|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.08740303725554463, |
|
"eval_steps": 500, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 8.740303725554463e-05, |
|
"grad_norm": 7.065422058105469, |
|
"learning_rate": 0.0001, |
|
"loss": 1.8318, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.00017480607451108925, |
|
"grad_norm": 12.618020057678223, |
|
"learning_rate": 0.0002, |
|
"loss": 2.6514, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0002622091117666339, |
|
"grad_norm": 5.291403770446777, |
|
"learning_rate": 0.0003, |
|
"loss": 1.1527, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0003496121490221785, |
|
"grad_norm": 0.6172698736190796, |
|
"learning_rate": 0.0004, |
|
"loss": 0.9539, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00043701518627772313, |
|
"grad_norm": 2.0148203372955322, |
|
"learning_rate": 0.0005, |
|
"loss": 1.4452, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0005244182235332678, |
|
"grad_norm": 8.47681999206543, |
|
"learning_rate": 0.0004999562784190276, |
|
"loss": 1.8725, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0006118212607888124, |
|
"grad_norm": 1.3222665786743164, |
|
"learning_rate": 0.0004999125568380553, |
|
"loss": 1.4179, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.000699224298044357, |
|
"grad_norm": 2.153110980987549, |
|
"learning_rate": 0.0004998688352570829, |
|
"loss": 1.1031, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0007866273352999017, |
|
"grad_norm": 1.160366415977478, |
|
"learning_rate": 0.0004998251136761106, |
|
"loss": 0.9552, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0008740303725554463, |
|
"grad_norm": 0.7029749155044556, |
|
"learning_rate": 0.0004997813920951382, |
|
"loss": 1.0771, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.000961433409810991, |
|
"grad_norm": 0.7599214315414429, |
|
"learning_rate": 0.0004997376705141658, |
|
"loss": 1.0371, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0010488364470665355, |
|
"grad_norm": 1.3291207551956177, |
|
"learning_rate": 0.0004996939489331935, |
|
"loss": 0.7945, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00113623948432208, |
|
"grad_norm": 0.6687347888946533, |
|
"learning_rate": 0.0004996502273522211, |
|
"loss": 0.9751, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0012236425215776249, |
|
"grad_norm": 0.5787840485572815, |
|
"learning_rate": 0.0004996065057712488, |
|
"loss": 1.234, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0013110455588331695, |
|
"grad_norm": 0.8155117034912109, |
|
"learning_rate": 0.0004995627841902764, |
|
"loss": 1.2566, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.001398448596088714, |
|
"grad_norm": 0.5109673142433167, |
|
"learning_rate": 0.0004995190626093039, |
|
"loss": 0.8717, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0014858516333442588, |
|
"grad_norm": 0.4625360667705536, |
|
"learning_rate": 0.0004994753410283315, |
|
"loss": 0.8922, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0015732546705998034, |
|
"grad_norm": 0.714952826499939, |
|
"learning_rate": 0.0004994316194473592, |
|
"loss": 0.921, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.001660657707855348, |
|
"grad_norm": 0.48220372200012207, |
|
"learning_rate": 0.0004993878978663869, |
|
"loss": 1.0207, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0017480607451108925, |
|
"grad_norm": 5.717684745788574, |
|
"learning_rate": 0.0004993441762854145, |
|
"loss": 1.3551, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0018354637823664373, |
|
"grad_norm": 0.5429579615592957, |
|
"learning_rate": 0.0004993004547044421, |
|
"loss": 0.8929, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.001922866819621982, |
|
"grad_norm": 6.894193172454834, |
|
"learning_rate": 0.0004992567331234697, |
|
"loss": 1.2508, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0020102698568775267, |
|
"grad_norm": 0.4427785277366638, |
|
"learning_rate": 0.0004992130115424974, |
|
"loss": 0.9662, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.002097672894133071, |
|
"grad_norm": 0.5576323866844177, |
|
"learning_rate": 0.000499169289961525, |
|
"loss": 1.0545, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.002185075931388616, |
|
"grad_norm": 1.3581053018569946, |
|
"learning_rate": 0.0004991255683805527, |
|
"loss": 1.1777, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.00227247896864416, |
|
"grad_norm": 0.609951376914978, |
|
"learning_rate": 0.0004990818467995803, |
|
"loss": 1.5921, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.002359882005899705, |
|
"grad_norm": 1.3641082048416138, |
|
"learning_rate": 0.0004990381252186079, |
|
"loss": 0.8309, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0024472850431552498, |
|
"grad_norm": 0.5976356267929077, |
|
"learning_rate": 0.0004989944036376356, |
|
"loss": 0.828, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.002534688080410794, |
|
"grad_norm": 0.6889556646347046, |
|
"learning_rate": 0.0004989506820566632, |
|
"loss": 1.4536, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.002622091117666339, |
|
"grad_norm": 0.5091891884803772, |
|
"learning_rate": 0.0004989069604756908, |
|
"loss": 1.054, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0027094941549218837, |
|
"grad_norm": 1.0312514305114746, |
|
"learning_rate": 0.0004988632388947185, |
|
"loss": 0.8454, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.002796897192177428, |
|
"grad_norm": 1.136455774307251, |
|
"learning_rate": 0.000498819517313746, |
|
"loss": 0.9365, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.002884300229432973, |
|
"grad_norm": 0.5671233534812927, |
|
"learning_rate": 0.0004987757957327737, |
|
"loss": 0.9139, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0029717032666885176, |
|
"grad_norm": 0.38321638107299805, |
|
"learning_rate": 0.0004987320741518013, |
|
"loss": 0.9383, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.003059106303944062, |
|
"grad_norm": 0.49962496757507324, |
|
"learning_rate": 0.0004986883525708289, |
|
"loss": 1.1371, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.003146509341199607, |
|
"grad_norm": 0.4470585584640503, |
|
"learning_rate": 0.0004986446309898566, |
|
"loss": 1.2636, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.003233912378455151, |
|
"grad_norm": 0.4494791626930237, |
|
"learning_rate": 0.0004986009094088842, |
|
"loss": 0.8846, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.003321315415710696, |
|
"grad_norm": 1.8432437181472778, |
|
"learning_rate": 0.0004985571878279119, |
|
"loss": 1.0042, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0034087184529662407, |
|
"grad_norm": 0.512199878692627, |
|
"learning_rate": 0.0004985134662469395, |
|
"loss": 0.9648, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.003496121490221785, |
|
"grad_norm": 0.7086130380630493, |
|
"learning_rate": 0.0004984697446659671, |
|
"loss": 0.8634, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.00358352452747733, |
|
"grad_norm": 0.34971296787261963, |
|
"learning_rate": 0.0004984260230849947, |
|
"loss": 1.1422, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0036709275647328747, |
|
"grad_norm": 0.5125827193260193, |
|
"learning_rate": 0.0004983823015040224, |
|
"loss": 0.9885, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.003758330601988419, |
|
"grad_norm": 0.363505482673645, |
|
"learning_rate": 0.0004983385799230501, |
|
"loss": 0.9047, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.003845733639243964, |
|
"grad_norm": 0.36858850717544556, |
|
"learning_rate": 0.0004982948583420777, |
|
"loss": 0.8149, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.003933136676499509, |
|
"grad_norm": 0.3395627439022064, |
|
"learning_rate": 0.0004982511367611053, |
|
"loss": 0.6765, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.004020539713755053, |
|
"grad_norm": 0.8366663455963135, |
|
"learning_rate": 0.0004982074151801329, |
|
"loss": 1.4199, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.004107942751010597, |
|
"grad_norm": 0.4986715614795685, |
|
"learning_rate": 0.0004981636935991606, |
|
"loss": 1.0475, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.004195345788266142, |
|
"grad_norm": 0.39106953144073486, |
|
"learning_rate": 0.0004981199720181882, |
|
"loss": 0.8671, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.004282748825521687, |
|
"grad_norm": 1.129980206489563, |
|
"learning_rate": 0.0004980762504372159, |
|
"loss": 0.6251, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.004370151862777232, |
|
"grad_norm": 1.9613661766052246, |
|
"learning_rate": 0.0004980325288562434, |
|
"loss": 1.5782, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0044575549000327765, |
|
"grad_norm": 0.3839377164840698, |
|
"learning_rate": 0.000497988807275271, |
|
"loss": 0.8171, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.00454495793728832, |
|
"grad_norm": 1.2072890996932983, |
|
"learning_rate": 0.0004979450856942987, |
|
"loss": 1.3112, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.004632360974543865, |
|
"grad_norm": 0.4228273630142212, |
|
"learning_rate": 0.0004979013641133263, |
|
"loss": 0.8507, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.00471976401179941, |
|
"grad_norm": 0.3379599452018738, |
|
"learning_rate": 0.000497857642532354, |
|
"loss": 0.9112, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.004807167049054955, |
|
"grad_norm": 0.4163492023944855, |
|
"learning_rate": 0.0004978139209513816, |
|
"loss": 0.9839, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.0048945700863104995, |
|
"grad_norm": 1.4194269180297852, |
|
"learning_rate": 0.0004977701993704092, |
|
"loss": 1.194, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.004981973123566044, |
|
"grad_norm": 0.8857583999633789, |
|
"learning_rate": 0.0004977264777894369, |
|
"loss": 0.9047, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.005069376160821588, |
|
"grad_norm": 0.8493141531944275, |
|
"learning_rate": 0.0004976827562084645, |
|
"loss": 0.921, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.005156779198077133, |
|
"grad_norm": 0.6385464668273926, |
|
"learning_rate": 0.0004976390346274922, |
|
"loss": 0.9945, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.005244182235332678, |
|
"grad_norm": 0.6642935872077942, |
|
"learning_rate": 0.0004975953130465198, |
|
"loss": 0.8654, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.005331585272588223, |
|
"grad_norm": 0.5619232654571533, |
|
"learning_rate": 0.0004975515914655474, |
|
"loss": 0.9012, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.005418988309843767, |
|
"grad_norm": 0.37755316495895386, |
|
"learning_rate": 0.0004975078698845751, |
|
"loss": 0.7285, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.005506391347099311, |
|
"grad_norm": 1.3131452798843384, |
|
"learning_rate": 0.0004974641483036027, |
|
"loss": 1.5863, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.005593794384354856, |
|
"grad_norm": 0.48203301429748535, |
|
"learning_rate": 0.0004974204267226304, |
|
"loss": 0.932, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.005681197421610401, |
|
"grad_norm": 1.7584421634674072, |
|
"learning_rate": 0.000497376705141658, |
|
"loss": 1.3908, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.005768600458865946, |
|
"grad_norm": 0.5197044610977173, |
|
"learning_rate": 0.0004973329835606855, |
|
"loss": 0.8429, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0058560034961214905, |
|
"grad_norm": 1.9259709119796753, |
|
"learning_rate": 0.0004972892619797131, |
|
"loss": 0.9317, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.005943406533377035, |
|
"grad_norm": 1.0053375959396362, |
|
"learning_rate": 0.0004972455403987408, |
|
"loss": 0.9276, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.006030809570632579, |
|
"grad_norm": 85.76437377929688, |
|
"learning_rate": 0.0004972018188177684, |
|
"loss": 5.3967, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.006118212607888124, |
|
"grad_norm": 1.9150564670562744, |
|
"learning_rate": 0.0004971580972367961, |
|
"loss": 1.2467, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.006205615645143669, |
|
"grad_norm": 1.286971092224121, |
|
"learning_rate": 0.0004971143756558237, |
|
"loss": 1.055, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.006293018682399214, |
|
"grad_norm": 3.5728204250335693, |
|
"learning_rate": 0.0004970706540748513, |
|
"loss": 0.9154, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.006380421719654758, |
|
"grad_norm": 3.2489278316497803, |
|
"learning_rate": 0.000497026932493879, |
|
"loss": 1.0816, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.006467824756910302, |
|
"grad_norm": 0.7258114218711853, |
|
"learning_rate": 0.0004969832109129066, |
|
"loss": 0.8656, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.006555227794165847, |
|
"grad_norm": 1.0952316522598267, |
|
"learning_rate": 0.0004969394893319343, |
|
"loss": 0.9195, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.006642630831421392, |
|
"grad_norm": 5.054478645324707, |
|
"learning_rate": 0.0004968957677509619, |
|
"loss": 1.2343, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.006730033868676937, |
|
"grad_norm": 2.0239686965942383, |
|
"learning_rate": 0.0004968520461699895, |
|
"loss": 1.6315, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0068174369059324814, |
|
"grad_norm": 1.3708548545837402, |
|
"learning_rate": 0.0004968083245890172, |
|
"loss": 0.8507, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.006904839943188025, |
|
"grad_norm": 0.6372014284133911, |
|
"learning_rate": 0.0004967646030080448, |
|
"loss": 0.9235, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.00699224298044357, |
|
"grad_norm": 1.0243886709213257, |
|
"learning_rate": 0.0004967208814270724, |
|
"loss": 1.0295, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.007079646017699115, |
|
"grad_norm": 0.6127680540084839, |
|
"learning_rate": 0.0004966771598461001, |
|
"loss": 0.8469, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.00716704905495466, |
|
"grad_norm": 0.7449392080307007, |
|
"learning_rate": 0.0004966334382651277, |
|
"loss": 1.5825, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0072544520922102045, |
|
"grad_norm": 0.6267126798629761, |
|
"learning_rate": 0.0004965897166841554, |
|
"loss": 1.0257, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.007341855129465749, |
|
"grad_norm": 5.416685104370117, |
|
"learning_rate": 0.0004965459951031829, |
|
"loss": 1.0654, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.007429258166721293, |
|
"grad_norm": 1.0485210418701172, |
|
"learning_rate": 0.0004965022735222105, |
|
"loss": 0.8979, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.007516661203976838, |
|
"grad_norm": 1.0192244052886963, |
|
"learning_rate": 0.0004964585519412381, |
|
"loss": 1.1117, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.007604064241232383, |
|
"grad_norm": 0.7042039632797241, |
|
"learning_rate": 0.0004964148303602658, |
|
"loss": 0.9955, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.007691467278487928, |
|
"grad_norm": 0.649395227432251, |
|
"learning_rate": 0.0004963711087792935, |
|
"loss": 0.7092, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.007778870315743472, |
|
"grad_norm": 0.8017964959144592, |
|
"learning_rate": 0.0004963273871983211, |
|
"loss": 0.8941, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.007866273352999017, |
|
"grad_norm": 0.4518626630306244, |
|
"learning_rate": 0.0004962836656173487, |
|
"loss": 0.9088, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.007953676390254561, |
|
"grad_norm": 0.4033469259738922, |
|
"learning_rate": 0.0004962399440363763, |
|
"loss": 0.9251, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.008041079427510107, |
|
"grad_norm": 0.8128958940505981, |
|
"learning_rate": 0.000496196222455404, |
|
"loss": 0.975, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.00812848246476565, |
|
"grad_norm": 3.1504242420196533, |
|
"learning_rate": 0.0004961525008744317, |
|
"loss": 1.5942, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.008215885502021195, |
|
"grad_norm": 3.9139645099639893, |
|
"learning_rate": 0.0004961087792934593, |
|
"loss": 1.071, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.00830328853927674, |
|
"grad_norm": 0.7689482569694519, |
|
"learning_rate": 0.0004960650577124869, |
|
"loss": 1.038, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.008390691576532284, |
|
"grad_norm": 0.5784656405448914, |
|
"learning_rate": 0.0004960213361315145, |
|
"loss": 1.0943, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.00847809461378783, |
|
"grad_norm": 0.5716943144798279, |
|
"learning_rate": 0.0004959776145505422, |
|
"loss": 0.8874, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.008565497651043374, |
|
"grad_norm": 0.5122077465057373, |
|
"learning_rate": 0.0004959338929695698, |
|
"loss": 0.951, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.008652900688298918, |
|
"grad_norm": 0.8700870871543884, |
|
"learning_rate": 0.0004958901713885975, |
|
"loss": 0.9632, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.008740303725554463, |
|
"grad_norm": 0.5623646974563599, |
|
"learning_rate": 0.000495846449807625, |
|
"loss": 1.0711, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.008827706762810007, |
|
"grad_norm": 0.589887261390686, |
|
"learning_rate": 0.0004958027282266526, |
|
"loss": 0.781, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.008915109800065553, |
|
"grad_norm": 1.63577401638031, |
|
"learning_rate": 0.0004957590066456803, |
|
"loss": 0.9118, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.009002512837321097, |
|
"grad_norm": 0.7755091786384583, |
|
"learning_rate": 0.0004957152850647079, |
|
"loss": 1.192, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.00908991587457664, |
|
"grad_norm": 0.5463851094245911, |
|
"learning_rate": 0.0004956715634837356, |
|
"loss": 0.894, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.009177318911832186, |
|
"grad_norm": 0.5253966450691223, |
|
"learning_rate": 0.0004956278419027632, |
|
"loss": 0.9432, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.00926472194908773, |
|
"grad_norm": 0.4377374053001404, |
|
"learning_rate": 0.0004955841203217908, |
|
"loss": 1.09, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.009352124986343276, |
|
"grad_norm": 0.5025166869163513, |
|
"learning_rate": 0.0004955403987408185, |
|
"loss": 0.9262, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.00943952802359882, |
|
"grad_norm": 0.45846027135849, |
|
"learning_rate": 0.0004954966771598461, |
|
"loss": 0.9428, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.009526931060854364, |
|
"grad_norm": 0.4219333529472351, |
|
"learning_rate": 0.0004954529555788738, |
|
"loss": 1.026, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.00961433409810991, |
|
"grad_norm": 0.5737212896347046, |
|
"learning_rate": 0.0004954092339979014, |
|
"loss": 1.1012, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.009701737135365453, |
|
"grad_norm": 0.887387752532959, |
|
"learning_rate": 0.000495365512416929, |
|
"loss": 1.3495, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.009789140172620999, |
|
"grad_norm": 0.5145196914672852, |
|
"learning_rate": 0.0004953217908359567, |
|
"loss": 1.0266, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.009876543209876543, |
|
"grad_norm": 1.5954936742782593, |
|
"learning_rate": 0.0004952780692549843, |
|
"loss": 1.254, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.009963946247132089, |
|
"grad_norm": 0.9585819840431213, |
|
"learning_rate": 0.0004952343476740119, |
|
"loss": 1.4545, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.010051349284387633, |
|
"grad_norm": 0.8477827310562134, |
|
"learning_rate": 0.0004951906260930396, |
|
"loss": 0.9454, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.010138752321643177, |
|
"grad_norm": 1.2712616920471191, |
|
"learning_rate": 0.0004951469045120672, |
|
"loss": 0.9497, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.010226155358898722, |
|
"grad_norm": 0.5731809139251709, |
|
"learning_rate": 0.0004951031829310947, |
|
"loss": 1.0611, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.010313558396154266, |
|
"grad_norm": 2.106234550476074, |
|
"learning_rate": 0.0004950594613501224, |
|
"loss": 1.0015, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.010400961433409812, |
|
"grad_norm": 0.7425693273544312, |
|
"learning_rate": 0.00049501573976915, |
|
"loss": 1.0588, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.010488364470665356, |
|
"grad_norm": 0.5987507700920105, |
|
"learning_rate": 0.0004949720181881777, |
|
"loss": 1.0016, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.0105757675079209, |
|
"grad_norm": 0.3802410364151001, |
|
"learning_rate": 0.0004949282966072053, |
|
"loss": 0.9133, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.010663170545176445, |
|
"grad_norm": 0.42108240723609924, |
|
"learning_rate": 0.0004948845750262329, |
|
"loss": 0.8675, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.01075057358243199, |
|
"grad_norm": 0.6281617879867554, |
|
"learning_rate": 0.0004948408534452606, |
|
"loss": 0.8294, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.010837976619687535, |
|
"grad_norm": 0.8346467614173889, |
|
"learning_rate": 0.0004947971318642882, |
|
"loss": 0.8333, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.010925379656943079, |
|
"grad_norm": 0.5090304613113403, |
|
"learning_rate": 0.0004947534102833158, |
|
"loss": 1.0423, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.011012782694198623, |
|
"grad_norm": 0.39572426676750183, |
|
"learning_rate": 0.0004947096887023435, |
|
"loss": 0.8565, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.011100185731454168, |
|
"grad_norm": 1.1466861963272095, |
|
"learning_rate": 0.0004946659671213711, |
|
"loss": 1.4358, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.011187588768709712, |
|
"grad_norm": 0.36562782526016235, |
|
"learning_rate": 0.0004946222455403988, |
|
"loss": 0.8373, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.011274991805965258, |
|
"grad_norm": 0.49587374925613403, |
|
"learning_rate": 0.0004945785239594264, |
|
"loss": 1.3961, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.011362394843220802, |
|
"grad_norm": 0.4852742850780487, |
|
"learning_rate": 0.000494534802378454, |
|
"loss": 1.0804, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.011449797880476346, |
|
"grad_norm": 0.4050949215888977, |
|
"learning_rate": 0.0004944910807974817, |
|
"loss": 1.0482, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.011537200917731891, |
|
"grad_norm": 0.35284534096717834, |
|
"learning_rate": 0.0004944473592165093, |
|
"loss": 0.9467, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.011624603954987435, |
|
"grad_norm": 1.6482305526733398, |
|
"learning_rate": 0.000494403637635537, |
|
"loss": 1.0678, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.011712006992242981, |
|
"grad_norm": 1.103427767753601, |
|
"learning_rate": 0.0004943599160545645, |
|
"loss": 0.9495, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.011799410029498525, |
|
"grad_norm": 0.45183080434799194, |
|
"learning_rate": 0.0004943161944735921, |
|
"loss": 0.9117, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.01188681306675407, |
|
"grad_norm": 0.3565897047519684, |
|
"learning_rate": 0.0004942724728926198, |
|
"loss": 0.8209, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.011974216104009614, |
|
"grad_norm": 0.6118256449699402, |
|
"learning_rate": 0.0004942287513116474, |
|
"loss": 1.0973, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.012061619141265158, |
|
"grad_norm": 0.40304186940193176, |
|
"learning_rate": 0.0004941850297306751, |
|
"loss": 1.1167, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.012149022178520704, |
|
"grad_norm": 0.46548163890838623, |
|
"learning_rate": 0.0004941413081497027, |
|
"loss": 0.9813, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.012236425215776248, |
|
"grad_norm": 0.4140109121799469, |
|
"learning_rate": 0.0004940975865687303, |
|
"loss": 0.9859, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.012323828253031794, |
|
"grad_norm": 0.7219896912574768, |
|
"learning_rate": 0.0004940538649877579, |
|
"loss": 0.9464, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.012411231290287338, |
|
"grad_norm": 1.1531212329864502, |
|
"learning_rate": 0.0004940101434067856, |
|
"loss": 0.9439, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.012498634327542881, |
|
"grad_norm": 0.5690356492996216, |
|
"learning_rate": 0.0004939664218258133, |
|
"loss": 0.897, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.012586037364798427, |
|
"grad_norm": 4.290929317474365, |
|
"learning_rate": 0.0004939227002448409, |
|
"loss": 0.9462, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.012673440402053971, |
|
"grad_norm": 0.8283594250679016, |
|
"learning_rate": 0.0004938789786638685, |
|
"loss": 0.8452, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.012760843439309517, |
|
"grad_norm": 0.7647207975387573, |
|
"learning_rate": 0.0004938352570828961, |
|
"loss": 0.8869, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.01284824647656506, |
|
"grad_norm": 0.4244186580181122, |
|
"learning_rate": 0.0004937915355019238, |
|
"loss": 1.0727, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.012935649513820605, |
|
"grad_norm": 0.6509714722633362, |
|
"learning_rate": 0.0004937478139209514, |
|
"loss": 1.3135, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.01302305255107615, |
|
"grad_norm": 0.5276227593421936, |
|
"learning_rate": 0.0004937040923399791, |
|
"loss": 0.9124, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.013110455588331694, |
|
"grad_norm": 0.6556555032730103, |
|
"learning_rate": 0.0004936603707590067, |
|
"loss": 1.0882, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.01319785862558724, |
|
"grad_norm": 0.5422887802124023, |
|
"learning_rate": 0.0004936166491780342, |
|
"loss": 0.787, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.013285261662842784, |
|
"grad_norm": 0.4304672181606293, |
|
"learning_rate": 0.0004935729275970619, |
|
"loss": 0.9496, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.013372664700098328, |
|
"grad_norm": 1.1699761152267456, |
|
"learning_rate": 0.0004935292060160895, |
|
"loss": 2.1129, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.013460067737353873, |
|
"grad_norm": 2.376859664916992, |
|
"learning_rate": 0.0004934854844351172, |
|
"loss": 1.0353, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.013547470774609417, |
|
"grad_norm": 0.6845773458480835, |
|
"learning_rate": 0.0004934417628541448, |
|
"loss": 0.739, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.013634873811864963, |
|
"grad_norm": 1.45736563205719, |
|
"learning_rate": 0.0004933980412731724, |
|
"loss": 0.9946, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.013722276849120507, |
|
"grad_norm": 0.8025717735290527, |
|
"learning_rate": 0.0004933543196922001, |
|
"loss": 0.7987, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.01380967988637605, |
|
"grad_norm": 0.4995729625225067, |
|
"learning_rate": 0.0004933105981112277, |
|
"loss": 0.8258, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.013897082923631596, |
|
"grad_norm": 0.3529548645019531, |
|
"learning_rate": 0.0004932668765302554, |
|
"loss": 0.7891, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.01398448596088714, |
|
"grad_norm": 0.3970806300640106, |
|
"learning_rate": 0.000493223154949283, |
|
"loss": 0.8748, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.014071888998142686, |
|
"grad_norm": 0.46492478251457214, |
|
"learning_rate": 0.0004931794333683106, |
|
"loss": 0.83, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.01415929203539823, |
|
"grad_norm": 0.39829567074775696, |
|
"learning_rate": 0.0004931357117873383, |
|
"loss": 0.8678, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.014246695072653776, |
|
"grad_norm": 0.44665223360061646, |
|
"learning_rate": 0.0004930919902063659, |
|
"loss": 0.8311, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.01433409810990932, |
|
"grad_norm": 0.3569469451904297, |
|
"learning_rate": 0.0004930482686253935, |
|
"loss": 0.7291, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.014421501147164863, |
|
"grad_norm": 0.5544111132621765, |
|
"learning_rate": 0.0004930045470444212, |
|
"loss": 0.7815, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.014508904184420409, |
|
"grad_norm": 0.350799024105072, |
|
"learning_rate": 0.0004929608254634488, |
|
"loss": 0.7029, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.014596307221675953, |
|
"grad_norm": 0.8473671078681946, |
|
"learning_rate": 0.0004929171038824765, |
|
"loss": 0.929, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.014683710258931499, |
|
"grad_norm": 0.46682775020599365, |
|
"learning_rate": 0.000492873382301504, |
|
"loss": 0.9511, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.014771113296187043, |
|
"grad_norm": 0.40774253010749817, |
|
"learning_rate": 0.0004928296607205316, |
|
"loss": 0.9113, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.014858516333442586, |
|
"grad_norm": 0.38683247566223145, |
|
"learning_rate": 0.0004927859391395592, |
|
"loss": 0.8733, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.014945919370698132, |
|
"grad_norm": 0.3632119297981262, |
|
"learning_rate": 0.0004927422175585869, |
|
"loss": 0.802, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.015033322407953676, |
|
"grad_norm": 0.43275561928749084, |
|
"learning_rate": 0.0004926984959776145, |
|
"loss": 0.869, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.015120725445209222, |
|
"grad_norm": 0.34049132466316223, |
|
"learning_rate": 0.0004926547743966422, |
|
"loss": 0.9312, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.015208128482464766, |
|
"grad_norm": 0.3519800901412964, |
|
"learning_rate": 0.0004926110528156698, |
|
"loss": 0.9362, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.01529553151972031, |
|
"grad_norm": 0.47325399518013, |
|
"learning_rate": 0.0004925673312346974, |
|
"loss": 0.9907, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.015382934556975855, |
|
"grad_norm": 0.3297930359840393, |
|
"learning_rate": 0.0004925236096537251, |
|
"loss": 0.9065, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.0154703375942314, |
|
"grad_norm": 0.3259631097316742, |
|
"learning_rate": 0.0004924798880727527, |
|
"loss": 0.76, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.015557740631486945, |
|
"grad_norm": 0.3202175498008728, |
|
"learning_rate": 0.0004924361664917804, |
|
"loss": 0.8182, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.01564514366874249, |
|
"grad_norm": 1.7625497579574585, |
|
"learning_rate": 0.000492392444910808, |
|
"loss": 1.0324, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.015732546705998034, |
|
"grad_norm": 0.31030330061912537, |
|
"learning_rate": 0.0004923487233298356, |
|
"loss": 0.7945, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.015819949743253577, |
|
"grad_norm": 0.416181743144989, |
|
"learning_rate": 0.0004923050017488633, |
|
"loss": 0.829, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.015907352780509122, |
|
"grad_norm": 0.42921754717826843, |
|
"learning_rate": 0.0004922612801678909, |
|
"loss": 0.7401, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.015994755817764668, |
|
"grad_norm": 0.2919391989707947, |
|
"learning_rate": 0.0004922175585869186, |
|
"loss": 0.8488, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.016082158855020214, |
|
"grad_norm": 0.314208447933197, |
|
"learning_rate": 0.0004921738370059462, |
|
"loss": 0.7946, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.016169561892275756, |
|
"grad_norm": 0.503778338432312, |
|
"learning_rate": 0.0004921301154249737, |
|
"loss": 0.8052, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.0162569649295313, |
|
"grad_norm": 0.36193403601646423, |
|
"learning_rate": 0.0004920863938440014, |
|
"loss": 0.8649, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.016344367966786847, |
|
"grad_norm": 0.631439208984375, |
|
"learning_rate": 0.000492042672263029, |
|
"loss": 0.7121, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.01643177100404239, |
|
"grad_norm": 0.3578779399394989, |
|
"learning_rate": 0.0004919989506820567, |
|
"loss": 0.9566, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.016519174041297935, |
|
"grad_norm": 0.3394636809825897, |
|
"learning_rate": 0.0004919552291010843, |
|
"loss": 0.7892, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.01660657707855348, |
|
"grad_norm": 0.3014313876628876, |
|
"learning_rate": 0.0004919115075201119, |
|
"loss": 0.9773, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.016693980115809023, |
|
"grad_norm": 0.464288592338562, |
|
"learning_rate": 0.0004918677859391395, |
|
"loss": 0.8351, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.01678138315306457, |
|
"grad_norm": 0.3988270163536072, |
|
"learning_rate": 0.0004918240643581672, |
|
"loss": 0.9227, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.016868786190320114, |
|
"grad_norm": 0.3190634250640869, |
|
"learning_rate": 0.0004917803427771949, |
|
"loss": 1.0606, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.01695618922757566, |
|
"grad_norm": 0.6769363880157471, |
|
"learning_rate": 0.0004917366211962225, |
|
"loss": 1.0602, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.017043592264831202, |
|
"grad_norm": 0.3352043330669403, |
|
"learning_rate": 0.0004916928996152501, |
|
"loss": 0.9759, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.017130995302086748, |
|
"grad_norm": 0.32745465636253357, |
|
"learning_rate": 0.0004916491780342777, |
|
"loss": 0.7544, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.017218398339342293, |
|
"grad_norm": 0.6321395635604858, |
|
"learning_rate": 0.0004916054564533054, |
|
"loss": 0.6861, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.017305801376597835, |
|
"grad_norm": 0.32094526290893555, |
|
"learning_rate": 0.000491561734872333, |
|
"loss": 0.8258, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.01739320441385338, |
|
"grad_norm": 0.3911696970462799, |
|
"learning_rate": 0.0004915180132913607, |
|
"loss": 0.9963, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.017480607451108927, |
|
"grad_norm": 0.2953476905822754, |
|
"learning_rate": 0.0004914742917103883, |
|
"loss": 0.8456, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.017568010488364472, |
|
"grad_norm": 0.3092620372772217, |
|
"learning_rate": 0.0004914305701294158, |
|
"loss": 0.8644, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.017655413525620015, |
|
"grad_norm": 0.6630509495735168, |
|
"learning_rate": 0.0004913868485484435, |
|
"loss": 0.9363, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.01774281656287556, |
|
"grad_norm": 0.3516843616962433, |
|
"learning_rate": 0.0004913431269674711, |
|
"loss": 1.1422, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.017830219600131106, |
|
"grad_norm": 0.43253111839294434, |
|
"learning_rate": 0.0004912994053864988, |
|
"loss": 0.852, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.017917622637386648, |
|
"grad_norm": 0.324238657951355, |
|
"learning_rate": 0.0004912556838055264, |
|
"loss": 0.8587, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.018005025674642194, |
|
"grad_norm": 0.28279510140419006, |
|
"learning_rate": 0.000491211962224554, |
|
"loss": 1.0088, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.01809242871189774, |
|
"grad_norm": 1.4974584579467773, |
|
"learning_rate": 0.0004911682406435817, |
|
"loss": 1.0296, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.01817983174915328, |
|
"grad_norm": 0.3786958158016205, |
|
"learning_rate": 0.0004911245190626093, |
|
"loss": 1.0741, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.018267234786408827, |
|
"grad_norm": 0.294880747795105, |
|
"learning_rate": 0.0004910807974816369, |
|
"loss": 1.021, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.018354637823664373, |
|
"grad_norm": 0.36885932087898254, |
|
"learning_rate": 0.0004910370759006646, |
|
"loss": 0.9023, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.01844204086091992, |
|
"grad_norm": 0.37099695205688477, |
|
"learning_rate": 0.0004909933543196922, |
|
"loss": 0.961, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.01852944389817546, |
|
"grad_norm": 0.3451802432537079, |
|
"learning_rate": 0.0004909496327387199, |
|
"loss": 0.8744, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.018616846935431006, |
|
"grad_norm": 0.34541890025138855, |
|
"learning_rate": 0.0004909059111577475, |
|
"loss": 0.9766, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.018704249972686552, |
|
"grad_norm": 0.2827027440071106, |
|
"learning_rate": 0.0004908621895767751, |
|
"loss": 0.8569, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.018791653009942094, |
|
"grad_norm": 0.3254356384277344, |
|
"learning_rate": 0.0004908184679958028, |
|
"loss": 0.9091, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.01887905604719764, |
|
"grad_norm": 0.29408493638038635, |
|
"learning_rate": 0.0004907747464148304, |
|
"loss": 0.823, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.018966459084453186, |
|
"grad_norm": 0.3414423167705536, |
|
"learning_rate": 0.0004907310248338581, |
|
"loss": 0.8197, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.019053862121708728, |
|
"grad_norm": 0.33818957209587097, |
|
"learning_rate": 0.0004906873032528857, |
|
"loss": 0.8553, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.019141265158964273, |
|
"grad_norm": 0.28477659821510315, |
|
"learning_rate": 0.0004906435816719132, |
|
"loss": 0.9008, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.01922866819621982, |
|
"grad_norm": 0.30363160371780396, |
|
"learning_rate": 0.0004905998600909408, |
|
"loss": 0.8077, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.019316071233475365, |
|
"grad_norm": 0.5011153221130371, |
|
"learning_rate": 0.0004905561385099685, |
|
"loss": 0.8938, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.019403474270730907, |
|
"grad_norm": 0.33721473813056946, |
|
"learning_rate": 0.0004905124169289961, |
|
"loss": 0.7798, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.019490877307986453, |
|
"grad_norm": 0.3752390742301941, |
|
"learning_rate": 0.0004904686953480238, |
|
"loss": 0.9064, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.019578280345241998, |
|
"grad_norm": 0.32278257608413696, |
|
"learning_rate": 0.0004904249737670514, |
|
"loss": 1.0019, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.01966568338249754, |
|
"grad_norm": 0.5604023933410645, |
|
"learning_rate": 0.000490381252186079, |
|
"loss": 0.9579, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.019753086419753086, |
|
"grad_norm": 0.26056113839149475, |
|
"learning_rate": 0.0004903375306051067, |
|
"loss": 0.7596, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.01984048945700863, |
|
"grad_norm": 0.3333994448184967, |
|
"learning_rate": 0.0004902938090241343, |
|
"loss": 1.0804, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.019927892494264177, |
|
"grad_norm": 0.3021886944770813, |
|
"learning_rate": 0.000490250087443162, |
|
"loss": 0.959, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.02001529553151972, |
|
"grad_norm": 0.2865878641605377, |
|
"learning_rate": 0.0004902063658621896, |
|
"loss": 0.9816, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.020102698568775265, |
|
"grad_norm": 0.2981945276260376, |
|
"learning_rate": 0.0004901626442812172, |
|
"loss": 0.8672, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.02019010160603081, |
|
"grad_norm": 0.34836679697036743, |
|
"learning_rate": 0.0004901189227002449, |
|
"loss": 0.9012, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.020277504643286353, |
|
"grad_norm": 0.7560614347457886, |
|
"learning_rate": 0.0004900752011192725, |
|
"loss": 1.2521, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.0203649076805419, |
|
"grad_norm": 0.2899073362350464, |
|
"learning_rate": 0.0004900314795383002, |
|
"loss": 0.9376, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.020452310717797444, |
|
"grad_norm": 0.2944093644618988, |
|
"learning_rate": 0.0004899877579573278, |
|
"loss": 0.9158, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.020539713755052987, |
|
"grad_norm": 0.2837924361228943, |
|
"learning_rate": 0.0004899440363763553, |
|
"loss": 0.9397, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.020627116792308532, |
|
"grad_norm": 0.3069987893104553, |
|
"learning_rate": 0.000489900314795383, |
|
"loss": 0.9635, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.020714519829564078, |
|
"grad_norm": 0.29966363310813904, |
|
"learning_rate": 0.0004898565932144106, |
|
"loss": 0.9103, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.020801922866819623, |
|
"grad_norm": 0.3086193799972534, |
|
"learning_rate": 0.0004898128716334383, |
|
"loss": 0.9797, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.020889325904075166, |
|
"grad_norm": 0.28495675325393677, |
|
"learning_rate": 0.0004897691500524659, |
|
"loss": 0.8221, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.02097672894133071, |
|
"grad_norm": 0.27056995034217834, |
|
"learning_rate": 0.0004897254284714935, |
|
"loss": 0.9584, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.021064131978586257, |
|
"grad_norm": 0.2837945818901062, |
|
"learning_rate": 0.0004896817068905211, |
|
"loss": 1.0047, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.0211515350158418, |
|
"grad_norm": 0.4288729429244995, |
|
"learning_rate": 0.0004896379853095488, |
|
"loss": 1.3211, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.021238938053097345, |
|
"grad_norm": 1.1985094547271729, |
|
"learning_rate": 0.0004895942637285765, |
|
"loss": 1.4015, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.02132634109035289, |
|
"grad_norm": 0.3171183466911316, |
|
"learning_rate": 0.0004895505421476041, |
|
"loss": 0.7096, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.021413744127608433, |
|
"grad_norm": 3.1765527725219727, |
|
"learning_rate": 0.0004895068205666317, |
|
"loss": 1.5594, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.02150114716486398, |
|
"grad_norm": 0.35891321301460266, |
|
"learning_rate": 0.0004894630989856593, |
|
"loss": 1.0663, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.021588550202119524, |
|
"grad_norm": 0.7044485807418823, |
|
"learning_rate": 0.000489419377404687, |
|
"loss": 1.4146, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.02167595323937507, |
|
"grad_norm": 0.361392617225647, |
|
"learning_rate": 0.0004893756558237146, |
|
"loss": 0.7964, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.021763356276630612, |
|
"grad_norm": 0.31394776701927185, |
|
"learning_rate": 0.0004893319342427423, |
|
"loss": 0.8608, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.021850759313886157, |
|
"grad_norm": 0.2853809893131256, |
|
"learning_rate": 0.0004892882126617699, |
|
"loss": 0.8628, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.021938162351141703, |
|
"grad_norm": 0.3122541904449463, |
|
"learning_rate": 0.0004892444910807975, |
|
"loss": 0.7246, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.022025565388397245, |
|
"grad_norm": 12.120355606079102, |
|
"learning_rate": 0.0004892007694998252, |
|
"loss": 1.3082, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.02211296842565279, |
|
"grad_norm": 0.3758118450641632, |
|
"learning_rate": 0.0004891570479188527, |
|
"loss": 1.0478, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.022200371462908337, |
|
"grad_norm": 1.1910297870635986, |
|
"learning_rate": 0.0004891133263378804, |
|
"loss": 1.2477, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.022287774500163882, |
|
"grad_norm": 0.8632226586341858, |
|
"learning_rate": 0.000489069604756908, |
|
"loss": 1.0988, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.022375177537419425, |
|
"grad_norm": 0.381533145904541, |
|
"learning_rate": 0.0004890258831759356, |
|
"loss": 0.8892, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.02246258057467497, |
|
"grad_norm": 0.43683141469955444, |
|
"learning_rate": 0.0004889821615949633, |
|
"loss": 0.8526, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.022549983611930516, |
|
"grad_norm": 0.6212348341941833, |
|
"learning_rate": 0.0004889384400139909, |
|
"loss": 0.9791, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.022637386649186058, |
|
"grad_norm": 0.44247013330459595, |
|
"learning_rate": 0.0004888947184330185, |
|
"loss": 1.0408, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.022724789686441604, |
|
"grad_norm": 0.5239019989967346, |
|
"learning_rate": 0.0004888509968520462, |
|
"loss": 0.8948, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.02281219272369715, |
|
"grad_norm": 0.7413169145584106, |
|
"learning_rate": 0.0004888072752710738, |
|
"loss": 0.7135, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.02289959576095269, |
|
"grad_norm": 0.39856553077697754, |
|
"learning_rate": 0.0004887635536901015, |
|
"loss": 0.8587, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.022986998798208237, |
|
"grad_norm": 0.534248411655426, |
|
"learning_rate": 0.0004887198321091291, |
|
"loss": 0.9006, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.023074401835463783, |
|
"grad_norm": 0.4782329499721527, |
|
"learning_rate": 0.0004886761105281567, |
|
"loss": 0.9292, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.02316180487271933, |
|
"grad_norm": 2.2424156665802, |
|
"learning_rate": 0.0004886323889471843, |
|
"loss": 1.1921, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.02324920790997487, |
|
"grad_norm": 0.5274596810340881, |
|
"learning_rate": 0.000488588667366212, |
|
"loss": 0.9732, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.023336610947230416, |
|
"grad_norm": 1.5465450286865234, |
|
"learning_rate": 0.0004885449457852397, |
|
"loss": 0.9304, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.023424013984485962, |
|
"grad_norm": 0.5691818594932556, |
|
"learning_rate": 0.0004885012242042673, |
|
"loss": 0.9713, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.023511417021741504, |
|
"grad_norm": 0.7849003672599792, |
|
"learning_rate": 0.0004884575026232948, |
|
"loss": 0.957, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.02359882005899705, |
|
"grad_norm": 0.5940591096878052, |
|
"learning_rate": 0.0004884137810423224, |
|
"loss": 0.8786, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.023686223096252595, |
|
"grad_norm": 0.592288076877594, |
|
"learning_rate": 0.0004883700594613501, |
|
"loss": 0.8695, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.02377362613350814, |
|
"grad_norm": 0.3618888556957245, |
|
"learning_rate": 0.0004883263378803777, |
|
"loss": 0.9204, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.023861029170763683, |
|
"grad_norm": 0.5957768559455872, |
|
"learning_rate": 0.0004882826162994054, |
|
"loss": 0.9289, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.02394843220801923, |
|
"grad_norm": 2.2828385829925537, |
|
"learning_rate": 0.000488238894718433, |
|
"loss": 0.9809, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.024035835245274775, |
|
"grad_norm": 0.5379523634910583, |
|
"learning_rate": 0.00048819517313746066, |
|
"loss": 0.934, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.024123238282530317, |
|
"grad_norm": 1.698805809020996, |
|
"learning_rate": 0.00048815145155648826, |
|
"loss": 0.9954, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.024210641319785862, |
|
"grad_norm": 4.479689121246338, |
|
"learning_rate": 0.00048810772997551595, |
|
"loss": 1.3687, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.024298044357041408, |
|
"grad_norm": 2.58227276802063, |
|
"learning_rate": 0.00048806400839454355, |
|
"loss": 0.9305, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.02438544739429695, |
|
"grad_norm": 0.8035925030708313, |
|
"learning_rate": 0.0004880202868135712, |
|
"loss": 1.1649, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.024472850431552496, |
|
"grad_norm": 0.560945451259613, |
|
"learning_rate": 0.00048797656523259884, |
|
"loss": 0.7542, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.02456025346880804, |
|
"grad_norm": 1.6739729642868042, |
|
"learning_rate": 0.0004879328436516264, |
|
"loss": 1.5675, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.024647656506063587, |
|
"grad_norm": 1.0051480531692505, |
|
"learning_rate": 0.0004878891220706541, |
|
"loss": 0.9312, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.02473505954331913, |
|
"grad_norm": 0.43883591890335083, |
|
"learning_rate": 0.0004878454004896817, |
|
"loss": 0.9779, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.024822462580574675, |
|
"grad_norm": 0.668854832649231, |
|
"learning_rate": 0.00048780167890870936, |
|
"loss": 0.9906, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.02490986561783022, |
|
"grad_norm": 2.1563730239868164, |
|
"learning_rate": 0.00048775795732773695, |
|
"loss": 0.9536, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.024997268655085763, |
|
"grad_norm": 1.1613394021987915, |
|
"learning_rate": 0.0004877142357467646, |
|
"loss": 0.9793, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.02508467169234131, |
|
"grad_norm": 0.5452724695205688, |
|
"learning_rate": 0.00048767051416579224, |
|
"loss": 1.11, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.025172074729596854, |
|
"grad_norm": 1.7393804788589478, |
|
"learning_rate": 0.0004876267925848199, |
|
"loss": 1.249, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.025259477766852396, |
|
"grad_norm": 15.148497581481934, |
|
"learning_rate": 0.00048758307100384753, |
|
"loss": 1.4897, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.025346880804107942, |
|
"grad_norm": 0.8102678060531616, |
|
"learning_rate": 0.0004875393494228751, |
|
"loss": 1.0192, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.025434283841363488, |
|
"grad_norm": 3.7395308017730713, |
|
"learning_rate": 0.00048749562784190277, |
|
"loss": 1.05, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.025521686878619033, |
|
"grad_norm": 0.6473442316055298, |
|
"learning_rate": 0.0004874519062609304, |
|
"loss": 0.9341, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.025609089915874576, |
|
"grad_norm": 1.2162256240844727, |
|
"learning_rate": 0.000487408184679958, |
|
"loss": 0.9426, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.02569649295313012, |
|
"grad_norm": 0.7783584594726562, |
|
"learning_rate": 0.0004873644630989857, |
|
"loss": 0.9343, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.025783895990385667, |
|
"grad_norm": 0.7198899388313293, |
|
"learning_rate": 0.0004873207415180133, |
|
"loss": 0.89, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.02587129902764121, |
|
"grad_norm": 0.6314525604248047, |
|
"learning_rate": 0.00048727701993704094, |
|
"loss": 0.9523, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.025958702064896755, |
|
"grad_norm": 3.2664554119110107, |
|
"learning_rate": 0.00048723329835606853, |
|
"loss": 1.3729, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.0260461051021523, |
|
"grad_norm": 0.9869332909584045, |
|
"learning_rate": 0.0004871895767750962, |
|
"loss": 0.978, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.026133508139407846, |
|
"grad_norm": 0.9169254302978516, |
|
"learning_rate": 0.0004871458551941239, |
|
"loss": 0.7641, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.02622091117666339, |
|
"grad_norm": 2.386565685272217, |
|
"learning_rate": 0.00048710213361315147, |
|
"loss": 0.9728, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.026308314213918934, |
|
"grad_norm": 2.5879757404327393, |
|
"learning_rate": 0.0004870584120321791, |
|
"loss": 1.0264, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.02639571725117448, |
|
"grad_norm": 1.059586763381958, |
|
"learning_rate": 0.0004870146904512067, |
|
"loss": 0.9235, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.026483120288430022, |
|
"grad_norm": 1.9793821573257446, |
|
"learning_rate": 0.00048697096887023435, |
|
"loss": 1.5626, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.026570523325685567, |
|
"grad_norm": 1.2389543056488037, |
|
"learning_rate": 0.00048692724728926194, |
|
"loss": 0.9666, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.026657926362941113, |
|
"grad_norm": 1.1373975276947021, |
|
"learning_rate": 0.00048688352570828964, |
|
"loss": 0.993, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.026745329400196655, |
|
"grad_norm": 5.966507434844971, |
|
"learning_rate": 0.0004868398041273173, |
|
"loss": 1.0113, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.0268327324374522, |
|
"grad_norm": 1.2714189291000366, |
|
"learning_rate": 0.0004867960825463449, |
|
"loss": 0.9462, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.026920135474707747, |
|
"grad_norm": 1.397048830986023, |
|
"learning_rate": 0.0004867523609653725, |
|
"loss": 0.9511, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.027007538511963292, |
|
"grad_norm": 1.2888479232788086, |
|
"learning_rate": 0.0004867086393844001, |
|
"loss": 1.014, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.027094941549218834, |
|
"grad_norm": 3.5597853660583496, |
|
"learning_rate": 0.0004866649178034278, |
|
"loss": 1.2336, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.02718234458647438, |
|
"grad_norm": 1.4104827642440796, |
|
"learning_rate": 0.00048662119622245545, |
|
"loss": 1.0148, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.027269747623729926, |
|
"grad_norm": 1.064355492591858, |
|
"learning_rate": 0.00048657747464148305, |
|
"loss": 1.0645, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.027357150660985468, |
|
"grad_norm": 0.819186806678772, |
|
"learning_rate": 0.0004865337530605107, |
|
"loss": 0.8948, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.027444553698241014, |
|
"grad_norm": 3.036085605621338, |
|
"learning_rate": 0.0004864900314795383, |
|
"loss": 1.1567, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.02753195673549656, |
|
"grad_norm": 1.4990466833114624, |
|
"learning_rate": 0.0004864463098985659, |
|
"loss": 0.9445, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.0276193597727521, |
|
"grad_norm": 1.889307975769043, |
|
"learning_rate": 0.00048640258831759357, |
|
"loss": 1.1844, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.027706762810007647, |
|
"grad_norm": 2.072758913040161, |
|
"learning_rate": 0.0004863588667366212, |
|
"loss": 1.0734, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.027794165847263193, |
|
"grad_norm": 2.2393903732299805, |
|
"learning_rate": 0.00048631514515564886, |
|
"loss": 1.1427, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.02788156888451874, |
|
"grad_norm": 4.34975528717041, |
|
"learning_rate": 0.00048627142357467645, |
|
"loss": 1.2473, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.02796897192177428, |
|
"grad_norm": 2.8603451251983643, |
|
"learning_rate": 0.0004862277019937041, |
|
"loss": 1.1657, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.028056374959029826, |
|
"grad_norm": 3.665041923522949, |
|
"learning_rate": 0.0004861839804127317, |
|
"loss": 1.6031, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.028143777996285372, |
|
"grad_norm": 3.366703748703003, |
|
"learning_rate": 0.0004861402588317594, |
|
"loss": 1.0769, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.028231181033540914, |
|
"grad_norm": 1.470408320426941, |
|
"learning_rate": 0.00048609653725078703, |
|
"loss": 1.2034, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.02831858407079646, |
|
"grad_norm": 1.0659921169281006, |
|
"learning_rate": 0.0004860528156698146, |
|
"loss": 0.984, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.028405987108052005, |
|
"grad_norm": 4.098123550415039, |
|
"learning_rate": 0.00048600909408884227, |
|
"loss": 1.2241, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.02849339014530755, |
|
"grad_norm": 11.896109580993652, |
|
"learning_rate": 0.00048596537250786986, |
|
"loss": 2.0891, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.028580793182563093, |
|
"grad_norm": 3.2453126907348633, |
|
"learning_rate": 0.00048592165092689756, |
|
"loss": 1.1273, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.02866819621981864, |
|
"grad_norm": 2.6395857334136963, |
|
"learning_rate": 0.00048587792934592515, |
|
"loss": 1.6087, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.028755599257074185, |
|
"grad_norm": 2.1530113220214844, |
|
"learning_rate": 0.0004858342077649528, |
|
"loss": 1.2749, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.028843002294329727, |
|
"grad_norm": 4.572982311248779, |
|
"learning_rate": 0.00048579048618398044, |
|
"loss": 1.4111, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.028930405331585272, |
|
"grad_norm": 3.029306173324585, |
|
"learning_rate": 0.00048574676460300803, |
|
"loss": 1.2926, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.029017808368840818, |
|
"grad_norm": 1.7193225622177124, |
|
"learning_rate": 0.0004857030430220357, |
|
"loss": 1.1767, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.02910521140609636, |
|
"grad_norm": 10.779121398925781, |
|
"learning_rate": 0.0004856593214410633, |
|
"loss": 1.3369, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.029192614443351906, |
|
"grad_norm": 2.478919744491577, |
|
"learning_rate": 0.00048561559986009097, |
|
"loss": 1.093, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.02928001748060745, |
|
"grad_norm": 2.2353742122650146, |
|
"learning_rate": 0.00048557187827911856, |
|
"loss": 1.1168, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.029367420517862997, |
|
"grad_norm": 2.8225460052490234, |
|
"learning_rate": 0.0004855281566981462, |
|
"loss": 1.3248, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.02945482355511854, |
|
"grad_norm": 2.1292366981506348, |
|
"learning_rate": 0.00048548443511717385, |
|
"loss": 1.344, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.029542226592374085, |
|
"grad_norm": 7.299522399902344, |
|
"learning_rate": 0.0004854407135362015, |
|
"loss": 1.8145, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.02962962962962963, |
|
"grad_norm": 1.5046287775039673, |
|
"learning_rate": 0.00048539699195522914, |
|
"loss": 1.388, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.029717032666885173, |
|
"grad_norm": 3.0877699851989746, |
|
"learning_rate": 0.00048535327037425673, |
|
"loss": 1.3291, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.02980443570414072, |
|
"grad_norm": 3.4899399280548096, |
|
"learning_rate": 0.0004853095487932844, |
|
"loss": 2.0677, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.029891838741396264, |
|
"grad_norm": 11.234345436096191, |
|
"learning_rate": 0.000485265827212312, |
|
"loss": 1.625, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.029979241778651806, |
|
"grad_norm": 2.1975765228271484, |
|
"learning_rate": 0.0004852221056313396, |
|
"loss": 1.4517, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.030066644815907352, |
|
"grad_norm": 8.629820823669434, |
|
"learning_rate": 0.0004851783840503673, |
|
"loss": 1.5853, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.030154047853162898, |
|
"grad_norm": 2.3949103355407715, |
|
"learning_rate": 0.0004851346624693949, |
|
"loss": 1.2549, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.030241450890418443, |
|
"grad_norm": 159.31179809570312, |
|
"learning_rate": 0.00048509094088842255, |
|
"loss": 1.5771, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.030328853927673986, |
|
"grad_norm": 11.36462688446045, |
|
"learning_rate": 0.00048504721930745014, |
|
"loss": 1.9178, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.03041625696492953, |
|
"grad_norm": 7.807027816772461, |
|
"learning_rate": 0.0004850034977264778, |
|
"loss": 1.9789, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.030503660002185077, |
|
"grad_norm": 8.663688659667969, |
|
"learning_rate": 0.0004849597761455054, |
|
"loss": 2.0506, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.03059106303944062, |
|
"grad_norm": 2.205583095550537, |
|
"learning_rate": 0.00048491605456453307, |
|
"loss": 1.8671, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.030678466076696165, |
|
"grad_norm": 3.150911808013916, |
|
"learning_rate": 0.0004848723329835607, |
|
"loss": 1.333, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.03076586911395171, |
|
"grad_norm": 4.053075790405273, |
|
"learning_rate": 0.0004848286114025883, |
|
"loss": 1.5273, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.030853272151207256, |
|
"grad_norm": 2.823411703109741, |
|
"learning_rate": 0.00048478488982161595, |
|
"loss": 1.4247, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.0309406751884628, |
|
"grad_norm": 3.0909945964813232, |
|
"learning_rate": 0.0004847411682406436, |
|
"loss": 1.2206, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.031028078225718344, |
|
"grad_norm": 3.38694167137146, |
|
"learning_rate": 0.00048469744665967124, |
|
"loss": 1.3954, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.03111548126297389, |
|
"grad_norm": 1.5531120300292969, |
|
"learning_rate": 0.0004846537250786989, |
|
"loss": 1.4665, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.031202884300229432, |
|
"grad_norm": 2.2059831619262695, |
|
"learning_rate": 0.0004846100034977265, |
|
"loss": 1.6022, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.03129028733748498, |
|
"grad_norm": 5.113000869750977, |
|
"learning_rate": 0.0004845662819167541, |
|
"loss": 1.5966, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.03137769037474052, |
|
"grad_norm": 8.374882698059082, |
|
"learning_rate": 0.0004845225603357817, |
|
"loss": 1.7198, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.03146509341199607, |
|
"grad_norm": 6.680134296417236, |
|
"learning_rate": 0.00048447883875480936, |
|
"loss": 1.4896, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.031552496449251614, |
|
"grad_norm": 4.67073392868042, |
|
"learning_rate": 0.00048443511717383706, |
|
"loss": 1.8682, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.03163989948650715, |
|
"grad_norm": 4.780435562133789, |
|
"learning_rate": 0.00048439139559286465, |
|
"loss": 1.7389, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.0317273025237627, |
|
"grad_norm": 3.4517061710357666, |
|
"learning_rate": 0.0004843476740118923, |
|
"loss": 1.8797, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.031814705561018244, |
|
"grad_norm": 2.4916350841522217, |
|
"learning_rate": 0.0004843039524309199, |
|
"loss": 1.4436, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.03190210859827379, |
|
"grad_norm": 3.9899487495422363, |
|
"learning_rate": 0.00048426023084994753, |
|
"loss": 1.5546, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.031989511635529336, |
|
"grad_norm": 8.799160957336426, |
|
"learning_rate": 0.0004842165092689752, |
|
"loss": 1.6344, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.03207691467278488, |
|
"grad_norm": 2.636903762817383, |
|
"learning_rate": 0.0004841727876880028, |
|
"loss": 1.5937, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.03216431771004043, |
|
"grad_norm": 2.600330352783203, |
|
"learning_rate": 0.00048412906610703047, |
|
"loss": 1.5617, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.032251720747295966, |
|
"grad_norm": 2.9146833419799805, |
|
"learning_rate": 0.00048408534452605806, |
|
"loss": 2.2708, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.03233912378455151, |
|
"grad_norm": 1.6746532917022705, |
|
"learning_rate": 0.0004840416229450857, |
|
"loss": 1.3178, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.03242652682180706, |
|
"grad_norm": 2.1965625286102295, |
|
"learning_rate": 0.0004839979013641133, |
|
"loss": 1.2351, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.0325139298590626, |
|
"grad_norm": 4.235499858856201, |
|
"learning_rate": 0.000483954179783141, |
|
"loss": 1.8627, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.03260133289631815, |
|
"grad_norm": 1.5351746082305908, |
|
"learning_rate": 0.00048391045820216864, |
|
"loss": 1.2413, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.032688735933573694, |
|
"grad_norm": 1.5462607145309448, |
|
"learning_rate": 0.00048386673662119623, |
|
"loss": 1.3282, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.03277613897082924, |
|
"grad_norm": 2.4433155059814453, |
|
"learning_rate": 0.0004838230150402239, |
|
"loss": 1.3913, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.03286354200808478, |
|
"grad_norm": 2.431323528289795, |
|
"learning_rate": 0.00048377929345925146, |
|
"loss": 1.4269, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.032950945045340324, |
|
"grad_norm": 1.4146811962127686, |
|
"learning_rate": 0.0004837355718782791, |
|
"loss": 1.225, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.03303834808259587, |
|
"grad_norm": 1.0660099983215332, |
|
"learning_rate": 0.00048369185029730675, |
|
"loss": 1.2465, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.033125751119851415, |
|
"grad_norm": 16.820344924926758, |
|
"learning_rate": 0.0004836481287163344, |
|
"loss": 1.2228, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.03321315415710696, |
|
"grad_norm": 1.6520887613296509, |
|
"learning_rate": 0.00048360440713536204, |
|
"loss": 1.0955, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.03330055719436251, |
|
"grad_norm": 3.057648181915283, |
|
"learning_rate": 0.00048356068555438964, |
|
"loss": 1.3929, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.033387960231618045, |
|
"grad_norm": 5.74190092086792, |
|
"learning_rate": 0.0004835169639734173, |
|
"loss": 1.3873, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.03347536326887359, |
|
"grad_norm": 2.451111078262329, |
|
"learning_rate": 0.0004834732423924449, |
|
"loss": 1.2411, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.03356276630612914, |
|
"grad_norm": 7.096491813659668, |
|
"learning_rate": 0.00048342952081147257, |
|
"loss": 1.1512, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.03365016934338468, |
|
"grad_norm": 1.7510989904403687, |
|
"learning_rate": 0.0004833857992305002, |
|
"loss": 1.7508, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.03373757238064023, |
|
"grad_norm": 1.9392039775848389, |
|
"learning_rate": 0.0004833420776495278, |
|
"loss": 1.3254, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.033824975417895774, |
|
"grad_norm": 1.3087763786315918, |
|
"learning_rate": 0.00048329835606855545, |
|
"loss": 1.167, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.03391237845515132, |
|
"grad_norm": 1.0963687896728516, |
|
"learning_rate": 0.00048325463448758304, |
|
"loss": 1.193, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.03399978149240686, |
|
"grad_norm": 0.7981585264205933, |
|
"learning_rate": 0.00048321091290661074, |
|
"loss": 1.1383, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.034087184529662404, |
|
"grad_norm": 0.9217828512191772, |
|
"learning_rate": 0.00048316719132563833, |
|
"loss": 1.0119, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.03417458756691795, |
|
"grad_norm": 1.242906093597412, |
|
"learning_rate": 0.000483123469744666, |
|
"loss": 1.1663, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.034261990604173495, |
|
"grad_norm": 0.9021317362785339, |
|
"learning_rate": 0.0004830797481636936, |
|
"loss": 1.1384, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.03434939364142904, |
|
"grad_norm": 0.9118911623954773, |
|
"learning_rate": 0.0004830360265827212, |
|
"loss": 1.3087, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.034436796678684586, |
|
"grad_norm": 1.754934549331665, |
|
"learning_rate": 0.0004829923050017489, |
|
"loss": 1.3614, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.03452419971594013, |
|
"grad_norm": 0.8837860822677612, |
|
"learning_rate": 0.0004829485834207765, |
|
"loss": 1.1244, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.03461160275319567, |
|
"grad_norm": 2.6078360080718994, |
|
"learning_rate": 0.00048290486183980415, |
|
"loss": 1.0216, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.034699005790451216, |
|
"grad_norm": 5.406350135803223, |
|
"learning_rate": 0.00048286114025883174, |
|
"loss": 1.0928, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.03478640882770676, |
|
"grad_norm": 2.1140406131744385, |
|
"learning_rate": 0.0004828174186778594, |
|
"loss": 1.1857, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.03487381186496231, |
|
"grad_norm": 7.267689228057861, |
|
"learning_rate": 0.00048277369709688703, |
|
"loss": 1.7055, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.03496121490221785, |
|
"grad_norm": 1.1019072532653809, |
|
"learning_rate": 0.0004827299755159147, |
|
"loss": 2.0105, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.0350486179394734, |
|
"grad_norm": 7.888851165771484, |
|
"learning_rate": 0.0004826862539349423, |
|
"loss": 1.9483, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.035136020976728945, |
|
"grad_norm": 1.299735188484192, |
|
"learning_rate": 0.0004826425323539699, |
|
"loss": 1.2644, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.03522342401398448, |
|
"grad_norm": 1.5624737739562988, |
|
"learning_rate": 0.00048259881077299756, |
|
"loss": 1.0429, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.03531082705124003, |
|
"grad_norm": 1.350966453552246, |
|
"learning_rate": 0.0004825550891920252, |
|
"loss": 1.1749, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.035398230088495575, |
|
"grad_norm": 1.5936487913131714, |
|
"learning_rate": 0.0004825113676110528, |
|
"loss": 1.1733, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.03548563312575112, |
|
"grad_norm": 1.0757735967636108, |
|
"learning_rate": 0.0004824676460300805, |
|
"loss": 0.9944, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.035573036163006666, |
|
"grad_norm": 0.7153262495994568, |
|
"learning_rate": 0.0004824239244491081, |
|
"loss": 1.1921, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.03566043920026221, |
|
"grad_norm": 1.0734481811523438, |
|
"learning_rate": 0.00048238020286813573, |
|
"loss": 1.1752, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.03574784223751775, |
|
"grad_norm": 0.8831942081451416, |
|
"learning_rate": 0.0004823364812871633, |
|
"loss": 1.1402, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.035835245274773296, |
|
"grad_norm": 0.6179252862930298, |
|
"learning_rate": 0.00048229275970619096, |
|
"loss": 1.2101, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.03592264831202884, |
|
"grad_norm": 1.091264009475708, |
|
"learning_rate": 0.00048224903812521866, |
|
"loss": 1.1421, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.03601005134928439, |
|
"grad_norm": 0.8162115216255188, |
|
"learning_rate": 0.00048220531654424625, |
|
"loss": 1.2952, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.03609745438653993, |
|
"grad_norm": 1.0148085355758667, |
|
"learning_rate": 0.0004821615949632739, |
|
"loss": 0.9862, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.03618485742379548, |
|
"grad_norm": 0.9712663888931274, |
|
"learning_rate": 0.0004821178733823015, |
|
"loss": 1.1402, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.036272260461051024, |
|
"grad_norm": 0.9177207350730896, |
|
"learning_rate": 0.00048207415180132914, |
|
"loss": 1.2027, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.03635966349830656, |
|
"grad_norm": 3.5026392936706543, |
|
"learning_rate": 0.0004820304302203567, |
|
"loss": 1.4284, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.03644706653556211, |
|
"grad_norm": 1.7483121156692505, |
|
"learning_rate": 0.0004819867086393844, |
|
"loss": 1.2328, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.036534469572817654, |
|
"grad_norm": 1.423335075378418, |
|
"learning_rate": 0.00048194298705841207, |
|
"loss": 1.1085, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.0366218726100732, |
|
"grad_norm": 13.332382202148438, |
|
"learning_rate": 0.00048189926547743966, |
|
"loss": 1.2456, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.036709275647328746, |
|
"grad_norm": 1.2808276414871216, |
|
"learning_rate": 0.0004818555438964673, |
|
"loss": 1.1165, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.03679667868458429, |
|
"grad_norm": 1.293886661529541, |
|
"learning_rate": 0.0004818118223154949, |
|
"loss": 1.2171, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.03688408172183984, |
|
"grad_norm": 1.1845675706863403, |
|
"learning_rate": 0.0004817681007345226, |
|
"loss": 2.0462, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.036971484759095376, |
|
"grad_norm": 0.9728288054466248, |
|
"learning_rate": 0.00048172437915355024, |
|
"loss": 1.143, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.03705888779635092, |
|
"grad_norm": 0.816474437713623, |
|
"learning_rate": 0.00048168065757257783, |
|
"loss": 1.2092, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.03714629083360647, |
|
"grad_norm": 0.6224190592765808, |
|
"learning_rate": 0.0004816369359916055, |
|
"loss": 1.0575, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.03723369387086201, |
|
"grad_norm": 0.6718823313713074, |
|
"learning_rate": 0.00048159321441063307, |
|
"loss": 1.0947, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.03732109690811756, |
|
"grad_norm": 0.6595826148986816, |
|
"learning_rate": 0.0004815494928296607, |
|
"loss": 1.4427, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.037408499945373104, |
|
"grad_norm": 11.761706352233887, |
|
"learning_rate": 0.00048150577124868836, |
|
"loss": 1.0676, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.03749590298262865, |
|
"grad_norm": 0.8342620134353638, |
|
"learning_rate": 0.000481462049667716, |
|
"loss": 1.9127, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.03758330601988419, |
|
"grad_norm": 1.1234923601150513, |
|
"learning_rate": 0.00048141832808674365, |
|
"loss": 1.1633, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.037670709057139734, |
|
"grad_norm": 1.9076615571975708, |
|
"learning_rate": 0.00048137460650577124, |
|
"loss": 1.0639, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.03775811209439528, |
|
"grad_norm": 0.6750392913818359, |
|
"learning_rate": 0.0004813308849247989, |
|
"loss": 0.9955, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.037845515131650825, |
|
"grad_norm": 0.6759085655212402, |
|
"learning_rate": 0.0004812871633438265, |
|
"loss": 1.131, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.03793291816890637, |
|
"grad_norm": 1.4919787645339966, |
|
"learning_rate": 0.0004812434417628542, |
|
"loss": 1.6338, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.03802032120616192, |
|
"grad_norm": 0.8407806754112244, |
|
"learning_rate": 0.0004811997201818818, |
|
"loss": 1.6765, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.038107724243417455, |
|
"grad_norm": 0.5378815531730652, |
|
"learning_rate": 0.0004811559986009094, |
|
"loss": 1.1115, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.038195127280673, |
|
"grad_norm": 0.705746054649353, |
|
"learning_rate": 0.00048111227701993706, |
|
"loss": 0.8717, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.03828253031792855, |
|
"grad_norm": 0.6170596480369568, |
|
"learning_rate": 0.00048106855543896465, |
|
"loss": 1.113, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.03836993335518409, |
|
"grad_norm": 0.7694591283798218, |
|
"learning_rate": 0.00048102483385799235, |
|
"loss": 0.9803, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.03845733639243964, |
|
"grad_norm": 0.44214290380477905, |
|
"learning_rate": 0.00048098111227701994, |
|
"loss": 1.0997, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.038544739429695184, |
|
"grad_norm": 1.67384934425354, |
|
"learning_rate": 0.0004809373906960476, |
|
"loss": 1.473, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.03863214246695073, |
|
"grad_norm": 0.906971275806427, |
|
"learning_rate": 0.00048089366911507523, |
|
"loss": 1.4701, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.03871954550420627, |
|
"grad_norm": 1.0720627307891846, |
|
"learning_rate": 0.0004808499475341028, |
|
"loss": 1.2818, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.038806948541461814, |
|
"grad_norm": 0.9048315286636353, |
|
"learning_rate": 0.00048080622595313046, |
|
"loss": 1.0395, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.03889435157871736, |
|
"grad_norm": 0.6810390949249268, |
|
"learning_rate": 0.0004807625043721581, |
|
"loss": 0.9983, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.038981754615972905, |
|
"grad_norm": 2.8892154693603516, |
|
"learning_rate": 0.00048071878279118575, |
|
"loss": 1.4023, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.03906915765322845, |
|
"grad_norm": 2.2658865451812744, |
|
"learning_rate": 0.00048067506121021335, |
|
"loss": 1.2289, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.039156560690483996, |
|
"grad_norm": 0.6239084005355835, |
|
"learning_rate": 0.000480631339629241, |
|
"loss": 1.012, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.03924396372773954, |
|
"grad_norm": 1.147459864616394, |
|
"learning_rate": 0.00048058761804826864, |
|
"loss": 1.0538, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.03933136676499508, |
|
"grad_norm": 0.8646839261054993, |
|
"learning_rate": 0.0004805438964672963, |
|
"loss": 0.965, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.039418769802250626, |
|
"grad_norm": 0.9366894960403442, |
|
"learning_rate": 0.0004805001748863239, |
|
"loss": 0.8447, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.03950617283950617, |
|
"grad_norm": 0.6512202024459839, |
|
"learning_rate": 0.0004804564533053515, |
|
"loss": 1.0594, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.03959357587676172, |
|
"grad_norm": 0.5651702284812927, |
|
"learning_rate": 0.00048041273172437916, |
|
"loss": 1.1249, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.03968097891401726, |
|
"grad_norm": 1.0038714408874512, |
|
"learning_rate": 0.0004803690101434068, |
|
"loss": 1.1198, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.03976838195127281, |
|
"grad_norm": 1.0579853057861328, |
|
"learning_rate": 0.0004803252885624344, |
|
"loss": 1.0889, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.039855784988528355, |
|
"grad_norm": 0.4361538887023926, |
|
"learning_rate": 0.0004802815669814621, |
|
"loss": 0.876, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.03994318802578389, |
|
"grad_norm": 0.8685644865036011, |
|
"learning_rate": 0.0004802378454004897, |
|
"loss": 0.8344, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.04003059106303944, |
|
"grad_norm": 0.5350561141967773, |
|
"learning_rate": 0.00048019412381951733, |
|
"loss": 1.0352, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.040117994100294985, |
|
"grad_norm": 0.7722122669219971, |
|
"learning_rate": 0.0004801504022385449, |
|
"loss": 0.9144, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.04020539713755053, |
|
"grad_norm": 0.5645512938499451, |
|
"learning_rate": 0.00048010668065757257, |
|
"loss": 0.9014, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.040292800174806076, |
|
"grad_norm": 0.5366953015327454, |
|
"learning_rate": 0.00048006295907660027, |
|
"loss": 1.005, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.04038020321206162, |
|
"grad_norm": 0.5673419237136841, |
|
"learning_rate": 0.00048001923749562786, |
|
"loss": 0.9666, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.04046760624931716, |
|
"grad_norm": 0.5309872031211853, |
|
"learning_rate": 0.0004799755159146555, |
|
"loss": 1.017, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.040555009286572706, |
|
"grad_norm": 0.567584753036499, |
|
"learning_rate": 0.0004799317943336831, |
|
"loss": 0.9212, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.04064241232382825, |
|
"grad_norm": 0.5049634575843811, |
|
"learning_rate": 0.00047988807275271074, |
|
"loss": 1.0515, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.0407298153610838, |
|
"grad_norm": 0.5385315418243408, |
|
"learning_rate": 0.00047984435117173833, |
|
"loss": 1.1727, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.04081721839833934, |
|
"grad_norm": 0.4884001910686493, |
|
"learning_rate": 0.00047980062959076603, |
|
"loss": 1.1159, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.04090462143559489, |
|
"grad_norm": 0.7112920880317688, |
|
"learning_rate": 0.0004797569080097937, |
|
"loss": 1.235, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.040992024472850434, |
|
"grad_norm": 0.4838173985481262, |
|
"learning_rate": 0.00047971318642882127, |
|
"loss": 0.9681, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.04107942751010597, |
|
"grad_norm": 0.45457422733306885, |
|
"learning_rate": 0.0004796694648478489, |
|
"loss": 1.1104, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.04116683054736152, |
|
"grad_norm": 0.5703690648078918, |
|
"learning_rate": 0.0004796257432668765, |
|
"loss": 1.1248, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.041254233584617064, |
|
"grad_norm": 0.450735479593277, |
|
"learning_rate": 0.00047958202168590415, |
|
"loss": 0.8925, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.04134163662187261, |
|
"grad_norm": 0.5150513052940369, |
|
"learning_rate": 0.00047953830010493185, |
|
"loss": 1.3525, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.041429039659128156, |
|
"grad_norm": 0.3937002718448639, |
|
"learning_rate": 0.00047949457852395944, |
|
"loss": 0.9275, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.0415164426963837, |
|
"grad_norm": 0.3689919114112854, |
|
"learning_rate": 0.0004794508569429871, |
|
"loss": 1.0588, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.04160384573363925, |
|
"grad_norm": 0.34137895703315735, |
|
"learning_rate": 0.0004794071353620147, |
|
"loss": 1.0148, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.041691248770894786, |
|
"grad_norm": 0.33478084206581116, |
|
"learning_rate": 0.0004793634137810423, |
|
"loss": 1.1783, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.04177865180815033, |
|
"grad_norm": 0.36996185779571533, |
|
"learning_rate": 0.00047931969220006996, |
|
"loss": 0.9166, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.04186605484540588, |
|
"grad_norm": 0.40458017587661743, |
|
"learning_rate": 0.0004792759706190976, |
|
"loss": 1.039, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.04195345788266142, |
|
"grad_norm": 0.5270059704780579, |
|
"learning_rate": 0.00047923224903812525, |
|
"loss": 0.9331, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.04204086091991697, |
|
"grad_norm": 0.38086146116256714, |
|
"learning_rate": 0.00047918852745715285, |
|
"loss": 1.2488, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.042128263957172514, |
|
"grad_norm": 0.4206714332103729, |
|
"learning_rate": 0.0004791448058761805, |
|
"loss": 0.9509, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.04221566699442806, |
|
"grad_norm": 0.45416519045829773, |
|
"learning_rate": 0.0004791010842952081, |
|
"loss": 1.0384, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.0423030700316836, |
|
"grad_norm": 0.312229722738266, |
|
"learning_rate": 0.0004790573627142358, |
|
"loss": 1.0349, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.042390473068939144, |
|
"grad_norm": 0.4084686040878296, |
|
"learning_rate": 0.0004790136411332634, |
|
"loss": 0.9074, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.04247787610619469, |
|
"grad_norm": 12.558296203613281, |
|
"learning_rate": 0.000478969919552291, |
|
"loss": 1.4943, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.042565279143450235, |
|
"grad_norm": 0.5897109508514404, |
|
"learning_rate": 0.00047892619797131866, |
|
"loss": 1.0668, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.04265268218070578, |
|
"grad_norm": 0.6350471377372742, |
|
"learning_rate": 0.00047888247639034625, |
|
"loss": 0.9479, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.04274008521796133, |
|
"grad_norm": 0.4891508221626282, |
|
"learning_rate": 0.00047883875480937395, |
|
"loss": 1.1157, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.042827488255216865, |
|
"grad_norm": 0.3619961142539978, |
|
"learning_rate": 0.00047879503322840154, |
|
"loss": 0.9912, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.04291489129247241, |
|
"grad_norm": 0.3376581072807312, |
|
"learning_rate": 0.0004787513116474292, |
|
"loss": 0.8494, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.04300229432972796, |
|
"grad_norm": 0.6040793061256409, |
|
"learning_rate": 0.00047870759006645683, |
|
"loss": 1.3237, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.0430896973669835, |
|
"grad_norm": 2.6606392860412598, |
|
"learning_rate": 0.0004786638684854844, |
|
"loss": 1.7359, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.04317710040423905, |
|
"grad_norm": 0.5396057367324829, |
|
"learning_rate": 0.00047862014690451207, |
|
"loss": 1.552, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.043264503441494594, |
|
"grad_norm": 0.42991939187049866, |
|
"learning_rate": 0.0004785764253235397, |
|
"loss": 0.99, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.04335190647875014, |
|
"grad_norm": 0.40487632155418396, |
|
"learning_rate": 0.00047853270374256736, |
|
"loss": 1.0104, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.04343930951600568, |
|
"grad_norm": 0.9767838716506958, |
|
"learning_rate": 0.00047848898216159495, |
|
"loss": 1.0582, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.043526712553261224, |
|
"grad_norm": 0.3633114695549011, |
|
"learning_rate": 0.0004784452605806226, |
|
"loss": 0.92, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.04361411559051677, |
|
"grad_norm": 0.6365157961845398, |
|
"learning_rate": 0.00047840153899965024, |
|
"loss": 0.9564, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.043701518627772315, |
|
"grad_norm": 0.4060046076774597, |
|
"learning_rate": 0.00047835781741867783, |
|
"loss": 1.046, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.04378892166502786, |
|
"grad_norm": 0.3747900128364563, |
|
"learning_rate": 0.00047831409583770553, |
|
"loss": 1.0201, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.043876324702283406, |
|
"grad_norm": 0.3672393262386322, |
|
"learning_rate": 0.0004782703742567331, |
|
"loss": 1.0021, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.04396372773953895, |
|
"grad_norm": 0.3505338132381439, |
|
"learning_rate": 0.00047822665267576077, |
|
"loss": 1.0002, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.04405113077679449, |
|
"grad_norm": 5.722542762756348, |
|
"learning_rate": 0.0004781829310947884, |
|
"loss": 2.5431, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.044138533814050036, |
|
"grad_norm": 0.5349693298339844, |
|
"learning_rate": 0.000478139209513816, |
|
"loss": 1.151, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.04422593685130558, |
|
"grad_norm": 0.4468895494937897, |
|
"learning_rate": 0.0004780954879328437, |
|
"loss": 0.9958, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.04431333988856113, |
|
"grad_norm": 0.47205036878585815, |
|
"learning_rate": 0.0004780517663518713, |
|
"loss": 0.9401, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.04440074292581667, |
|
"grad_norm": 0.35336941480636597, |
|
"learning_rate": 0.00047800804477089894, |
|
"loss": 1.0982, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.04448814596307222, |
|
"grad_norm": 1.8884743452072144, |
|
"learning_rate": 0.00047796432318992653, |
|
"loss": 0.9199, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.044575549000327765, |
|
"grad_norm": 0.4091229736804962, |
|
"learning_rate": 0.0004779206016089542, |
|
"loss": 0.8953, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.0446629520375833, |
|
"grad_norm": 0.4730583131313324, |
|
"learning_rate": 0.0004778768800279818, |
|
"loss": 0.8085, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.04475035507483885, |
|
"grad_norm": 0.3801075220108032, |
|
"learning_rate": 0.00047783315844700946, |
|
"loss": 0.9914, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.044837758112094395, |
|
"grad_norm": 0.3660631477832794, |
|
"learning_rate": 0.0004777894368660371, |
|
"loss": 0.9804, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.04492516114934994, |
|
"grad_norm": 0.8466418981552124, |
|
"learning_rate": 0.0004777457152850647, |
|
"loss": 1.1207, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.045012564186605486, |
|
"grad_norm": 0.3560774624347687, |
|
"learning_rate": 0.00047770199370409234, |
|
"loss": 0.8773, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.04509996722386103, |
|
"grad_norm": 0.49633318185806274, |
|
"learning_rate": 0.00047765827212312, |
|
"loss": 1.0111, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.04518737026111657, |
|
"grad_norm": 0.6001185178756714, |
|
"learning_rate": 0.00047761455054214764, |
|
"loss": 1.2566, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.045274773298372116, |
|
"grad_norm": 0.7423095703125, |
|
"learning_rate": 0.0004775708289611753, |
|
"loss": 1.1431, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.04536217633562766, |
|
"grad_norm": 0.34218892455101013, |
|
"learning_rate": 0.00047752710738020287, |
|
"loss": 0.9254, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.04544957937288321, |
|
"grad_norm": 0.336230605840683, |
|
"learning_rate": 0.0004774833857992305, |
|
"loss": 1.0015, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.04553698241013875, |
|
"grad_norm": 0.39158111810684204, |
|
"learning_rate": 0.0004774396642182581, |
|
"loss": 0.8319, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.0456243854473943, |
|
"grad_norm": 0.4045357406139374, |
|
"learning_rate": 0.00047739594263728575, |
|
"loss": 0.8531, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.045711788484649844, |
|
"grad_norm": 0.5861966013908386, |
|
"learning_rate": 0.00047735222105631345, |
|
"loss": 0.9975, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.04579919152190538, |
|
"grad_norm": 0.33865249156951904, |
|
"learning_rate": 0.00047730849947534104, |
|
"loss": 0.94, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.04588659455916093, |
|
"grad_norm": 0.4759502112865448, |
|
"learning_rate": 0.0004772647778943687, |
|
"loss": 0.9581, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.045973997596416474, |
|
"grad_norm": 0.492929607629776, |
|
"learning_rate": 0.0004772210563133963, |
|
"loss": 1.3563, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.04606140063367202, |
|
"grad_norm": 0.31947705149650574, |
|
"learning_rate": 0.0004771773347324239, |
|
"loss": 0.8052, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.046148803670927566, |
|
"grad_norm": 0.3842394948005676, |
|
"learning_rate": 0.0004771336131514515, |
|
"loss": 0.9723, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.04623620670818311, |
|
"grad_norm": 0.338451623916626, |
|
"learning_rate": 0.0004770898915704792, |
|
"loss": 1.0315, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.04632360974543866, |
|
"grad_norm": 1.9640684127807617, |
|
"learning_rate": 0.00047704616998950686, |
|
"loss": 1.2013, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.046411012782694196, |
|
"grad_norm": 0.501758337020874, |
|
"learning_rate": 0.00047700244840853445, |
|
"loss": 1.0096, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.04649841581994974, |
|
"grad_norm": 0.5867491960525513, |
|
"learning_rate": 0.0004769587268275621, |
|
"loss": 0.9708, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.04658581885720529, |
|
"grad_norm": 2.1122539043426514, |
|
"learning_rate": 0.0004769150052465897, |
|
"loss": 0.8145, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.04667322189446083, |
|
"grad_norm": 0.7969621419906616, |
|
"learning_rate": 0.0004768712836656174, |
|
"loss": 0.829, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.04676062493171638, |
|
"grad_norm": 0.4205247461795807, |
|
"learning_rate": 0.00047682756208464503, |
|
"loss": 1.0063, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.046848027968971924, |
|
"grad_norm": 0.3231610059738159, |
|
"learning_rate": 0.0004767838405036726, |
|
"loss": 0.968, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.04693543100622747, |
|
"grad_norm": 1.369025707244873, |
|
"learning_rate": 0.00047674011892270027, |
|
"loss": 1.7445, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.04702283404348301, |
|
"grad_norm": 0.42706942558288574, |
|
"learning_rate": 0.00047669639734172786, |
|
"loss": 1.1781, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.047110237080738554, |
|
"grad_norm": 0.36257731914520264, |
|
"learning_rate": 0.0004766526757607555, |
|
"loss": 1.0557, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.0471976401179941, |
|
"grad_norm": 0.4783022105693817, |
|
"learning_rate": 0.00047660895417978315, |
|
"loss": 1.053, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.047285043155249645, |
|
"grad_norm": 0.3079909384250641, |
|
"learning_rate": 0.0004765652325988108, |
|
"loss": 1.1313, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.04737244619250519, |
|
"grad_norm": 0.4072510302066803, |
|
"learning_rate": 0.00047652151101783844, |
|
"loss": 0.8678, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.04745984922976074, |
|
"grad_norm": 0.36985546350479126, |
|
"learning_rate": 0.00047647778943686603, |
|
"loss": 0.9387, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.04754725226701628, |
|
"grad_norm": 0.4222630262374878, |
|
"learning_rate": 0.0004764340678558937, |
|
"loss": 0.9083, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.04763465530427182, |
|
"grad_norm": 0.39896291494369507, |
|
"learning_rate": 0.0004763903462749213, |
|
"loss": 0.9773, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.04772205834152737, |
|
"grad_norm": 0.3235687017440796, |
|
"learning_rate": 0.00047634662469394896, |
|
"loss": 0.9484, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.04780946137878291, |
|
"grad_norm": 0.3377327620983124, |
|
"learning_rate": 0.0004763029031129766, |
|
"loss": 0.9319, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.04789686441603846, |
|
"grad_norm": 0.37998026609420776, |
|
"learning_rate": 0.0004762591815320042, |
|
"loss": 1.3499, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.047984267453294004, |
|
"grad_norm": 0.37219107151031494, |
|
"learning_rate": 0.00047621545995103184, |
|
"loss": 1.1132, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.04807167049054955, |
|
"grad_norm": 0.3147220313549042, |
|
"learning_rate": 0.00047617173837005944, |
|
"loss": 0.9306, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.04815907352780509, |
|
"grad_norm": 0.3832624852657318, |
|
"learning_rate": 0.00047612801678908713, |
|
"loss": 0.8518, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.048246476565060634, |
|
"grad_norm": 0.3098907172679901, |
|
"learning_rate": 0.0004760842952081147, |
|
"loss": 0.8183, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.04833387960231618, |
|
"grad_norm": 0.3062676191329956, |
|
"learning_rate": 0.00047604057362714237, |
|
"loss": 0.9226, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.048421282639571725, |
|
"grad_norm": 0.3292568624019623, |
|
"learning_rate": 0.00047599685204617, |
|
"loss": 0.9204, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.04850868567682727, |
|
"grad_norm": 0.45942652225494385, |
|
"learning_rate": 0.0004759531304651976, |
|
"loss": 1.1571, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.048596088714082816, |
|
"grad_norm": 0.3519571125507355, |
|
"learning_rate": 0.00047590940888422525, |
|
"loss": 0.9566, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.04868349175133836, |
|
"grad_norm": 0.3418327569961548, |
|
"learning_rate": 0.0004758656873032529, |
|
"loss": 1.146, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.0487708947885939, |
|
"grad_norm": 0.3338674008846283, |
|
"learning_rate": 0.00047582196572228054, |
|
"loss": 1.0859, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.048858297825849446, |
|
"grad_norm": 1.2700949907302856, |
|
"learning_rate": 0.00047577824414130813, |
|
"loss": 1.3166, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.04894570086310499, |
|
"grad_norm": 0.706069827079773, |
|
"learning_rate": 0.0004757345225603358, |
|
"loss": 1.2259, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.04903310390036054, |
|
"grad_norm": 0.5171198844909668, |
|
"learning_rate": 0.0004756908009793634, |
|
"loss": 0.7985, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.04912050693761608, |
|
"grad_norm": 0.8621017932891846, |
|
"learning_rate": 0.00047564707939839107, |
|
"loss": 1.0042, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.04920790997487163, |
|
"grad_norm": 0.926487922668457, |
|
"learning_rate": 0.0004756033578174187, |
|
"loss": 0.9945, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.049295313012127175, |
|
"grad_norm": 0.9586560726165771, |
|
"learning_rate": 0.0004755596362364463, |
|
"loss": 1.5266, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.04938271604938271, |
|
"grad_norm": 0.507824182510376, |
|
"learning_rate": 0.00047551591465547395, |
|
"loss": 0.8737, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.04947011908663826, |
|
"grad_norm": 0.38291049003601074, |
|
"learning_rate": 0.0004754721930745016, |
|
"loss": 0.7636, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.049557522123893805, |
|
"grad_norm": 0.40479573607444763, |
|
"learning_rate": 0.0004754284714935292, |
|
"loss": 0.781, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.04964492516114935, |
|
"grad_norm": 0.6375040411949158, |
|
"learning_rate": 0.0004753847499125569, |
|
"loss": 1.1493, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.049732328198404896, |
|
"grad_norm": 0.3949948847293854, |
|
"learning_rate": 0.0004753410283315845, |
|
"loss": 0.9626, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.04981973123566044, |
|
"grad_norm": 0.3734526038169861, |
|
"learning_rate": 0.0004752973067506121, |
|
"loss": 0.9207, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.04990713427291599, |
|
"grad_norm": 0.5179705619812012, |
|
"learning_rate": 0.0004752535851696397, |
|
"loss": 1.3906, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.049994537310171526, |
|
"grad_norm": 0.4602389931678772, |
|
"learning_rate": 0.00047520986358866736, |
|
"loss": 1.0577, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.05008194034742707, |
|
"grad_norm": 0.30401960015296936, |
|
"learning_rate": 0.00047516614200769506, |
|
"loss": 1.13, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.05016934338468262, |
|
"grad_norm": 0.3481753170490265, |
|
"learning_rate": 0.00047512242042672265, |
|
"loss": 0.857, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.05025674642193816, |
|
"grad_norm": 0.4005964398384094, |
|
"learning_rate": 0.0004750786988457503, |
|
"loss": 0.9569, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.05034414945919371, |
|
"grad_norm": 0.43765851855278015, |
|
"learning_rate": 0.0004750349772647779, |
|
"loss": 1.2156, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.050431552496449254, |
|
"grad_norm": 0.3252186179161072, |
|
"learning_rate": 0.00047499125568380553, |
|
"loss": 1.0392, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.05051895553370479, |
|
"grad_norm": 0.3639061152935028, |
|
"learning_rate": 0.0004749475341028331, |
|
"loss": 0.914, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.05060635857096034, |
|
"grad_norm": 0.3080824911594391, |
|
"learning_rate": 0.0004749038125218608, |
|
"loss": 0.9735, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.050693761608215884, |
|
"grad_norm": 0.33566662669181824, |
|
"learning_rate": 0.00047486009094088846, |
|
"loss": 1.1619, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.05078116464547143, |
|
"grad_norm": 0.2990110218524933, |
|
"learning_rate": 0.00047481636935991605, |
|
"loss": 0.97, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.050868567682726976, |
|
"grad_norm": 0.3264564871788025, |
|
"learning_rate": 0.0004747726477789437, |
|
"loss": 0.824, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.05095597071998252, |
|
"grad_norm": 0.37740233540534973, |
|
"learning_rate": 0.0004747289261979713, |
|
"loss": 1.1715, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.05104337375723807, |
|
"grad_norm": 0.39894765615463257, |
|
"learning_rate": 0.00047468520461699894, |
|
"loss": 1.3263, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.051130776794493606, |
|
"grad_norm": 0.3279603123664856, |
|
"learning_rate": 0.00047464148303602663, |
|
"loss": 0.8633, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.05121817983174915, |
|
"grad_norm": 0.30895987153053284, |
|
"learning_rate": 0.0004745977614550542, |
|
"loss": 0.9019, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.0513055828690047, |
|
"grad_norm": 0.8510332703590393, |
|
"learning_rate": 0.00047455403987408187, |
|
"loss": 0.9492, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.05139298590626024, |
|
"grad_norm": 0.5336425304412842, |
|
"learning_rate": 0.00047451031829310946, |
|
"loss": 0.8209, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.05148038894351579, |
|
"grad_norm": 0.3380926847457886, |
|
"learning_rate": 0.0004744665967121371, |
|
"loss": 0.8024, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.051567791980771334, |
|
"grad_norm": 0.3537689447402954, |
|
"learning_rate": 0.00047442287513116475, |
|
"loss": 1.1219, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.05165519501802688, |
|
"grad_norm": 0.5417413711547852, |
|
"learning_rate": 0.0004743791535501924, |
|
"loss": 1.0341, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.05174259805528242, |
|
"grad_norm": 0.4394038915634155, |
|
"learning_rate": 0.00047433543196922004, |
|
"loss": 0.934, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.051830001092537964, |
|
"grad_norm": 0.738370954990387, |
|
"learning_rate": 0.00047429171038824763, |
|
"loss": 1.1953, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.05191740412979351, |
|
"grad_norm": 0.33024734258651733, |
|
"learning_rate": 0.0004742479888072753, |
|
"loss": 0.687, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.052004807167049055, |
|
"grad_norm": 0.3696803152561188, |
|
"learning_rate": 0.00047420426722630287, |
|
"loss": 1.0533, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.0520922102043046, |
|
"grad_norm": 0.31398460268974304, |
|
"learning_rate": 0.00047416054564533057, |
|
"loss": 1.0434, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.05217961324156015, |
|
"grad_norm": 0.3482360541820526, |
|
"learning_rate": 0.0004741168240643582, |
|
"loss": 1.2415, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.05226701627881569, |
|
"grad_norm": 0.32207486033439636, |
|
"learning_rate": 0.0004740731024833858, |
|
"loss": 1.1465, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.05235441931607123, |
|
"grad_norm": 0.2964969277381897, |
|
"learning_rate": 0.00047402938090241345, |
|
"loss": 0.8746, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.05244182235332678, |
|
"grad_norm": 0.26993119716644287, |
|
"learning_rate": 0.00047398565932144104, |
|
"loss": 0.9161, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.05252922539058232, |
|
"grad_norm": 0.31088942289352417, |
|
"learning_rate": 0.00047394193774046874, |
|
"loss": 0.938, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.05261662842783787, |
|
"grad_norm": 0.2921091318130493, |
|
"learning_rate": 0.00047389821615949633, |
|
"loss": 0.914, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.052704031465093414, |
|
"grad_norm": 0.4693572223186493, |
|
"learning_rate": 0.000473854494578524, |
|
"loss": 0.9083, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.05279143450234896, |
|
"grad_norm": 0.6201152801513672, |
|
"learning_rate": 0.0004738107729975516, |
|
"loss": 1.1098, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.0528788375396045, |
|
"grad_norm": 0.48871442675590515, |
|
"learning_rate": 0.0004737670514165792, |
|
"loss": 1.1571, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.052966240576860044, |
|
"grad_norm": 0.26332658529281616, |
|
"learning_rate": 0.00047372332983560686, |
|
"loss": 0.995, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.05305364361411559, |
|
"grad_norm": 0.7663961052894592, |
|
"learning_rate": 0.0004736796082546345, |
|
"loss": 1.0206, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.053141046651371135, |
|
"grad_norm": 0.3350706100463867, |
|
"learning_rate": 0.00047363588667366215, |
|
"loss": 1.0328, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.05322844968862668, |
|
"grad_norm": 0.30147233605384827, |
|
"learning_rate": 0.00047359216509268974, |
|
"loss": 0.8874, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.053315852725882226, |
|
"grad_norm": 0.4487704038619995, |
|
"learning_rate": 0.0004735484435117174, |
|
"loss": 0.8327, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.05340325576313777, |
|
"grad_norm": 0.474685400724411, |
|
"learning_rate": 0.00047350472193074503, |
|
"loss": 0.8405, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.05349065880039331, |
|
"grad_norm": 0.6512682437896729, |
|
"learning_rate": 0.0004734610003497726, |
|
"loss": 1.418, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.053578061837648856, |
|
"grad_norm": 0.3829117715358734, |
|
"learning_rate": 0.0004734172787688003, |
|
"loss": 0.9036, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.0536654648749044, |
|
"grad_norm": 0.3626525402069092, |
|
"learning_rate": 0.0004733735571878279, |
|
"loss": 0.9919, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.05375286791215995, |
|
"grad_norm": 0.6899876594543457, |
|
"learning_rate": 0.00047332983560685555, |
|
"loss": 0.8781, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.05384027094941549, |
|
"grad_norm": 0.33936572074890137, |
|
"learning_rate": 0.0004732861140258832, |
|
"loss": 0.7375, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.05392767398667104, |
|
"grad_norm": 0.45376959443092346, |
|
"learning_rate": 0.0004732423924449108, |
|
"loss": 0.868, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.054015077023926585, |
|
"grad_norm": 0.5580937266349792, |
|
"learning_rate": 0.0004731986708639385, |
|
"loss": 1.182, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.05410248006118212, |
|
"grad_norm": 0.3207378685474396, |
|
"learning_rate": 0.0004731549492829661, |
|
"loss": 0.9069, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.05418988309843767, |
|
"grad_norm": 0.3553832769393921, |
|
"learning_rate": 0.0004731112277019937, |
|
"loss": 1.4, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.054277286135693215, |
|
"grad_norm": 0.3708738386631012, |
|
"learning_rate": 0.0004730675061210213, |
|
"loss": 1.1475, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.05436468917294876, |
|
"grad_norm": 0.35041436553001404, |
|
"learning_rate": 0.00047302378454004896, |
|
"loss": 0.9505, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.054452092210204306, |
|
"grad_norm": 0.37304723262786865, |
|
"learning_rate": 0.0004729800629590766, |
|
"loss": 0.8858, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.05453949524745985, |
|
"grad_norm": 0.34602999687194824, |
|
"learning_rate": 0.00047293634137810425, |
|
"loss": 1.0687, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.0546268982847154, |
|
"grad_norm": 0.3194156587123871, |
|
"learning_rate": 0.0004728926197971319, |
|
"loss": 0.9222, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.054714301321970936, |
|
"grad_norm": 0.34864407777786255, |
|
"learning_rate": 0.0004728488982161595, |
|
"loss": 1.1291, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.05480170435922648, |
|
"grad_norm": 0.27222639322280884, |
|
"learning_rate": 0.00047280517663518713, |
|
"loss": 0.9762, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.05488910739648203, |
|
"grad_norm": 0.289035826921463, |
|
"learning_rate": 0.0004727614550542148, |
|
"loss": 0.84, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.05497651043373757, |
|
"grad_norm": 1.1678911447525024, |
|
"learning_rate": 0.0004727177334732424, |
|
"loss": 0.8835, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.05506391347099312, |
|
"grad_norm": 0.32149800658226013, |
|
"learning_rate": 0.00047267401189227007, |
|
"loss": 0.8814, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.055151316508248664, |
|
"grad_norm": 0.3312610387802124, |
|
"learning_rate": 0.00047263029031129766, |
|
"loss": 0.9001, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.0552387195455042, |
|
"grad_norm": 0.32734236121177673, |
|
"learning_rate": 0.0004725865687303253, |
|
"loss": 0.6587, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.05532612258275975, |
|
"grad_norm": 0.780978798866272, |
|
"learning_rate": 0.0004725428471493529, |
|
"loss": 1.1513, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.055413525620015294, |
|
"grad_norm": 0.3088547885417938, |
|
"learning_rate": 0.00047249912556838054, |
|
"loss": 0.8629, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.05550092865727084, |
|
"grad_norm": 0.34646108746528625, |
|
"learning_rate": 0.00047245540398740824, |
|
"loss": 0.8972, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.055588331694526386, |
|
"grad_norm": 0.47034963965415955, |
|
"learning_rate": 0.00047241168240643583, |
|
"loss": 1.414, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.05567573473178193, |
|
"grad_norm": 0.3200039565563202, |
|
"learning_rate": 0.0004723679608254635, |
|
"loss": 1.0516, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.05576313776903748, |
|
"grad_norm": 0.3332134187221527, |
|
"learning_rate": 0.00047232423924449107, |
|
"loss": 0.9086, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.055850540806293016, |
|
"grad_norm": 0.4804655611515045, |
|
"learning_rate": 0.0004722805176635187, |
|
"loss": 0.9719, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.05593794384354856, |
|
"grad_norm": 0.3591998219490051, |
|
"learning_rate": 0.0004722367960825463, |
|
"loss": 0.7201, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.05602534688080411, |
|
"grad_norm": 0.3319551944732666, |
|
"learning_rate": 0.000472193074501574, |
|
"loss": 1.1264, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.05611274991805965, |
|
"grad_norm": 0.3312825858592987, |
|
"learning_rate": 0.00047214935292060165, |
|
"loss": 1.0482, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.0562001529553152, |
|
"grad_norm": 0.3713119328022003, |
|
"learning_rate": 0.00047210563133962924, |
|
"loss": 1.1576, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.056287555992570744, |
|
"grad_norm": 0.35899418592453003, |
|
"learning_rate": 0.0004720619097586569, |
|
"loss": 0.7906, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.05637495902982629, |
|
"grad_norm": 0.31557363271713257, |
|
"learning_rate": 0.0004720181881776845, |
|
"loss": 0.9632, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.05646236206708183, |
|
"grad_norm": 0.40129950642585754, |
|
"learning_rate": 0.00047197446659671217, |
|
"loss": 1.3243, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.056549765104337374, |
|
"grad_norm": 0.3548416495323181, |
|
"learning_rate": 0.0004719307450157398, |
|
"loss": 1.0228, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.05663716814159292, |
|
"grad_norm": 0.5984897017478943, |
|
"learning_rate": 0.0004718870234347674, |
|
"loss": 0.9532, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.056724571178848465, |
|
"grad_norm": 0.2719477117061615, |
|
"learning_rate": 0.00047184330185379505, |
|
"loss": 0.9909, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.05681197421610401, |
|
"grad_norm": 0.2690770626068115, |
|
"learning_rate": 0.00047179958027282264, |
|
"loss": 0.9754, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.05689937725335956, |
|
"grad_norm": 0.3287508189678192, |
|
"learning_rate": 0.0004717558586918503, |
|
"loss": 0.823, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.0569867802906151, |
|
"grad_norm": 0.6442591547966003, |
|
"learning_rate": 0.00047171213711087793, |
|
"loss": 1.1211, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.05707418332787064, |
|
"grad_norm": 0.3647923469543457, |
|
"learning_rate": 0.0004716684155299056, |
|
"loss": 0.8892, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.05716158636512619, |
|
"grad_norm": 0.3035934269428253, |
|
"learning_rate": 0.0004716246939489332, |
|
"loss": 0.9781, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.05724898940238173, |
|
"grad_norm": 0.2986050546169281, |
|
"learning_rate": 0.0004715809723679608, |
|
"loss": 0.873, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.05733639243963728, |
|
"grad_norm": 0.3101188540458679, |
|
"learning_rate": 0.00047153725078698846, |
|
"loss": 1.1788, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.057423795476892824, |
|
"grad_norm": 1.2602791786193848, |
|
"learning_rate": 0.0004714935292060161, |
|
"loss": 1.376, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.05751119851414837, |
|
"grad_norm": 0.374224454164505, |
|
"learning_rate": 0.00047144980762504375, |
|
"loss": 0.9379, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.05759860155140391, |
|
"grad_norm": 0.35825932025909424, |
|
"learning_rate": 0.0004714060860440714, |
|
"loss": 0.9601, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.057686004588659454, |
|
"grad_norm": 0.37547796964645386, |
|
"learning_rate": 0.000471362364463099, |
|
"loss": 1.5432, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.057773407625915, |
|
"grad_norm": 0.30925118923187256, |
|
"learning_rate": 0.00047131864288212663, |
|
"loss": 0.9129, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.057860810663170545, |
|
"grad_norm": 0.43315598368644714, |
|
"learning_rate": 0.0004712749213011542, |
|
"loss": 0.7993, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.05794821370042609, |
|
"grad_norm": 1.0459505319595337, |
|
"learning_rate": 0.0004712311997201819, |
|
"loss": 1.4232, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.058035616737681636, |
|
"grad_norm": 0.4363897740840912, |
|
"learning_rate": 0.0004711874781392095, |
|
"loss": 1.3812, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.05812301977493718, |
|
"grad_norm": 0.2475530058145523, |
|
"learning_rate": 0.00047114375655823716, |
|
"loss": 0.8574, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.05821042281219272, |
|
"grad_norm": 0.352760910987854, |
|
"learning_rate": 0.0004711000349772648, |
|
"loss": 1.1236, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.058297825849448266, |
|
"grad_norm": 0.5032192468643188, |
|
"learning_rate": 0.0004710563133962924, |
|
"loss": 1.1754, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.05838522888670381, |
|
"grad_norm": 0.35939404368400574, |
|
"learning_rate": 0.0004710125918153201, |
|
"loss": 0.963, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.05847263192395936, |
|
"grad_norm": 0.4467969834804535, |
|
"learning_rate": 0.0004709688702343477, |
|
"loss": 2.0293, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.0585600349612149, |
|
"grad_norm": 0.3420664966106415, |
|
"learning_rate": 0.00047092514865337533, |
|
"loss": 1.0342, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.05864743799847045, |
|
"grad_norm": 0.3728554844856262, |
|
"learning_rate": 0.0004708814270724029, |
|
"loss": 0.9747, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.058734841035725995, |
|
"grad_norm": 1.2405109405517578, |
|
"learning_rate": 0.00047083770549143057, |
|
"loss": 1.6034, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.05882224407298153, |
|
"grad_norm": 0.3643404543399811, |
|
"learning_rate": 0.0004707939839104582, |
|
"loss": 0.7948, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.05890964711023708, |
|
"grad_norm": 0.31262850761413574, |
|
"learning_rate": 0.00047075026232948586, |
|
"loss": 0.8154, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.058997050147492625, |
|
"grad_norm": 0.49073535203933716, |
|
"learning_rate": 0.0004707065407485135, |
|
"loss": 0.9082, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.05908445318474817, |
|
"grad_norm": 0.39412635564804077, |
|
"learning_rate": 0.0004706628191675411, |
|
"loss": 1.0025, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.059171856222003716, |
|
"grad_norm": 0.40831953287124634, |
|
"learning_rate": 0.00047061909758656874, |
|
"loss": 1.0005, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.05925925925925926, |
|
"grad_norm": 0.5391172766685486, |
|
"learning_rate": 0.0004705753760055964, |
|
"loss": 0.9031, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.05934666229651481, |
|
"grad_norm": 0.31176143884658813, |
|
"learning_rate": 0.000470531654424624, |
|
"loss": 0.9589, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.059434065333770346, |
|
"grad_norm": 0.4320748448371887, |
|
"learning_rate": 0.00047048793284365167, |
|
"loss": 1.0996, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.05952146837102589, |
|
"grad_norm": 0.4102902412414551, |
|
"learning_rate": 0.00047044421126267926, |
|
"loss": 2.0338, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.05960887140828144, |
|
"grad_norm": 0.36022135615348816, |
|
"learning_rate": 0.0004704004896817069, |
|
"loss": 0.9675, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.05969627444553698, |
|
"grad_norm": 0.34680843353271484, |
|
"learning_rate": 0.0004703567681007345, |
|
"loss": 0.8765, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.05978367748279253, |
|
"grad_norm": 0.29740166664123535, |
|
"learning_rate": 0.00047031304651976214, |
|
"loss": 1.0053, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.059871080520048074, |
|
"grad_norm": 0.31341496109962463, |
|
"learning_rate": 0.00047026932493878984, |
|
"loss": 1.0295, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.05995848355730361, |
|
"grad_norm": 2.076716184616089, |
|
"learning_rate": 0.00047022560335781743, |
|
"loss": 1.5646, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.06004588659455916, |
|
"grad_norm": 0.2896002531051636, |
|
"learning_rate": 0.0004701818817768451, |
|
"loss": 0.9136, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.060133289631814704, |
|
"grad_norm": 0.37143734097480774, |
|
"learning_rate": 0.00047013816019587267, |
|
"loss": 0.8871, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.06022069266907025, |
|
"grad_norm": 0.49429547786712646, |
|
"learning_rate": 0.0004700944386149003, |
|
"loss": 1.1602, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.060308095706325796, |
|
"grad_norm": 0.3905726671218872, |
|
"learning_rate": 0.0004700507170339279, |
|
"loss": 1.1543, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.06039549874358134, |
|
"grad_norm": 0.3924982249736786, |
|
"learning_rate": 0.0004700069954529556, |
|
"loss": 0.8275, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.06048290178083689, |
|
"grad_norm": 0.27903103828430176, |
|
"learning_rate": 0.00046996327387198325, |
|
"loss": 0.8494, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.060570304818092426, |
|
"grad_norm": 0.382907897233963, |
|
"learning_rate": 0.00046991955229101084, |
|
"loss": 0.9531, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.06065770785534797, |
|
"grad_norm": 0.37153640389442444, |
|
"learning_rate": 0.0004698758307100385, |
|
"loss": 0.9131, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.06074511089260352, |
|
"grad_norm": 0.3007877767086029, |
|
"learning_rate": 0.0004698321091290661, |
|
"loss": 0.9513, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.06083251392985906, |
|
"grad_norm": 0.2546001672744751, |
|
"learning_rate": 0.0004697883875480938, |
|
"loss": 0.944, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.06091991696711461, |
|
"grad_norm": 0.27665847539901733, |
|
"learning_rate": 0.0004697446659671214, |
|
"loss": 0.7422, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.061007320004370154, |
|
"grad_norm": 0.28401628136634827, |
|
"learning_rate": 0.000469700944386149, |
|
"loss": 0.8458, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.0610947230416257, |
|
"grad_norm": 0.5097898840904236, |
|
"learning_rate": 0.00046965722280517666, |
|
"loss": 1.0018, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.06118212607888124, |
|
"grad_norm": 0.44888317584991455, |
|
"learning_rate": 0.00046961350122420425, |
|
"loss": 1.1203, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.061269529116136784, |
|
"grad_norm": 0.25764307379722595, |
|
"learning_rate": 0.0004695697796432319, |
|
"loss": 1.0156, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.06135693215339233, |
|
"grad_norm": 0.31590837240219116, |
|
"learning_rate": 0.00046952605806225954, |
|
"loss": 0.8823, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.061444335190647875, |
|
"grad_norm": 0.6337835192680359, |
|
"learning_rate": 0.0004694823364812872, |
|
"loss": 1.1565, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.06153173822790342, |
|
"grad_norm": 0.34477898478507996, |
|
"learning_rate": 0.00046943861490031483, |
|
"loss": 0.7563, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.061619141265158967, |
|
"grad_norm": 0.39787057042121887, |
|
"learning_rate": 0.0004693948933193424, |
|
"loss": 0.9804, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.06170654430241451, |
|
"grad_norm": 0.28919321298599243, |
|
"learning_rate": 0.00046935117173837007, |
|
"loss": 1.0019, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.06179394733967005, |
|
"grad_norm": 0.25737130641937256, |
|
"learning_rate": 0.00046930745015739766, |
|
"loss": 0.8751, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.0618813503769256, |
|
"grad_norm": 0.2699412703514099, |
|
"learning_rate": 0.00046926372857642536, |
|
"loss": 0.8999, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.06196875341418114, |
|
"grad_norm": 0.2957920730113983, |
|
"learning_rate": 0.000469220006995453, |
|
"loss": 0.9083, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.06205615645143669, |
|
"grad_norm": 0.2826875150203705, |
|
"learning_rate": 0.0004691762854144806, |
|
"loss": 0.946, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.062143559488692234, |
|
"grad_norm": 0.29016223549842834, |
|
"learning_rate": 0.00046913256383350824, |
|
"loss": 0.8126, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.06223096252594778, |
|
"grad_norm": 0.3504863679409027, |
|
"learning_rate": 0.00046908884225253583, |
|
"loss": 0.9127, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.06231836556320332, |
|
"grad_norm": 0.2627776861190796, |
|
"learning_rate": 0.00046904512067156353, |
|
"loss": 0.9476, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.062405768600458864, |
|
"grad_norm": 0.3002050220966339, |
|
"learning_rate": 0.0004690013990905911, |
|
"loss": 0.9444, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.06249317163771441, |
|
"grad_norm": 0.8539018630981445, |
|
"learning_rate": 0.00046895767750961876, |
|
"loss": 0.8977, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.06258057467496995, |
|
"grad_norm": 0.25260186195373535, |
|
"learning_rate": 0.0004689139559286464, |
|
"loss": 0.9615, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.0626679777122255, |
|
"grad_norm": 0.25615084171295166, |
|
"learning_rate": 0.000468870234347674, |
|
"loss": 0.8912, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.06275538074948105, |
|
"grad_norm": 0.3263600170612335, |
|
"learning_rate": 0.00046882651276670164, |
|
"loss": 0.843, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.06284278378673659, |
|
"grad_norm": 0.5694889426231384, |
|
"learning_rate": 0.0004687827911857293, |
|
"loss": 1.1624, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.06293018682399214, |
|
"grad_norm": 0.3248819410800934, |
|
"learning_rate": 0.00046873906960475693, |
|
"loss": 0.9452, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.06301758986124768, |
|
"grad_norm": 0.40857037901878357, |
|
"learning_rate": 0.0004686953480237845, |
|
"loss": 0.9117, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.06310499289850323, |
|
"grad_norm": 0.3211118280887604, |
|
"learning_rate": 0.00046865162644281217, |
|
"loss": 0.794, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.06319239593575877, |
|
"grad_norm": 0.32386934757232666, |
|
"learning_rate": 0.0004686079048618398, |
|
"loss": 1.2288, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.0632797989730143, |
|
"grad_norm": 0.3044579029083252, |
|
"learning_rate": 0.00046856418328086746, |
|
"loss": 0.9187, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.06336720201026985, |
|
"grad_norm": 0.6175875067710876, |
|
"learning_rate": 0.0004685204616998951, |
|
"loss": 0.8695, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.0634546050475254, |
|
"grad_norm": 0.7931004166603088, |
|
"learning_rate": 0.0004684767401189227, |
|
"loss": 1.3616, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.06354200808478094, |
|
"grad_norm": 0.337348997592926, |
|
"learning_rate": 0.00046843301853795034, |
|
"loss": 0.8654, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.06362941112203649, |
|
"grad_norm": 0.4152870178222656, |
|
"learning_rate": 0.000468389296956978, |
|
"loss": 1.2349, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.06371681415929203, |
|
"grad_norm": 0.3474035859107971, |
|
"learning_rate": 0.0004683455753760056, |
|
"loss": 0.9225, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.06380421719654758, |
|
"grad_norm": 0.35225990414619446, |
|
"learning_rate": 0.0004683018537950333, |
|
"loss": 0.9248, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.06389162023380313, |
|
"grad_norm": 0.24920597672462463, |
|
"learning_rate": 0.00046825813221406087, |
|
"loss": 0.8138, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.06397902327105867, |
|
"grad_norm": 0.3522126376628876, |
|
"learning_rate": 0.0004682144106330885, |
|
"loss": 0.9314, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.06406642630831422, |
|
"grad_norm": 0.4510492980480194, |
|
"learning_rate": 0.0004681706890521161, |
|
"loss": 0.8733, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.06415382934556976, |
|
"grad_norm": 0.2538619935512543, |
|
"learning_rate": 0.00046812696747114375, |
|
"loss": 0.8893, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.06424123238282531, |
|
"grad_norm": 0.39753592014312744, |
|
"learning_rate": 0.0004680832458901714, |
|
"loss": 1.0493, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.06432863542008085, |
|
"grad_norm": 0.40073463320732117, |
|
"learning_rate": 0.00046803952430919904, |
|
"loss": 0.8895, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.06441603845733639, |
|
"grad_norm": 0.31110239028930664, |
|
"learning_rate": 0.0004679958027282267, |
|
"loss": 0.8689, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.06450344149459193, |
|
"grad_norm": 0.29956865310668945, |
|
"learning_rate": 0.0004679520811472543, |
|
"loss": 0.8385, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.06459084453184748, |
|
"grad_norm": 0.3735499382019043, |
|
"learning_rate": 0.0004679083595662819, |
|
"loss": 0.8552, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.06467824756910302, |
|
"grad_norm": 0.4668900966644287, |
|
"learning_rate": 0.0004678646379853095, |
|
"loss": 1.4957, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.06476565060635857, |
|
"grad_norm": 0.363799512386322, |
|
"learning_rate": 0.0004678209164043372, |
|
"loss": 1.0365, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.06485305364361411, |
|
"grad_norm": 0.3261052668094635, |
|
"learning_rate": 0.00046777719482336486, |
|
"loss": 0.8972, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.06494045668086966, |
|
"grad_norm": 0.27814945578575134, |
|
"learning_rate": 0.00046773347324239245, |
|
"loss": 0.8051, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.0650278597181252, |
|
"grad_norm": 0.37245509028434753, |
|
"learning_rate": 0.0004676897516614201, |
|
"loss": 0.9421, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.06511526275538075, |
|
"grad_norm": 0.2978193163871765, |
|
"learning_rate": 0.0004676460300804477, |
|
"loss": 0.8464, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.0652026657926363, |
|
"grad_norm": 0.41827908158302307, |
|
"learning_rate": 0.00046760230849947533, |
|
"loss": 1.3154, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.06529006882989184, |
|
"grad_norm": 0.28153055906295776, |
|
"learning_rate": 0.000467558586918503, |
|
"loss": 0.812, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.06537747186714739, |
|
"grad_norm": 0.3568740487098694, |
|
"learning_rate": 0.0004675148653375306, |
|
"loss": 0.9333, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.06546487490440293, |
|
"grad_norm": 0.5805249810218811, |
|
"learning_rate": 0.00046747114375655826, |
|
"loss": 1.3821, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.06555227794165848, |
|
"grad_norm": 0.30053797364234924, |
|
"learning_rate": 0.00046742742217558585, |
|
"loss": 0.9358, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.06563968097891401, |
|
"grad_norm": 0.3179711699485779, |
|
"learning_rate": 0.0004673837005946135, |
|
"loss": 0.9094, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.06572708401616956, |
|
"grad_norm": 0.2717473804950714, |
|
"learning_rate": 0.00046733997901364114, |
|
"loss": 0.7255, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.0658144870534251, |
|
"grad_norm": 0.24072229862213135, |
|
"learning_rate": 0.0004672962574326688, |
|
"loss": 1.1008, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.06590189009068065, |
|
"grad_norm": 0.3099074363708496, |
|
"learning_rate": 0.00046725253585169643, |
|
"loss": 0.8751, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.0659892931279362, |
|
"grad_norm": 0.31873032450675964, |
|
"learning_rate": 0.000467208814270724, |
|
"loss": 0.8932, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.06607669616519174, |
|
"grad_norm": 0.31468328833580017, |
|
"learning_rate": 0.00046716509268975167, |
|
"loss": 0.8792, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.06616409920244729, |
|
"grad_norm": 0.35658881068229675, |
|
"learning_rate": 0.00046712137110877926, |
|
"loss": 0.8955, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.06625150223970283, |
|
"grad_norm": 0.3107976019382477, |
|
"learning_rate": 0.00046707764952780696, |
|
"loss": 0.9174, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.06633890527695838, |
|
"grad_norm": 0.2277815192937851, |
|
"learning_rate": 0.0004670339279468346, |
|
"loss": 0.7611, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.06642630831421392, |
|
"grad_norm": 0.25561246275901794, |
|
"learning_rate": 0.0004669902063658622, |
|
"loss": 0.8041, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.06651371135146947, |
|
"grad_norm": 0.2826947271823883, |
|
"learning_rate": 0.00046694648478488984, |
|
"loss": 0.7732, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.06660111438872501, |
|
"grad_norm": 0.2515583038330078, |
|
"learning_rate": 0.00046690276320391743, |
|
"loss": 1.0321, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.06668851742598056, |
|
"grad_norm": 0.26518338918685913, |
|
"learning_rate": 0.0004668590416229451, |
|
"loss": 1.1347, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.06677592046323609, |
|
"grad_norm": 0.2963607609272003, |
|
"learning_rate": 0.0004668153200419727, |
|
"loss": 0.9982, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.06686332350049164, |
|
"grad_norm": 0.2876517176628113, |
|
"learning_rate": 0.00046677159846100037, |
|
"loss": 0.6918, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.06695072653774718, |
|
"grad_norm": 0.3714672923088074, |
|
"learning_rate": 0.000466727876880028, |
|
"loss": 0.9023, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.06703812957500273, |
|
"grad_norm": 0.3568623960018158, |
|
"learning_rate": 0.0004666841552990556, |
|
"loss": 0.8378, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.06712553261225827, |
|
"grad_norm": 0.4770544469356537, |
|
"learning_rate": 0.00046664043371808325, |
|
"loss": 1.0266, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.06721293564951382, |
|
"grad_norm": 0.2760886549949646, |
|
"learning_rate": 0.0004665967121371109, |
|
"loss": 0.8276, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.06730033868676936, |
|
"grad_norm": 0.31360816955566406, |
|
"learning_rate": 0.00046655299055613854, |
|
"loss": 0.8646, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.06738774172402491, |
|
"grad_norm": 0.3075156509876251, |
|
"learning_rate": 0.00046650926897516613, |
|
"loss": 1.1144, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.06747514476128046, |
|
"grad_norm": 0.3104390501976013, |
|
"learning_rate": 0.0004664655473941938, |
|
"loss": 0.8923, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.067562547798536, |
|
"grad_norm": 0.3964294493198395, |
|
"learning_rate": 0.0004664218258132214, |
|
"loss": 1.0969, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.06764995083579155, |
|
"grad_norm": 0.3698040843009949, |
|
"learning_rate": 0.000466378104232249, |
|
"loss": 0.9078, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.0677373538730471, |
|
"grad_norm": 0.28510838747024536, |
|
"learning_rate": 0.0004663343826512767, |
|
"loss": 1.0075, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.06782475691030264, |
|
"grad_norm": 0.25500908493995667, |
|
"learning_rate": 0.0004662906610703043, |
|
"loss": 0.8457, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.06791215994755818, |
|
"grad_norm": 0.27927708625793457, |
|
"learning_rate": 0.00046624693948933195, |
|
"loss": 1.01, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.06799956298481372, |
|
"grad_norm": 0.2683468461036682, |
|
"learning_rate": 0.0004662032179083596, |
|
"loss": 1.0491, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.06808696602206926, |
|
"grad_norm": 0.31843262910842896, |
|
"learning_rate": 0.0004661594963273872, |
|
"loss": 0.9467, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.06817436905932481, |
|
"grad_norm": 0.27564141154289246, |
|
"learning_rate": 0.0004661157747464149, |
|
"loss": 0.9487, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.06826177209658035, |
|
"grad_norm": 0.2407764047384262, |
|
"learning_rate": 0.00046607205316544247, |
|
"loss": 0.8939, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.0683491751338359, |
|
"grad_norm": 0.3025217652320862, |
|
"learning_rate": 0.0004660283315844701, |
|
"loss": 0.9859, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.06843657817109144, |
|
"grad_norm": 0.2979051470756531, |
|
"learning_rate": 0.0004659846100034977, |
|
"loss": 0.9136, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.06852398120834699, |
|
"grad_norm": 0.28788650035858154, |
|
"learning_rate": 0.00046594088842252535, |
|
"loss": 0.9734, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.06861138424560254, |
|
"grad_norm": 0.2947753667831421, |
|
"learning_rate": 0.000465897166841553, |
|
"loss": 0.735, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.06869878728285808, |
|
"grad_norm": 0.3203105032444, |
|
"learning_rate": 0.00046585344526058064, |
|
"loss": 0.8992, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.06878619032011363, |
|
"grad_norm": 0.2638401985168457, |
|
"learning_rate": 0.0004658097236796083, |
|
"loss": 0.8669, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.06887359335736917, |
|
"grad_norm": 0.26712629199028015, |
|
"learning_rate": 0.0004657660020986359, |
|
"loss": 0.9765, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.06896099639462472, |
|
"grad_norm": 0.4055823087692261, |
|
"learning_rate": 0.0004657222805176635, |
|
"loss": 0.8117, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.06904839943188026, |
|
"grad_norm": 0.2518852651119232, |
|
"learning_rate": 0.00046567855893669117, |
|
"loss": 0.9517, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.0691358024691358, |
|
"grad_norm": 0.27589836716651917, |
|
"learning_rate": 0.00046563483735571876, |
|
"loss": 0.7855, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.06922320550639134, |
|
"grad_norm": 0.2739314138889313, |
|
"learning_rate": 0.00046559111577474646, |
|
"loss": 0.8862, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.06931060854364689, |
|
"grad_norm": 0.3271756172180176, |
|
"learning_rate": 0.00046554739419377405, |
|
"loss": 1.2893, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.06939801158090243, |
|
"grad_norm": 0.27038949728012085, |
|
"learning_rate": 0.0004655036726128017, |
|
"loss": 0.8059, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.06948541461815798, |
|
"grad_norm": 0.2605447471141815, |
|
"learning_rate": 0.0004654599510318293, |
|
"loss": 0.8816, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.06957281765541352, |
|
"grad_norm": 0.2714409828186035, |
|
"learning_rate": 0.00046541622945085693, |
|
"loss": 0.9307, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.06966022069266907, |
|
"grad_norm": 0.2455201894044876, |
|
"learning_rate": 0.00046537250786988463, |
|
"loss": 0.8321, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.06974762372992462, |
|
"grad_norm": 0.29036253690719604, |
|
"learning_rate": 0.0004653287862889122, |
|
"loss": 0.8605, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.06983502676718016, |
|
"grad_norm": 0.24069538712501526, |
|
"learning_rate": 0.00046528506470793987, |
|
"loss": 1.0819, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.0699224298044357, |
|
"grad_norm": 0.254304975271225, |
|
"learning_rate": 0.00046524134312696746, |
|
"loss": 0.7388, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.07000983284169125, |
|
"grad_norm": 0.27309149503707886, |
|
"learning_rate": 0.0004651976215459951, |
|
"loss": 0.7796, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.0700972358789468, |
|
"grad_norm": 0.26903948187828064, |
|
"learning_rate": 0.0004651538999650227, |
|
"loss": 1.0103, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.07018463891620234, |
|
"grad_norm": 0.2526533901691437, |
|
"learning_rate": 0.0004651101783840504, |
|
"loss": 0.8566, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.07027204195345789, |
|
"grad_norm": 0.2822379469871521, |
|
"learning_rate": 0.00046506645680307804, |
|
"loss": 0.9441, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.07035944499071342, |
|
"grad_norm": 0.27883851528167725, |
|
"learning_rate": 0.00046502273522210563, |
|
"loss": 0.9006, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.07044684802796897, |
|
"grad_norm": 0.23839306831359863, |
|
"learning_rate": 0.0004649790136411333, |
|
"loss": 0.8387, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.07053425106522451, |
|
"grad_norm": 0.2352200597524643, |
|
"learning_rate": 0.00046493529206016087, |
|
"loss": 0.8228, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.07062165410248006, |
|
"grad_norm": 0.31958913803100586, |
|
"learning_rate": 0.00046489157047918857, |
|
"loss": 1.0312, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.0707090571397356, |
|
"grad_norm": 0.286045640707016, |
|
"learning_rate": 0.0004648478488982162, |
|
"loss": 0.8427, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.07079646017699115, |
|
"grad_norm": 0.24101607501506805, |
|
"learning_rate": 0.0004648041273172438, |
|
"loss": 0.9986, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.0708838632142467, |
|
"grad_norm": 0.28324073553085327, |
|
"learning_rate": 0.00046476040573627145, |
|
"loss": 0.778, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.07097126625150224, |
|
"grad_norm": 0.30368572473526, |
|
"learning_rate": 0.00046471668415529904, |
|
"loss": 0.9543, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.07105866928875779, |
|
"grad_norm": 0.3159104585647583, |
|
"learning_rate": 0.0004646729625743267, |
|
"loss": 0.9481, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.07114607232601333, |
|
"grad_norm": 0.2856074869632721, |
|
"learning_rate": 0.00046462924099335433, |
|
"loss": 1.0117, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.07123347536326888, |
|
"grad_norm": 0.32605329155921936, |
|
"learning_rate": 0.00046458551941238197, |
|
"loss": 0.8451, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.07132087840052442, |
|
"grad_norm": 0.22008907794952393, |
|
"learning_rate": 0.0004645417978314096, |
|
"loss": 0.8965, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.07140828143777997, |
|
"grad_norm": 0.26317551732063293, |
|
"learning_rate": 0.0004644980762504372, |
|
"loss": 0.8644, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.0714956844750355, |
|
"grad_norm": 0.22049389779567719, |
|
"learning_rate": 0.00046445435466946485, |
|
"loss": 0.8144, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.07158308751229105, |
|
"grad_norm": 0.2786102890968323, |
|
"learning_rate": 0.00046441063308849244, |
|
"loss": 0.8841, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.07167049054954659, |
|
"grad_norm": 0.31796136498451233, |
|
"learning_rate": 0.00046436691150752014, |
|
"loss": 1.0665, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.07175789358680214, |
|
"grad_norm": 0.29958993196487427, |
|
"learning_rate": 0.0004643231899265478, |
|
"loss": 0.8789, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.07184529662405768, |
|
"grad_norm": 0.2706652283668518, |
|
"learning_rate": 0.0004642794683455754, |
|
"loss": 0.8721, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.07193269966131323, |
|
"grad_norm": 0.22537319362163544, |
|
"learning_rate": 0.000464235746764603, |
|
"loss": 0.9403, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.07202010269856877, |
|
"grad_norm": 0.34331005811691284, |
|
"learning_rate": 0.0004641920251836306, |
|
"loss": 1.1497, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.07210750573582432, |
|
"grad_norm": 0.25914907455444336, |
|
"learning_rate": 0.0004641483036026583, |
|
"loss": 1.1589, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.07219490877307987, |
|
"grad_norm": 0.2956130802631378, |
|
"learning_rate": 0.0004641045820216859, |
|
"loss": 0.8587, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.07228231181033541, |
|
"grad_norm": 0.30292391777038574, |
|
"learning_rate": 0.00046406086044071355, |
|
"loss": 0.9224, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.07236971484759096, |
|
"grad_norm": 0.3101223409175873, |
|
"learning_rate": 0.0004640171388597412, |
|
"loss": 0.9115, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.0724571178848465, |
|
"grad_norm": 0.2720979154109955, |
|
"learning_rate": 0.0004639734172787688, |
|
"loss": 0.8112, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.07254452092210205, |
|
"grad_norm": 0.2774461507797241, |
|
"learning_rate": 0.00046392969569779643, |
|
"loss": 0.9776, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.0726319239593576, |
|
"grad_norm": 0.25150200724601746, |
|
"learning_rate": 0.0004638859741168241, |
|
"loss": 1.0255, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.07271932699661313, |
|
"grad_norm": 0.2526938319206238, |
|
"learning_rate": 0.0004638422525358517, |
|
"loss": 0.7242, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.07280673003386867, |
|
"grad_norm": 0.29642441868782043, |
|
"learning_rate": 0.0004637985309548793, |
|
"loss": 1.0944, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.07289413307112422, |
|
"grad_norm": 0.250478595495224, |
|
"learning_rate": 0.00046375480937390696, |
|
"loss": 0.8324, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.07298153610837976, |
|
"grad_norm": 0.28843697905540466, |
|
"learning_rate": 0.0004637110877929346, |
|
"loss": 0.8646, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.07306893914563531, |
|
"grad_norm": 0.22244645655155182, |
|
"learning_rate": 0.00046366736621196225, |
|
"loss": 0.7966, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.07315634218289085, |
|
"grad_norm": 0.2418157458305359, |
|
"learning_rate": 0.0004636236446309899, |
|
"loss": 0.8101, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.0732437452201464, |
|
"grad_norm": 0.2781657874584198, |
|
"learning_rate": 0.0004635799230500175, |
|
"loss": 0.9902, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.07333114825740195, |
|
"grad_norm": 0.24249030649662018, |
|
"learning_rate": 0.00046353620146904513, |
|
"loss": 0.7445, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.07341855129465749, |
|
"grad_norm": 0.23980437219142914, |
|
"learning_rate": 0.0004634924798880728, |
|
"loss": 0.8168, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.07350595433191304, |
|
"grad_norm": 0.3362947106361389, |
|
"learning_rate": 0.00046344875830710037, |
|
"loss": 1.1176, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.07359335736916858, |
|
"grad_norm": 0.23380422592163086, |
|
"learning_rate": 0.00046340503672612807, |
|
"loss": 0.8311, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.07368076040642413, |
|
"grad_norm": 0.2908138632774353, |
|
"learning_rate": 0.00046336131514515566, |
|
"loss": 0.8315, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.07376816344367967, |
|
"grad_norm": 0.2556897699832916, |
|
"learning_rate": 0.0004633175935641833, |
|
"loss": 0.939, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.0738555664809352, |
|
"grad_norm": 0.3416728079319, |
|
"learning_rate": 0.0004632738719832109, |
|
"loss": 0.746, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.07394296951819075, |
|
"grad_norm": 0.2219434678554535, |
|
"learning_rate": 0.00046323015040223854, |
|
"loss": 1.0259, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.0740303725554463, |
|
"grad_norm": 0.3327368497848511, |
|
"learning_rate": 0.0004631864288212662, |
|
"loss": 1.4831, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.07411777559270184, |
|
"grad_norm": 0.28128185868263245, |
|
"learning_rate": 0.00046314270724029383, |
|
"loss": 0.9478, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.07420517862995739, |
|
"grad_norm": 0.29582032561302185, |
|
"learning_rate": 0.00046309898565932147, |
|
"loss": 0.9397, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.07429258166721293, |
|
"grad_norm": 0.26146262884140015, |
|
"learning_rate": 0.00046305526407834906, |
|
"loss": 0.6904, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.07437998470446848, |
|
"grad_norm": 0.3188638389110565, |
|
"learning_rate": 0.0004630115424973767, |
|
"loss": 0.7268, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.07446738774172403, |
|
"grad_norm": 0.2691085934638977, |
|
"learning_rate": 0.0004629678209164043, |
|
"loss": 0.7836, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.07455479077897957, |
|
"grad_norm": 0.2730037569999695, |
|
"learning_rate": 0.000462924099335432, |
|
"loss": 0.8207, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.07464219381623512, |
|
"grad_norm": 0.23849952220916748, |
|
"learning_rate": 0.00046288037775445964, |
|
"loss": 0.9859, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.07472959685349066, |
|
"grad_norm": 0.24940194189548492, |
|
"learning_rate": 0.00046283665617348723, |
|
"loss": 0.7821, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.07481699989074621, |
|
"grad_norm": 0.23495396971702576, |
|
"learning_rate": 0.0004627929345925149, |
|
"loss": 0.8847, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.07490440292800175, |
|
"grad_norm": 0.25201091170310974, |
|
"learning_rate": 0.00046274921301154247, |
|
"loss": 0.8386, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.0749918059652573, |
|
"grad_norm": 0.25054988265037537, |
|
"learning_rate": 0.0004627054914305701, |
|
"loss": 0.9939, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.07507920900251283, |
|
"grad_norm": 0.39931726455688477, |
|
"learning_rate": 0.0004626617698495978, |
|
"loss": 1.1039, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.07516661203976838, |
|
"grad_norm": 0.2789982855319977, |
|
"learning_rate": 0.0004626180482686254, |
|
"loss": 1.1707, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.07525401507702392, |
|
"grad_norm": 0.282528817653656, |
|
"learning_rate": 0.00046257432668765305, |
|
"loss": 0.8738, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.07534141811427947, |
|
"grad_norm": 0.2707865536212921, |
|
"learning_rate": 0.00046253060510668064, |
|
"loss": 0.832, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.07542882115153501, |
|
"grad_norm": 0.19732601940631866, |
|
"learning_rate": 0.0004624868835257083, |
|
"loss": 0.8948, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.07551622418879056, |
|
"grad_norm": 0.2605394721031189, |
|
"learning_rate": 0.00046244316194473593, |
|
"loss": 0.7346, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.0756036272260461, |
|
"grad_norm": 0.26202288269996643, |
|
"learning_rate": 0.0004623994403637636, |
|
"loss": 0.8521, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.07569103026330165, |
|
"grad_norm": 0.3473947048187256, |
|
"learning_rate": 0.0004623557187827912, |
|
"loss": 1.043, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.0757784333005572, |
|
"grad_norm": 0.7824636697769165, |
|
"learning_rate": 0.0004623119972018188, |
|
"loss": 1.2121, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.07586583633781274, |
|
"grad_norm": 0.26076897978782654, |
|
"learning_rate": 0.00046226827562084646, |
|
"loss": 0.8669, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.07595323937506829, |
|
"grad_norm": 0.3360956013202667, |
|
"learning_rate": 0.00046222455403987405, |
|
"loss": 0.8806, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.07604064241232383, |
|
"grad_norm": 0.27572354674339294, |
|
"learning_rate": 0.00046218083245890175, |
|
"loss": 0.8105, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.07612804544957938, |
|
"grad_norm": 0.22802734375, |
|
"learning_rate": 0.0004621371108779294, |
|
"loss": 0.6879, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.07621544848683491, |
|
"grad_norm": 0.31544265151023865, |
|
"learning_rate": 0.000462093389296957, |
|
"loss": 0.835, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.07630285152409046, |
|
"grad_norm": 0.3530902564525604, |
|
"learning_rate": 0.00046204966771598463, |
|
"loss": 0.7543, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.076390254561346, |
|
"grad_norm": 0.28108978271484375, |
|
"learning_rate": 0.0004620059461350122, |
|
"loss": 0.9433, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.07647765759860155, |
|
"grad_norm": 0.2918491065502167, |
|
"learning_rate": 0.00046196222455403987, |
|
"loss": 0.9016, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.0765650606358571, |
|
"grad_norm": 0.3130475580692291, |
|
"learning_rate": 0.0004619185029730675, |
|
"loss": 0.8612, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.07665246367311264, |
|
"grad_norm": 0.2697352468967438, |
|
"learning_rate": 0.00046187478139209516, |
|
"loss": 1.0324, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.07673986671036818, |
|
"grad_norm": 0.3534733057022095, |
|
"learning_rate": 0.0004618310598111228, |
|
"loss": 0.7769, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.07682726974762373, |
|
"grad_norm": 0.46239951252937317, |
|
"learning_rate": 0.0004617873382301504, |
|
"loss": 0.8155, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.07691467278487928, |
|
"grad_norm": 0.2869885265827179, |
|
"learning_rate": 0.00046174361664917804, |
|
"loss": 0.8088, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.07700207582213482, |
|
"grad_norm": 0.544746458530426, |
|
"learning_rate": 0.0004616998950682057, |
|
"loss": 1.0332, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.07708947885939037, |
|
"grad_norm": 0.28001531958580017, |
|
"learning_rate": 0.0004616561734872333, |
|
"loss": 0.8363, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.07717688189664591, |
|
"grad_norm": 0.244185671210289, |
|
"learning_rate": 0.0004616124519062609, |
|
"loss": 0.8611, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.07726428493390146, |
|
"grad_norm": 0.3561322093009949, |
|
"learning_rate": 0.00046156873032528856, |
|
"loss": 0.9298, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.077351687971157, |
|
"grad_norm": 0.2852579355239868, |
|
"learning_rate": 0.0004615250087443162, |
|
"loss": 0.9415, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.07743909100841254, |
|
"grad_norm": 0.3458700180053711, |
|
"learning_rate": 0.0004614812871633438, |
|
"loss": 0.7855, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.07752649404566808, |
|
"grad_norm": 0.33211758732795715, |
|
"learning_rate": 0.0004614375655823715, |
|
"loss": 0.7652, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.07761389708292363, |
|
"grad_norm": 0.2643268406391144, |
|
"learning_rate": 0.0004613938440013991, |
|
"loss": 0.813, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.07770130012017917, |
|
"grad_norm": 0.26717138290405273, |
|
"learning_rate": 0.00046135012242042673, |
|
"loss": 0.673, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.07778870315743472, |
|
"grad_norm": 0.2716834843158722, |
|
"learning_rate": 0.0004613064008394544, |
|
"loss": 1.0343, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.07787610619469026, |
|
"grad_norm": 0.4963998794555664, |
|
"learning_rate": 0.00046126267925848197, |
|
"loss": 1.3856, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.07796350923194581, |
|
"grad_norm": 0.3124493360519409, |
|
"learning_rate": 0.00046121895767750967, |
|
"loss": 1.0451, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.07805091226920136, |
|
"grad_norm": 0.5837683081626892, |
|
"learning_rate": 0.00046117523609653726, |
|
"loss": 1.0501, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.0781383153064569, |
|
"grad_norm": 0.31839168071746826, |
|
"learning_rate": 0.0004611315145155649, |
|
"loss": 0.9903, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.07822571834371245, |
|
"grad_norm": 0.5437602996826172, |
|
"learning_rate": 0.0004610877929345925, |
|
"loss": 1.0399, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.07831312138096799, |
|
"grad_norm": 0.3862234354019165, |
|
"learning_rate": 0.00046104407135362014, |
|
"loss": 1.0355, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.07840052441822354, |
|
"grad_norm": 0.7273140549659729, |
|
"learning_rate": 0.0004610003497726478, |
|
"loss": 0.9339, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.07848792745547908, |
|
"grad_norm": 0.31776732206344604, |
|
"learning_rate": 0.00046095662819167543, |
|
"loss": 1.405, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.07857533049273462, |
|
"grad_norm": 0.33975592255592346, |
|
"learning_rate": 0.0004609129066107031, |
|
"loss": 0.9493, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.07866273352999016, |
|
"grad_norm": 0.3096635937690735, |
|
"learning_rate": 0.00046086918502973067, |
|
"loss": 0.8949, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.07875013656724571, |
|
"grad_norm": 0.22939470410346985, |
|
"learning_rate": 0.0004608254634487583, |
|
"loss": 1.0486, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.07883753960450125, |
|
"grad_norm": 0.27594518661499023, |
|
"learning_rate": 0.0004607817418677859, |
|
"loss": 0.7005, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.0789249426417568, |
|
"grad_norm": 0.38164445757865906, |
|
"learning_rate": 0.0004607380202868136, |
|
"loss": 1.2305, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.07901234567901234, |
|
"grad_norm": 0.26803824305534363, |
|
"learning_rate": 0.00046069429870584125, |
|
"loss": 0.824, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.07909974871626789, |
|
"grad_norm": 0.3049018085002899, |
|
"learning_rate": 0.00046065057712486884, |
|
"loss": 0.8824, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.07918715175352344, |
|
"grad_norm": 0.30478763580322266, |
|
"learning_rate": 0.0004606068555438965, |
|
"loss": 0.9809, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.07927455479077898, |
|
"grad_norm": 0.276212602853775, |
|
"learning_rate": 0.0004605631339629241, |
|
"loss": 0.8166, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.07936195782803453, |
|
"grad_norm": 0.8416312336921692, |
|
"learning_rate": 0.0004605194123819517, |
|
"loss": 1.5118, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.07944936086529007, |
|
"grad_norm": 0.3249102532863617, |
|
"learning_rate": 0.0004604756908009794, |
|
"loss": 0.905, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.07953676390254562, |
|
"grad_norm": 0.3695957064628601, |
|
"learning_rate": 0.000460431969220007, |
|
"loss": 0.809, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.07962416693980116, |
|
"grad_norm": 0.2533642649650574, |
|
"learning_rate": 0.00046038824763903466, |
|
"loss": 0.8706, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.07971156997705671, |
|
"grad_norm": 1.895600438117981, |
|
"learning_rate": 0.00046034452605806225, |
|
"loss": 0.906, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.07979897301431224, |
|
"grad_norm": 0.3041301369667053, |
|
"learning_rate": 0.0004603008044770899, |
|
"loss": 0.8028, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.07988637605156779, |
|
"grad_norm": 0.39580902457237244, |
|
"learning_rate": 0.0004602570828961175, |
|
"loss": 0.8785, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.07997377908882333, |
|
"grad_norm": 0.3260571360588074, |
|
"learning_rate": 0.0004602133613151452, |
|
"loss": 0.908, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.08006118212607888, |
|
"grad_norm": 0.3628925681114197, |
|
"learning_rate": 0.0004601696397341728, |
|
"loss": 0.8364, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.08014858516333442, |
|
"grad_norm": 0.4076823890209198, |
|
"learning_rate": 0.0004601259181532004, |
|
"loss": 1.93, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.08023598820058997, |
|
"grad_norm": 0.6916859149932861, |
|
"learning_rate": 0.00046008219657222806, |
|
"loss": 1.1446, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.08032339123784552, |
|
"grad_norm": 1.301007866859436, |
|
"learning_rate": 0.00046003847499125565, |
|
"loss": 1.117, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.08041079427510106, |
|
"grad_norm": 2.9351885318756104, |
|
"learning_rate": 0.00045999475341028335, |
|
"loss": 1.8147, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.0804981973123566, |
|
"grad_norm": 3.5363566875457764, |
|
"learning_rate": 0.000459951031829311, |
|
"loss": 1.4487, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.08058560034961215, |
|
"grad_norm": 1.0070669651031494, |
|
"learning_rate": 0.0004599073102483386, |
|
"loss": 0.9901, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.0806730033868677, |
|
"grad_norm": 0.42096540331840515, |
|
"learning_rate": 0.00045986358866736623, |
|
"loss": 0.8757, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.08076040642412324, |
|
"grad_norm": 0.7990926504135132, |
|
"learning_rate": 0.0004598198670863938, |
|
"loss": 1.1409, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.08084780946137879, |
|
"grad_norm": 0.6880809664726257, |
|
"learning_rate": 0.00045977614550542147, |
|
"loss": 0.9678, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.08093521249863432, |
|
"grad_norm": 0.7126320004463196, |
|
"learning_rate": 0.0004597324239244491, |
|
"loss": 0.8932, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.08102261553588987, |
|
"grad_norm": 1.2712117433547974, |
|
"learning_rate": 0.00045968870234347676, |
|
"loss": 1.7774, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.08111001857314541, |
|
"grad_norm": 1.9836965799331665, |
|
"learning_rate": 0.0004596449807625044, |
|
"loss": 1.1419, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.08119742161040096, |
|
"grad_norm": 0.6894294023513794, |
|
"learning_rate": 0.000459601259181532, |
|
"loss": 0.9666, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.0812848246476565, |
|
"grad_norm": 2.2530252933502197, |
|
"learning_rate": 0.00045955753760055964, |
|
"loss": 1.5093, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.08137222768491205, |
|
"grad_norm": 14.37427043914795, |
|
"learning_rate": 0.0004595138160195873, |
|
"loss": 1.3134, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.0814596307221676, |
|
"grad_norm": 3.392730236053467, |
|
"learning_rate": 0.00045947009443861493, |
|
"loss": 1.0883, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.08154703375942314, |
|
"grad_norm": 1.097122073173523, |
|
"learning_rate": 0.0004594263728576425, |
|
"loss": 1.0587, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.08163443679667869, |
|
"grad_norm": 0.7270208597183228, |
|
"learning_rate": 0.00045938265127667017, |
|
"loss": 1.1386, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.08172183983393423, |
|
"grad_norm": 3.5602266788482666, |
|
"learning_rate": 0.0004593389296956978, |
|
"loss": 1.1204, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.08180924287118978, |
|
"grad_norm": 1.953038215637207, |
|
"learning_rate": 0.0004592952081147254, |
|
"loss": 1.2367, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.08189664590844532, |
|
"grad_norm": 1.90444016456604, |
|
"learning_rate": 0.0004592514865337531, |
|
"loss": 1.1981, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.08198404894570087, |
|
"grad_norm": 9.526935577392578, |
|
"learning_rate": 0.0004592077649527807, |
|
"loss": 1.4363, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.08207145198295641, |
|
"grad_norm": 5.361575603485107, |
|
"learning_rate": 0.00045916404337180834, |
|
"loss": 1.4758, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.08215885502021195, |
|
"grad_norm": 49.836151123046875, |
|
"learning_rate": 0.000459120321790836, |
|
"loss": 3.2272, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.08224625805746749, |
|
"grad_norm": 6.1282877922058105, |
|
"learning_rate": 0.0004590766002098636, |
|
"loss": 2.0861, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.08233366109472304, |
|
"grad_norm": 9.320550918579102, |
|
"learning_rate": 0.0004590328786288912, |
|
"loss": 2.0217, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.08242106413197858, |
|
"grad_norm": 3.1131937503814697, |
|
"learning_rate": 0.00045898915704791887, |
|
"loss": 1.4848, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.08250846716923413, |
|
"grad_norm": 51.67763137817383, |
|
"learning_rate": 0.0004589454354669465, |
|
"loss": 3.2458, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.08259587020648967, |
|
"grad_norm": 7.247336387634277, |
|
"learning_rate": 0.0004589017138859741, |
|
"loss": 2.6957, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.08268327324374522, |
|
"grad_norm": 3.2208497524261475, |
|
"learning_rate": 0.00045885799230500175, |
|
"loss": 1.9059, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.08277067628100077, |
|
"grad_norm": 78.9037094116211, |
|
"learning_rate": 0.0004588142707240294, |
|
"loss": 5.5682, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.08285807931825631, |
|
"grad_norm": 4.832467079162598, |
|
"learning_rate": 0.00045877054914305704, |
|
"loss": 1.6731, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.08294548235551186, |
|
"grad_norm": 7.1308674812316895, |
|
"learning_rate": 0.0004587268275620847, |
|
"loss": 2.2772, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.0830328853927674, |
|
"grad_norm": 4.155465126037598, |
|
"learning_rate": 0.00045868310598111227, |
|
"loss": 2.2794, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.08312028843002295, |
|
"grad_norm": 51.88750457763672, |
|
"learning_rate": 0.0004586393844001399, |
|
"loss": 4.0774, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.0832076914672785, |
|
"grad_norm": 2.969212532043457, |
|
"learning_rate": 0.00045859566281916756, |
|
"loss": 1.9225, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.08329509450453403, |
|
"grad_norm": 3.454350233078003, |
|
"learning_rate": 0.00045855194123819515, |
|
"loss": 1.6258, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.08338249754178957, |
|
"grad_norm": 46.18666458129883, |
|
"learning_rate": 0.00045850821965722285, |
|
"loss": 1.7273, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.08346990057904512, |
|
"grad_norm": 13.307456016540527, |
|
"learning_rate": 0.00045846449807625044, |
|
"loss": 2.1933, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.08355730361630066, |
|
"grad_norm": 8.283126831054688, |
|
"learning_rate": 0.0004584207764952781, |
|
"loss": 2.499, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.08364470665355621, |
|
"grad_norm": 6.291905403137207, |
|
"learning_rate": 0.0004583770549143057, |
|
"loss": 1.8399, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.08373210969081175, |
|
"grad_norm": 19.28121566772461, |
|
"learning_rate": 0.0004583333333333333, |
|
"loss": 2.6815, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.0838195127280673, |
|
"grad_norm": 9.661205291748047, |
|
"learning_rate": 0.000458289611752361, |
|
"loss": 2.3274, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.08390691576532285, |
|
"grad_norm": 15.012873649597168, |
|
"learning_rate": 0.0004582458901713886, |
|
"loss": 2.1736, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.08399431880257839, |
|
"grad_norm": 10.02956485748291, |
|
"learning_rate": 0.00045820216859041626, |
|
"loss": 2.4168, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.08408172183983394, |
|
"grad_norm": 2.234221935272217, |
|
"learning_rate": 0.00045815844700944385, |
|
"loss": 1.7808, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.08416912487708948, |
|
"grad_norm": 7.04872989654541, |
|
"learning_rate": 0.0004581147254284715, |
|
"loss": 2.1456, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.08425652791434503, |
|
"grad_norm": 3.498042106628418, |
|
"learning_rate": 0.0004580710038474991, |
|
"loss": 1.6212, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.08434393095160057, |
|
"grad_norm": 2.731658935546875, |
|
"learning_rate": 0.0004580272822665268, |
|
"loss": 1.6905, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.08443133398885612, |
|
"grad_norm": 4.867488384246826, |
|
"learning_rate": 0.00045798356068555443, |
|
"loss": 1.4945, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.08451873702611165, |
|
"grad_norm": 10.225361824035645, |
|
"learning_rate": 0.000457939839104582, |
|
"loss": 2.4163, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.0846061400633672, |
|
"grad_norm": 2.749767780303955, |
|
"learning_rate": 0.00045789611752360967, |
|
"loss": 1.49, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.08469354310062274, |
|
"grad_norm": 14.945262908935547, |
|
"learning_rate": 0.00045785239594263726, |
|
"loss": 2.4579, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.08478094613787829, |
|
"grad_norm": 4.0551228523254395, |
|
"learning_rate": 0.0004578086743616649, |
|
"loss": 1.6358, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.08486834917513383, |
|
"grad_norm": 2.8462789058685303, |
|
"learning_rate": 0.0004577649527806926, |
|
"loss": 1.6568, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.08495575221238938, |
|
"grad_norm": 3.82456111907959, |
|
"learning_rate": 0.0004577212311997202, |
|
"loss": 1.696, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.08504315524964493, |
|
"grad_norm": 2.9463558197021484, |
|
"learning_rate": 0.00045767750961874784, |
|
"loss": 1.8359, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.08513055828690047, |
|
"grad_norm": 2.811894416809082, |
|
"learning_rate": 0.00045763378803777543, |
|
"loss": 1.369, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.08521796132415602, |
|
"grad_norm": 2.092231512069702, |
|
"learning_rate": 0.0004575900664568031, |
|
"loss": 1.5433, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.08530536436141156, |
|
"grad_norm": 4.028072357177734, |
|
"learning_rate": 0.0004575463448758307, |
|
"loss": 2.4999, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.08539276739866711, |
|
"grad_norm": 10.593165397644043, |
|
"learning_rate": 0.00045750262329485836, |
|
"loss": 1.5753, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.08548017043592265, |
|
"grad_norm": 6.811407089233398, |
|
"learning_rate": 0.000457458901713886, |
|
"loss": 1.7268, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.0855675734731782, |
|
"grad_norm": 2.3520467281341553, |
|
"learning_rate": 0.0004574151801329136, |
|
"loss": 1.4044, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.08565497651043373, |
|
"grad_norm": 3.668078660964966, |
|
"learning_rate": 0.00045737145855194125, |
|
"loss": 1.718, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.08574237954768928, |
|
"grad_norm": 10.229111671447754, |
|
"learning_rate": 0.00045732773697096884, |
|
"loss": 1.7006, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.08582978258494482, |
|
"grad_norm": 5.428765773773193, |
|
"learning_rate": 0.00045728401538999654, |
|
"loss": 2.2021, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.08591718562220037, |
|
"grad_norm": 2.0686569213867188, |
|
"learning_rate": 0.0004572402938090242, |
|
"loss": 1.687, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.08600458865945591, |
|
"grad_norm": 2.371243715286255, |
|
"learning_rate": 0.00045719657222805177, |
|
"loss": 1.6734, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.08609199169671146, |
|
"grad_norm": 1.6429576873779297, |
|
"learning_rate": 0.0004571528506470794, |
|
"loss": 1.8382, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.086179394733967, |
|
"grad_norm": 2.408743381500244, |
|
"learning_rate": 0.000457109129066107, |
|
"loss": 1.45, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.08626679777122255, |
|
"grad_norm": 4.068368434906006, |
|
"learning_rate": 0.0004570654074851347, |
|
"loss": 1.7464, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.0863542008084781, |
|
"grad_norm": 1.9330801963806152, |
|
"learning_rate": 0.0004570216859041623, |
|
"loss": 1.6335, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.08644160384573364, |
|
"grad_norm": 4.200726509094238, |
|
"learning_rate": 0.00045697796432318994, |
|
"loss": 1.6781, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.08652900688298919, |
|
"grad_norm": 4.335032939910889, |
|
"learning_rate": 0.0004569342427422176, |
|
"loss": 1.7382, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.08661640992024473, |
|
"grad_norm": 2.2428669929504395, |
|
"learning_rate": 0.0004568905211612452, |
|
"loss": 1.4791, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.08670381295750028, |
|
"grad_norm": 2.2247121334075928, |
|
"learning_rate": 0.0004568467995802728, |
|
"loss": 1.8668, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.08679121599475582, |
|
"grad_norm": 2.013319492340088, |
|
"learning_rate": 0.00045680307799930047, |
|
"loss": 1.4925, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.08687861903201136, |
|
"grad_norm": 1.5773614645004272, |
|
"learning_rate": 0.0004567593564183281, |
|
"loss": 1.3334, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.0869660220692669, |
|
"grad_norm": 1.1663486957550049, |
|
"learning_rate": 0.0004567156348373557, |
|
"loss": 1.5022, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.08705342510652245, |
|
"grad_norm": 1.763238549232483, |
|
"learning_rate": 0.00045667191325638335, |
|
"loss": 1.5118, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.08714082814377799, |
|
"grad_norm": 1.4888843297958374, |
|
"learning_rate": 0.000456628191675411, |
|
"loss": 1.6713, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.08722823118103354, |
|
"grad_norm": 2.5363516807556152, |
|
"learning_rate": 0.0004565844700944386, |
|
"loss": 1.4999, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.08731563421828908, |
|
"grad_norm": 2.134773015975952, |
|
"learning_rate": 0.0004565407485134663, |
|
"loss": 1.5086, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.08740303725554463, |
|
"grad_norm": 15.75776481628418, |
|
"learning_rate": 0.0004564970269324939, |
|
"loss": 2.11, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 11441, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.60783873359872e+17, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|