{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.8819774863641638, "eval_steps": 500, "global_step": 3800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.00023209933851688522, "grad_norm": 3.2038071155548096, "learning_rate": 0.00019999999893639996, "loss": 1.7651, "step": 1 }, { "epoch": 0.00046419867703377044, "grad_norm": Infinity, "learning_rate": 0.00019999999893639996, "loss": 2.173, "step": 2 }, { "epoch": 0.0006962980155506556, "grad_norm": Infinity, "learning_rate": 0.00019999999893639996, "loss": 2.1661, "step": 3 }, { "epoch": 0.0009283973540675409, "grad_norm": 16.338359832763672, "learning_rate": 0.0001999999957455998, "loss": 2.2505, "step": 4 }, { "epoch": 0.0011604966925844262, "grad_norm": 10.540064811706543, "learning_rate": 0.00019999999042759966, "loss": 1.9261, "step": 5 }, { "epoch": 0.0013925960311013113, "grad_norm": 8.137545585632324, "learning_rate": 0.00019999998298239957, "loss": 1.6788, "step": 6 }, { "epoch": 0.0016246953696181965, "grad_norm": 10.819408416748047, "learning_rate": 0.00019999997340999976, "loss": 1.7106, "step": 7 }, { "epoch": 0.0018567947081350818, "grad_norm": 5.37817907333374, "learning_rate": 0.0001999999617104004, "loss": 1.6442, "step": 8 }, { "epoch": 0.002088894046651967, "grad_norm": 5.0646071434021, "learning_rate": 0.00019999994788360174, "loss": 1.662, "step": 9 }, { "epoch": 0.0023209933851688525, "grad_norm": 4.481884956359863, "learning_rate": 0.00019999993192960412, "loss": 1.5318, "step": 10 }, { "epoch": 0.0025530927236857375, "grad_norm": 4.167725086212158, "learning_rate": 0.0001999999138484078, "loss": 1.4225, "step": 11 }, { "epoch": 0.0027851920622026225, "grad_norm": 3.317434072494507, "learning_rate": 0.0001999998936400132, "loss": 1.4752, "step": 12 }, { "epoch": 0.003017291400719508, "grad_norm": 2.7889750003814697, "learning_rate": 0.00019999987130442075, "loss": 1.4986, "step": 13 }, { "epoch": 0.003249390739236393, "grad_norm": 2.7518820762634277, "learning_rate": 0.00019999984684163097, "loss": 1.421, "step": 14 }, { "epoch": 0.0034814900777532785, "grad_norm": 2.1498594284057617, "learning_rate": 0.00019999982025164427, "loss": 1.4375, "step": 15 }, { "epoch": 0.0037135894162701635, "grad_norm": 2.5411746501922607, "learning_rate": 0.00019999979153446134, "loss": 1.4187, "step": 16 }, { "epoch": 0.003945688754787049, "grad_norm": 2.192904233932495, "learning_rate": 0.0001999997606900827, "loss": 1.38, "step": 17 }, { "epoch": 0.004177788093303934, "grad_norm": 1.6563782691955566, "learning_rate": 0.00019999972771850906, "loss": 1.4276, "step": 18 }, { "epoch": 0.004409887431820819, "grad_norm": 2.560487747192383, "learning_rate": 0.0001999996926197411, "loss": 1.39, "step": 19 }, { "epoch": 0.004641986770337705, "grad_norm": 1.4510043859481812, "learning_rate": 0.00019999965539377955, "loss": 1.3103, "step": 20 }, { "epoch": 0.00487408610885459, "grad_norm": 3.1493489742279053, "learning_rate": 0.00019999961604062527, "loss": 1.2738, "step": 21 }, { "epoch": 0.005106185447371475, "grad_norm": 1.5094335079193115, "learning_rate": 0.00019999957456027901, "loss": 1.3622, "step": 22 }, { "epoch": 0.00533828478588836, "grad_norm": 2.2526330947875977, "learning_rate": 0.0001999995309527417, "loss": 1.3611, "step": 23 }, { "epoch": 0.005570384124405245, "grad_norm": 1.4780999422073364, "learning_rate": 0.00019999948521801425, "loss": 1.3363, "step": 24 }, { "epoch": 0.005802483462922131, "grad_norm": 2.157604455947876, "learning_rate": 0.00019999943735609765, "loss": 1.4177, "step": 25 }, { "epoch": 0.006034582801439016, "grad_norm": 1.5941506624221802, "learning_rate": 0.0001999993873669929, "loss": 1.3707, "step": 26 }, { "epoch": 0.006266682139955901, "grad_norm": 1.8606597185134888, "learning_rate": 0.00019999933525070108, "loss": 1.3249, "step": 27 }, { "epoch": 0.006498781478472786, "grad_norm": 1.5392180681228638, "learning_rate": 0.00019999928100722326, "loss": 1.3107, "step": 28 }, { "epoch": 0.006730880816989672, "grad_norm": 1.683279275894165, "learning_rate": 0.00019999922463656068, "loss": 1.3468, "step": 29 }, { "epoch": 0.006962980155506557, "grad_norm": 1.8321382999420166, "learning_rate": 0.00019999916613871446, "loss": 1.321, "step": 30 }, { "epoch": 0.007195079494023442, "grad_norm": 1.4149234294891357, "learning_rate": 0.00019999910551368586, "loss": 1.3856, "step": 31 }, { "epoch": 0.007427178832540327, "grad_norm": 1.3785655498504639, "learning_rate": 0.00019999904276147618, "loss": 1.3159, "step": 32 }, { "epoch": 0.007659278171057212, "grad_norm": 1.447564721107483, "learning_rate": 0.00019999897788208678, "loss": 1.3619, "step": 33 }, { "epoch": 0.007891377509574098, "grad_norm": 1.6309694051742554, "learning_rate": 0.00019999891087551895, "loss": 1.2771, "step": 34 }, { "epoch": 0.008123476848090982, "grad_norm": 1.4609997272491455, "learning_rate": 0.00019999884174177424, "loss": 1.2926, "step": 35 }, { "epoch": 0.008355576186607868, "grad_norm": 1.683349847793579, "learning_rate": 0.00019999877048085404, "loss": 1.3539, "step": 36 }, { "epoch": 0.008587675525124754, "grad_norm": 1.3834455013275146, "learning_rate": 0.00019999869709275988, "loss": 1.3587, "step": 37 }, { "epoch": 0.008819774863641638, "grad_norm": 1.742599606513977, "learning_rate": 0.0001999986215774933, "loss": 1.2976, "step": 38 }, { "epoch": 0.009051874202158524, "grad_norm": 1.2949187755584717, "learning_rate": 0.00019999854393505596, "loss": 1.3341, "step": 39 }, { "epoch": 0.00928397354067541, "grad_norm": 1.3417593240737915, "learning_rate": 0.0001999984641654495, "loss": 1.274, "step": 40 }, { "epoch": 0.009516072879192294, "grad_norm": 1.2190725803375244, "learning_rate": 0.0001999983822686756, "loss": 1.3872, "step": 41 }, { "epoch": 0.00974817221770918, "grad_norm": 1.4673843383789062, "learning_rate": 0.00019999829824473596, "loss": 1.332, "step": 42 }, { "epoch": 0.009980271556226064, "grad_norm": 1.1995548009872437, "learning_rate": 0.00019999821209363246, "loss": 1.2863, "step": 43 }, { "epoch": 0.01021237089474295, "grad_norm": 1.6070772409439087, "learning_rate": 0.00019999812381536685, "loss": 1.2777, "step": 44 }, { "epoch": 0.010444470233259836, "grad_norm": 1.6723705530166626, "learning_rate": 0.00019999803340994105, "loss": 1.1997, "step": 45 }, { "epoch": 0.01067656957177672, "grad_norm": 1.1233357191085815, "learning_rate": 0.00019999794087735697, "loss": 1.3104, "step": 46 }, { "epoch": 0.010908668910293606, "grad_norm": 2.060980796813965, "learning_rate": 0.00019999784621761658, "loss": 1.2477, "step": 47 }, { "epoch": 0.01114076824881049, "grad_norm": 1.4882616996765137, "learning_rate": 0.00019999774943072192, "loss": 1.2435, "step": 48 }, { "epoch": 0.011372867587327376, "grad_norm": 1.8072844743728638, "learning_rate": 0.000199997650516675, "loss": 1.319, "step": 49 }, { "epoch": 0.011604966925844262, "grad_norm": 1.5391795635223389, "learning_rate": 0.00019999754947547797, "loss": 1.2641, "step": 50 }, { "epoch": 0.011837066264361146, "grad_norm": 1.5926071405410767, "learning_rate": 0.00019999744630713292, "loss": 1.272, "step": 51 }, { "epoch": 0.012069165602878032, "grad_norm": 1.6202788352966309, "learning_rate": 0.00019999734101164212, "loss": 1.2903, "step": 52 }, { "epoch": 0.012301264941394918, "grad_norm": 1.1898560523986816, "learning_rate": 0.00019999723358900775, "loss": 1.2918, "step": 53 }, { "epoch": 0.012533364279911802, "grad_norm": 1.6437476873397827, "learning_rate": 0.0001999971240392321, "loss": 1.2732, "step": 54 }, { "epoch": 0.012765463618428688, "grad_norm": 1.2351772785186768, "learning_rate": 0.00019999701236231757, "loss": 1.2665, "step": 55 }, { "epoch": 0.012997562956945572, "grad_norm": 2.001274824142456, "learning_rate": 0.00019999689855826645, "loss": 1.2996, "step": 56 }, { "epoch": 0.013229662295462458, "grad_norm": 2.2652699947357178, "learning_rate": 0.00019999678262708123, "loss": 1.252, "step": 57 }, { "epoch": 0.013461761633979344, "grad_norm": 1.4783529043197632, "learning_rate": 0.00019999666456876428, "loss": 1.2925, "step": 58 }, { "epoch": 0.013693860972496228, "grad_norm": 1.6773056983947754, "learning_rate": 0.0001999965443833182, "loss": 1.2147, "step": 59 }, { "epoch": 0.013925960311013114, "grad_norm": 1.385788083076477, "learning_rate": 0.00019999642207074554, "loss": 1.2712, "step": 60 }, { "epoch": 0.014158059649529998, "grad_norm": 1.6196304559707642, "learning_rate": 0.00019999629763104885, "loss": 1.2451, "step": 61 }, { "epoch": 0.014390158988046884, "grad_norm": 1.318074345588684, "learning_rate": 0.00019999617106423082, "loss": 1.2352, "step": 62 }, { "epoch": 0.01462225832656377, "grad_norm": 1.8781934976577759, "learning_rate": 0.00019999604237029412, "loss": 1.2094, "step": 63 }, { "epoch": 0.014854357665080654, "grad_norm": 1.1900345087051392, "learning_rate": 0.0001999959115492415, "loss": 1.3217, "step": 64 }, { "epoch": 0.01508645700359754, "grad_norm": 1.223382830619812, "learning_rate": 0.00019999577860107577, "loss": 1.2869, "step": 65 }, { "epoch": 0.015318556342114424, "grad_norm": 1.320986032485962, "learning_rate": 0.0001999956435257997, "loss": 1.2772, "step": 66 }, { "epoch": 0.01555065568063131, "grad_norm": 1.2489218711853027, "learning_rate": 0.0001999955063234162, "loss": 1.236, "step": 67 }, { "epoch": 0.015782755019148196, "grad_norm": 1.5126451253890991, "learning_rate": 0.00019999536699392815, "loss": 1.2395, "step": 68 }, { "epoch": 0.01601485435766508, "grad_norm": 1.2283551692962646, "learning_rate": 0.00019999522553733857, "loss": 1.2065, "step": 69 }, { "epoch": 0.016246953696181964, "grad_norm": 1.7732326984405518, "learning_rate": 0.00019999508195365048, "loss": 1.2396, "step": 70 }, { "epoch": 0.016479053034698852, "grad_norm": 1.08442223072052, "learning_rate": 0.0001999949362428668, "loss": 1.2728, "step": 71 }, { "epoch": 0.016711152373215736, "grad_norm": 1.9178045988082886, "learning_rate": 0.0001999947884049908, "loss": 1.2229, "step": 72 }, { "epoch": 0.01694325171173262, "grad_norm": 1.3670941591262817, "learning_rate": 0.00019999463844002556, "loss": 1.2209, "step": 73 }, { "epoch": 0.017175351050249508, "grad_norm": 1.811126947402954, "learning_rate": 0.00019999448634797423, "loss": 1.2614, "step": 74 }, { "epoch": 0.017407450388766392, "grad_norm": 1.8345831632614136, "learning_rate": 0.00019999433212884008, "loss": 1.2384, "step": 75 }, { "epoch": 0.017639549727283276, "grad_norm": 1.017521619796753, "learning_rate": 0.00019999417578262638, "loss": 1.2412, "step": 76 }, { "epoch": 0.017871649065800164, "grad_norm": 1.595546841621399, "learning_rate": 0.0001999940173093365, "loss": 1.2285, "step": 77 }, { "epoch": 0.018103748404317048, "grad_norm": 0.9834617972373962, "learning_rate": 0.00019999385670897374, "loss": 1.2118, "step": 78 }, { "epoch": 0.018335847742833932, "grad_norm": 1.542934775352478, "learning_rate": 0.00019999369398154157, "loss": 1.2347, "step": 79 }, { "epoch": 0.01856794708135082, "grad_norm": 1.136288046836853, "learning_rate": 0.0001999935291270434, "loss": 1.2005, "step": 80 }, { "epoch": 0.018800046419867704, "grad_norm": 1.359007716178894, "learning_rate": 0.0001999933621454828, "loss": 1.1772, "step": 81 }, { "epoch": 0.019032145758384588, "grad_norm": 1.1479077339172363, "learning_rate": 0.00019999319303686328, "loss": 1.2875, "step": 82 }, { "epoch": 0.019264245096901472, "grad_norm": 1.1424561738967896, "learning_rate": 0.00019999302180118847, "loss": 1.2808, "step": 83 }, { "epoch": 0.01949634443541836, "grad_norm": 1.16828191280365, "learning_rate": 0.00019999284843846195, "loss": 1.2416, "step": 84 }, { "epoch": 0.019728443773935244, "grad_norm": 1.3842523097991943, "learning_rate": 0.0001999926729486875, "loss": 1.1973, "step": 85 }, { "epoch": 0.019960543112452128, "grad_norm": 1.1938616037368774, "learning_rate": 0.00019999249533186875, "loss": 1.2025, "step": 86 }, { "epoch": 0.020192642450969016, "grad_norm": 1.51042902469635, "learning_rate": 0.00019999231558800955, "loss": 1.2007, "step": 87 }, { "epoch": 0.0204247417894859, "grad_norm": 1.30199134349823, "learning_rate": 0.00019999213371711368, "loss": 1.1959, "step": 88 }, { "epoch": 0.020656841128002784, "grad_norm": 1.5003533363342285, "learning_rate": 0.00019999194971918508, "loss": 1.2399, "step": 89 }, { "epoch": 0.020888940466519672, "grad_norm": 1.192273497581482, "learning_rate": 0.00019999176359422757, "loss": 1.1765, "step": 90 }, { "epoch": 0.021121039805036556, "grad_norm": 1.4001610279083252, "learning_rate": 0.0001999915753422452, "loss": 1.1962, "step": 91 }, { "epoch": 0.02135313914355344, "grad_norm": 1.296350359916687, "learning_rate": 0.0001999913849632419, "loss": 1.1489, "step": 92 }, { "epoch": 0.021585238482070328, "grad_norm": 1.0739744901657104, "learning_rate": 0.00019999119245722177, "loss": 1.2623, "step": 93 }, { "epoch": 0.021817337820587212, "grad_norm": 1.1202659606933594, "learning_rate": 0.00019999099782418888, "loss": 1.2235, "step": 94 }, { "epoch": 0.022049437159104096, "grad_norm": 1.1871074438095093, "learning_rate": 0.00019999080106414735, "loss": 1.1408, "step": 95 }, { "epoch": 0.02228153649762098, "grad_norm": 1.1671167612075806, "learning_rate": 0.00019999060217710138, "loss": 1.1915, "step": 96 }, { "epoch": 0.022513635836137868, "grad_norm": 1.1107845306396484, "learning_rate": 0.00019999040116305526, "loss": 1.1829, "step": 97 }, { "epoch": 0.022745735174654752, "grad_norm": 1.3131628036499023, "learning_rate": 0.00019999019802201316, "loss": 1.3263, "step": 98 }, { "epoch": 0.022977834513171636, "grad_norm": 1.1934670209884644, "learning_rate": 0.00019998999275397953, "loss": 1.2026, "step": 99 }, { "epoch": 0.023209933851688524, "grad_norm": 1.1404023170471191, "learning_rate": 0.0001999897853589586, "loss": 1.2138, "step": 100 }, { "epoch": 0.023442033190205408, "grad_norm": 1.2095048427581787, "learning_rate": 0.00019998957583695491, "loss": 1.1886, "step": 101 }, { "epoch": 0.023674132528722292, "grad_norm": 0.9044592380523682, "learning_rate": 0.00019998936418797277, "loss": 1.2356, "step": 102 }, { "epoch": 0.02390623186723918, "grad_norm": 1.146653175354004, "learning_rate": 0.00019998915041201683, "loss": 1.214, "step": 103 }, { "epoch": 0.024138331205756064, "grad_norm": 0.9014309048652649, "learning_rate": 0.00019998893450909154, "loss": 1.2397, "step": 104 }, { "epoch": 0.024370430544272948, "grad_norm": 1.0998764038085938, "learning_rate": 0.00019998871647920155, "loss": 1.2164, "step": 105 }, { "epoch": 0.024602529882789836, "grad_norm": 0.9546407461166382, "learning_rate": 0.00019998849632235145, "loss": 1.2233, "step": 106 }, { "epoch": 0.02483462922130672, "grad_norm": 1.0850448608398438, "learning_rate": 0.00019998827403854596, "loss": 1.1951, "step": 107 }, { "epoch": 0.025066728559823604, "grad_norm": 1.0332109928131104, "learning_rate": 0.00019998804962778982, "loss": 1.1932, "step": 108 }, { "epoch": 0.025298827898340488, "grad_norm": 0.8607396483421326, "learning_rate": 0.00019998782309008775, "loss": 1.2299, "step": 109 }, { "epoch": 0.025530927236857376, "grad_norm": 0.8929436802864075, "learning_rate": 0.00019998759442544463, "loss": 1.1355, "step": 110 }, { "epoch": 0.02576302657537426, "grad_norm": 0.9388677477836609, "learning_rate": 0.00019998736363386523, "loss": 1.1908, "step": 111 }, { "epoch": 0.025995125913891144, "grad_norm": 0.8951278924942017, "learning_rate": 0.00019998713071535455, "loss": 1.1533, "step": 112 }, { "epoch": 0.026227225252408032, "grad_norm": 0.7739974856376648, "learning_rate": 0.00019998689566991753, "loss": 1.2247, "step": 113 }, { "epoch": 0.026459324590924916, "grad_norm": 0.9300251007080078, "learning_rate": 0.00019998665849755917, "loss": 1.2084, "step": 114 }, { "epoch": 0.0266914239294418, "grad_norm": 1.0354303121566772, "learning_rate": 0.00019998641919828443, "loss": 1.1797, "step": 115 }, { "epoch": 0.026923523267958688, "grad_norm": 0.9980953335762024, "learning_rate": 0.00019998617777209853, "loss": 1.1736, "step": 116 }, { "epoch": 0.027155622606475572, "grad_norm": 0.8998697400093079, "learning_rate": 0.00019998593421900656, "loss": 1.2219, "step": 117 }, { "epoch": 0.027387721944992456, "grad_norm": 1.0656938552856445, "learning_rate": 0.00019998568853901364, "loss": 1.1859, "step": 118 }, { "epoch": 0.02761982128350934, "grad_norm": 1.1336939334869385, "learning_rate": 0.00019998544073212507, "loss": 1.1496, "step": 119 }, { "epoch": 0.027851920622026228, "grad_norm": 0.9787673354148865, "learning_rate": 0.0001999851907983461, "loss": 1.1571, "step": 120 }, { "epoch": 0.028084019960543112, "grad_norm": 1.0923672914505005, "learning_rate": 0.00019998493873768202, "loss": 1.1126, "step": 121 }, { "epoch": 0.028316119299059996, "grad_norm": 0.984729528427124, "learning_rate": 0.00019998468455013823, "loss": 1.2217, "step": 122 }, { "epoch": 0.028548218637576884, "grad_norm": 1.228933334350586, "learning_rate": 0.00019998442823572013, "loss": 1.1337, "step": 123 }, { "epoch": 0.028780317976093768, "grad_norm": 0.7867931723594666, "learning_rate": 0.00019998416979443313, "loss": 1.1504, "step": 124 }, { "epoch": 0.029012417314610652, "grad_norm": 1.375267744064331, "learning_rate": 0.0001999839092262828, "loss": 1.1411, "step": 125 }, { "epoch": 0.02924451665312754, "grad_norm": 0.9629110097885132, "learning_rate": 0.0001999836465312746, "loss": 1.1656, "step": 126 }, { "epoch": 0.029476615991644424, "grad_norm": 1.013222575187683, "learning_rate": 0.0001999833817094142, "loss": 1.1645, "step": 127 }, { "epoch": 0.029708715330161308, "grad_norm": 0.9050752520561218, "learning_rate": 0.0001999831147607072, "loss": 1.2246, "step": 128 }, { "epoch": 0.029940814668678196, "grad_norm": 0.9478861093521118, "learning_rate": 0.00019998284568515924, "loss": 1.219, "step": 129 }, { "epoch": 0.03017291400719508, "grad_norm": 1.0421406030654907, "learning_rate": 0.0001999825744827761, "loss": 1.2109, "step": 130 }, { "epoch": 0.030405013345711964, "grad_norm": 0.9947962164878845, "learning_rate": 0.00019998230115356348, "loss": 1.1926, "step": 131 }, { "epoch": 0.030637112684228848, "grad_norm": 1.1830108165740967, "learning_rate": 0.00019998202569752727, "loss": 1.1984, "step": 132 }, { "epoch": 0.030869212022745736, "grad_norm": 0.7691541910171509, "learning_rate": 0.00019998174811467333, "loss": 1.2591, "step": 133 }, { "epoch": 0.03110131136126262, "grad_norm": 1.1075387001037598, "learning_rate": 0.0001999814684050075, "loss": 1.1312, "step": 134 }, { "epoch": 0.031333410699779504, "grad_norm": 1.0531169176101685, "learning_rate": 0.0001999811865685358, "loss": 1.1995, "step": 135 }, { "epoch": 0.03156551003829639, "grad_norm": 1.0216295719146729, "learning_rate": 0.00019998090260526418, "loss": 1.1813, "step": 136 }, { "epoch": 0.03179760937681328, "grad_norm": 1.0923082828521729, "learning_rate": 0.00019998061651519867, "loss": 1.1662, "step": 137 }, { "epoch": 0.03202970871533016, "grad_norm": 1.0432199239730835, "learning_rate": 0.00019998032829834537, "loss": 1.1983, "step": 138 }, { "epoch": 0.03226180805384705, "grad_norm": 0.9476574659347534, "learning_rate": 0.00019998003795471044, "loss": 1.176, "step": 139 }, { "epoch": 0.03249390739236393, "grad_norm": 0.9106845259666443, "learning_rate": 0.00019997974548430003, "loss": 1.2096, "step": 140 }, { "epoch": 0.032726006730880816, "grad_norm": 1.0149940252304077, "learning_rate": 0.00019997945088712037, "loss": 1.2198, "step": 141 }, { "epoch": 0.032958106069397704, "grad_norm": 1.087999939918518, "learning_rate": 0.00019997915416317767, "loss": 1.1762, "step": 142 }, { "epoch": 0.033190205407914584, "grad_norm": 1.15151047706604, "learning_rate": 0.00019997885531247836, "loss": 1.2268, "step": 143 }, { "epoch": 0.03342230474643147, "grad_norm": 1.101580262184143, "learning_rate": 0.0001999785543350287, "loss": 1.195, "step": 144 }, { "epoch": 0.03365440408494836, "grad_norm": 1.3661648035049438, "learning_rate": 0.00019997825123083511, "loss": 1.1911, "step": 145 }, { "epoch": 0.03388650342346524, "grad_norm": 1.1633622646331787, "learning_rate": 0.00019997794599990404, "loss": 1.1641, "step": 146 }, { "epoch": 0.03411860276198213, "grad_norm": 1.5635263919830322, "learning_rate": 0.00019997763864224204, "loss": 1.1746, "step": 147 }, { "epoch": 0.034350702100499016, "grad_norm": 1.1527385711669922, "learning_rate": 0.0001999773291578556, "loss": 1.1754, "step": 148 }, { "epoch": 0.034582801439015896, "grad_norm": 1.6002782583236694, "learning_rate": 0.00019997701754675124, "loss": 1.152, "step": 149 }, { "epoch": 0.034814900777532784, "grad_norm": 1.3039273023605347, "learning_rate": 0.00019997670380893573, "loss": 1.228, "step": 150 }, { "epoch": 0.03504700011604967, "grad_norm": 1.2945127487182617, "learning_rate": 0.0001999763879444156, "loss": 1.2359, "step": 151 }, { "epoch": 0.03527909945456655, "grad_norm": 1.1199839115142822, "learning_rate": 0.00019997606995319768, "loss": 1.1973, "step": 152 }, { "epoch": 0.03551119879308344, "grad_norm": 1.1038458347320557, "learning_rate": 0.0001999757498352887, "loss": 1.1478, "step": 153 }, { "epoch": 0.03574329813160033, "grad_norm": 1.36546790599823, "learning_rate": 0.00019997542759069543, "loss": 1.1392, "step": 154 }, { "epoch": 0.03597539747011721, "grad_norm": 1.0823314189910889, "learning_rate": 0.00019997510321942476, "loss": 1.1637, "step": 155 }, { "epoch": 0.036207496808634096, "grad_norm": 1.2991116046905518, "learning_rate": 0.0001999747767214836, "loss": 1.1674, "step": 156 }, { "epoch": 0.036439596147150984, "grad_norm": 1.3077826499938965, "learning_rate": 0.00019997444809687886, "loss": 1.1381, "step": 157 }, { "epoch": 0.036671695485667864, "grad_norm": 1.1962543725967407, "learning_rate": 0.00019997411734561755, "loss": 1.121, "step": 158 }, { "epoch": 0.03690379482418475, "grad_norm": 1.2830172777175903, "learning_rate": 0.0001999737844677067, "loss": 1.1628, "step": 159 }, { "epoch": 0.03713589416270164, "grad_norm": 1.0819096565246582, "learning_rate": 0.00019997344946315343, "loss": 1.1207, "step": 160 }, { "epoch": 0.03736799350121852, "grad_norm": 1.4033743143081665, "learning_rate": 0.00019997311233196482, "loss": 1.091, "step": 161 }, { "epoch": 0.03760009283973541, "grad_norm": 1.168992280960083, "learning_rate": 0.00019997277307414805, "loss": 1.1408, "step": 162 }, { "epoch": 0.03783219217825229, "grad_norm": 1.4940844774246216, "learning_rate": 0.00019997243168971035, "loss": 1.123, "step": 163 }, { "epoch": 0.038064291516769176, "grad_norm": 1.0344845056533813, "learning_rate": 0.00019997208817865896, "loss": 1.1097, "step": 164 }, { "epoch": 0.038296390855286064, "grad_norm": 1.2433559894561768, "learning_rate": 0.00019997174254100122, "loss": 1.1901, "step": 165 }, { "epoch": 0.038528490193802944, "grad_norm": 1.0224947929382324, "learning_rate": 0.00019997139477674446, "loss": 1.1009, "step": 166 }, { "epoch": 0.03876058953231983, "grad_norm": 1.2136378288269043, "learning_rate": 0.00019997104488589607, "loss": 1.1661, "step": 167 }, { "epoch": 0.03899268887083672, "grad_norm": 1.22763991355896, "learning_rate": 0.00019997069286846347, "loss": 1.2112, "step": 168 }, { "epoch": 0.0392247882093536, "grad_norm": 1.0742839574813843, "learning_rate": 0.00019997033872445424, "loss": 1.116, "step": 169 }, { "epoch": 0.03945688754787049, "grad_norm": 1.3394129276275635, "learning_rate": 0.00019996998245387583, "loss": 1.149, "step": 170 }, { "epoch": 0.039688986886387376, "grad_norm": 1.1060538291931152, "learning_rate": 0.00019996962405673585, "loss": 1.0631, "step": 171 }, { "epoch": 0.039921086224904256, "grad_norm": 1.297677993774414, "learning_rate": 0.00019996926353304192, "loss": 1.2356, "step": 172 }, { "epoch": 0.040153185563421144, "grad_norm": 0.9665383100509644, "learning_rate": 0.0001999689008828017, "loss": 1.24, "step": 173 }, { "epoch": 0.04038528490193803, "grad_norm": 1.05509352684021, "learning_rate": 0.00019996853610602288, "loss": 1.1544, "step": 174 }, { "epoch": 0.04061738424045491, "grad_norm": 1.2445751428604126, "learning_rate": 0.00019996816920271328, "loss": 1.1736, "step": 175 }, { "epoch": 0.0408494835789718, "grad_norm": 0.9968123435974121, "learning_rate": 0.00019996780017288068, "loss": 1.1915, "step": 176 }, { "epoch": 0.04108158291748869, "grad_norm": 1.2577074766159058, "learning_rate": 0.00019996742901653294, "loss": 1.1177, "step": 177 }, { "epoch": 0.04131368225600557, "grad_norm": 1.0380483865737915, "learning_rate": 0.0001999670557336779, "loss": 1.1283, "step": 178 }, { "epoch": 0.041545781594522456, "grad_norm": 1.063004493713379, "learning_rate": 0.00019996668032432359, "loss": 1.1113, "step": 179 }, { "epoch": 0.041777880933039344, "grad_norm": 1.2589250802993774, "learning_rate": 0.0001999663027884779, "loss": 1.1009, "step": 180 }, { "epoch": 0.042009980271556224, "grad_norm": 1.0601253509521484, "learning_rate": 0.00019996592312614895, "loss": 1.264, "step": 181 }, { "epoch": 0.04224207961007311, "grad_norm": 0.8920071721076965, "learning_rate": 0.00019996554133734474, "loss": 1.1354, "step": 182 }, { "epoch": 0.04247417894859, "grad_norm": 1.1329562664031982, "learning_rate": 0.00019996515742207342, "loss": 1.1173, "step": 183 }, { "epoch": 0.04270627828710688, "grad_norm": 0.9925181269645691, "learning_rate": 0.00019996477138034318, "loss": 1.1003, "step": 184 }, { "epoch": 0.04293837762562377, "grad_norm": 0.8460555672645569, "learning_rate": 0.0001999643832121622, "loss": 1.1545, "step": 185 }, { "epoch": 0.043170476964140655, "grad_norm": 1.0797373056411743, "learning_rate": 0.0001999639929175388, "loss": 1.1961, "step": 186 }, { "epoch": 0.043402576302657536, "grad_norm": 1.0007143020629883, "learning_rate": 0.0001999636004964812, "loss": 1.171, "step": 187 }, { "epoch": 0.043634675641174424, "grad_norm": 1.1414363384246826, "learning_rate": 0.00019996320594899776, "loss": 1.0893, "step": 188 }, { "epoch": 0.043866774979691305, "grad_norm": 0.9441143870353699, "learning_rate": 0.00019996280927509692, "loss": 1.128, "step": 189 }, { "epoch": 0.04409887431820819, "grad_norm": 0.9933585524559021, "learning_rate": 0.0001999624104747871, "loss": 1.1309, "step": 190 }, { "epoch": 0.04433097365672508, "grad_norm": 0.9130701422691345, "learning_rate": 0.00019996200954807672, "loss": 1.1608, "step": 191 }, { "epoch": 0.04456307299524196, "grad_norm": 1.0881755352020264, "learning_rate": 0.0001999616064949744, "loss": 1.1691, "step": 192 }, { "epoch": 0.04479517233375885, "grad_norm": 1.1120761632919312, "learning_rate": 0.00019996120131548869, "loss": 1.0689, "step": 193 }, { "epoch": 0.045027271672275736, "grad_norm": 0.9563672542572021, "learning_rate": 0.00019996079400962818, "loss": 1.1315, "step": 194 }, { "epoch": 0.045259371010792616, "grad_norm": 1.0514744520187378, "learning_rate": 0.00019996038457740153, "loss": 1.1043, "step": 195 }, { "epoch": 0.045491470349309504, "grad_norm": 1.2425557374954224, "learning_rate": 0.0001999599730188175, "loss": 1.1051, "step": 196 }, { "epoch": 0.04572356968782639, "grad_norm": 0.8657141327857971, "learning_rate": 0.0001999595593338848, "loss": 1.2132, "step": 197 }, { "epoch": 0.04595566902634327, "grad_norm": 1.210310459136963, "learning_rate": 0.00019995914352261227, "loss": 1.0302, "step": 198 }, { "epoch": 0.04618776836486016, "grad_norm": 1.2979429960250854, "learning_rate": 0.0001999587255850087, "loss": 1.091, "step": 199 }, { "epoch": 0.04641986770337705, "grad_norm": 0.9033575057983398, "learning_rate": 0.00019995830552108301, "loss": 1.1027, "step": 200 }, { "epoch": 0.04665196704189393, "grad_norm": 0.9414019584655762, "learning_rate": 0.00019995788333084416, "loss": 1.1225, "step": 201 }, { "epoch": 0.046884066380410816, "grad_norm": 1.199793815612793, "learning_rate": 0.0001999574590143011, "loss": 1.143, "step": 202 }, { "epoch": 0.047116165718927704, "grad_norm": 0.8170751929283142, "learning_rate": 0.00019995703257146284, "loss": 1.1603, "step": 203 }, { "epoch": 0.047348265057444584, "grad_norm": 1.1768051385879517, "learning_rate": 0.00019995660400233848, "loss": 1.0992, "step": 204 }, { "epoch": 0.04758036439596147, "grad_norm": 0.8381533026695251, "learning_rate": 0.00019995617330693716, "loss": 1.11, "step": 205 }, { "epoch": 0.04781246373447836, "grad_norm": 1.0177040100097656, "learning_rate": 0.00019995574048526797, "loss": 1.2031, "step": 206 }, { "epoch": 0.04804456307299524, "grad_norm": 1.1447888612747192, "learning_rate": 0.0001999553055373402, "loss": 1.1489, "step": 207 }, { "epoch": 0.04827666241151213, "grad_norm": 0.9254429340362549, "learning_rate": 0.00019995486846316305, "loss": 1.1429, "step": 208 }, { "epoch": 0.048508761750029016, "grad_norm": 0.9362263083457947, "learning_rate": 0.0001999544292627458, "loss": 1.1075, "step": 209 }, { "epoch": 0.048740861088545896, "grad_norm": 1.0369575023651123, "learning_rate": 0.00019995398793609784, "loss": 1.2028, "step": 210 }, { "epoch": 0.048972960427062784, "grad_norm": 1.0828003883361816, "learning_rate": 0.00019995354448322855, "loss": 1.047, "step": 211 }, { "epoch": 0.04920505976557967, "grad_norm": 0.9045186638832092, "learning_rate": 0.00019995309890414732, "loss": 1.2051, "step": 212 }, { "epoch": 0.04943715910409655, "grad_norm": 0.8883135318756104, "learning_rate": 0.00019995265119886367, "loss": 1.0837, "step": 213 }, { "epoch": 0.04966925844261344, "grad_norm": 1.0592052936553955, "learning_rate": 0.00019995220136738716, "loss": 1.0946, "step": 214 }, { "epoch": 0.04990135778113032, "grad_norm": 0.8543261289596558, "learning_rate": 0.00019995174940972728, "loss": 1.1044, "step": 215 }, { "epoch": 0.05013345711964721, "grad_norm": 0.9892598986625671, "learning_rate": 0.00019995129532589365, "loss": 1.1275, "step": 216 }, { "epoch": 0.050365556458164096, "grad_norm": 0.9829018712043762, "learning_rate": 0.000199950839115896, "loss": 1.0906, "step": 217 }, { "epoch": 0.050597655796680976, "grad_norm": 1.2495214939117432, "learning_rate": 0.00019995038077974398, "loss": 1.1874, "step": 218 }, { "epoch": 0.050829755135197864, "grad_norm": 1.0131596326828003, "learning_rate": 0.00019994992031744735, "loss": 1.1523, "step": 219 }, { "epoch": 0.05106185447371475, "grad_norm": 1.0469106435775757, "learning_rate": 0.00019994945772901587, "loss": 1.1159, "step": 220 }, { "epoch": 0.05129395381223163, "grad_norm": 1.024359107017517, "learning_rate": 0.00019994899301445946, "loss": 1.1937, "step": 221 }, { "epoch": 0.05152605315074852, "grad_norm": 1.1803243160247803, "learning_rate": 0.00019994852617378795, "loss": 1.1823, "step": 222 }, { "epoch": 0.05175815248926541, "grad_norm": 1.1598308086395264, "learning_rate": 0.0001999480572070113, "loss": 1.143, "step": 223 }, { "epoch": 0.05199025182778229, "grad_norm": 1.1717250347137451, "learning_rate": 0.00019994758611413942, "loss": 1.0783, "step": 224 }, { "epoch": 0.052222351166299176, "grad_norm": 1.1139521598815918, "learning_rate": 0.0001999471128951824, "loss": 1.1184, "step": 225 }, { "epoch": 0.052454450504816064, "grad_norm": 1.1323457956314087, "learning_rate": 0.0001999466375501503, "loss": 1.1565, "step": 226 }, { "epoch": 0.052686549843332944, "grad_norm": 1.2254990339279175, "learning_rate": 0.00019994616007905318, "loss": 1.1377, "step": 227 }, { "epoch": 0.05291864918184983, "grad_norm": 1.0691893100738525, "learning_rate": 0.00019994568048190128, "loss": 1.1257, "step": 228 }, { "epoch": 0.05315074852036672, "grad_norm": 0.9855639934539795, "learning_rate": 0.0001999451987587047, "loss": 1.0799, "step": 229 }, { "epoch": 0.0533828478588836, "grad_norm": 1.0207661390304565, "learning_rate": 0.0001999447149094738, "loss": 1.1311, "step": 230 }, { "epoch": 0.05361494719740049, "grad_norm": 0.8670130968093872, "learning_rate": 0.00019994422893421877, "loss": 1.1173, "step": 231 }, { "epoch": 0.053847046535917376, "grad_norm": 0.9034013152122498, "learning_rate": 0.00019994374083295003, "loss": 1.1558, "step": 232 }, { "epoch": 0.054079145874434256, "grad_norm": 0.8590165972709656, "learning_rate": 0.0001999432506056779, "loss": 1.1088, "step": 233 }, { "epoch": 0.054311245212951144, "grad_norm": 0.9429453015327454, "learning_rate": 0.00019994275825241286, "loss": 1.1139, "step": 234 }, { "epoch": 0.05454334455146803, "grad_norm": 0.9882816076278687, "learning_rate": 0.00019994226377316533, "loss": 1.0784, "step": 235 }, { "epoch": 0.05477544388998491, "grad_norm": 0.7824612855911255, "learning_rate": 0.00019994176716794587, "loss": 1.1349, "step": 236 }, { "epoch": 0.0550075432285018, "grad_norm": 1.1462411880493164, "learning_rate": 0.000199941268436765, "loss": 1.123, "step": 237 }, { "epoch": 0.05523964256701868, "grad_norm": 0.8474573493003845, "learning_rate": 0.0001999407675796334, "loss": 1.0541, "step": 238 }, { "epoch": 0.05547174190553557, "grad_norm": 1.2329221963882446, "learning_rate": 0.0001999402645965617, "loss": 1.1719, "step": 239 }, { "epoch": 0.055703841244052456, "grad_norm": 1.0721195936203003, "learning_rate": 0.00019993975948756054, "loss": 1.1295, "step": 240 }, { "epoch": 0.055935940582569336, "grad_norm": 1.0087839365005493, "learning_rate": 0.0001999392522526407, "loss": 1.1494, "step": 241 }, { "epoch": 0.056168039921086224, "grad_norm": 1.0350946187973022, "learning_rate": 0.000199938742891813, "loss": 1.1913, "step": 242 }, { "epoch": 0.05640013925960311, "grad_norm": 0.825470507144928, "learning_rate": 0.00019993823140508822, "loss": 1.1028, "step": 243 }, { "epoch": 0.05663223859811999, "grad_norm": 1.086072325706482, "learning_rate": 0.00019993771779247733, "loss": 1.1317, "step": 244 }, { "epoch": 0.05686433793663688, "grad_norm": 1.0059558153152466, "learning_rate": 0.00019993720205399113, "loss": 1.133, "step": 245 }, { "epoch": 0.05709643727515377, "grad_norm": 1.0169835090637207, "learning_rate": 0.0001999366841896407, "loss": 1.1286, "step": 246 }, { "epoch": 0.05732853661367065, "grad_norm": 1.1611257791519165, "learning_rate": 0.000199936164199437, "loss": 1.1164, "step": 247 }, { "epoch": 0.057560635952187536, "grad_norm": 0.9916123151779175, "learning_rate": 0.0001999356420833911, "loss": 1.069, "step": 248 }, { "epoch": 0.057792735290704424, "grad_norm": 1.0158668756484985, "learning_rate": 0.00019993511784151412, "loss": 1.0955, "step": 249 }, { "epoch": 0.058024834629221304, "grad_norm": 0.9248602390289307, "learning_rate": 0.00019993459147381724, "loss": 1.1601, "step": 250 }, { "epoch": 0.05825693396773819, "grad_norm": 0.8843005895614624, "learning_rate": 0.00019993406298031156, "loss": 1.1308, "step": 251 }, { "epoch": 0.05848903330625508, "grad_norm": 0.8477474451065063, "learning_rate": 0.00019993353236100842, "loss": 1.0245, "step": 252 }, { "epoch": 0.05872113264477196, "grad_norm": 1.0305787324905396, "learning_rate": 0.00019993299961591906, "loss": 1.1113, "step": 253 }, { "epoch": 0.05895323198328885, "grad_norm": 0.8507434725761414, "learning_rate": 0.00019993246474505483, "loss": 1.099, "step": 254 }, { "epoch": 0.059185331321805736, "grad_norm": 0.8601514101028442, "learning_rate": 0.00019993192774842708, "loss": 1.1129, "step": 255 }, { "epoch": 0.059417430660322616, "grad_norm": 0.8006900548934937, "learning_rate": 0.00019993138862604728, "loss": 1.1423, "step": 256 }, { "epoch": 0.059649529998839504, "grad_norm": 0.8169512152671814, "learning_rate": 0.00019993084737792685, "loss": 1.0753, "step": 257 }, { "epoch": 0.05988162933735639, "grad_norm": 0.8702026605606079, "learning_rate": 0.00019993030400407733, "loss": 1.1078, "step": 258 }, { "epoch": 0.06011372867587327, "grad_norm": 0.9176567196846008, "learning_rate": 0.0001999297585045103, "loss": 1.1244, "step": 259 }, { "epoch": 0.06034582801439016, "grad_norm": 1.0057438611984253, "learning_rate": 0.0001999292108792373, "loss": 1.0926, "step": 260 }, { "epoch": 0.06057792735290705, "grad_norm": 0.7635266780853271, "learning_rate": 0.00019992866112827, "loss": 1.1697, "step": 261 }, { "epoch": 0.06081002669142393, "grad_norm": 1.1172640323638916, "learning_rate": 0.0001999281092516202, "loss": 1.1948, "step": 262 }, { "epoch": 0.061042126029940816, "grad_norm": 0.9378498792648315, "learning_rate": 0.00019992755524929945, "loss": 1.1268, "step": 263 }, { "epoch": 0.061274225368457697, "grad_norm": 1.1356533765792847, "learning_rate": 0.00019992699912131968, "loss": 1.074, "step": 264 }, { "epoch": 0.061506324706974584, "grad_norm": 0.8633067607879639, "learning_rate": 0.00019992644086769268, "loss": 1.1625, "step": 265 }, { "epoch": 0.06173842404549147, "grad_norm": 1.041566252708435, "learning_rate": 0.00019992588048843032, "loss": 1.1542, "step": 266 }, { "epoch": 0.06197052338400835, "grad_norm": 1.0433952808380127, "learning_rate": 0.00019992531798354452, "loss": 1.0958, "step": 267 }, { "epoch": 0.06220262272252524, "grad_norm": 0.8931392431259155, "learning_rate": 0.00019992475335304724, "loss": 1.1235, "step": 268 }, { "epoch": 0.06243472206104213, "grad_norm": 1.1202656030654907, "learning_rate": 0.0001999241865969505, "loss": 1.0745, "step": 269 }, { "epoch": 0.06266682139955901, "grad_norm": 0.9156639575958252, "learning_rate": 0.00019992361771526635, "loss": 1.1297, "step": 270 }, { "epoch": 0.0628989207380759, "grad_norm": 0.77488112449646, "learning_rate": 0.0001999230467080069, "loss": 1.1159, "step": 271 }, { "epoch": 0.06313102007659278, "grad_norm": 0.8726563453674316, "learning_rate": 0.00019992247357518428, "loss": 1.143, "step": 272 }, { "epoch": 0.06336311941510966, "grad_norm": 0.8080572485923767, "learning_rate": 0.0001999218983168107, "loss": 1.0154, "step": 273 }, { "epoch": 0.06359521875362656, "grad_norm": 1.0385979413986206, "learning_rate": 0.00019992132093289838, "loss": 1.1056, "step": 274 }, { "epoch": 0.06382731809214344, "grad_norm": 0.875795304775238, "learning_rate": 0.00019992074142345962, "loss": 1.1321, "step": 275 }, { "epoch": 0.06405941743066032, "grad_norm": 1.0214327573776245, "learning_rate": 0.00019992015978850675, "loss": 1.1875, "step": 276 }, { "epoch": 0.0642915167691772, "grad_norm": 0.8299492001533508, "learning_rate": 0.00019991957602805212, "loss": 1.0961, "step": 277 }, { "epoch": 0.0645236161076941, "grad_norm": 1.015297532081604, "learning_rate": 0.00019991899014210816, "loss": 1.1603, "step": 278 }, { "epoch": 0.06475571544621098, "grad_norm": 0.9751666784286499, "learning_rate": 0.00019991840213068733, "loss": 1.1324, "step": 279 }, { "epoch": 0.06498781478472786, "grad_norm": 1.0044399499893188, "learning_rate": 0.00019991781199380214, "loss": 1.1102, "step": 280 }, { "epoch": 0.06521991412324475, "grad_norm": 1.041677713394165, "learning_rate": 0.00019991721973146515, "loss": 1.0361, "step": 281 }, { "epoch": 0.06545201346176163, "grad_norm": 0.7689234614372253, "learning_rate": 0.00019991662534368893, "loss": 1.0458, "step": 282 }, { "epoch": 0.06568411280027851, "grad_norm": 0.9421409964561462, "learning_rate": 0.00019991602883048616, "loss": 1.0953, "step": 283 }, { "epoch": 0.06591621213879541, "grad_norm": 1.0836259126663208, "learning_rate": 0.00019991543019186954, "loss": 1.0163, "step": 284 }, { "epoch": 0.06614831147731229, "grad_norm": 0.8633719682693481, "learning_rate": 0.00019991482942785177, "loss": 1.1438, "step": 285 }, { "epoch": 0.06638041081582917, "grad_norm": 0.9521791934967041, "learning_rate": 0.00019991422653844562, "loss": 1.1117, "step": 286 }, { "epoch": 0.06661251015434606, "grad_norm": 0.9649552702903748, "learning_rate": 0.00019991362152366394, "loss": 1.1237, "step": 287 }, { "epoch": 0.06684460949286294, "grad_norm": 1.0226740837097168, "learning_rate": 0.0001999130143835196, "loss": 1.1058, "step": 288 }, { "epoch": 0.06707670883137982, "grad_norm": 1.0051816701889038, "learning_rate": 0.0001999124051180255, "loss": 1.1001, "step": 289 }, { "epoch": 0.06730880816989672, "grad_norm": 0.9007184505462646, "learning_rate": 0.00019991179372719458, "loss": 1.0847, "step": 290 }, { "epoch": 0.0675409075084136, "grad_norm": 0.983424961566925, "learning_rate": 0.00019991118021103993, "loss": 1.0998, "step": 291 }, { "epoch": 0.06777300684693048, "grad_norm": 0.9023692011833191, "learning_rate": 0.00019991056456957452, "loss": 1.1104, "step": 292 }, { "epoch": 0.06800510618544738, "grad_norm": 0.9888734817504883, "learning_rate": 0.00019990994680281149, "loss": 1.065, "step": 293 }, { "epoch": 0.06823720552396426, "grad_norm": 0.8546025156974792, "learning_rate": 0.00019990932691076393, "loss": 1.0255, "step": 294 }, { "epoch": 0.06846930486248114, "grad_norm": 0.847175657749176, "learning_rate": 0.00019990870489344507, "loss": 1.0758, "step": 295 }, { "epoch": 0.06870140420099803, "grad_norm": 0.7871413826942444, "learning_rate": 0.00019990808075086815, "loss": 1.1277, "step": 296 }, { "epoch": 0.06893350353951491, "grad_norm": 0.7918626070022583, "learning_rate": 0.00019990745448304642, "loss": 1.1579, "step": 297 }, { "epoch": 0.06916560287803179, "grad_norm": 0.8720512986183167, "learning_rate": 0.00019990682608999317, "loss": 1.0208, "step": 298 }, { "epoch": 0.06939770221654869, "grad_norm": 0.9261143803596497, "learning_rate": 0.00019990619557172184, "loss": 1.1809, "step": 299 }, { "epoch": 0.06962980155506557, "grad_norm": 1.0741815567016602, "learning_rate": 0.0001999055629282458, "loss": 1.1193, "step": 300 }, { "epoch": 0.06986190089358245, "grad_norm": 1.069780945777893, "learning_rate": 0.00019990492815957853, "loss": 1.0774, "step": 301 }, { "epoch": 0.07009400023209934, "grad_norm": 0.9523029327392578, "learning_rate": 0.0001999042912657335, "loss": 1.1496, "step": 302 }, { "epoch": 0.07032609957061622, "grad_norm": 0.888105571269989, "learning_rate": 0.0001999036522467243, "loss": 1.1014, "step": 303 }, { "epoch": 0.0705581989091331, "grad_norm": 0.7714315056800842, "learning_rate": 0.00019990301110256448, "loss": 1.0611, "step": 304 }, { "epoch": 0.07079029824765, "grad_norm": 1.0600584745407104, "learning_rate": 0.0001999023678332677, "loss": 1.0367, "step": 305 }, { "epoch": 0.07102239758616688, "grad_norm": 0.7578359246253967, "learning_rate": 0.00019990172243884767, "loss": 1.0786, "step": 306 }, { "epoch": 0.07125449692468376, "grad_norm": 0.9650562405586243, "learning_rate": 0.00019990107491931803, "loss": 1.0913, "step": 307 }, { "epoch": 0.07148659626320066, "grad_norm": 0.7320677042007446, "learning_rate": 0.0001999004252746927, "loss": 1.1001, "step": 308 }, { "epoch": 0.07171869560171754, "grad_norm": 0.7178481817245483, "learning_rate": 0.00019989977350498531, "loss": 1.0777, "step": 309 }, { "epoch": 0.07195079494023442, "grad_norm": 0.9243410229682922, "learning_rate": 0.0001998991196102099, "loss": 1.1314, "step": 310 }, { "epoch": 0.07218289427875131, "grad_norm": 0.7747706174850464, "learning_rate": 0.0001998984635903803, "loss": 1.1302, "step": 311 }, { "epoch": 0.07241499361726819, "grad_norm": 0.9819523692131042, "learning_rate": 0.00019989780544551046, "loss": 1.0798, "step": 312 }, { "epoch": 0.07264709295578507, "grad_norm": 0.8312498331069946, "learning_rate": 0.0001998971451756144, "loss": 1.0375, "step": 313 }, { "epoch": 0.07287919229430197, "grad_norm": 0.9729771614074707, "learning_rate": 0.00019989648278070617, "loss": 1.034, "step": 314 }, { "epoch": 0.07311129163281885, "grad_norm": 1.0454977750778198, "learning_rate": 0.0001998958182607998, "loss": 1.1192, "step": 315 }, { "epoch": 0.07334339097133573, "grad_norm": 0.7825678586959839, "learning_rate": 0.00019989515161590956, "loss": 1.1535, "step": 316 }, { "epoch": 0.07357549030985262, "grad_norm": 0.8148214221000671, "learning_rate": 0.0001998944828460495, "loss": 1.0893, "step": 317 }, { "epoch": 0.0738075896483695, "grad_norm": 0.9481724500656128, "learning_rate": 0.00019989381195123387, "loss": 1.114, "step": 318 }, { "epoch": 0.07403968898688638, "grad_norm": 0.9884357452392578, "learning_rate": 0.00019989313893147695, "loss": 1.0488, "step": 319 }, { "epoch": 0.07427178832540328, "grad_norm": 0.7910088896751404, "learning_rate": 0.0001998924637867931, "loss": 1.0774, "step": 320 }, { "epoch": 0.07450388766392016, "grad_norm": 0.9351299405097961, "learning_rate": 0.00019989178651719667, "loss": 1.1089, "step": 321 }, { "epoch": 0.07473598700243704, "grad_norm": 0.8399198055267334, "learning_rate": 0.000199891107122702, "loss": 1.1101, "step": 322 }, { "epoch": 0.07496808634095393, "grad_norm": 0.875951886177063, "learning_rate": 0.00019989042560332365, "loss": 1.0779, "step": 323 }, { "epoch": 0.07520018567947082, "grad_norm": 1.0044878721237183, "learning_rate": 0.00019988974195907602, "loss": 1.0856, "step": 324 }, { "epoch": 0.0754322850179877, "grad_norm": 1.1838111877441406, "learning_rate": 0.00019988905618997368, "loss": 1.0955, "step": 325 }, { "epoch": 0.07566438435650458, "grad_norm": 0.7841762900352478, "learning_rate": 0.00019988836829603127, "loss": 1.1273, "step": 326 }, { "epoch": 0.07589648369502147, "grad_norm": 1.2381926774978638, "learning_rate": 0.0001998876782772633, "loss": 1.0913, "step": 327 }, { "epoch": 0.07612858303353835, "grad_norm": 0.7921507954597473, "learning_rate": 0.0001998869861336846, "loss": 1.1366, "step": 328 }, { "epoch": 0.07636068237205523, "grad_norm": 1.0130703449249268, "learning_rate": 0.0001998862918653098, "loss": 1.105, "step": 329 }, { "epoch": 0.07659278171057213, "grad_norm": 0.8626142740249634, "learning_rate": 0.0001998855954721537, "loss": 1.0661, "step": 330 }, { "epoch": 0.07682488104908901, "grad_norm": 1.1977587938308716, "learning_rate": 0.0001998848969542311, "loss": 1.0798, "step": 331 }, { "epoch": 0.07705698038760589, "grad_norm": 0.8752743005752563, "learning_rate": 0.00019988419631155683, "loss": 1.0306, "step": 332 }, { "epoch": 0.07728907972612278, "grad_norm": 1.1087294816970825, "learning_rate": 0.00019988349354414586, "loss": 1.0671, "step": 333 }, { "epoch": 0.07752117906463966, "grad_norm": 0.9223496317863464, "learning_rate": 0.0001998827886520131, "loss": 1.1012, "step": 334 }, { "epoch": 0.07775327840315654, "grad_norm": 1.157308578491211, "learning_rate": 0.00019988208163517358, "loss": 1.124, "step": 335 }, { "epoch": 0.07798537774167344, "grad_norm": 0.8575731515884399, "learning_rate": 0.00019988137249364227, "loss": 1.0899, "step": 336 }, { "epoch": 0.07821747708019032, "grad_norm": 1.0061554908752441, "learning_rate": 0.0001998806612274343, "loss": 1.0718, "step": 337 }, { "epoch": 0.0784495764187072, "grad_norm": 0.9622625112533569, "learning_rate": 0.0001998799478365648, "loss": 1.0658, "step": 338 }, { "epoch": 0.0786816757572241, "grad_norm": 0.7512287497520447, "learning_rate": 0.00019987923232104896, "loss": 1.1, "step": 339 }, { "epoch": 0.07891377509574098, "grad_norm": 0.900329053401947, "learning_rate": 0.00019987851468090194, "loss": 1.0983, "step": 340 }, { "epoch": 0.07914587443425786, "grad_norm": 0.8396482467651367, "learning_rate": 0.00019987779491613907, "loss": 1.1145, "step": 341 }, { "epoch": 0.07937797377277475, "grad_norm": 0.9162524938583374, "learning_rate": 0.00019987707302677562, "loss": 1.0819, "step": 342 }, { "epoch": 0.07961007311129163, "grad_norm": 0.812610387802124, "learning_rate": 0.000199876349012827, "loss": 1.1357, "step": 343 }, { "epoch": 0.07984217244980851, "grad_norm": 0.9251707792282104, "learning_rate": 0.00019987562287430855, "loss": 1.0337, "step": 344 }, { "epoch": 0.08007427178832541, "grad_norm": 0.7728751301765442, "learning_rate": 0.00019987489461123574, "loss": 1.1394, "step": 345 }, { "epoch": 0.08030637112684229, "grad_norm": 0.9750840067863464, "learning_rate": 0.00019987416422362407, "loss": 1.1319, "step": 346 }, { "epoch": 0.08053847046535917, "grad_norm": 0.768337607383728, "learning_rate": 0.00019987343171148906, "loss": 1.0523, "step": 347 }, { "epoch": 0.08077056980387606, "grad_norm": 0.9472404718399048, "learning_rate": 0.0001998726970748463, "loss": 1.1062, "step": 348 }, { "epoch": 0.08100266914239294, "grad_norm": 0.8234665989875793, "learning_rate": 0.0001998719603137114, "loss": 1.0147, "step": 349 }, { "epoch": 0.08123476848090982, "grad_norm": 1.0279309749603271, "learning_rate": 0.0001998712214281001, "loss": 1.0829, "step": 350 }, { "epoch": 0.08146686781942672, "grad_norm": 0.9161117076873779, "learning_rate": 0.00019987048041802803, "loss": 1.0351, "step": 351 }, { "epoch": 0.0816989671579436, "grad_norm": 0.8823438286781311, "learning_rate": 0.000199869737283511, "loss": 1.0658, "step": 352 }, { "epoch": 0.08193106649646048, "grad_norm": 0.7851248979568481, "learning_rate": 0.0001998689920245648, "loss": 1.0437, "step": 353 }, { "epoch": 0.08216316583497738, "grad_norm": 0.8633283972740173, "learning_rate": 0.00019986824464120532, "loss": 1.1365, "step": 354 }, { "epoch": 0.08239526517349426, "grad_norm": 0.7831545472145081, "learning_rate": 0.00019986749513344843, "loss": 1.093, "step": 355 }, { "epoch": 0.08262736451201114, "grad_norm": 0.7985716462135315, "learning_rate": 0.00019986674350131004, "loss": 1.1344, "step": 356 }, { "epoch": 0.08285946385052803, "grad_norm": 0.8980196118354797, "learning_rate": 0.0001998659897448062, "loss": 1.1431, "step": 357 }, { "epoch": 0.08309156318904491, "grad_norm": 0.84169602394104, "learning_rate": 0.00019986523386395294, "loss": 1.0389, "step": 358 }, { "epoch": 0.08332366252756179, "grad_norm": 0.9104847311973572, "learning_rate": 0.00019986447585876627, "loss": 1.0608, "step": 359 }, { "epoch": 0.08355576186607869, "grad_norm": 0.7837865352630615, "learning_rate": 0.00019986371572926234, "loss": 1.0523, "step": 360 }, { "epoch": 0.08378786120459557, "grad_norm": 0.7863414883613586, "learning_rate": 0.0001998629534754574, "loss": 1.0007, "step": 361 }, { "epoch": 0.08401996054311245, "grad_norm": 1.1823731660842896, "learning_rate": 0.00019986218909736757, "loss": 1.0897, "step": 362 }, { "epoch": 0.08425205988162934, "grad_norm": 0.9312179684638977, "learning_rate": 0.00019986142259500916, "loss": 1.1254, "step": 363 }, { "epoch": 0.08448415922014622, "grad_norm": 0.8568184971809387, "learning_rate": 0.00019986065396839842, "loss": 1.1636, "step": 364 }, { "epoch": 0.0847162585586631, "grad_norm": 0.9058393239974976, "learning_rate": 0.00019985988321755181, "loss": 1.0921, "step": 365 }, { "epoch": 0.08494835789718, "grad_norm": 0.876783549785614, "learning_rate": 0.00019985911034248562, "loss": 1.0986, "step": 366 }, { "epoch": 0.08518045723569688, "grad_norm": 1.2825336456298828, "learning_rate": 0.00019985833534321633, "loss": 1.0376, "step": 367 }, { "epoch": 0.08541255657421376, "grad_norm": 0.8188105821609497, "learning_rate": 0.00019985755821976038, "loss": 1.1298, "step": 368 }, { "epoch": 0.08564465591273066, "grad_norm": 0.9878905415534973, "learning_rate": 0.0001998567789721344, "loss": 1.0403, "step": 369 }, { "epoch": 0.08587675525124754, "grad_norm": 0.971174955368042, "learning_rate": 0.00019985599760035487, "loss": 1.0586, "step": 370 }, { "epoch": 0.08610885458976442, "grad_norm": 0.9827623963356018, "learning_rate": 0.00019985521410443845, "loss": 1.1211, "step": 371 }, { "epoch": 0.08634095392828131, "grad_norm": 1.051499366760254, "learning_rate": 0.00019985442848440184, "loss": 1.0266, "step": 372 }, { "epoch": 0.08657305326679819, "grad_norm": 1.1268165111541748, "learning_rate": 0.00019985364074026168, "loss": 1.0343, "step": 373 }, { "epoch": 0.08680515260531507, "grad_norm": 0.9337987303733826, "learning_rate": 0.00019985285087203481, "loss": 1.0685, "step": 374 }, { "epoch": 0.08703725194383197, "grad_norm": 0.9367212653160095, "learning_rate": 0.00019985205887973796, "loss": 1.0273, "step": 375 }, { "epoch": 0.08726935128234885, "grad_norm": 0.9873326420783997, "learning_rate": 0.000199851264763388, "loss": 1.0629, "step": 376 }, { "epoch": 0.08750145062086573, "grad_norm": 0.7622995972633362, "learning_rate": 0.00019985046852300181, "loss": 1.0502, "step": 377 }, { "epoch": 0.08773354995938261, "grad_norm": 0.9594647288322449, "learning_rate": 0.00019984967015859636, "loss": 1.0405, "step": 378 }, { "epoch": 0.0879656492978995, "grad_norm": 0.804670512676239, "learning_rate": 0.00019984886967018862, "loss": 1.0284, "step": 379 }, { "epoch": 0.08819774863641638, "grad_norm": 1.0320247411727905, "learning_rate": 0.00019984806705779562, "loss": 1.0647, "step": 380 }, { "epoch": 0.08842984797493326, "grad_norm": 0.8569663166999817, "learning_rate": 0.00019984726232143442, "loss": 1.07, "step": 381 }, { "epoch": 0.08866194731345016, "grad_norm": 0.9463668465614319, "learning_rate": 0.00019984645546112216, "loss": 1.0617, "step": 382 }, { "epoch": 0.08889404665196704, "grad_norm": 0.8338714838027954, "learning_rate": 0.00019984564647687595, "loss": 1.0887, "step": 383 }, { "epoch": 0.08912614599048392, "grad_norm": 0.8550117611885071, "learning_rate": 0.0001998448353687131, "loss": 1.0531, "step": 384 }, { "epoch": 0.08935824532900082, "grad_norm": 1.1526165008544922, "learning_rate": 0.00019984402213665077, "loss": 1.0491, "step": 385 }, { "epoch": 0.0895903446675177, "grad_norm": 0.8583720922470093, "learning_rate": 0.0001998432067807063, "loss": 1.0327, "step": 386 }, { "epoch": 0.08982244400603458, "grad_norm": 0.8606504797935486, "learning_rate": 0.000199842389300897, "loss": 1.1064, "step": 387 }, { "epoch": 0.09005454334455147, "grad_norm": 0.8435392379760742, "learning_rate": 0.0001998415696972403, "loss": 1.0241, "step": 388 }, { "epoch": 0.09028664268306835, "grad_norm": 0.9908517003059387, "learning_rate": 0.00019984074796975364, "loss": 1.0158, "step": 389 }, { "epoch": 0.09051874202158523, "grad_norm": 0.8418910503387451, "learning_rate": 0.00019983992411845444, "loss": 1.0712, "step": 390 }, { "epoch": 0.09075084136010213, "grad_norm": 0.702828049659729, "learning_rate": 0.0001998390981433603, "loss": 1.0775, "step": 391 }, { "epoch": 0.09098294069861901, "grad_norm": 0.7957027554512024, "learning_rate": 0.00019983827004448873, "loss": 1.0957, "step": 392 }, { "epoch": 0.09121504003713589, "grad_norm": 0.8195531964302063, "learning_rate": 0.00019983743982185742, "loss": 1.0664, "step": 393 }, { "epoch": 0.09144713937565278, "grad_norm": 0.8747407793998718, "learning_rate": 0.00019983660747548393, "loss": 1.0663, "step": 394 }, { "epoch": 0.09167923871416966, "grad_norm": 0.8397645950317383, "learning_rate": 0.00019983577300538603, "loss": 1.0517, "step": 395 }, { "epoch": 0.09191133805268654, "grad_norm": 0.9320644736289978, "learning_rate": 0.00019983493641158147, "loss": 1.035, "step": 396 }, { "epoch": 0.09214343739120344, "grad_norm": 0.7503117322921753, "learning_rate": 0.000199834097694088, "loss": 1.0824, "step": 397 }, { "epoch": 0.09237553672972032, "grad_norm": 0.8999665975570679, "learning_rate": 0.00019983325685292354, "loss": 1.1093, "step": 398 }, { "epoch": 0.0926076360682372, "grad_norm": 0.8580470681190491, "learning_rate": 0.00019983241388810588, "loss": 1.0237, "step": 399 }, { "epoch": 0.0928397354067541, "grad_norm": 0.9881505370140076, "learning_rate": 0.00019983156879965303, "loss": 1.0305, "step": 400 }, { "epoch": 0.09307183474527098, "grad_norm": 0.8159308433532715, "learning_rate": 0.00019983072158758294, "loss": 1.0454, "step": 401 }, { "epoch": 0.09330393408378786, "grad_norm": 0.9615257382392883, "learning_rate": 0.00019982987225191363, "loss": 1.1241, "step": 402 }, { "epoch": 0.09353603342230475, "grad_norm": 0.9637776017189026, "learning_rate": 0.0001998290207926632, "loss": 1.0607, "step": 403 }, { "epoch": 0.09376813276082163, "grad_norm": 0.8008259534835815, "learning_rate": 0.00019982816720984968, "loss": 0.9958, "step": 404 }, { "epoch": 0.09400023209933851, "grad_norm": 1.0477102994918823, "learning_rate": 0.0001998273115034913, "loss": 1.056, "step": 405 }, { "epoch": 0.09423233143785541, "grad_norm": 0.8229115605354309, "learning_rate": 0.00019982645367360623, "loss": 1.0359, "step": 406 }, { "epoch": 0.09446443077637229, "grad_norm": 0.909223735332489, "learning_rate": 0.00019982559372021272, "loss": 1.0539, "step": 407 }, { "epoch": 0.09469653011488917, "grad_norm": 0.8627819418907166, "learning_rate": 0.0001998247316433291, "loss": 1.1054, "step": 408 }, { "epoch": 0.09492862945340606, "grad_norm": 0.6753279566764832, "learning_rate": 0.00019982386744297367, "loss": 1.0653, "step": 409 }, { "epoch": 0.09516072879192294, "grad_norm": 1.0438196659088135, "learning_rate": 0.0001998230011191648, "loss": 1.1184, "step": 410 }, { "epoch": 0.09539282813043982, "grad_norm": 0.8014771342277527, "learning_rate": 0.00019982213267192095, "loss": 1.1164, "step": 411 }, { "epoch": 0.09562492746895672, "grad_norm": 1.1247109174728394, "learning_rate": 0.0001998212621012606, "loss": 1.0765, "step": 412 }, { "epoch": 0.0958570268074736, "grad_norm": 1.100355625152588, "learning_rate": 0.00019982038940720225, "loss": 1.1078, "step": 413 }, { "epoch": 0.09608912614599048, "grad_norm": 1.0231127738952637, "learning_rate": 0.00019981951458976447, "loss": 1.1127, "step": 414 }, { "epoch": 0.09632122548450738, "grad_norm": 1.0545341968536377, "learning_rate": 0.00019981863764896585, "loss": 1.0445, "step": 415 }, { "epoch": 0.09655332482302426, "grad_norm": 0.9659501910209656, "learning_rate": 0.00019981775858482506, "loss": 1.0565, "step": 416 }, { "epoch": 0.09678542416154114, "grad_norm": 0.9455957412719727, "learning_rate": 0.0001998168773973608, "loss": 1.043, "step": 417 }, { "epoch": 0.09701752350005803, "grad_norm": 0.8940414190292358, "learning_rate": 0.00019981599408659185, "loss": 1.1264, "step": 418 }, { "epoch": 0.09724962283857491, "grad_norm": 0.8081499338150024, "learning_rate": 0.00019981510865253692, "loss": 1.0263, "step": 419 }, { "epoch": 0.09748172217709179, "grad_norm": 0.9429773092269897, "learning_rate": 0.00019981422109521493, "loss": 1.0447, "step": 420 }, { "epoch": 0.09771382151560869, "grad_norm": 0.8503597974777222, "learning_rate": 0.0001998133314146447, "loss": 1.094, "step": 421 }, { "epoch": 0.09794592085412557, "grad_norm": 0.8415423035621643, "learning_rate": 0.00019981243961084515, "loss": 1.0538, "step": 422 }, { "epoch": 0.09817802019264245, "grad_norm": 0.6833095550537109, "learning_rate": 0.0001998115456838353, "loss": 1.111, "step": 423 }, { "epoch": 0.09841011953115934, "grad_norm": 0.9266495704650879, "learning_rate": 0.00019981064963363413, "loss": 1.0523, "step": 424 }, { "epoch": 0.09864221886967622, "grad_norm": 0.7533798217773438, "learning_rate": 0.00019980975146026073, "loss": 1.0612, "step": 425 }, { "epoch": 0.0988743182081931, "grad_norm": 1.027948021888733, "learning_rate": 0.00019980885116373416, "loss": 1.0405, "step": 426 }, { "epoch": 0.09910641754670999, "grad_norm": 0.7134813070297241, "learning_rate": 0.00019980794874407363, "loss": 1.0893, "step": 427 }, { "epoch": 0.09933851688522688, "grad_norm": 0.9721697568893433, "learning_rate": 0.0001998070442012983, "loss": 1.033, "step": 428 }, { "epoch": 0.09957061622374376, "grad_norm": 0.8393346071243286, "learning_rate": 0.00019980613753542737, "loss": 1.076, "step": 429 }, { "epoch": 0.09980271556226064, "grad_norm": 0.9879680275917053, "learning_rate": 0.00019980522874648023, "loss": 1.1245, "step": 430 }, { "epoch": 0.10003481490077754, "grad_norm": 0.7959268093109131, "learning_rate": 0.00019980431783447612, "loss": 1.1097, "step": 431 }, { "epoch": 0.10026691423929442, "grad_norm": 0.878797173500061, "learning_rate": 0.00019980340479943448, "loss": 1.1481, "step": 432 }, { "epoch": 0.1004990135778113, "grad_norm": 0.7361109256744385, "learning_rate": 0.00019980248964137468, "loss": 1.0949, "step": 433 }, { "epoch": 0.10073111291632819, "grad_norm": 0.788895308971405, "learning_rate": 0.00019980157236031625, "loss": 1.0749, "step": 434 }, { "epoch": 0.10096321225484507, "grad_norm": 0.7798685431480408, "learning_rate": 0.00019980065295627862, "loss": 1.0663, "step": 435 }, { "epoch": 0.10119531159336195, "grad_norm": 0.8533051609992981, "learning_rate": 0.0001997997314292814, "loss": 1.0562, "step": 436 }, { "epoch": 0.10142741093187885, "grad_norm": 0.8581674695014954, "learning_rate": 0.0001997988077793442, "loss": 1.0323, "step": 437 }, { "epoch": 0.10165951027039573, "grad_norm": 0.9123759865760803, "learning_rate": 0.0001997978820064866, "loss": 1.0733, "step": 438 }, { "epoch": 0.10189160960891261, "grad_norm": 0.828527569770813, "learning_rate": 0.00019979695411072844, "loss": 1.0709, "step": 439 }, { "epoch": 0.1021237089474295, "grad_norm": 0.7270798087120056, "learning_rate": 0.0001997960240920893, "loss": 1.0874, "step": 440 }, { "epoch": 0.10235580828594638, "grad_norm": 0.8241468667984009, "learning_rate": 0.000199795091950589, "loss": 1.0075, "step": 441 }, { "epoch": 0.10258790762446326, "grad_norm": 0.8153014779090881, "learning_rate": 0.00019979415768624743, "loss": 1.1094, "step": 442 }, { "epoch": 0.10282000696298016, "grad_norm": 0.8595038652420044, "learning_rate": 0.00019979322129908445, "loss": 1.1032, "step": 443 }, { "epoch": 0.10305210630149704, "grad_norm": 0.8262979984283447, "learning_rate": 0.00019979228278911993, "loss": 1.0332, "step": 444 }, { "epoch": 0.10328420564001392, "grad_norm": 0.8706637024879456, "learning_rate": 0.00019979134215637387, "loss": 1.1019, "step": 445 }, { "epoch": 0.10351630497853082, "grad_norm": 0.9604765176773071, "learning_rate": 0.00019979039940086626, "loss": 1.0411, "step": 446 }, { "epoch": 0.1037484043170477, "grad_norm": 0.900208055973053, "learning_rate": 0.00019978945452261718, "loss": 1.0626, "step": 447 }, { "epoch": 0.10398050365556458, "grad_norm": 0.921674370765686, "learning_rate": 0.0001997885075216467, "loss": 1.071, "step": 448 }, { "epoch": 0.10421260299408147, "grad_norm": 0.7033634185791016, "learning_rate": 0.00019978755839797496, "loss": 0.9994, "step": 449 }, { "epoch": 0.10444470233259835, "grad_norm": 0.7881147861480713, "learning_rate": 0.00019978660715162222, "loss": 1.0147, "step": 450 }, { "epoch": 0.10467680167111523, "grad_norm": 0.9307485818862915, "learning_rate": 0.0001997856537826086, "loss": 1.0758, "step": 451 }, { "epoch": 0.10490890100963213, "grad_norm": 0.6880293488502502, "learning_rate": 0.0001997846982909545, "loss": 1.0575, "step": 452 }, { "epoch": 0.10514100034814901, "grad_norm": 0.9519034028053284, "learning_rate": 0.00019978374067668018, "loss": 1.0767, "step": 453 }, { "epoch": 0.10537309968666589, "grad_norm": 0.8484275341033936, "learning_rate": 0.00019978278093980598, "loss": 1.0394, "step": 454 }, { "epoch": 0.10560519902518278, "grad_norm": 0.9416453242301941, "learning_rate": 0.00019978181908035238, "loss": 1.0866, "step": 455 }, { "epoch": 0.10583729836369966, "grad_norm": 0.7314450144767761, "learning_rate": 0.0001997808550983398, "loss": 1.0383, "step": 456 }, { "epoch": 0.10606939770221654, "grad_norm": 0.9439957737922668, "learning_rate": 0.0001997798889937888, "loss": 1.0547, "step": 457 }, { "epoch": 0.10630149704073344, "grad_norm": 0.7313393950462341, "learning_rate": 0.00019977892076671988, "loss": 1.1297, "step": 458 }, { "epoch": 0.10653359637925032, "grad_norm": 0.8427722454071045, "learning_rate": 0.0001997779504171536, "loss": 1.0227, "step": 459 }, { "epoch": 0.1067656957177672, "grad_norm": 0.7954211235046387, "learning_rate": 0.00019977697794511072, "loss": 1.0273, "step": 460 }, { "epoch": 0.1069977950562841, "grad_norm": 0.6243712902069092, "learning_rate": 0.00019977600335061181, "loss": 1.0064, "step": 461 }, { "epoch": 0.10722989439480098, "grad_norm": 0.8314496874809265, "learning_rate": 0.00019977502663367768, "loss": 1.0769, "step": 462 }, { "epoch": 0.10746199373331786, "grad_norm": 0.7590845823287964, "learning_rate": 0.00019977404779432902, "loss": 1.027, "step": 463 }, { "epoch": 0.10769409307183475, "grad_norm": 0.7570692300796509, "learning_rate": 0.00019977306683258673, "loss": 1.0609, "step": 464 }, { "epoch": 0.10792619241035163, "grad_norm": 0.859194278717041, "learning_rate": 0.0001997720837484717, "loss": 1.0865, "step": 465 }, { "epoch": 0.10815829174886851, "grad_norm": 0.6915069818496704, "learning_rate": 0.00019977109854200475, "loss": 1.0771, "step": 466 }, { "epoch": 0.10839039108738541, "grad_norm": 0.7773957252502441, "learning_rate": 0.00019977011121320688, "loss": 1.0438, "step": 467 }, { "epoch": 0.10862249042590229, "grad_norm": 0.7903149724006653, "learning_rate": 0.00019976912176209913, "loss": 1.045, "step": 468 }, { "epoch": 0.10885458976441917, "grad_norm": 0.9494195580482483, "learning_rate": 0.00019976813018870247, "loss": 1.0373, "step": 469 }, { "epoch": 0.10908668910293606, "grad_norm": 0.8904356956481934, "learning_rate": 0.00019976713649303806, "loss": 1.0206, "step": 470 }, { "epoch": 0.10931878844145294, "grad_norm": 0.7504216432571411, "learning_rate": 0.000199766140675127, "loss": 1.0652, "step": 471 }, { "epoch": 0.10955088777996982, "grad_norm": 0.7103638648986816, "learning_rate": 0.00019976514273499046, "loss": 1.0921, "step": 472 }, { "epoch": 0.10978298711848672, "grad_norm": 0.8586751818656921, "learning_rate": 0.00019976414267264974, "loss": 1.0235, "step": 473 }, { "epoch": 0.1100150864570036, "grad_norm": 0.741524338722229, "learning_rate": 0.0001997631404881261, "loss": 1.0466, "step": 474 }, { "epoch": 0.11024718579552048, "grad_norm": 0.7192615270614624, "learning_rate": 0.00019976213618144075, "loss": 1.0002, "step": 475 }, { "epoch": 0.11047928513403736, "grad_norm": 0.6912160515785217, "learning_rate": 0.00019976112975261516, "loss": 1.0371, "step": 476 }, { "epoch": 0.11071138447255426, "grad_norm": 0.928968608379364, "learning_rate": 0.00019976012120167071, "loss": 1.0653, "step": 477 }, { "epoch": 0.11094348381107114, "grad_norm": 0.6723461747169495, "learning_rate": 0.00019975911052862885, "loss": 1.0238, "step": 478 }, { "epoch": 0.11117558314958802, "grad_norm": 0.7536126971244812, "learning_rate": 0.00019975809773351108, "loss": 1.0684, "step": 479 }, { "epoch": 0.11140768248810491, "grad_norm": 0.7266784310340881, "learning_rate": 0.00019975708281633897, "loss": 1.0383, "step": 480 }, { "epoch": 0.11163978182662179, "grad_norm": 0.7533577084541321, "learning_rate": 0.00019975606577713405, "loss": 1.0439, "step": 481 }, { "epoch": 0.11187188116513867, "grad_norm": 0.866611123085022, "learning_rate": 0.000199755046615918, "loss": 1.0313, "step": 482 }, { "epoch": 0.11210398050365557, "grad_norm": 0.8491448163986206, "learning_rate": 0.0001997540253327125, "loss": 1.023, "step": 483 }, { "epoch": 0.11233607984217245, "grad_norm": 0.7805542945861816, "learning_rate": 0.00019975300192753926, "loss": 1.0317, "step": 484 }, { "epoch": 0.11256817918068933, "grad_norm": 0.8965733051300049, "learning_rate": 0.00019975197640042004, "loss": 1.0074, "step": 485 }, { "epoch": 0.11280027851920622, "grad_norm": 0.86135333776474, "learning_rate": 0.00019975094875137667, "loss": 1.0239, "step": 486 }, { "epoch": 0.1130323778577231, "grad_norm": 0.9294856786727905, "learning_rate": 0.00019974991898043102, "loss": 1.0741, "step": 487 }, { "epoch": 0.11326447719623998, "grad_norm": 0.7671636939048767, "learning_rate": 0.00019974888708760498, "loss": 1.0614, "step": 488 }, { "epoch": 0.11349657653475688, "grad_norm": 0.9263598322868347, "learning_rate": 0.00019974785307292053, "loss": 1.1056, "step": 489 }, { "epoch": 0.11372867587327376, "grad_norm": 0.9056907296180725, "learning_rate": 0.00019974681693639957, "loss": 0.9894, "step": 490 }, { "epoch": 0.11396077521179064, "grad_norm": 0.9194972515106201, "learning_rate": 0.00019974577867806428, "loss": 1.0151, "step": 491 }, { "epoch": 0.11419287455030754, "grad_norm": 0.8098393082618713, "learning_rate": 0.00019974473829793667, "loss": 1.0026, "step": 492 }, { "epoch": 0.11442497388882442, "grad_norm": 0.8670751452445984, "learning_rate": 0.00019974369579603884, "loss": 1.054, "step": 493 }, { "epoch": 0.1146570732273413, "grad_norm": 0.7589981555938721, "learning_rate": 0.00019974265117239304, "loss": 0.9766, "step": 494 }, { "epoch": 0.11488917256585819, "grad_norm": 0.753220796585083, "learning_rate": 0.00019974160442702142, "loss": 1.0994, "step": 495 }, { "epoch": 0.11512127190437507, "grad_norm": 0.912337601184845, "learning_rate": 0.0001997405555599463, "loss": 1.1217, "step": 496 }, { "epoch": 0.11535337124289195, "grad_norm": 0.7215189337730408, "learning_rate": 0.00019973950457119, "loss": 1.0198, "step": 497 }, { "epoch": 0.11558547058140885, "grad_norm": 1.0049920082092285, "learning_rate": 0.0001997384514607748, "loss": 1.0263, "step": 498 }, { "epoch": 0.11581756991992573, "grad_norm": 0.7916737198829651, "learning_rate": 0.0001997373962287232, "loss": 1.0401, "step": 499 }, { "epoch": 0.11604966925844261, "grad_norm": 0.8183894157409668, "learning_rate": 0.00019973633887505757, "loss": 1.0352, "step": 500 }, { "epoch": 0.1162817685969595, "grad_norm": 0.9618611335754395, "learning_rate": 0.0001997352793998004, "loss": 1.0462, "step": 501 }, { "epoch": 0.11651386793547638, "grad_norm": 0.8983595967292786, "learning_rate": 0.00019973421780297433, "loss": 1.0485, "step": 502 }, { "epoch": 0.11674596727399326, "grad_norm": 1.0975823402404785, "learning_rate": 0.00019973315408460183, "loss": 1.0312, "step": 503 }, { "epoch": 0.11697806661251016, "grad_norm": 0.6729968190193176, "learning_rate": 0.00019973208824470556, "loss": 1.0218, "step": 504 }, { "epoch": 0.11721016595102704, "grad_norm": 0.9417785406112671, "learning_rate": 0.0001997310202833082, "loss": 1.0727, "step": 505 }, { "epoch": 0.11744226528954392, "grad_norm": 0.751654326915741, "learning_rate": 0.00019972995020043247, "loss": 1.0052, "step": 506 }, { "epoch": 0.11767436462806082, "grad_norm": 0.9985359907150269, "learning_rate": 0.00019972887799610113, "loss": 1.0094, "step": 507 }, { "epoch": 0.1179064639665777, "grad_norm": 0.9009521007537842, "learning_rate": 0.00019972780367033698, "loss": 1.04, "step": 508 }, { "epoch": 0.11813856330509458, "grad_norm": 1.0276734828948975, "learning_rate": 0.00019972672722316293, "loss": 1.0499, "step": 509 }, { "epoch": 0.11837066264361147, "grad_norm": 0.6524766683578491, "learning_rate": 0.00019972564865460178, "loss": 1.0549, "step": 510 }, { "epoch": 0.11860276198212835, "grad_norm": 0.7956752777099609, "learning_rate": 0.00019972456796467654, "loss": 1.0764, "step": 511 }, { "epoch": 0.11883486132064523, "grad_norm": 0.7724266052246094, "learning_rate": 0.00019972348515341016, "loss": 1.0784, "step": 512 }, { "epoch": 0.11906696065916213, "grad_norm": 0.8163689374923706, "learning_rate": 0.00019972240022082574, "loss": 0.992, "step": 513 }, { "epoch": 0.11929905999767901, "grad_norm": 0.7198384404182434, "learning_rate": 0.00019972131316694627, "loss": 1.057, "step": 514 }, { "epoch": 0.11953115933619589, "grad_norm": 0.9914113283157349, "learning_rate": 0.00019972022399179494, "loss": 0.993, "step": 515 }, { "epoch": 0.11976325867471278, "grad_norm": 0.820415198802948, "learning_rate": 0.00019971913269539488, "loss": 1.0742, "step": 516 }, { "epoch": 0.11999535801322966, "grad_norm": 0.8389418721199036, "learning_rate": 0.00019971803927776934, "loss": 1.0589, "step": 517 }, { "epoch": 0.12022745735174654, "grad_norm": 0.7351484298706055, "learning_rate": 0.00019971694373894152, "loss": 1.0027, "step": 518 }, { "epoch": 0.12045955669026344, "grad_norm": 0.8111717700958252, "learning_rate": 0.0001997158460789348, "loss": 1.0048, "step": 519 }, { "epoch": 0.12069165602878032, "grad_norm": 0.7949929237365723, "learning_rate": 0.00019971474629777247, "loss": 1.0227, "step": 520 }, { "epoch": 0.1209237553672972, "grad_norm": 0.8246322870254517, "learning_rate": 0.00019971364439547794, "loss": 1.0315, "step": 521 }, { "epoch": 0.1211558547058141, "grad_norm": 0.7811806201934814, "learning_rate": 0.0001997125403720747, "loss": 1.0644, "step": 522 }, { "epoch": 0.12138795404433098, "grad_norm": 0.810202956199646, "learning_rate": 0.0001997114342275862, "loss": 1.025, "step": 523 }, { "epoch": 0.12162005338284786, "grad_norm": 0.7108595371246338, "learning_rate": 0.00019971032596203592, "loss": 1.0799, "step": 524 }, { "epoch": 0.12185215272136475, "grad_norm": 0.68560791015625, "learning_rate": 0.00019970921557544745, "loss": 1.0393, "step": 525 }, { "epoch": 0.12208425205988163, "grad_norm": 0.8568196296691895, "learning_rate": 0.00019970810306784448, "loss": 1.0955, "step": 526 }, { "epoch": 0.12231635139839851, "grad_norm": 0.747940719127655, "learning_rate": 0.00019970698843925063, "loss": 1.0293, "step": 527 }, { "epoch": 0.12254845073691539, "grad_norm": 0.9994249939918518, "learning_rate": 0.00019970587168968961, "loss": 1.0211, "step": 528 }, { "epoch": 0.12278055007543229, "grad_norm": 0.72423255443573, "learning_rate": 0.0001997047528191852, "loss": 1.0724, "step": 529 }, { "epoch": 0.12301264941394917, "grad_norm": 1.0344938039779663, "learning_rate": 0.00019970363182776114, "loss": 0.9632, "step": 530 }, { "epoch": 0.12324474875246605, "grad_norm": 0.8274568319320679, "learning_rate": 0.00019970250871544133, "loss": 1.0784, "step": 531 }, { "epoch": 0.12347684809098294, "grad_norm": 1.0104786157608032, "learning_rate": 0.00019970138348224964, "loss": 0.9882, "step": 532 }, { "epoch": 0.12370894742949982, "grad_norm": 0.8115677833557129, "learning_rate": 0.00019970025612821, "loss": 1.0268, "step": 533 }, { "epoch": 0.1239410467680167, "grad_norm": 0.7791486382484436, "learning_rate": 0.00019969912665334643, "loss": 1.0192, "step": 534 }, { "epoch": 0.1241731461065336, "grad_norm": 0.8426869511604309, "learning_rate": 0.00019969799505768293, "loss": 1.0462, "step": 535 }, { "epoch": 0.12440524544505048, "grad_norm": 0.6992729902267456, "learning_rate": 0.00019969686134124358, "loss": 1.0464, "step": 536 }, { "epoch": 0.12463734478356736, "grad_norm": 0.8115811944007874, "learning_rate": 0.00019969572550405246, "loss": 0.9713, "step": 537 }, { "epoch": 0.12486944412208426, "grad_norm": 0.8914007544517517, "learning_rate": 0.00019969458754613375, "loss": 1.0111, "step": 538 }, { "epoch": 0.12510154346060115, "grad_norm": 1.0643993616104126, "learning_rate": 0.0001996934474675117, "loss": 1.0044, "step": 539 }, { "epoch": 0.12533364279911802, "grad_norm": 0.8111059665679932, "learning_rate": 0.0001996923052682105, "loss": 1.08, "step": 540 }, { "epoch": 0.1255657421376349, "grad_norm": 0.9008238315582275, "learning_rate": 0.00019969116094825448, "loss": 1.0355, "step": 541 }, { "epoch": 0.1257978414761518, "grad_norm": 0.7825167775154114, "learning_rate": 0.00019969001450766794, "loss": 1.0507, "step": 542 }, { "epoch": 0.12602994081466867, "grad_norm": 0.7800542116165161, "learning_rate": 0.00019968886594647534, "loss": 1.0177, "step": 543 }, { "epoch": 0.12626204015318557, "grad_norm": 0.663352906703949, "learning_rate": 0.00019968771526470108, "loss": 1.0565, "step": 544 }, { "epoch": 0.12649413949170246, "grad_norm": 0.8352718949317932, "learning_rate": 0.00019968656246236962, "loss": 1.1133, "step": 545 }, { "epoch": 0.12672623883021933, "grad_norm": 0.7332931160926819, "learning_rate": 0.0001996854075395055, "loss": 0.9989, "step": 546 }, { "epoch": 0.12695833816873622, "grad_norm": 0.6717795133590698, "learning_rate": 0.00019968425049613327, "loss": 1.0443, "step": 547 }, { "epoch": 0.12719043750725312, "grad_norm": 1.0395334959030151, "learning_rate": 0.00019968309133227755, "loss": 1.1153, "step": 548 }, { "epoch": 0.12742253684576998, "grad_norm": 0.8919034600257874, "learning_rate": 0.00019968193004796302, "loss": 1.0417, "step": 549 }, { "epoch": 0.12765463618428688, "grad_norm": 0.8023985028266907, "learning_rate": 0.00019968076664321436, "loss": 1.0208, "step": 550 }, { "epoch": 0.12788673552280377, "grad_norm": 0.8431861400604248, "learning_rate": 0.00019967960111805632, "loss": 1.0642, "step": 551 }, { "epoch": 0.12811883486132064, "grad_norm": 0.9393913149833679, "learning_rate": 0.0001996784334725137, "loss": 1.0628, "step": 552 }, { "epoch": 0.12835093419983754, "grad_norm": 0.762643039226532, "learning_rate": 0.00019967726370661134, "loss": 1.0361, "step": 553 }, { "epoch": 0.1285830335383544, "grad_norm": 0.7927071452140808, "learning_rate": 0.0001996760918203741, "loss": 1.0381, "step": 554 }, { "epoch": 0.1288151328768713, "grad_norm": 0.650922954082489, "learning_rate": 0.0001996749178138269, "loss": 1.0554, "step": 555 }, { "epoch": 0.1290472322153882, "grad_norm": 0.7996553182601929, "learning_rate": 0.0001996737416869948, "loss": 1.046, "step": 556 }, { "epoch": 0.12927933155390506, "grad_norm": 0.752427875995636, "learning_rate": 0.00019967256343990272, "loss": 1.0351, "step": 557 }, { "epoch": 0.12951143089242195, "grad_norm": 0.7776615023612976, "learning_rate": 0.00019967138307257578, "loss": 1.0377, "step": 558 }, { "epoch": 0.12974353023093885, "grad_norm": 0.9636134505271912, "learning_rate": 0.00019967020058503905, "loss": 0.9234, "step": 559 }, { "epoch": 0.12997562956945571, "grad_norm": 0.9309984445571899, "learning_rate": 0.0001996690159773177, "loss": 1.0302, "step": 560 }, { "epoch": 0.1302077289079726, "grad_norm": 1.1272213459014893, "learning_rate": 0.00019966782924943694, "loss": 1.0697, "step": 561 }, { "epoch": 0.1304398282464895, "grad_norm": 0.7249235510826111, "learning_rate": 0.00019966664040142203, "loss": 1.017, "step": 562 }, { "epoch": 0.13067192758500637, "grad_norm": 0.8726166486740112, "learning_rate": 0.00019966544943329822, "loss": 1.0402, "step": 563 }, { "epoch": 0.13090402692352326, "grad_norm": 0.8584849238395691, "learning_rate": 0.00019966425634509088, "loss": 1.0314, "step": 564 }, { "epoch": 0.13113612626204016, "grad_norm": 0.7811364531517029, "learning_rate": 0.0001996630611368253, "loss": 1.0376, "step": 565 }, { "epoch": 0.13136822560055703, "grad_norm": 0.8528879284858704, "learning_rate": 0.00019966186380852705, "loss": 1.0082, "step": 566 }, { "epoch": 0.13160032493907392, "grad_norm": 0.7160260081291199, "learning_rate": 0.0001996606643602215, "loss": 1.0884, "step": 567 }, { "epoch": 0.13183242427759082, "grad_norm": 0.9725378155708313, "learning_rate": 0.0001996594627919342, "loss": 1.0074, "step": 568 }, { "epoch": 0.13206452361610768, "grad_norm": 0.9064332246780396, "learning_rate": 0.00019965825910369067, "loss": 1.0687, "step": 569 }, { "epoch": 0.13229662295462458, "grad_norm": 0.9751808643341064, "learning_rate": 0.00019965705329551655, "loss": 1.0103, "step": 570 }, { "epoch": 0.13252872229314147, "grad_norm": 0.7790144681930542, "learning_rate": 0.00019965584536743748, "loss": 1.0633, "step": 571 }, { "epoch": 0.13276082163165834, "grad_norm": 0.8400565981864929, "learning_rate": 0.0001996546353194792, "loss": 1.0234, "step": 572 }, { "epoch": 0.13299292097017523, "grad_norm": 0.9255087375640869, "learning_rate": 0.00019965342315166737, "loss": 1.0015, "step": 573 }, { "epoch": 0.13322502030869213, "grad_norm": 0.6984609365463257, "learning_rate": 0.00019965220886402783, "loss": 0.9747, "step": 574 }, { "epoch": 0.133457119647209, "grad_norm": 0.8719913959503174, "learning_rate": 0.00019965099245658637, "loss": 1.0276, "step": 575 }, { "epoch": 0.1336892189857259, "grad_norm": 0.7916336059570312, "learning_rate": 0.00019964977392936893, "loss": 1.0058, "step": 576 }, { "epoch": 0.13392131832424278, "grad_norm": 0.8974113464355469, "learning_rate": 0.00019964855328240134, "loss": 1.034, "step": 577 }, { "epoch": 0.13415341766275965, "grad_norm": 0.8398187160491943, "learning_rate": 0.00019964733051570964, "loss": 0.9834, "step": 578 }, { "epoch": 0.13438551700127654, "grad_norm": 0.9548221230506897, "learning_rate": 0.00019964610562931982, "loss": 0.9803, "step": 579 }, { "epoch": 0.13461761633979344, "grad_norm": 3.2445390224456787, "learning_rate": 0.0001996448786232579, "loss": 1.1462, "step": 580 }, { "epoch": 0.1348497156783103, "grad_norm": 1.1247425079345703, "learning_rate": 0.0001996436494975501, "loss": 1.0649, "step": 581 }, { "epoch": 0.1350818150168272, "grad_norm": 0.8128942251205444, "learning_rate": 0.0001996424182522224, "loss": 1.0642, "step": 582 }, { "epoch": 0.1353139143553441, "grad_norm": 0.9496175646781921, "learning_rate": 0.00019964118488730107, "loss": 1.0485, "step": 583 }, { "epoch": 0.13554601369386096, "grad_norm": 0.8619377613067627, "learning_rate": 0.00019963994940281235, "loss": 1.0582, "step": 584 }, { "epoch": 0.13577811303237786, "grad_norm": 1.0637335777282715, "learning_rate": 0.00019963871179878254, "loss": 1.038, "step": 585 }, { "epoch": 0.13601021237089475, "grad_norm": 0.9543029069900513, "learning_rate": 0.00019963747207523796, "loss": 1.0213, "step": 586 }, { "epoch": 0.13624231170941162, "grad_norm": 0.8079410195350647, "learning_rate": 0.00019963623023220493, "loss": 1.0732, "step": 587 }, { "epoch": 0.1364744110479285, "grad_norm": 1.0276994705200195, "learning_rate": 0.0001996349862697099, "loss": 1.0812, "step": 588 }, { "epoch": 0.1367065103864454, "grad_norm": 0.8158445358276367, "learning_rate": 0.00019963374018777935, "loss": 0.951, "step": 589 }, { "epoch": 0.13693860972496227, "grad_norm": 1.2105224132537842, "learning_rate": 0.00019963249198643975, "loss": 1.0508, "step": 590 }, { "epoch": 0.13717070906347917, "grad_norm": 1.1305785179138184, "learning_rate": 0.00019963124166571768, "loss": 1.0639, "step": 591 }, { "epoch": 0.13740280840199606, "grad_norm": 0.8962584733963013, "learning_rate": 0.00019962998922563977, "loss": 1.1189, "step": 592 }, { "epoch": 0.13763490774051293, "grad_norm": 0.9282813668251038, "learning_rate": 0.0001996287346662326, "loss": 1.0292, "step": 593 }, { "epoch": 0.13786700707902982, "grad_norm": 0.8880316019058228, "learning_rate": 0.00019962747798752282, "loss": 1.0914, "step": 594 }, { "epoch": 0.13809910641754672, "grad_norm": 1.0125417709350586, "learning_rate": 0.0001996262191895373, "loss": 1.0019, "step": 595 }, { "epoch": 0.13833120575606359, "grad_norm": 0.9111359119415283, "learning_rate": 0.00019962495827230265, "loss": 0.972, "step": 596 }, { "epoch": 0.13856330509458048, "grad_norm": 0.8676003217697144, "learning_rate": 0.00019962369523584585, "loss": 1.1151, "step": 597 }, { "epoch": 0.13879540443309737, "grad_norm": 0.8413462042808533, "learning_rate": 0.00019962243008019365, "loss": 1.0346, "step": 598 }, { "epoch": 0.13902750377161424, "grad_norm": 0.834363579750061, "learning_rate": 0.00019962116280537303, "loss": 1.0723, "step": 599 }, { "epoch": 0.13925960311013114, "grad_norm": 0.769131064414978, "learning_rate": 0.00019961989341141093, "loss": 1.0153, "step": 600 }, { "epoch": 0.13949170244864803, "grad_norm": 0.8790700435638428, "learning_rate": 0.00019961862189833433, "loss": 0.9923, "step": 601 }, { "epoch": 0.1397238017871649, "grad_norm": 0.9028897881507874, "learning_rate": 0.00019961734826617035, "loss": 1.065, "step": 602 }, { "epoch": 0.1399559011256818, "grad_norm": 0.8318933248519897, "learning_rate": 0.00019961607251494593, "loss": 1.0097, "step": 603 }, { "epoch": 0.1401880004641987, "grad_norm": 0.786912739276886, "learning_rate": 0.00019961479464468838, "loss": 1.0298, "step": 604 }, { "epoch": 0.14042009980271555, "grad_norm": 0.9217137098312378, "learning_rate": 0.0001996135146554248, "loss": 1.0324, "step": 605 }, { "epoch": 0.14065219914123245, "grad_norm": 0.7953110337257385, "learning_rate": 0.0001996122325471824, "loss": 0.9879, "step": 606 }, { "epoch": 0.14088429847974934, "grad_norm": 0.7856104373931885, "learning_rate": 0.00019961094831998853, "loss": 1.0561, "step": 607 }, { "epoch": 0.1411163978182662, "grad_norm": 0.7287328243255615, "learning_rate": 0.00019960966197387042, "loss": 1.0136, "step": 608 }, { "epoch": 0.1413484971567831, "grad_norm": 0.7670353055000305, "learning_rate": 0.00019960837350885545, "loss": 1.0537, "step": 609 }, { "epoch": 0.1415805964953, "grad_norm": 0.8179692625999451, "learning_rate": 0.0001996070829249711, "loss": 1.0168, "step": 610 }, { "epoch": 0.14181269583381687, "grad_norm": 0.7314320206642151, "learning_rate": 0.00019960579022224476, "loss": 1.0286, "step": 611 }, { "epoch": 0.14204479517233376, "grad_norm": 0.7815575003623962, "learning_rate": 0.00019960449540070392, "loss": 1.0329, "step": 612 }, { "epoch": 0.14227689451085065, "grad_norm": 0.6073306798934937, "learning_rate": 0.00019960319846037616, "loss": 1.0252, "step": 613 }, { "epoch": 0.14250899384936752, "grad_norm": 0.7036458253860474, "learning_rate": 0.00019960189940128906, "loss": 1.0349, "step": 614 }, { "epoch": 0.14274109318788442, "grad_norm": 0.6957321166992188, "learning_rate": 0.00019960059822347025, "loss": 0.9941, "step": 615 }, { "epoch": 0.1429731925264013, "grad_norm": 0.6608189940452576, "learning_rate": 0.00019959929492694735, "loss": 1.0285, "step": 616 }, { "epoch": 0.14320529186491818, "grad_norm": 0.6584436893463135, "learning_rate": 0.0001995979895117482, "loss": 1.0006, "step": 617 }, { "epoch": 0.14343739120343507, "grad_norm": 0.7804873585700989, "learning_rate": 0.0001995966819779005, "loss": 1.0499, "step": 618 }, { "epoch": 0.14366949054195197, "grad_norm": 0.670741617679596, "learning_rate": 0.00019959537232543207, "loss": 0.9996, "step": 619 }, { "epoch": 0.14390158988046883, "grad_norm": 0.7694559693336487, "learning_rate": 0.00019959406055437072, "loss": 1.0705, "step": 620 }, { "epoch": 0.14413368921898573, "grad_norm": 0.7293174266815186, "learning_rate": 0.00019959274666474447, "loss": 0.965, "step": 621 }, { "epoch": 0.14436578855750262, "grad_norm": 0.7596963047981262, "learning_rate": 0.0001995914306565812, "loss": 0.9926, "step": 622 }, { "epoch": 0.1445978878960195, "grad_norm": 0.7808328866958618, "learning_rate": 0.00019959011252990887, "loss": 1.0062, "step": 623 }, { "epoch": 0.14482998723453638, "grad_norm": 0.7159825563430786, "learning_rate": 0.00019958879228475557, "loss": 1.0119, "step": 624 }, { "epoch": 0.14506208657305328, "grad_norm": 0.79645836353302, "learning_rate": 0.0001995874699211494, "loss": 1.0413, "step": 625 }, { "epoch": 0.14529418591157015, "grad_norm": 0.7398644685745239, "learning_rate": 0.00019958614543911843, "loss": 0.9791, "step": 626 }, { "epoch": 0.14552628525008704, "grad_norm": 0.8200693130493164, "learning_rate": 0.00019958481883869085, "loss": 0.9755, "step": 627 }, { "epoch": 0.14575838458860393, "grad_norm": 0.7564805150032043, "learning_rate": 0.00019958349011989492, "loss": 0.9867, "step": 628 }, { "epoch": 0.1459904839271208, "grad_norm": 0.9714164137840271, "learning_rate": 0.00019958215928275887, "loss": 1.0305, "step": 629 }, { "epoch": 0.1462225832656377, "grad_norm": 0.7759577035903931, "learning_rate": 0.00019958082632731103, "loss": 1.0055, "step": 630 }, { "epoch": 0.1464546826041546, "grad_norm": 0.8790760636329651, "learning_rate": 0.00019957949125357975, "loss": 0.9744, "step": 631 }, { "epoch": 0.14668678194267146, "grad_norm": 0.7593209743499756, "learning_rate": 0.0001995781540615934, "loss": 1.0443, "step": 632 }, { "epoch": 0.14691888128118835, "grad_norm": 0.7465614080429077, "learning_rate": 0.00019957681475138049, "loss": 1.0446, "step": 633 }, { "epoch": 0.14715098061970525, "grad_norm": 0.8966584205627441, "learning_rate": 0.00019957547332296943, "loss": 0.9835, "step": 634 }, { "epoch": 0.1473830799582221, "grad_norm": 0.7770743370056152, "learning_rate": 0.0001995741297763888, "loss": 1.0294, "step": 635 }, { "epoch": 0.147615179296739, "grad_norm": 0.9469747543334961, "learning_rate": 0.00019957278411166716, "loss": 0.9779, "step": 636 }, { "epoch": 0.1478472786352559, "grad_norm": 0.7389424443244934, "learning_rate": 0.00019957143632883315, "loss": 1.0332, "step": 637 }, { "epoch": 0.14807937797377277, "grad_norm": 0.8307377696037292, "learning_rate": 0.00019957008642791543, "loss": 0.9814, "step": 638 }, { "epoch": 0.14831147731228966, "grad_norm": 0.6925795078277588, "learning_rate": 0.00019956873440894274, "loss": 1.0375, "step": 639 }, { "epoch": 0.14854357665080656, "grad_norm": 0.8945396542549133, "learning_rate": 0.00019956738027194384, "loss": 0.9728, "step": 640 }, { "epoch": 0.14877567598932342, "grad_norm": 0.7660860419273376, "learning_rate": 0.0001995660240169475, "loss": 1.0139, "step": 641 }, { "epoch": 0.14900777532784032, "grad_norm": 0.8752306699752808, "learning_rate": 0.0001995646656439826, "loss": 0.9991, "step": 642 }, { "epoch": 0.14923987466635721, "grad_norm": 0.7751394510269165, "learning_rate": 0.000199563305153078, "loss": 1.0148, "step": 643 }, { "epoch": 0.14947197400487408, "grad_norm": 0.9648170471191406, "learning_rate": 0.00019956194254426265, "loss": 1.0458, "step": 644 }, { "epoch": 0.14970407334339098, "grad_norm": 0.8465046882629395, "learning_rate": 0.0001995605778175656, "loss": 1.0359, "step": 645 }, { "epoch": 0.14993617268190787, "grad_norm": 0.9437295198440552, "learning_rate": 0.00019955921097301578, "loss": 1.0117, "step": 646 }, { "epoch": 0.15016827202042474, "grad_norm": 0.6958244442939758, "learning_rate": 0.00019955784201064236, "loss": 1.0313, "step": 647 }, { "epoch": 0.15040037135894163, "grad_norm": 0.9571789503097534, "learning_rate": 0.00019955647093047437, "loss": 0.9816, "step": 648 }, { "epoch": 0.15063247069745853, "grad_norm": 0.7870465517044067, "learning_rate": 0.00019955509773254105, "loss": 0.9797, "step": 649 }, { "epoch": 0.1508645700359754, "grad_norm": 0.8897795677185059, "learning_rate": 0.00019955372241687157, "loss": 1.0094, "step": 650 }, { "epoch": 0.1510966693744923, "grad_norm": 0.7716643810272217, "learning_rate": 0.00019955234498349522, "loss": 1.0009, "step": 651 }, { "epoch": 0.15132876871300915, "grad_norm": 0.7610432505607605, "learning_rate": 0.00019955096543244127, "loss": 0.9835, "step": 652 }, { "epoch": 0.15156086805152605, "grad_norm": 0.8721607327461243, "learning_rate": 0.00019954958376373907, "loss": 1.0677, "step": 653 }, { "epoch": 0.15179296739004294, "grad_norm": 0.7674221396446228, "learning_rate": 0.00019954819997741797, "loss": 1.0648, "step": 654 }, { "epoch": 0.1520250667285598, "grad_norm": 0.6242952942848206, "learning_rate": 0.00019954681407350751, "loss": 1.1395, "step": 655 }, { "epoch": 0.1522571660670767, "grad_norm": 0.8117160201072693, "learning_rate": 0.0001995454260520371, "loss": 0.9323, "step": 656 }, { "epoch": 0.1524892654055936, "grad_norm": 0.6578777432441711, "learning_rate": 0.00019954403591303628, "loss": 0.937, "step": 657 }, { "epoch": 0.15272136474411047, "grad_norm": 0.6872897148132324, "learning_rate": 0.0001995426436565346, "loss": 0.9907, "step": 658 }, { "epoch": 0.15295346408262736, "grad_norm": 0.7111395001411438, "learning_rate": 0.00019954124928256173, "loss": 1.0997, "step": 659 }, { "epoch": 0.15318556342114426, "grad_norm": 0.7976904511451721, "learning_rate": 0.00019953985279114726, "loss": 1.0271, "step": 660 }, { "epoch": 0.15341766275966112, "grad_norm": 0.6194788217544556, "learning_rate": 0.00019953845418232096, "loss": 0.9538, "step": 661 }, { "epoch": 0.15364976209817802, "grad_norm": 0.8761386871337891, "learning_rate": 0.0001995370534561125, "loss": 0.994, "step": 662 }, { "epoch": 0.1538818614366949, "grad_norm": 0.5725529193878174, "learning_rate": 0.00019953565061255177, "loss": 0.9835, "step": 663 }, { "epoch": 0.15411396077521178, "grad_norm": 0.8013498187065125, "learning_rate": 0.0001995342456516686, "loss": 1.0289, "step": 664 }, { "epoch": 0.15434606011372867, "grad_norm": 0.716290295124054, "learning_rate": 0.0001995328385734928, "loss": 1.0133, "step": 665 }, { "epoch": 0.15457815945224557, "grad_norm": 0.8954859972000122, "learning_rate": 0.00019953142937805435, "loss": 1.0206, "step": 666 }, { "epoch": 0.15481025879076243, "grad_norm": 0.8911016583442688, "learning_rate": 0.00019953001806538326, "loss": 0.9896, "step": 667 }, { "epoch": 0.15504235812927933, "grad_norm": 0.8333848714828491, "learning_rate": 0.0001995286046355095, "loss": 0.9798, "step": 668 }, { "epoch": 0.15527445746779622, "grad_norm": 0.730487585067749, "learning_rate": 0.00019952718908846314, "loss": 1.0891, "step": 669 }, { "epoch": 0.1555065568063131, "grad_norm": 0.7405540347099304, "learning_rate": 0.00019952577142427432, "loss": 0.9799, "step": 670 }, { "epoch": 0.15573865614482998, "grad_norm": 0.7609966993331909, "learning_rate": 0.00019952435164297317, "loss": 0.971, "step": 671 }, { "epoch": 0.15597075548334688, "grad_norm": 0.6858656406402588, "learning_rate": 0.00019952292974458993, "loss": 0.9751, "step": 672 }, { "epoch": 0.15620285482186375, "grad_norm": 0.7276650667190552, "learning_rate": 0.0001995215057291548, "loss": 1.0247, "step": 673 }, { "epoch": 0.15643495416038064, "grad_norm": 0.7341631054878235, "learning_rate": 0.0001995200795966981, "loss": 1.0599, "step": 674 }, { "epoch": 0.15666705349889753, "grad_norm": 0.8044571876525879, "learning_rate": 0.00019951865134725015, "loss": 1.0107, "step": 675 }, { "epoch": 0.1568991528374144, "grad_norm": 0.6420938372612, "learning_rate": 0.0001995172209808413, "loss": 0.9731, "step": 676 }, { "epoch": 0.1571312521759313, "grad_norm": 0.7621270418167114, "learning_rate": 0.0001995157884975021, "loss": 0.9787, "step": 677 }, { "epoch": 0.1573633515144482, "grad_norm": 0.6882710456848145, "learning_rate": 0.00019951435389726284, "loss": 1.0604, "step": 678 }, { "epoch": 0.15759545085296506, "grad_norm": 0.8514811992645264, "learning_rate": 0.0001995129171801542, "loss": 0.989, "step": 679 }, { "epoch": 0.15782755019148195, "grad_norm": 0.7716861963272095, "learning_rate": 0.00019951147834620667, "loss": 1.018, "step": 680 }, { "epoch": 0.15805964952999885, "grad_norm": 0.7619121074676514, "learning_rate": 0.00019951003739545088, "loss": 1.0205, "step": 681 }, { "epoch": 0.1582917488685157, "grad_norm": 0.7319327592849731, "learning_rate": 0.0001995085943279174, "loss": 1.0151, "step": 682 }, { "epoch": 0.1585238482070326, "grad_norm": 0.7430733442306519, "learning_rate": 0.00019950714914363706, "loss": 1.0211, "step": 683 }, { "epoch": 0.1587559475455495, "grad_norm": 0.839922308921814, "learning_rate": 0.0001995057018426405, "loss": 0.9658, "step": 684 }, { "epoch": 0.15898804688406637, "grad_norm": 0.7353138327598572, "learning_rate": 0.00019950425242495855, "loss": 0.9939, "step": 685 }, { "epoch": 0.15922014622258326, "grad_norm": 0.7491339445114136, "learning_rate": 0.00019950280089062203, "loss": 0.9888, "step": 686 }, { "epoch": 0.15945224556110016, "grad_norm": 0.7883397340774536, "learning_rate": 0.00019950134723966184, "loss": 1.0385, "step": 687 }, { "epoch": 0.15968434489961703, "grad_norm": 0.7607892155647278, "learning_rate": 0.00019949989147210887, "loss": 1.0246, "step": 688 }, { "epoch": 0.15991644423813392, "grad_norm": 0.6952542662620544, "learning_rate": 0.0001994984335879941, "loss": 1.0327, "step": 689 }, { "epoch": 0.16014854357665081, "grad_norm": 0.7602426409721375, "learning_rate": 0.0001994969735873485, "loss": 1.0084, "step": 690 }, { "epoch": 0.16038064291516768, "grad_norm": 0.7466674447059631, "learning_rate": 0.0001994955114702032, "loss": 1.038, "step": 691 }, { "epoch": 0.16061274225368458, "grad_norm": 0.8651823401451111, "learning_rate": 0.0001994940472365893, "loss": 0.995, "step": 692 }, { "epoch": 0.16084484159220147, "grad_norm": 0.7051277756690979, "learning_rate": 0.0001994925808865379, "loss": 1.0037, "step": 693 }, { "epoch": 0.16107694093071834, "grad_norm": 0.8252283334732056, "learning_rate": 0.0001994911124200802, "loss": 0.9939, "step": 694 }, { "epoch": 0.16130904026923523, "grad_norm": 0.9004282355308533, "learning_rate": 0.00019948964183724745, "loss": 0.9826, "step": 695 }, { "epoch": 0.16154113960775213, "grad_norm": 0.7449021935462952, "learning_rate": 0.00019948816913807095, "loss": 1.0134, "step": 696 }, { "epoch": 0.161773238946269, "grad_norm": 0.7929180264472961, "learning_rate": 0.00019948669432258198, "loss": 1.0291, "step": 697 }, { "epoch": 0.1620053382847859, "grad_norm": 0.6865376830101013, "learning_rate": 0.00019948521739081193, "loss": 1.0093, "step": 698 }, { "epoch": 0.16223743762330278, "grad_norm": 0.8430371880531311, "learning_rate": 0.0001994837383427923, "loss": 1.0313, "step": 699 }, { "epoch": 0.16246953696181965, "grad_norm": 0.672202467918396, "learning_rate": 0.0001994822571785544, "loss": 0.9895, "step": 700 }, { "epoch": 0.16270163630033654, "grad_norm": 0.7533789277076721, "learning_rate": 0.00019948077389812983, "loss": 1.0251, "step": 701 }, { "epoch": 0.16293373563885344, "grad_norm": 0.7379886507987976, "learning_rate": 0.00019947928850155014, "loss": 0.9902, "step": 702 }, { "epoch": 0.1631658349773703, "grad_norm": 0.6314092874526978, "learning_rate": 0.0001994778009888469, "loss": 0.9967, "step": 703 }, { "epoch": 0.1633979343158872, "grad_norm": 0.7838600277900696, "learning_rate": 0.0001994763113600518, "loss": 0.9498, "step": 704 }, { "epoch": 0.1636300336544041, "grad_norm": 0.6348045468330383, "learning_rate": 0.0001994748196151965, "loss": 1.0185, "step": 705 }, { "epoch": 0.16386213299292096, "grad_norm": 0.6709309816360474, "learning_rate": 0.0001994733257543127, "loss": 0.9949, "step": 706 }, { "epoch": 0.16409423233143786, "grad_norm": 0.5771185755729675, "learning_rate": 0.0001994718297774322, "loss": 1.0176, "step": 707 }, { "epoch": 0.16432633166995475, "grad_norm": 0.7808734774589539, "learning_rate": 0.00019947033168458687, "loss": 1.0334, "step": 708 }, { "epoch": 0.16455843100847162, "grad_norm": 0.6964454054832458, "learning_rate": 0.00019946883147580852, "loss": 1.0043, "step": 709 }, { "epoch": 0.1647905303469885, "grad_norm": 0.748284101486206, "learning_rate": 0.00019946732915112903, "loss": 1.0112, "step": 710 }, { "epoch": 0.1650226296855054, "grad_norm": 0.7442067265510559, "learning_rate": 0.00019946582471058047, "loss": 0.9902, "step": 711 }, { "epoch": 0.16525472902402227, "grad_norm": 0.7463036775588989, "learning_rate": 0.00019946431815419472, "loss": 1.0587, "step": 712 }, { "epoch": 0.16548682836253917, "grad_norm": 0.6683975458145142, "learning_rate": 0.0001994628094820039, "loss": 1.0124, "step": 713 }, { "epoch": 0.16571892770105606, "grad_norm": 0.7835921049118042, "learning_rate": 0.0001994612986940401, "loss": 1.0394, "step": 714 }, { "epoch": 0.16595102703957293, "grad_norm": 0.7277269959449768, "learning_rate": 0.00019945978579033547, "loss": 0.9857, "step": 715 }, { "epoch": 0.16618312637808982, "grad_norm": 0.9423496127128601, "learning_rate": 0.00019945827077092214, "loss": 1.0112, "step": 716 }, { "epoch": 0.16641522571660672, "grad_norm": 0.68646639585495, "learning_rate": 0.00019945675363583235, "loss": 0.993, "step": 717 }, { "epoch": 0.16664732505512359, "grad_norm": 0.7243557572364807, "learning_rate": 0.00019945523438509843, "loss": 1.0294, "step": 718 }, { "epoch": 0.16687942439364048, "grad_norm": 0.8303258419036865, "learning_rate": 0.00019945371301875265, "loss": 1.051, "step": 719 }, { "epoch": 0.16711152373215737, "grad_norm": 0.6014471054077148, "learning_rate": 0.00019945218953682734, "loss": 0.956, "step": 720 }, { "epoch": 0.16734362307067424, "grad_norm": 0.7443534731864929, "learning_rate": 0.00019945066393935496, "loss": 1.0143, "step": 721 }, { "epoch": 0.16757572240919114, "grad_norm": 0.6502603888511658, "learning_rate": 0.00019944913622636795, "loss": 1.0086, "step": 722 }, { "epoch": 0.16780782174770803, "grad_norm": 0.6778891086578369, "learning_rate": 0.0001994476063978988, "loss": 0.9976, "step": 723 }, { "epoch": 0.1680399210862249, "grad_norm": 0.7750787734985352, "learning_rate": 0.0001994460744539801, "loss": 1.0168, "step": 724 }, { "epoch": 0.1682720204247418, "grad_norm": 0.7372100949287415, "learning_rate": 0.00019944454039464435, "loss": 1.0844, "step": 725 }, { "epoch": 0.1685041197632587, "grad_norm": 0.7257523536682129, "learning_rate": 0.00019944300421992424, "loss": 0.9978, "step": 726 }, { "epoch": 0.16873621910177555, "grad_norm": 0.7090028524398804, "learning_rate": 0.00019944146592985242, "loss": 1.0147, "step": 727 }, { "epoch": 0.16896831844029245, "grad_norm": 1.1804558038711548, "learning_rate": 0.00019943992552446166, "loss": 1.0049, "step": 728 }, { "epoch": 0.16920041777880934, "grad_norm": 0.7015290260314941, "learning_rate": 0.00019943838300378466, "loss": 1.0144, "step": 729 }, { "epoch": 0.1694325171173262, "grad_norm": 0.6722859740257263, "learning_rate": 0.0001994368383678543, "loss": 1.0107, "step": 730 }, { "epoch": 0.1696646164558431, "grad_norm": 0.7584577798843384, "learning_rate": 0.0001994352916167034, "loss": 0.9795, "step": 731 }, { "epoch": 0.16989671579436, "grad_norm": 0.6277761459350586, "learning_rate": 0.00019943374275036485, "loss": 1.0452, "step": 732 }, { "epoch": 0.17012881513287686, "grad_norm": 0.7431066036224365, "learning_rate": 0.00019943219176887163, "loss": 0.9953, "step": 733 }, { "epoch": 0.17036091447139376, "grad_norm": 0.7466265559196472, "learning_rate": 0.0001994306386722567, "loss": 1.0039, "step": 734 }, { "epoch": 0.17059301380991065, "grad_norm": 0.7028168439865112, "learning_rate": 0.00019942908346055313, "loss": 1.0334, "step": 735 }, { "epoch": 0.17082511314842752, "grad_norm": 0.6532999277114868, "learning_rate": 0.000199427526133794, "loss": 1.0085, "step": 736 }, { "epoch": 0.17105721248694442, "grad_norm": 0.9394673705101013, "learning_rate": 0.0001994259666920124, "loss": 0.9505, "step": 737 }, { "epoch": 0.1712893118254613, "grad_norm": 0.6865752339363098, "learning_rate": 0.0001994244051352415, "loss": 0.9914, "step": 738 }, { "epoch": 0.17152141116397818, "grad_norm": 0.8739460706710815, "learning_rate": 0.0001994228414635146, "loss": 1.0118, "step": 739 }, { "epoch": 0.17175351050249507, "grad_norm": 0.6867471933364868, "learning_rate": 0.00019942127567686488, "loss": 0.9922, "step": 740 }, { "epoch": 0.17198560984101197, "grad_norm": 0.8083595037460327, "learning_rate": 0.0001994197077753257, "loss": 0.987, "step": 741 }, { "epoch": 0.17221770917952883, "grad_norm": 0.6917040348052979, "learning_rate": 0.00019941813775893036, "loss": 1.002, "step": 742 }, { "epoch": 0.17244980851804573, "grad_norm": 0.6717849969863892, "learning_rate": 0.00019941656562771231, "loss": 1.0277, "step": 743 }, { "epoch": 0.17268190785656262, "grad_norm": 0.692537784576416, "learning_rate": 0.00019941499138170494, "loss": 1.0022, "step": 744 }, { "epoch": 0.1729140071950795, "grad_norm": 0.7512099742889404, "learning_rate": 0.00019941341502094178, "loss": 0.9731, "step": 745 }, { "epoch": 0.17314610653359638, "grad_norm": 0.7096254825592041, "learning_rate": 0.00019941183654545634, "loss": 1.0507, "step": 746 }, { "epoch": 0.17337820587211328, "grad_norm": 0.6329427361488342, "learning_rate": 0.00019941025595528221, "loss": 0.9913, "step": 747 }, { "epoch": 0.17361030521063014, "grad_norm": 0.7236711382865906, "learning_rate": 0.00019940867325045304, "loss": 0.9639, "step": 748 }, { "epoch": 0.17384240454914704, "grad_norm": 0.6646637916564941, "learning_rate": 0.00019940708843100243, "loss": 0.9552, "step": 749 }, { "epoch": 0.17407450388766393, "grad_norm": 0.7680178284645081, "learning_rate": 0.00019940550149696412, "loss": 1.0324, "step": 750 }, { "epoch": 0.1743066032261808, "grad_norm": 0.9164337515830994, "learning_rate": 0.00019940391244837184, "loss": 0.973, "step": 751 }, { "epoch": 0.1745387025646977, "grad_norm": 0.6420217752456665, "learning_rate": 0.0001994023212852595, "loss": 0.9785, "step": 752 }, { "epoch": 0.17477080190321456, "grad_norm": 0.8492144346237183, "learning_rate": 0.0001994007280076608, "loss": 1.0403, "step": 753 }, { "epoch": 0.17500290124173146, "grad_norm": 1.0029014348983765, "learning_rate": 0.00019939913261560977, "loss": 1.0168, "step": 754 }, { "epoch": 0.17523500058024835, "grad_norm": 0.7424417734146118, "learning_rate": 0.00019939753510914024, "loss": 1.0344, "step": 755 }, { "epoch": 0.17546709991876522, "grad_norm": 0.9073155522346497, "learning_rate": 0.00019939593548828625, "loss": 1.0125, "step": 756 }, { "epoch": 0.1756991992572821, "grad_norm": 0.7877057790756226, "learning_rate": 0.00019939433375308182, "loss": 0.9844, "step": 757 }, { "epoch": 0.175931298595799, "grad_norm": 0.7347944378852844, "learning_rate": 0.000199392729903561, "loss": 0.9707, "step": 758 }, { "epoch": 0.17616339793431587, "grad_norm": 0.7492951154708862, "learning_rate": 0.0001993911239397579, "loss": 1.0364, "step": 759 }, { "epoch": 0.17639549727283277, "grad_norm": 0.6598828434944153, "learning_rate": 0.00019938951586170674, "loss": 0.9801, "step": 760 }, { "epoch": 0.17662759661134966, "grad_norm": 0.8709242343902588, "learning_rate": 0.00019938790566944166, "loss": 0.9714, "step": 761 }, { "epoch": 0.17685969594986653, "grad_norm": 0.7231050729751587, "learning_rate": 0.00019938629336299697, "loss": 0.9949, "step": 762 }, { "epoch": 0.17709179528838342, "grad_norm": 0.7318840026855469, "learning_rate": 0.00019938467894240692, "loss": 1.0276, "step": 763 }, { "epoch": 0.17732389462690032, "grad_norm": 0.6909183859825134, "learning_rate": 0.00019938306240770587, "loss": 0.9836, "step": 764 }, { "epoch": 0.17755599396541719, "grad_norm": 0.6637578010559082, "learning_rate": 0.00019938144375892822, "loss": 1.0143, "step": 765 }, { "epoch": 0.17778809330393408, "grad_norm": 0.9036746025085449, "learning_rate": 0.00019937982299610837, "loss": 1.0156, "step": 766 }, { "epoch": 0.17802019264245098, "grad_norm": 0.6576951146125793, "learning_rate": 0.00019937820011928085, "loss": 0.9743, "step": 767 }, { "epoch": 0.17825229198096784, "grad_norm": 0.8088716268539429, "learning_rate": 0.00019937657512848013, "loss": 0.9602, "step": 768 }, { "epoch": 0.17848439131948474, "grad_norm": 0.6778093576431274, "learning_rate": 0.0001993749480237408, "loss": 0.9858, "step": 769 }, { "epoch": 0.17871649065800163, "grad_norm": 0.7673980593681335, "learning_rate": 0.00019937331880509745, "loss": 1.011, "step": 770 }, { "epoch": 0.1789485899965185, "grad_norm": 0.843537449836731, "learning_rate": 0.00019937168747258475, "loss": 0.9976, "step": 771 }, { "epoch": 0.1791806893350354, "grad_norm": 0.7212751507759094, "learning_rate": 0.0001993700540262374, "loss": 0.983, "step": 772 }, { "epoch": 0.1794127886735523, "grad_norm": 0.8316361308097839, "learning_rate": 0.00019936841846609018, "loss": 0.9976, "step": 773 }, { "epoch": 0.17964488801206915, "grad_norm": 0.8775364756584167, "learning_rate": 0.00019936678079217785, "loss": 1.0477, "step": 774 }, { "epoch": 0.17987698735058605, "grad_norm": 0.7859828472137451, "learning_rate": 0.00019936514100453524, "loss": 1.0555, "step": 775 }, { "epoch": 0.18010908668910294, "grad_norm": 0.7875041365623474, "learning_rate": 0.00019936349910319726, "loss": 1.039, "step": 776 }, { "epoch": 0.1803411860276198, "grad_norm": 0.988399863243103, "learning_rate": 0.0001993618550881988, "loss": 0.9416, "step": 777 }, { "epoch": 0.1805732853661367, "grad_norm": 0.6127263307571411, "learning_rate": 0.00019936020895957484, "loss": 0.9925, "step": 778 }, { "epoch": 0.1808053847046536, "grad_norm": 0.9991029500961304, "learning_rate": 0.00019935856071736044, "loss": 1.0396, "step": 779 }, { "epoch": 0.18103748404317047, "grad_norm": 0.7750523090362549, "learning_rate": 0.00019935691036159062, "loss": 1.0261, "step": 780 }, { "epoch": 0.18126958338168736, "grad_norm": 0.7655877470970154, "learning_rate": 0.00019935525789230044, "loss": 1.0141, "step": 781 }, { "epoch": 0.18150168272020425, "grad_norm": 0.8542660474777222, "learning_rate": 0.00019935360330952518, "loss": 1.0348, "step": 782 }, { "epoch": 0.18173378205872112, "grad_norm": 0.6947060227394104, "learning_rate": 0.0001993519466132999, "loss": 0.947, "step": 783 }, { "epoch": 0.18196588139723802, "grad_norm": 0.8238322138786316, "learning_rate": 0.00019935028780365994, "loss": 0.9432, "step": 784 }, { "epoch": 0.1821979807357549, "grad_norm": 0.6070980429649353, "learning_rate": 0.00019934862688064054, "loss": 1.0033, "step": 785 }, { "epoch": 0.18243008007427178, "grad_norm": 0.8402527570724487, "learning_rate": 0.00019934696384427705, "loss": 1.0295, "step": 786 }, { "epoch": 0.18266217941278867, "grad_norm": 0.7866340279579163, "learning_rate": 0.0001993452986946048, "loss": 1.0169, "step": 787 }, { "epoch": 0.18289427875130557, "grad_norm": 0.7608495950698853, "learning_rate": 0.00019934363143165928, "loss": 1.056, "step": 788 }, { "epoch": 0.18312637808982243, "grad_norm": 0.8008711338043213, "learning_rate": 0.0001993419620554759, "loss": 1.0361, "step": 789 }, { "epoch": 0.18335847742833933, "grad_norm": 0.704158365726471, "learning_rate": 0.00019934029056609023, "loss": 1.0054, "step": 790 }, { "epoch": 0.18359057676685622, "grad_norm": 0.727641761302948, "learning_rate": 0.00019933861696353776, "loss": 0.999, "step": 791 }, { "epoch": 0.1838226761053731, "grad_norm": 0.7447011470794678, "learning_rate": 0.00019933694124785417, "loss": 0.9943, "step": 792 }, { "epoch": 0.18405477544388998, "grad_norm": 0.6190704107284546, "learning_rate": 0.000199335263419075, "loss": 0.9626, "step": 793 }, { "epoch": 0.18428687478240688, "grad_norm": 0.8359029293060303, "learning_rate": 0.000199333583477236, "loss": 1.0255, "step": 794 }, { "epoch": 0.18451897412092375, "grad_norm": 0.745926022529602, "learning_rate": 0.00019933190142237294, "loss": 1.051, "step": 795 }, { "epoch": 0.18475107345944064, "grad_norm": 0.719132661819458, "learning_rate": 0.00019933021725452153, "loss": 1.0048, "step": 796 }, { "epoch": 0.18498317279795753, "grad_norm": 0.7354856133460999, "learning_rate": 0.00019932853097371763, "loss": 0.9635, "step": 797 }, { "epoch": 0.1852152721364744, "grad_norm": 0.6940884590148926, "learning_rate": 0.00019932684257999714, "loss": 1.0194, "step": 798 }, { "epoch": 0.1854473714749913, "grad_norm": 0.8320935368537903, "learning_rate": 0.00019932515207339593, "loss": 0.9505, "step": 799 }, { "epoch": 0.1856794708135082, "grad_norm": 0.7456812262535095, "learning_rate": 0.00019932345945394996, "loss": 1.0271, "step": 800 }, { "epoch": 0.18591157015202506, "grad_norm": 0.7851873636245728, "learning_rate": 0.00019932176472169528, "loss": 0.9929, "step": 801 }, { "epoch": 0.18614366949054195, "grad_norm": 0.7568420171737671, "learning_rate": 0.0001993200678766679, "loss": 1.0438, "step": 802 }, { "epoch": 0.18637576882905885, "grad_norm": 0.9683287143707275, "learning_rate": 0.00019931836891890395, "loss": 0.9719, "step": 803 }, { "epoch": 0.1866078681675757, "grad_norm": 0.7195144891738892, "learning_rate": 0.0001993166678484395, "loss": 0.9902, "step": 804 }, { "epoch": 0.1868399675060926, "grad_norm": 0.8277847170829773, "learning_rate": 0.00019931496466531082, "loss": 0.988, "step": 805 }, { "epoch": 0.1870720668446095, "grad_norm": 0.7291855812072754, "learning_rate": 0.0001993132593695541, "loss": 0.9986, "step": 806 }, { "epoch": 0.18730416618312637, "grad_norm": 0.6581141948699951, "learning_rate": 0.00019931155196120559, "loss": 1.052, "step": 807 }, { "epoch": 0.18753626552164326, "grad_norm": 0.6882954835891724, "learning_rate": 0.00019930984244030165, "loss": 1.0077, "step": 808 }, { "epoch": 0.18776836486016016, "grad_norm": 0.7601907253265381, "learning_rate": 0.00019930813080687866, "loss": 1.0386, "step": 809 }, { "epoch": 0.18800046419867703, "grad_norm": 0.7828986644744873, "learning_rate": 0.00019930641706097295, "loss": 0.9695, "step": 810 }, { "epoch": 0.18823256353719392, "grad_norm": 0.8075627088546753, "learning_rate": 0.00019930470120262106, "loss": 1.0189, "step": 811 }, { "epoch": 0.18846466287571081, "grad_norm": 0.7714138627052307, "learning_rate": 0.00019930298323185945, "loss": 0.9854, "step": 812 }, { "epoch": 0.18869676221422768, "grad_norm": 0.6927416324615479, "learning_rate": 0.0001993012631487247, "loss": 1.0177, "step": 813 }, { "epoch": 0.18892886155274458, "grad_norm": 0.7472324371337891, "learning_rate": 0.00019929954095325333, "loss": 1.0303, "step": 814 }, { "epoch": 0.18916096089126147, "grad_norm": 0.6679052710533142, "learning_rate": 0.00019929781664548206, "loss": 1.0325, "step": 815 }, { "epoch": 0.18939306022977834, "grad_norm": 0.6790755391120911, "learning_rate": 0.00019929609022544749, "loss": 1.0063, "step": 816 }, { "epoch": 0.18962515956829523, "grad_norm": 0.5949795842170715, "learning_rate": 0.0001992943616931864, "loss": 0.9856, "step": 817 }, { "epoch": 0.18985725890681213, "grad_norm": 0.6851198673248291, "learning_rate": 0.0001992926310487355, "loss": 0.9911, "step": 818 }, { "epoch": 0.190089358245329, "grad_norm": 0.6633700728416443, "learning_rate": 0.00019929089829213168, "loss": 0.9673, "step": 819 }, { "epoch": 0.1903214575838459, "grad_norm": 0.7310757040977478, "learning_rate": 0.00019928916342341178, "loss": 0.9645, "step": 820 }, { "epoch": 0.19055355692236278, "grad_norm": 0.6800209879875183, "learning_rate": 0.00019928742644261265, "loss": 1.042, "step": 821 }, { "epoch": 0.19078565626087965, "grad_norm": 0.6571696996688843, "learning_rate": 0.00019928568734977128, "loss": 1.0087, "step": 822 }, { "epoch": 0.19101775559939654, "grad_norm": 0.6280850172042847, "learning_rate": 0.0001992839461449247, "loss": 1.1051, "step": 823 }, { "epoch": 0.19124985493791344, "grad_norm": 0.6647082567214966, "learning_rate": 0.00019928220282810987, "loss": 1.0006, "step": 824 }, { "epoch": 0.1914819542764303, "grad_norm": 0.6608778834342957, "learning_rate": 0.00019928045739936388, "loss": 1.0314, "step": 825 }, { "epoch": 0.1917140536149472, "grad_norm": 0.6483500599861145, "learning_rate": 0.00019927870985872397, "loss": 1.0503, "step": 826 }, { "epoch": 0.1919461529534641, "grad_norm": 0.5875803828239441, "learning_rate": 0.00019927696020622716, "loss": 0.9419, "step": 827 }, { "epoch": 0.19217825229198096, "grad_norm": 0.7076141238212585, "learning_rate": 0.00019927520844191082, "loss": 0.9563, "step": 828 }, { "epoch": 0.19241035163049786, "grad_norm": 0.6014459729194641, "learning_rate": 0.00019927345456581208, "loss": 0.9508, "step": 829 }, { "epoch": 0.19264245096901475, "grad_norm": 0.6280800700187683, "learning_rate": 0.00019927169857796834, "loss": 1.003, "step": 830 }, { "epoch": 0.19287455030753162, "grad_norm": 0.6131556034088135, "learning_rate": 0.0001992699404784169, "loss": 1.0001, "step": 831 }, { "epoch": 0.1931066496460485, "grad_norm": 0.7052152752876282, "learning_rate": 0.0001992681802671952, "loss": 0.9366, "step": 832 }, { "epoch": 0.1933387489845654, "grad_norm": 0.6523432731628418, "learning_rate": 0.0001992664179443406, "loss": 0.9517, "step": 833 }, { "epoch": 0.19357084832308227, "grad_norm": 0.7725576758384705, "learning_rate": 0.0001992646535098907, "loss": 1.0019, "step": 834 }, { "epoch": 0.19380294766159917, "grad_norm": 0.6948834657669067, "learning_rate": 0.00019926288696388297, "loss": 1.0611, "step": 835 }, { "epoch": 0.19403504700011606, "grad_norm": 0.6892808079719543, "learning_rate": 0.00019926111830635498, "loss": 1.0339, "step": 836 }, { "epoch": 0.19426714633863293, "grad_norm": 0.5884084105491638, "learning_rate": 0.00019925934753734441, "loss": 0.9571, "step": 837 }, { "epoch": 0.19449924567714982, "grad_norm": 0.7471634745597839, "learning_rate": 0.00019925757465688888, "loss": 0.9521, "step": 838 }, { "epoch": 0.19473134501566672, "grad_norm": 0.6766919493675232, "learning_rate": 0.00019925579966502613, "loss": 1.006, "step": 839 }, { "epoch": 0.19496344435418358, "grad_norm": 0.6207334399223328, "learning_rate": 0.00019925402256179385, "loss": 1.0306, "step": 840 }, { "epoch": 0.19519554369270048, "grad_norm": 0.648978590965271, "learning_rate": 0.00019925224334722992, "loss": 1.0368, "step": 841 }, { "epoch": 0.19542764303121737, "grad_norm": 0.7452328205108643, "learning_rate": 0.00019925046202137216, "loss": 0.9544, "step": 842 }, { "epoch": 0.19565974236973424, "grad_norm": 0.690368115901947, "learning_rate": 0.0001992486785842585, "loss": 1.0268, "step": 843 }, { "epoch": 0.19589184170825114, "grad_norm": 0.8583188652992249, "learning_rate": 0.00019924689303592677, "loss": 0.9654, "step": 844 }, { "epoch": 0.19612394104676803, "grad_norm": 0.606836199760437, "learning_rate": 0.00019924510537641505, "loss": 1.0037, "step": 845 }, { "epoch": 0.1963560403852849, "grad_norm": 0.79323810338974, "learning_rate": 0.00019924331560576138, "loss": 1.0066, "step": 846 }, { "epoch": 0.1965881397238018, "grad_norm": 0.6854238510131836, "learning_rate": 0.00019924152372400374, "loss": 1.0046, "step": 847 }, { "epoch": 0.19682023906231869, "grad_norm": 0.8182931542396545, "learning_rate": 0.00019923972973118033, "loss": 0.9748, "step": 848 }, { "epoch": 0.19705233840083555, "grad_norm": 0.6291098594665527, "learning_rate": 0.00019923793362732926, "loss": 0.9959, "step": 849 }, { "epoch": 0.19728443773935245, "grad_norm": 0.7048092484474182, "learning_rate": 0.00019923613541248875, "loss": 0.998, "step": 850 }, { "epoch": 0.19751653707786934, "grad_norm": 0.7435493469238281, "learning_rate": 0.0001992343350866971, "loss": 0.9958, "step": 851 }, { "epoch": 0.1977486364163862, "grad_norm": 0.8091257810592651, "learning_rate": 0.0001992325326499925, "loss": 1.0074, "step": 852 }, { "epoch": 0.1979807357549031, "grad_norm": 0.7977421879768372, "learning_rate": 0.0001992307281024134, "loss": 0.9288, "step": 853 }, { "epoch": 0.19821283509341997, "grad_norm": 0.7225073575973511, "learning_rate": 0.00019922892144399817, "loss": 1.0193, "step": 854 }, { "epoch": 0.19844493443193686, "grad_norm": 0.7307286858558655, "learning_rate": 0.00019922711267478517, "loss": 1.0386, "step": 855 }, { "epoch": 0.19867703377045376, "grad_norm": 0.7085416316986084, "learning_rate": 0.00019922530179481294, "loss": 0.9628, "step": 856 }, { "epoch": 0.19890913310897063, "grad_norm": 0.7864494919776917, "learning_rate": 0.00019922348880411996, "loss": 0.9805, "step": 857 }, { "epoch": 0.19914123244748752, "grad_norm": 0.7806622385978699, "learning_rate": 0.00019922167370274485, "loss": 0.9761, "step": 858 }, { "epoch": 0.19937333178600442, "grad_norm": 0.7023131847381592, "learning_rate": 0.00019921985649072615, "loss": 1.0397, "step": 859 }, { "epoch": 0.19960543112452128, "grad_norm": 0.7977806925773621, "learning_rate": 0.00019921803716810258, "loss": 1.0903, "step": 860 }, { "epoch": 0.19983753046303818, "grad_norm": 0.7352699637413025, "learning_rate": 0.00019921621573491282, "loss": 1.0222, "step": 861 }, { "epoch": 0.20006962980155507, "grad_norm": 0.7725037336349487, "learning_rate": 0.0001992143921911956, "loss": 1.017, "step": 862 }, { "epoch": 0.20030172914007194, "grad_norm": 0.6731856465339661, "learning_rate": 0.00019921256653698973, "loss": 1.0022, "step": 863 }, { "epoch": 0.20053382847858883, "grad_norm": 0.6586350798606873, "learning_rate": 0.000199210738772334, "loss": 0.9543, "step": 864 }, { "epoch": 0.20076592781710573, "grad_norm": 0.7590776085853577, "learning_rate": 0.00019920890889726738, "loss": 1.032, "step": 865 }, { "epoch": 0.2009980271556226, "grad_norm": 0.8955305218696594, "learning_rate": 0.0001992070769118287, "loss": 0.9899, "step": 866 }, { "epoch": 0.2012301264941395, "grad_norm": 0.7706810832023621, "learning_rate": 0.000199205242816057, "loss": 0.9419, "step": 867 }, { "epoch": 0.20146222583265638, "grad_norm": 0.8760741949081421, "learning_rate": 0.00019920340660999124, "loss": 1.0091, "step": 868 }, { "epoch": 0.20169432517117325, "grad_norm": 0.7808639407157898, "learning_rate": 0.0001992015682936705, "loss": 1.0222, "step": 869 }, { "epoch": 0.20192642450969014, "grad_norm": 0.7963709235191345, "learning_rate": 0.00019919972786713392, "loss": 0.9506, "step": 870 }, { "epoch": 0.20215852384820704, "grad_norm": 0.7936874628067017, "learning_rate": 0.0001991978853304206, "loss": 1.0578, "step": 871 }, { "epoch": 0.2023906231867239, "grad_norm": 0.749712347984314, "learning_rate": 0.00019919604068356978, "loss": 0.9937, "step": 872 }, { "epoch": 0.2026227225252408, "grad_norm": 0.7461695671081543, "learning_rate": 0.00019919419392662064, "loss": 0.9431, "step": 873 }, { "epoch": 0.2028548218637577, "grad_norm": 0.9083579778671265, "learning_rate": 0.00019919234505961253, "loss": 0.9189, "step": 874 }, { "epoch": 0.20308692120227456, "grad_norm": 0.756526529788971, "learning_rate": 0.00019919049408258474, "loss": 0.946, "step": 875 }, { "epoch": 0.20331902054079146, "grad_norm": 0.8746988773345947, "learning_rate": 0.00019918864099557665, "loss": 0.995, "step": 876 }, { "epoch": 0.20355111987930835, "grad_norm": 0.7590621709823608, "learning_rate": 0.00019918678579862765, "loss": 0.9949, "step": 877 }, { "epoch": 0.20378321921782522, "grad_norm": 0.7832101583480835, "learning_rate": 0.00019918492849177726, "loss": 1.0101, "step": 878 }, { "epoch": 0.2040153185563421, "grad_norm": 0.668903648853302, "learning_rate": 0.00019918306907506497, "loss": 0.963, "step": 879 }, { "epoch": 0.204247417894859, "grad_norm": 0.8419638276100159, "learning_rate": 0.0001991812075485303, "loss": 0.9899, "step": 880 }, { "epoch": 0.20447951723337587, "grad_norm": 0.600913405418396, "learning_rate": 0.0001991793439122129, "loss": 0.9691, "step": 881 }, { "epoch": 0.20471161657189277, "grad_norm": 0.8120608925819397, "learning_rate": 0.00019917747816615238, "loss": 0.9852, "step": 882 }, { "epoch": 0.20494371591040966, "grad_norm": 0.7042036652565002, "learning_rate": 0.00019917561031038844, "loss": 0.967, "step": 883 }, { "epoch": 0.20517581524892653, "grad_norm": 0.6704075932502747, "learning_rate": 0.0001991737403449608, "loss": 0.9873, "step": 884 }, { "epoch": 0.20540791458744342, "grad_norm": 0.7685598731040955, "learning_rate": 0.00019917186826990923, "loss": 1.0351, "step": 885 }, { "epoch": 0.20564001392596032, "grad_norm": 0.615253746509552, "learning_rate": 0.0001991699940852736, "loss": 0.9205, "step": 886 }, { "epoch": 0.20587211326447719, "grad_norm": 0.7005866169929504, "learning_rate": 0.00019916811779109372, "loss": 0.9879, "step": 887 }, { "epoch": 0.20610421260299408, "grad_norm": 0.7386662364006042, "learning_rate": 0.00019916623938740955, "loss": 0.9295, "step": 888 }, { "epoch": 0.20633631194151097, "grad_norm": 0.7526286840438843, "learning_rate": 0.000199164358874261, "loss": 0.963, "step": 889 }, { "epoch": 0.20656841128002784, "grad_norm": 0.7055673003196716, "learning_rate": 0.00019916247625168817, "loss": 1.0081, "step": 890 }, { "epoch": 0.20680051061854474, "grad_norm": 0.7451292872428894, "learning_rate": 0.00019916059151973097, "loss": 0.9605, "step": 891 }, { "epoch": 0.20703260995706163, "grad_norm": 0.6615798473358154, "learning_rate": 0.00019915870467842957, "loss": 0.9882, "step": 892 }, { "epoch": 0.2072647092955785, "grad_norm": 0.7829159498214722, "learning_rate": 0.0001991568157278241, "loss": 1.0077, "step": 893 }, { "epoch": 0.2074968086340954, "grad_norm": 0.6309629082679749, "learning_rate": 0.00019915492466795472, "loss": 0.9411, "step": 894 }, { "epoch": 0.2077289079726123, "grad_norm": 0.7071045637130737, "learning_rate": 0.00019915303149886171, "loss": 1.0215, "step": 895 }, { "epoch": 0.20796100731112915, "grad_norm": 0.7164064645767212, "learning_rate": 0.0001991511362205853, "loss": 0.9381, "step": 896 }, { "epoch": 0.20819310664964605, "grad_norm": 0.6423659324645996, "learning_rate": 0.00019914923883316578, "loss": 0.9492, "step": 897 }, { "epoch": 0.20842520598816294, "grad_norm": 0.7434820532798767, "learning_rate": 0.00019914733933664355, "loss": 0.9776, "step": 898 }, { "epoch": 0.2086573053266798, "grad_norm": 0.6487737894058228, "learning_rate": 0.00019914543773105905, "loss": 1.0045, "step": 899 }, { "epoch": 0.2088894046651967, "grad_norm": 0.7158071398735046, "learning_rate": 0.00019914353401645265, "loss": 0.9371, "step": 900 }, { "epoch": 0.2091215040037136, "grad_norm": 0.6024456024169922, "learning_rate": 0.0001991416281928649, "loss": 1.0068, "step": 901 }, { "epoch": 0.20935360334223047, "grad_norm": 0.8383240103721619, "learning_rate": 0.00019913972026033632, "loss": 0.9405, "step": 902 }, { "epoch": 0.20958570268074736, "grad_norm": 0.5521221160888672, "learning_rate": 0.0001991378102189075, "loss": 0.9721, "step": 903 }, { "epoch": 0.20981780201926425, "grad_norm": 0.8866906762123108, "learning_rate": 0.0001991358980686191, "loss": 0.9602, "step": 904 }, { "epoch": 0.21004990135778112, "grad_norm": 0.7330222129821777, "learning_rate": 0.00019913398380951174, "loss": 0.9449, "step": 905 }, { "epoch": 0.21028200069629802, "grad_norm": 0.799877405166626, "learning_rate": 0.00019913206744162615, "loss": 1.0121, "step": 906 }, { "epoch": 0.2105141000348149, "grad_norm": 0.7318835258483887, "learning_rate": 0.00019913014896500315, "loss": 0.9412, "step": 907 }, { "epoch": 0.21074619937333178, "grad_norm": 0.7632192373275757, "learning_rate": 0.0001991282283796835, "loss": 0.9556, "step": 908 }, { "epoch": 0.21097829871184867, "grad_norm": 0.7292538285255432, "learning_rate": 0.00019912630568570807, "loss": 0.9464, "step": 909 }, { "epoch": 0.21121039805036557, "grad_norm": 0.789539098739624, "learning_rate": 0.00019912438088311776, "loss": 0.9552, "step": 910 }, { "epoch": 0.21144249738888243, "grad_norm": 0.6972430944442749, "learning_rate": 0.0001991224539719535, "loss": 1.0022, "step": 911 }, { "epoch": 0.21167459672739933, "grad_norm": 0.6916122436523438, "learning_rate": 0.0001991205249522563, "loss": 0.9562, "step": 912 }, { "epoch": 0.21190669606591622, "grad_norm": 0.5755712389945984, "learning_rate": 0.00019911859382406718, "loss": 0.9794, "step": 913 }, { "epoch": 0.2121387954044331, "grad_norm": 0.5981063842773438, "learning_rate": 0.0001991166605874272, "loss": 0.9382, "step": 914 }, { "epoch": 0.21237089474294998, "grad_norm": 0.7757181525230408, "learning_rate": 0.00019911472524237756, "loss": 0.9514, "step": 915 }, { "epoch": 0.21260299408146688, "grad_norm": 0.6511759757995605, "learning_rate": 0.00019911278778895932, "loss": 0.947, "step": 916 }, { "epoch": 0.21283509341998375, "grad_norm": 0.6893859505653381, "learning_rate": 0.0001991108482272138, "loss": 1.0322, "step": 917 }, { "epoch": 0.21306719275850064, "grad_norm": 0.6665262579917908, "learning_rate": 0.00019910890655718214, "loss": 1.0298, "step": 918 }, { "epoch": 0.21329929209701753, "grad_norm": 0.6562342047691345, "learning_rate": 0.00019910696277890578, "loss": 0.9559, "step": 919 }, { "epoch": 0.2135313914355344, "grad_norm": 0.7250169515609741, "learning_rate": 0.00019910501689242597, "loss": 0.9795, "step": 920 }, { "epoch": 0.2137634907740513, "grad_norm": 0.6263486742973328, "learning_rate": 0.00019910306889778413, "loss": 0.9628, "step": 921 }, { "epoch": 0.2139955901125682, "grad_norm": 0.7502377033233643, "learning_rate": 0.0001991011187950217, "loss": 1.0117, "step": 922 }, { "epoch": 0.21422768945108506, "grad_norm": 0.6383588314056396, "learning_rate": 0.0001990991665841802, "loss": 1.0164, "step": 923 }, { "epoch": 0.21445978878960195, "grad_norm": 0.6259427070617676, "learning_rate": 0.00019909721226530106, "loss": 1.0592, "step": 924 }, { "epoch": 0.21469188812811885, "grad_norm": 0.5734742879867554, "learning_rate": 0.00019909525583842596, "loss": 0.989, "step": 925 }, { "epoch": 0.2149239874666357, "grad_norm": 0.7068042159080505, "learning_rate": 0.00019909329730359644, "loss": 0.9773, "step": 926 }, { "epoch": 0.2151560868051526, "grad_norm": 0.5663431882858276, "learning_rate": 0.0001990913366608542, "loss": 0.9969, "step": 927 }, { "epoch": 0.2153881861436695, "grad_norm": 0.682887077331543, "learning_rate": 0.00019908937391024094, "loss": 0.9905, "step": 928 }, { "epoch": 0.21562028548218637, "grad_norm": 0.7273259162902832, "learning_rate": 0.0001990874090517984, "loss": 0.9374, "step": 929 }, { "epoch": 0.21585238482070326, "grad_norm": 0.6849899291992188, "learning_rate": 0.0001990854420855684, "loss": 0.9452, "step": 930 }, { "epoch": 0.21608448415922016, "grad_norm": 0.814423143863678, "learning_rate": 0.00019908347301159278, "loss": 0.9178, "step": 931 }, { "epoch": 0.21631658349773702, "grad_norm": 0.6402668356895447, "learning_rate": 0.00019908150182991339, "loss": 0.9589, "step": 932 }, { "epoch": 0.21654868283625392, "grad_norm": 0.8134530186653137, "learning_rate": 0.00019907952854057216, "loss": 0.9783, "step": 933 }, { "epoch": 0.21678078217477081, "grad_norm": 0.6399134993553162, "learning_rate": 0.00019907755314361113, "loss": 0.9891, "step": 934 }, { "epoch": 0.21701288151328768, "grad_norm": 0.6813836097717285, "learning_rate": 0.0001990755756390723, "loss": 0.9976, "step": 935 }, { "epoch": 0.21724498085180458, "grad_norm": 0.6623417139053345, "learning_rate": 0.00019907359602699766, "loss": 0.948, "step": 936 }, { "epoch": 0.21747708019032147, "grad_norm": 0.8633440136909485, "learning_rate": 0.00019907161430742943, "loss": 0.941, "step": 937 }, { "epoch": 0.21770917952883834, "grad_norm": 0.6960586905479431, "learning_rate": 0.00019906963048040968, "loss": 0.9647, "step": 938 }, { "epoch": 0.21794127886735523, "grad_norm": 0.6707686185836792, "learning_rate": 0.00019906764454598064, "loss": 0.9821, "step": 939 }, { "epoch": 0.21817337820587213, "grad_norm": 0.632754921913147, "learning_rate": 0.00019906565650418457, "loss": 1.0185, "step": 940 }, { "epoch": 0.218405477544389, "grad_norm": 0.5785548686981201, "learning_rate": 0.00019906366635506374, "loss": 0.9784, "step": 941 }, { "epoch": 0.2186375768829059, "grad_norm": 0.63602215051651, "learning_rate": 0.0001990616740986605, "loss": 0.9633, "step": 942 }, { "epoch": 0.21886967622142278, "grad_norm": 0.661085844039917, "learning_rate": 0.0001990596797350172, "loss": 0.9938, "step": 943 }, { "epoch": 0.21910177555993965, "grad_norm": 0.6743558049201965, "learning_rate": 0.0001990576832641763, "loss": 0.9232, "step": 944 }, { "epoch": 0.21933387489845654, "grad_norm": 0.5749072432518005, "learning_rate": 0.00019905568468618024, "loss": 0.954, "step": 945 }, { "epoch": 0.21956597423697344, "grad_norm": 0.6103346347808838, "learning_rate": 0.00019905368400107156, "loss": 0.9689, "step": 946 }, { "epoch": 0.2197980735754903, "grad_norm": 0.7214609384536743, "learning_rate": 0.0001990516812088928, "loss": 0.9671, "step": 947 }, { "epoch": 0.2200301729140072, "grad_norm": 0.6307926774024963, "learning_rate": 0.00019904967630968656, "loss": 0.949, "step": 948 }, { "epoch": 0.2202622722525241, "grad_norm": 0.62674880027771, "learning_rate": 0.0001990476693034955, "loss": 0.9397, "step": 949 }, { "epoch": 0.22049437159104096, "grad_norm": 0.8015561103820801, "learning_rate": 0.00019904566019036233, "loss": 0.9714, "step": 950 }, { "epoch": 0.22072647092955786, "grad_norm": 0.7245792150497437, "learning_rate": 0.00019904364897032974, "loss": 0.995, "step": 951 }, { "epoch": 0.22095857026807472, "grad_norm": 0.6479692459106445, "learning_rate": 0.0001990416356434406, "loss": 0.999, "step": 952 }, { "epoch": 0.22119066960659162, "grad_norm": 0.8003178238868713, "learning_rate": 0.0001990396202097376, "loss": 0.9529, "step": 953 }, { "epoch": 0.2214227689451085, "grad_norm": 0.5936675071716309, "learning_rate": 0.00019903760266926375, "loss": 0.9111, "step": 954 }, { "epoch": 0.22165486828362538, "grad_norm": 0.6162198781967163, "learning_rate": 0.00019903558302206186, "loss": 0.9571, "step": 955 }, { "epoch": 0.22188696762214227, "grad_norm": 0.6715914607048035, "learning_rate": 0.00019903356126817497, "loss": 1.0165, "step": 956 }, { "epoch": 0.22211906696065917, "grad_norm": 0.682853639125824, "learning_rate": 0.00019903153740764604, "loss": 0.9294, "step": 957 }, { "epoch": 0.22235116629917603, "grad_norm": 0.5687209963798523, "learning_rate": 0.00019902951144051815, "loss": 0.9713, "step": 958 }, { "epoch": 0.22258326563769293, "grad_norm": 0.6776739358901978, "learning_rate": 0.00019902748336683436, "loss": 0.9951, "step": 959 }, { "epoch": 0.22281536497620982, "grad_norm": 0.6323391199111938, "learning_rate": 0.00019902545318663787, "loss": 1.0293, "step": 960 }, { "epoch": 0.2230474643147267, "grad_norm": 0.5990196466445923, "learning_rate": 0.00019902342089997183, "loss": 0.9506, "step": 961 }, { "epoch": 0.22327956365324358, "grad_norm": 0.6580356359481812, "learning_rate": 0.00019902138650687942, "loss": 1.0176, "step": 962 }, { "epoch": 0.22351166299176048, "grad_norm": 0.5820430517196655, "learning_rate": 0.00019901935000740401, "loss": 0.9986, "step": 963 }, { "epoch": 0.22374376233027735, "grad_norm": 0.6097736358642578, "learning_rate": 0.00019901731140158888, "loss": 1.0116, "step": 964 }, { "epoch": 0.22397586166879424, "grad_norm": 0.5930247902870178, "learning_rate": 0.0001990152706894774, "loss": 0.9513, "step": 965 }, { "epoch": 0.22420796100731114, "grad_norm": 0.6531046032905579, "learning_rate": 0.00019901322787111293, "loss": 0.9795, "step": 966 }, { "epoch": 0.224440060345828, "grad_norm": 0.6781020760536194, "learning_rate": 0.00019901118294653902, "loss": 1.0518, "step": 967 }, { "epoch": 0.2246721596843449, "grad_norm": 0.6298601031303406, "learning_rate": 0.00019900913591579911, "loss": 0.9672, "step": 968 }, { "epoch": 0.2249042590228618, "grad_norm": 0.8714528679847717, "learning_rate": 0.00019900708677893673, "loss": 0.9571, "step": 969 }, { "epoch": 0.22513635836137866, "grad_norm": 0.6770628690719604, "learning_rate": 0.0001990050355359955, "loss": 0.9926, "step": 970 }, { "epoch": 0.22536845769989555, "grad_norm": 0.6022521257400513, "learning_rate": 0.00019900298218701904, "loss": 0.989, "step": 971 }, { "epoch": 0.22560055703841245, "grad_norm": 0.7107973694801331, "learning_rate": 0.00019900092673205103, "loss": 0.9816, "step": 972 }, { "epoch": 0.2258326563769293, "grad_norm": 0.7493491768836975, "learning_rate": 0.00019899886917113522, "loss": 0.9906, "step": 973 }, { "epoch": 0.2260647557154462, "grad_norm": 0.6940585374832153, "learning_rate": 0.00019899680950431534, "loss": 0.9894, "step": 974 }, { "epoch": 0.2262968550539631, "grad_norm": 0.7166247963905334, "learning_rate": 0.00019899474773163523, "loss": 1.0296, "step": 975 }, { "epoch": 0.22652895439247997, "grad_norm": 0.7318999767303467, "learning_rate": 0.00019899268385313872, "loss": 0.9752, "step": 976 }, { "epoch": 0.22676105373099686, "grad_norm": 0.6513804197311401, "learning_rate": 0.00019899061786886977, "loss": 1.0166, "step": 977 }, { "epoch": 0.22699315306951376, "grad_norm": 0.7025654911994934, "learning_rate": 0.00019898854977887223, "loss": 1.003, "step": 978 }, { "epoch": 0.22722525240803063, "grad_norm": 0.6484196782112122, "learning_rate": 0.0001989864795831902, "loss": 0.9358, "step": 979 }, { "epoch": 0.22745735174654752, "grad_norm": 0.8256930112838745, "learning_rate": 0.00019898440728186766, "loss": 0.9236, "step": 980 }, { "epoch": 0.22768945108506441, "grad_norm": 0.6069899201393127, "learning_rate": 0.00019898233287494866, "loss": 0.9746, "step": 981 }, { "epoch": 0.22792155042358128, "grad_norm": 0.6900044083595276, "learning_rate": 0.0001989802563624774, "loss": 1.0056, "step": 982 }, { "epoch": 0.22815364976209818, "grad_norm": 0.5382182598114014, "learning_rate": 0.00019897817774449804, "loss": 0.9948, "step": 983 }, { "epoch": 0.22838574910061507, "grad_norm": 0.7674662470817566, "learning_rate": 0.00019897609702105475, "loss": 1.0189, "step": 984 }, { "epoch": 0.22861784843913194, "grad_norm": 0.6343722939491272, "learning_rate": 0.0001989740141921918, "loss": 0.9918, "step": 985 }, { "epoch": 0.22884994777764883, "grad_norm": 0.7178065776824951, "learning_rate": 0.00019897192925795353, "loss": 0.9625, "step": 986 }, { "epoch": 0.22908204711616573, "grad_norm": 0.7420802116394043, "learning_rate": 0.00019896984221838427, "loss": 0.9612, "step": 987 }, { "epoch": 0.2293141464546826, "grad_norm": 0.655222475528717, "learning_rate": 0.00019896775307352843, "loss": 1.013, "step": 988 }, { "epoch": 0.2295462457931995, "grad_norm": 0.8229613900184631, "learning_rate": 0.0001989656618234304, "loss": 0.9527, "step": 989 }, { "epoch": 0.22977834513171638, "grad_norm": 0.6890894174575806, "learning_rate": 0.00019896356846813473, "loss": 0.9685, "step": 990 }, { "epoch": 0.23001044447023325, "grad_norm": 0.9900982975959778, "learning_rate": 0.00019896147300768594, "loss": 0.9336, "step": 991 }, { "epoch": 0.23024254380875014, "grad_norm": 0.6395576000213623, "learning_rate": 0.00019895937544212858, "loss": 0.9866, "step": 992 }, { "epoch": 0.23047464314726704, "grad_norm": 0.8336830735206604, "learning_rate": 0.00019895727577150723, "loss": 0.9771, "step": 993 }, { "epoch": 0.2307067424857839, "grad_norm": 0.7252468466758728, "learning_rate": 0.00019895517399586666, "loss": 0.9782, "step": 994 }, { "epoch": 0.2309388418243008, "grad_norm": 0.7857797741889954, "learning_rate": 0.00019895307011525147, "loss": 0.9817, "step": 995 }, { "epoch": 0.2311709411628177, "grad_norm": 0.6648166179656982, "learning_rate": 0.0001989509641297065, "loss": 1.0091, "step": 996 }, { "epoch": 0.23140304050133456, "grad_norm": 0.847679078578949, "learning_rate": 0.0001989488560392765, "loss": 0.9396, "step": 997 }, { "epoch": 0.23163513983985146, "grad_norm": 0.7599864602088928, "learning_rate": 0.00019894674584400635, "loss": 0.974, "step": 998 }, { "epoch": 0.23186723917836835, "grad_norm": 0.7556748390197754, "learning_rate": 0.00019894463354394089, "loss": 0.9915, "step": 999 }, { "epoch": 0.23209933851688522, "grad_norm": 0.7051100134849548, "learning_rate": 0.00019894251913912507, "loss": 0.9921, "step": 1000 }, { "epoch": 0.2323314378554021, "grad_norm": 0.6394804120063782, "learning_rate": 0.00019894040262960388, "loss": 1.0131, "step": 1001 }, { "epoch": 0.232563537193919, "grad_norm": 0.8215779066085815, "learning_rate": 0.00019893828401542236, "loss": 0.9695, "step": 1002 }, { "epoch": 0.23279563653243587, "grad_norm": 0.5684072971343994, "learning_rate": 0.00019893616329662552, "loss": 0.9851, "step": 1003 }, { "epoch": 0.23302773587095277, "grad_norm": 0.827438473701477, "learning_rate": 0.0001989340404732585, "loss": 0.9558, "step": 1004 }, { "epoch": 0.23325983520946966, "grad_norm": 0.6697958111763, "learning_rate": 0.00019893191554536653, "loss": 0.9794, "step": 1005 }, { "epoch": 0.23349193454798653, "grad_norm": 0.6309450268745422, "learning_rate": 0.00019892978851299468, "loss": 1.014, "step": 1006 }, { "epoch": 0.23372403388650342, "grad_norm": 0.639557957649231, "learning_rate": 0.00019892765937618828, "loss": 1.0368, "step": 1007 }, { "epoch": 0.23395613322502032, "grad_norm": 0.5444445610046387, "learning_rate": 0.0001989255281349926, "loss": 0.9375, "step": 1008 }, { "epoch": 0.23418823256353719, "grad_norm": 0.7191767692565918, "learning_rate": 0.000198923394789453, "loss": 0.9858, "step": 1009 }, { "epoch": 0.23442033190205408, "grad_norm": 0.5967510938644409, "learning_rate": 0.0001989212593396148, "loss": 0.9173, "step": 1010 }, { "epoch": 0.23465243124057097, "grad_norm": 0.7227782011032104, "learning_rate": 0.0001989191217855235, "loss": 0.9418, "step": 1011 }, { "epoch": 0.23488453057908784, "grad_norm": 0.843644380569458, "learning_rate": 0.00019891698212722448, "loss": 0.9963, "step": 1012 }, { "epoch": 0.23511662991760474, "grad_norm": 0.5615436434745789, "learning_rate": 0.00019891484036476333, "loss": 0.9336, "step": 1013 }, { "epoch": 0.23534872925612163, "grad_norm": 0.7177525162696838, "learning_rate": 0.0001989126964981856, "loss": 1.0134, "step": 1014 }, { "epoch": 0.2355808285946385, "grad_norm": 0.5945267081260681, "learning_rate": 0.00019891055052753692, "loss": 0.9555, "step": 1015 }, { "epoch": 0.2358129279331554, "grad_norm": 0.6719576716423035, "learning_rate": 0.0001989084024528628, "loss": 0.9956, "step": 1016 }, { "epoch": 0.2360450272716723, "grad_norm": 0.5902034640312195, "learning_rate": 0.00019890625227420911, "loss": 1.0169, "step": 1017 }, { "epoch": 0.23627712661018915, "grad_norm": 0.6672709584236145, "learning_rate": 0.00019890409999162153, "loss": 0.9642, "step": 1018 }, { "epoch": 0.23650922594870605, "grad_norm": 0.6383097767829895, "learning_rate": 0.00019890194560514578, "loss": 0.9675, "step": 1019 }, { "epoch": 0.23674132528722294, "grad_norm": 0.5565131306648254, "learning_rate": 0.00019889978911482773, "loss": 0.959, "step": 1020 }, { "epoch": 0.2369734246257398, "grad_norm": 0.6134277582168579, "learning_rate": 0.0001988976305207133, "loss": 0.9481, "step": 1021 }, { "epoch": 0.2372055239642567, "grad_norm": 0.6967747211456299, "learning_rate": 0.00019889546982284834, "loss": 1.0659, "step": 1022 }, { "epoch": 0.2374376233027736, "grad_norm": 0.6515464186668396, "learning_rate": 0.00019889330702127885, "loss": 0.9341, "step": 1023 }, { "epoch": 0.23766972264129047, "grad_norm": 0.6849958300590515, "learning_rate": 0.00019889114211605078, "loss": 1.041, "step": 1024 }, { "epoch": 0.23790182197980736, "grad_norm": 0.6949160695075989, "learning_rate": 0.00019888897510721027, "loss": 0.9079, "step": 1025 }, { "epoch": 0.23813392131832425, "grad_norm": 0.6973737478256226, "learning_rate": 0.00019888680599480338, "loss": 0.9764, "step": 1026 }, { "epoch": 0.23836602065684112, "grad_norm": 0.7197220325469971, "learning_rate": 0.0001988846347788762, "loss": 0.9658, "step": 1027 }, { "epoch": 0.23859811999535802, "grad_norm": 0.5621665120124817, "learning_rate": 0.000198882461459475, "loss": 0.9886, "step": 1028 }, { "epoch": 0.2388302193338749, "grad_norm": 0.7529211640357971, "learning_rate": 0.00019888028603664597, "loss": 0.9721, "step": 1029 }, { "epoch": 0.23906231867239178, "grad_norm": 0.6316525936126709, "learning_rate": 0.00019887810851043538, "loss": 1.033, "step": 1030 }, { "epoch": 0.23929441801090867, "grad_norm": NaN, "learning_rate": 0.00019887810851043538, "loss": 0.9393, "step": 1031 }, { "epoch": 0.23952651734942557, "grad_norm": 0.652607262134552, "learning_rate": 0.00019887592888088955, "loss": 0.9157, "step": 1032 }, { "epoch": 0.23975861668794243, "grad_norm": 0.6957564353942871, "learning_rate": 0.00019887374714805484, "loss": 1.0524, "step": 1033 }, { "epoch": 0.23999071602645933, "grad_norm": 0.6793141961097717, "learning_rate": 0.00019887156331197773, "loss": 0.9423, "step": 1034 }, { "epoch": 0.24022281536497622, "grad_norm": 0.7398769855499268, "learning_rate": 0.00019886937737270455, "loss": 0.9802, "step": 1035 }, { "epoch": 0.2404549147034931, "grad_norm": 0.6323033571243286, "learning_rate": 0.0001988671893302819, "loss": 0.9659, "step": 1036 }, { "epoch": 0.24068701404200998, "grad_norm": 0.6376298069953918, "learning_rate": 0.00019886499918475627, "loss": 0.949, "step": 1037 }, { "epoch": 0.24091911338052688, "grad_norm": 0.8286488652229309, "learning_rate": 0.0001988628069361743, "loss": 0.95, "step": 1038 }, { "epoch": 0.24115121271904374, "grad_norm": 0.6404109001159668, "learning_rate": 0.00019886061258458254, "loss": 0.9709, "step": 1039 }, { "epoch": 0.24138331205756064, "grad_norm": 0.7084428071975708, "learning_rate": 0.00019885841613002776, "loss": 0.9499, "step": 1040 }, { "epoch": 0.24161541139607753, "grad_norm": 0.6747325658798218, "learning_rate": 0.00019885621757255665, "loss": 0.9277, "step": 1041 }, { "epoch": 0.2418475107345944, "grad_norm": 0.6257612109184265, "learning_rate": 0.00019885401691221595, "loss": 0.9796, "step": 1042 }, { "epoch": 0.2420796100731113, "grad_norm": 0.7052178978919983, "learning_rate": 0.0001988518141490525, "loss": 0.9269, "step": 1043 }, { "epoch": 0.2423117094116282, "grad_norm": 0.7508018016815186, "learning_rate": 0.00019884960928311319, "loss": 0.9804, "step": 1044 }, { "epoch": 0.24254380875014506, "grad_norm": 0.6753238439559937, "learning_rate": 0.00019884740231444482, "loss": 1.0061, "step": 1045 }, { "epoch": 0.24277590808866195, "grad_norm": 0.6773577332496643, "learning_rate": 0.00019884519324309442, "loss": 0.9664, "step": 1046 }, { "epoch": 0.24300800742717885, "grad_norm": 0.7419878244400024, "learning_rate": 0.000198842982069109, "loss": 0.9677, "step": 1047 }, { "epoch": 0.2432401067656957, "grad_norm": 0.8212810754776001, "learning_rate": 0.0001988407687925355, "loss": 0.9977, "step": 1048 }, { "epoch": 0.2434722061042126, "grad_norm": 0.7218479514122009, "learning_rate": 0.00019883855341342108, "loss": 0.97, "step": 1049 }, { "epoch": 0.2437043054427295, "grad_norm": 0.5721947550773621, "learning_rate": 0.00019883633593181286, "loss": 0.9955, "step": 1050 }, { "epoch": 0.24393640478124637, "grad_norm": 0.7010205388069153, "learning_rate": 0.00019883411634775798, "loss": 0.9491, "step": 1051 }, { "epoch": 0.24416850411976326, "grad_norm": 0.6217652559280396, "learning_rate": 0.00019883189466130365, "loss": 0.9952, "step": 1052 }, { "epoch": 0.24440060345828013, "grad_norm": 0.5644096732139587, "learning_rate": 0.00019882967087249718, "loss": 1.0063, "step": 1053 }, { "epoch": 0.24463270279679702, "grad_norm": 0.8879632949829102, "learning_rate": 0.00019882744498138583, "loss": 0.9363, "step": 1054 }, { "epoch": 0.24486480213531392, "grad_norm": 0.7017568349838257, "learning_rate": 0.00019882521698801696, "loss": 0.9515, "step": 1055 }, { "epoch": 0.24509690147383079, "grad_norm": 0.7804952263832092, "learning_rate": 0.00019882298689243797, "loss": 0.9789, "step": 1056 }, { "epoch": 0.24532900081234768, "grad_norm": 0.7065613865852356, "learning_rate": 0.0001988207546946963, "loss": 1.0243, "step": 1057 }, { "epoch": 0.24556110015086458, "grad_norm": 0.7654082179069519, "learning_rate": 0.00019881852039483944, "loss": 1.0577, "step": 1058 }, { "epoch": 0.24579319948938144, "grad_norm": 0.6900961995124817, "learning_rate": 0.00019881628399291487, "loss": 0.937, "step": 1059 }, { "epoch": 0.24602529882789834, "grad_norm": 0.6072416305541992, "learning_rate": 0.0001988140454889702, "loss": 1.0117, "step": 1060 }, { "epoch": 0.24625739816641523, "grad_norm": 0.7517485022544861, "learning_rate": 0.00019881180488305307, "loss": 0.9611, "step": 1061 }, { "epoch": 0.2464894975049321, "grad_norm": 0.6430591344833374, "learning_rate": 0.00019880956217521113, "loss": 0.9492, "step": 1062 }, { "epoch": 0.246721596843449, "grad_norm": 0.6278581619262695, "learning_rate": 0.00019880731736549202, "loss": 0.9409, "step": 1063 }, { "epoch": 0.2469536961819659, "grad_norm": 0.6350873112678528, "learning_rate": 0.0001988050704539436, "loss": 0.9854, "step": 1064 }, { "epoch": 0.24718579552048275, "grad_norm": 0.5931549072265625, "learning_rate": 0.0001988028214406136, "loss": 0.9485, "step": 1065 }, { "epoch": 0.24741789485899965, "grad_norm": 0.5988550782203674, "learning_rate": 0.00019880057032554987, "loss": 0.9758, "step": 1066 }, { "epoch": 0.24764999419751654, "grad_norm": 0.6799753308296204, "learning_rate": 0.0001987983171088003, "loss": 1.0409, "step": 1067 }, { "epoch": 0.2478820935360334, "grad_norm": 0.6968798637390137, "learning_rate": 0.00019879606179041282, "loss": 0.9125, "step": 1068 }, { "epoch": 0.2481141928745503, "grad_norm": 0.7556788325309753, "learning_rate": 0.0001987938043704354, "loss": 0.9847, "step": 1069 }, { "epoch": 0.2483462922130672, "grad_norm": 0.6871677041053772, "learning_rate": 0.00019879154484891607, "loss": 0.9301, "step": 1070 }, { "epoch": 0.24857839155158407, "grad_norm": 0.6313888430595398, "learning_rate": 0.00019878928322590288, "loss": 0.9158, "step": 1071 }, { "epoch": 0.24881049089010096, "grad_norm": 0.6888328194618225, "learning_rate": 0.000198787019501444, "loss": 0.9737, "step": 1072 }, { "epoch": 0.24904259022861785, "grad_norm": 0.5745333433151245, "learning_rate": 0.00019878475367558746, "loss": 0.9385, "step": 1073 }, { "epoch": 0.24927468956713472, "grad_norm": 0.6683965921401978, "learning_rate": 0.00019878248574838156, "loss": 0.9394, "step": 1074 }, { "epoch": 0.24950678890565162, "grad_norm": 0.7506693601608276, "learning_rate": 0.00019878021571987452, "loss": 0.9648, "step": 1075 }, { "epoch": 0.2497388882441685, "grad_norm": 0.6436938643455505, "learning_rate": 0.00019877794359011462, "loss": 0.9979, "step": 1076 }, { "epoch": 0.24997098758268538, "grad_norm": 0.695239245891571, "learning_rate": 0.00019877566935915019, "loss": 0.988, "step": 1077 }, { "epoch": 0.2502030869212023, "grad_norm": 0.5453709959983826, "learning_rate": 0.00019877339302702964, "loss": 0.9258, "step": 1078 }, { "epoch": 0.25043518625971917, "grad_norm": 0.5463337898254395, "learning_rate": 0.00019877111459380134, "loss": 0.9324, "step": 1079 }, { "epoch": 0.25066728559823603, "grad_norm": 0.6460524201393127, "learning_rate": 0.00019876883405951377, "loss": 0.9773, "step": 1080 }, { "epoch": 0.25089938493675296, "grad_norm": 0.5818413496017456, "learning_rate": 0.0001987665514242155, "loss": 0.9708, "step": 1081 }, { "epoch": 0.2511314842752698, "grad_norm": 0.6356840133666992, "learning_rate": 0.00019876426668795501, "loss": 0.9405, "step": 1082 }, { "epoch": 0.2513635836137867, "grad_norm": 0.6444448828697205, "learning_rate": 0.0001987619798507809, "loss": 1.0064, "step": 1083 }, { "epoch": 0.2515956829523036, "grad_norm": 0.6716335415840149, "learning_rate": 0.00019875969091274188, "loss": 0.9881, "step": 1084 }, { "epoch": 0.2518277822908205, "grad_norm": 0.5971190929412842, "learning_rate": 0.00019875739987388662, "loss": 0.9405, "step": 1085 }, { "epoch": 0.25205988162933735, "grad_norm": 0.7543767690658569, "learning_rate": 0.00019875510673426382, "loss": 0.9812, "step": 1086 }, { "epoch": 0.25229198096785427, "grad_norm": 0.6596341133117676, "learning_rate": 0.00019875281149392228, "loss": 0.9904, "step": 1087 }, { "epoch": 0.25252408030637113, "grad_norm": 0.6647107601165771, "learning_rate": 0.00019875051415291087, "loss": 0.9488, "step": 1088 }, { "epoch": 0.252756179644888, "grad_norm": 0.7366990447044373, "learning_rate": 0.00019874821471127835, "loss": 1.0672, "step": 1089 }, { "epoch": 0.2529882789834049, "grad_norm": 0.619855523109436, "learning_rate": 0.00019874591316907373, "loss": 0.9403, "step": 1090 }, { "epoch": 0.2532203783219218, "grad_norm": 0.8843204975128174, "learning_rate": 0.00019874360952634593, "loss": 0.9866, "step": 1091 }, { "epoch": 0.25345247766043866, "grad_norm": 0.7842957973480225, "learning_rate": 0.00019874130378314396, "loss": 0.9539, "step": 1092 }, { "epoch": 0.2536845769989556, "grad_norm": 0.735531747341156, "learning_rate": 0.00019873899593951687, "loss": 0.9879, "step": 1093 }, { "epoch": 0.25391667633747245, "grad_norm": 0.7946038842201233, "learning_rate": 0.00019873668599551376, "loss": 0.934, "step": 1094 }, { "epoch": 0.2541487756759893, "grad_norm": 0.673864483833313, "learning_rate": 0.00019873437395118374, "loss": 0.9266, "step": 1095 }, { "epoch": 0.25438087501450624, "grad_norm": 0.9342595338821411, "learning_rate": 0.000198732059806576, "loss": 0.9637, "step": 1096 }, { "epoch": 0.2546129743530231, "grad_norm": 0.7086910605430603, "learning_rate": 0.00019872974356173982, "loss": 1.0111, "step": 1097 }, { "epoch": 0.25484507369153997, "grad_norm": 0.6734259724617004, "learning_rate": 0.00019872742521672438, "loss": 0.9671, "step": 1098 }, { "epoch": 0.2550771730300569, "grad_norm": 0.6706153154373169, "learning_rate": 0.00019872510477157907, "loss": 1.0581, "step": 1099 }, { "epoch": 0.25530927236857376, "grad_norm": 0.6654695868492126, "learning_rate": 0.00019872278222635324, "loss": 0.9352, "step": 1100 }, { "epoch": 0.2555413717070906, "grad_norm": 0.5473498106002808, "learning_rate": 0.00019872045758109622, "loss": 0.9617, "step": 1101 }, { "epoch": 0.25577347104560755, "grad_norm": 0.6722119450569153, "learning_rate": 0.00019871813083585757, "loss": 0.9577, "step": 1102 }, { "epoch": 0.2560055703841244, "grad_norm": 0.6397858262062073, "learning_rate": 0.00019871580199068672, "loss": 1.05, "step": 1103 }, { "epoch": 0.2562376697226413, "grad_norm": 0.642416775226593, "learning_rate": 0.00019871347104563324, "loss": 0.9177, "step": 1104 }, { "epoch": 0.25646976906115815, "grad_norm": 0.6837118268013, "learning_rate": 0.00019871113800074664, "loss": 1.0059, "step": 1105 }, { "epoch": 0.25670186839967507, "grad_norm": 0.7533993721008301, "learning_rate": 0.00019870880285607665, "loss": 0.9712, "step": 1106 }, { "epoch": 0.25693396773819194, "grad_norm": 0.6660096645355225, "learning_rate": 0.00019870646561167288, "loss": 0.9801, "step": 1107 }, { "epoch": 0.2571660670767088, "grad_norm": 0.7277395725250244, "learning_rate": 0.00019870412626758505, "loss": 0.9265, "step": 1108 }, { "epoch": 0.2573981664152257, "grad_norm": 0.6277183890342712, "learning_rate": 0.00019870178482386296, "loss": 1.0197, "step": 1109 }, { "epoch": 0.2576302657537426, "grad_norm": 0.7515749335289001, "learning_rate": 0.0001986994412805564, "loss": 0.9574, "step": 1110 }, { "epoch": 0.25786236509225946, "grad_norm": 0.7869638800621033, "learning_rate": 0.0001986970956377152, "loss": 0.9487, "step": 1111 }, { "epoch": 0.2580944644307764, "grad_norm": 0.5892865061759949, "learning_rate": 0.0001986947478953893, "loss": 0.9512, "step": 1112 }, { "epoch": 0.25832656376929325, "grad_norm": 0.6904581189155579, "learning_rate": 0.0001986923980536286, "loss": 0.9086, "step": 1113 }, { "epoch": 0.2585586631078101, "grad_norm": 0.5810085535049438, "learning_rate": 0.0001986900461124831, "loss": 1.0037, "step": 1114 }, { "epoch": 0.25879076244632704, "grad_norm": 0.5691549181938171, "learning_rate": 0.00019868769207200283, "loss": 0.951, "step": 1115 }, { "epoch": 0.2590228617848439, "grad_norm": 0.5185096263885498, "learning_rate": 0.00019868533593223785, "loss": 1.0126, "step": 1116 }, { "epoch": 0.25925496112336077, "grad_norm": 0.5624130964279175, "learning_rate": 0.00019868297769323832, "loss": 0.9642, "step": 1117 }, { "epoch": 0.2594870604618777, "grad_norm": 0.6319359540939331, "learning_rate": 0.00019868061735505435, "loss": 1.0275, "step": 1118 }, { "epoch": 0.25971915980039456, "grad_norm": 0.588772714138031, "learning_rate": 0.0001986782549177362, "loss": 1.046, "step": 1119 }, { "epoch": 0.25995125913891143, "grad_norm": 0.6646950244903564, "learning_rate": 0.0001986758903813341, "loss": 0.9387, "step": 1120 }, { "epoch": 0.26018335847742835, "grad_norm": 0.6222002506256104, "learning_rate": 0.0001986735237458984, "loss": 0.9652, "step": 1121 }, { "epoch": 0.2604154578159452, "grad_norm": 0.6733331680297852, "learning_rate": 0.00019867115501147936, "loss": 0.9938, "step": 1122 }, { "epoch": 0.2606475571544621, "grad_norm": 0.6960176229476929, "learning_rate": 0.00019866878417812734, "loss": 0.9798, "step": 1123 }, { "epoch": 0.260879656492979, "grad_norm": 0.5923687219619751, "learning_rate": 0.0001986664112458929, "loss": 0.9794, "step": 1124 }, { "epoch": 0.2611117558314959, "grad_norm": 0.6249006986618042, "learning_rate": 0.00019866403621482645, "loss": 1.0619, "step": 1125 }, { "epoch": 0.26134385517001274, "grad_norm": 0.6020728349685669, "learning_rate": 0.0001986616590849785, "loss": 0.9711, "step": 1126 }, { "epoch": 0.26157595450852966, "grad_norm": 0.5362594127655029, "learning_rate": 0.00019865927985639962, "loss": 1.0011, "step": 1127 }, { "epoch": 0.26180805384704653, "grad_norm": 0.6688573360443115, "learning_rate": 0.00019865689852914046, "loss": 0.9712, "step": 1128 }, { "epoch": 0.2620401531855634, "grad_norm": 0.5580465793609619, "learning_rate": 0.00019865451510325165, "loss": 0.896, "step": 1129 }, { "epoch": 0.2622722525240803, "grad_norm": 0.5877300500869751, "learning_rate": 0.00019865212957878385, "loss": 0.9435, "step": 1130 }, { "epoch": 0.2625043518625972, "grad_norm": 0.5770688652992249, "learning_rate": 0.00019864974195578787, "loss": 1.0081, "step": 1131 }, { "epoch": 0.26273645120111405, "grad_norm": 0.6047990322113037, "learning_rate": 0.00019864735223431445, "loss": 0.9476, "step": 1132 }, { "epoch": 0.262968550539631, "grad_norm": 0.6056119203567505, "learning_rate": 0.00019864496041441444, "loss": 0.9199, "step": 1133 }, { "epoch": 0.26320064987814784, "grad_norm": 0.6231523752212524, "learning_rate": 0.00019864256649613875, "loss": 0.9703, "step": 1134 }, { "epoch": 0.2634327492166647, "grad_norm": 0.6072515249252319, "learning_rate": 0.00019864017047953826, "loss": 0.9618, "step": 1135 }, { "epoch": 0.26366484855518163, "grad_norm": 0.539057195186615, "learning_rate": 0.00019863777236466397, "loss": 0.9138, "step": 1136 }, { "epoch": 0.2638969478936985, "grad_norm": 0.5421254634857178, "learning_rate": 0.00019863537215156687, "loss": 0.9337, "step": 1137 }, { "epoch": 0.26412904723221536, "grad_norm": 0.6480298638343811, "learning_rate": 0.00019863296984029801, "loss": 0.9501, "step": 1138 }, { "epoch": 0.2643611465707323, "grad_norm": 0.6218541264533997, "learning_rate": 0.00019863056543090854, "loss": 0.9575, "step": 1139 }, { "epoch": 0.26459324590924915, "grad_norm": 0.5380146503448486, "learning_rate": 0.00019862815892344955, "loss": 0.9577, "step": 1140 }, { "epoch": 0.264825345247766, "grad_norm": 0.5878599882125854, "learning_rate": 0.00019862575031797227, "loss": 1.0325, "step": 1141 }, { "epoch": 0.26505744458628294, "grad_norm": 0.6298499703407288, "learning_rate": 0.00019862333961452792, "loss": 0.9459, "step": 1142 }, { "epoch": 0.2652895439247998, "grad_norm": 0.6419205069541931, "learning_rate": 0.00019862092681316776, "loss": 0.9475, "step": 1143 }, { "epoch": 0.2655216432633167, "grad_norm": 0.5695630311965942, "learning_rate": 0.00019861851191394316, "loss": 0.9411, "step": 1144 }, { "epoch": 0.2657537426018336, "grad_norm": 0.6880519390106201, "learning_rate": 0.00019861609491690545, "loss": 0.9827, "step": 1145 }, { "epoch": 0.26598584194035046, "grad_norm": 0.6248353719711304, "learning_rate": 0.00019861367582210609, "loss": 1.0022, "step": 1146 }, { "epoch": 0.26621794127886733, "grad_norm": 0.6777887344360352, "learning_rate": 0.0001986112546295965, "loss": 0.8936, "step": 1147 }, { "epoch": 0.26645004061738425, "grad_norm": 0.5304838418960571, "learning_rate": 0.00019860883133942817, "loss": 0.9234, "step": 1148 }, { "epoch": 0.2666821399559011, "grad_norm": 0.6150823831558228, "learning_rate": 0.00019860640595165268, "loss": 0.9661, "step": 1149 }, { "epoch": 0.266914239294418, "grad_norm": 0.6510245203971863, "learning_rate": 0.0001986039784663216, "loss": 1.0116, "step": 1150 }, { "epoch": 0.2671463386329349, "grad_norm": 0.5424444079399109, "learning_rate": 0.0001986015488834866, "loss": 0.9693, "step": 1151 }, { "epoch": 0.2673784379714518, "grad_norm": 0.5880402326583862, "learning_rate": 0.00019859911720319934, "loss": 0.9726, "step": 1152 }, { "epoch": 0.26761053730996864, "grad_norm": 0.6056056618690491, "learning_rate": 0.00019859668342551157, "loss": 0.967, "step": 1153 }, { "epoch": 0.26784263664848557, "grad_norm": 0.6275759339332581, "learning_rate": 0.000198594247550475, "loss": 0.9873, "step": 1154 }, { "epoch": 0.26807473598700243, "grad_norm": 0.6147432327270508, "learning_rate": 0.00019859180957814153, "loss": 1.0087, "step": 1155 }, { "epoch": 0.2683068353255193, "grad_norm": 0.6617740988731384, "learning_rate": 0.00019858936950856298, "loss": 0.9666, "step": 1156 }, { "epoch": 0.2685389346640362, "grad_norm": 0.775313675403595, "learning_rate": 0.00019858692734179125, "loss": 1.0275, "step": 1157 }, { "epoch": 0.2687710340025531, "grad_norm": 0.5583872199058533, "learning_rate": 0.00019858448307787826, "loss": 0.9239, "step": 1158 }, { "epoch": 0.26900313334106996, "grad_norm": 0.6754629015922546, "learning_rate": 0.00019858203671687608, "loss": 0.9278, "step": 1159 }, { "epoch": 0.2692352326795869, "grad_norm": 0.6704349517822266, "learning_rate": 0.00019857958825883666, "loss": 0.9249, "step": 1160 }, { "epoch": 0.26946733201810374, "grad_norm": 0.6635943651199341, "learning_rate": 0.00019857713770381216, "loss": 0.9693, "step": 1161 }, { "epoch": 0.2696994313566206, "grad_norm": 0.6549606323242188, "learning_rate": 0.00019857468505185468, "loss": 0.9435, "step": 1162 }, { "epoch": 0.26993153069513753, "grad_norm": 0.655264139175415, "learning_rate": 0.0001985722303030164, "loss": 0.9922, "step": 1163 }, { "epoch": 0.2701636300336544, "grad_norm": 0.7897664904594421, "learning_rate": 0.0001985697734573495, "loss": 0.9675, "step": 1164 }, { "epoch": 0.27039572937217127, "grad_norm": 0.6189572811126709, "learning_rate": 0.00019856731451490628, "loss": 0.9682, "step": 1165 }, { "epoch": 0.2706278287106882, "grad_norm": 0.6735862493515015, "learning_rate": 0.0001985648534757391, "loss": 0.9945, "step": 1166 }, { "epoch": 0.27085992804920506, "grad_norm": 0.6127110123634338, "learning_rate": 0.00019856239033990016, "loss": 0.9198, "step": 1167 }, { "epoch": 0.2710920273877219, "grad_norm": 0.5929283499717712, "learning_rate": 0.00019855992510744197, "loss": 0.9529, "step": 1168 }, { "epoch": 0.27132412672623885, "grad_norm": 0.8252766132354736, "learning_rate": 0.00019855745777841696, "loss": 0.9075, "step": 1169 }, { "epoch": 0.2715562260647557, "grad_norm": 0.5320939421653748, "learning_rate": 0.00019855498835287758, "loss": 0.9307, "step": 1170 }, { "epoch": 0.2717883254032726, "grad_norm": 0.6958411931991577, "learning_rate": 0.00019855251683087641, "loss": 0.9914, "step": 1171 }, { "epoch": 0.2720204247417895, "grad_norm": 0.6114237904548645, "learning_rate": 0.00019855004321246597, "loss": 0.924, "step": 1172 }, { "epoch": 0.27225252408030637, "grad_norm": 0.6043612957000732, "learning_rate": 0.0001985475674976989, "loss": 1.0149, "step": 1173 }, { "epoch": 0.27248462341882324, "grad_norm": 0.621781051158905, "learning_rate": 0.0001985450896866279, "loss": 1.0086, "step": 1174 }, { "epoch": 0.27271672275734016, "grad_norm": 0.6235195398330688, "learning_rate": 0.00019854260977930562, "loss": 0.9812, "step": 1175 }, { "epoch": 0.272948822095857, "grad_norm": 0.5985909104347229, "learning_rate": 0.00019854012777578483, "loss": 0.9756, "step": 1176 }, { "epoch": 0.2731809214343739, "grad_norm": 0.5930039882659912, "learning_rate": 0.00019853764367611833, "loss": 0.9747, "step": 1177 }, { "epoch": 0.2734130207728908, "grad_norm": 0.721496045589447, "learning_rate": 0.00019853515748035898, "loss": 1.0291, "step": 1178 }, { "epoch": 0.2736451201114077, "grad_norm": 0.6263835430145264, "learning_rate": 0.00019853266918855967, "loss": 1.0029, "step": 1179 }, { "epoch": 0.27387721944992455, "grad_norm": 0.5983469486236572, "learning_rate": 0.00019853017880077327, "loss": 0.9634, "step": 1180 }, { "epoch": 0.27410931878844147, "grad_norm": 0.60430908203125, "learning_rate": 0.00019852768631705282, "loss": 0.9656, "step": 1181 }, { "epoch": 0.27434141812695834, "grad_norm": 0.6196134686470032, "learning_rate": 0.00019852519173745132, "loss": 0.8979, "step": 1182 }, { "epoch": 0.2745735174654752, "grad_norm": 0.5695511698722839, "learning_rate": 0.00019852269506202185, "loss": 0.9423, "step": 1183 }, { "epoch": 0.2748056168039921, "grad_norm": 0.6204293966293335, "learning_rate": 0.0001985201962908175, "loss": 0.9904, "step": 1184 }, { "epoch": 0.275037716142509, "grad_norm": 0.6277987360954285, "learning_rate": 0.00019851769542389138, "loss": 1.0005, "step": 1185 }, { "epoch": 0.27526981548102586, "grad_norm": 0.5877930521965027, "learning_rate": 0.00019851519246129678, "loss": 1.0148, "step": 1186 }, { "epoch": 0.2755019148195428, "grad_norm": 0.6406680941581726, "learning_rate": 0.00019851268740308687, "loss": 1.0062, "step": 1187 }, { "epoch": 0.27573401415805965, "grad_norm": 0.5724177360534668, "learning_rate": 0.000198510180249315, "loss": 0.9494, "step": 1188 }, { "epoch": 0.2759661134965765, "grad_norm": 0.5927779078483582, "learning_rate": 0.00019850767100003445, "loss": 0.9568, "step": 1189 }, { "epoch": 0.27619821283509344, "grad_norm": 0.6121124029159546, "learning_rate": 0.0001985051596552986, "loss": 0.9539, "step": 1190 }, { "epoch": 0.2764303121736103, "grad_norm": 0.710793673992157, "learning_rate": 0.0001985026462151609, "loss": 0.9725, "step": 1191 }, { "epoch": 0.27666241151212717, "grad_norm": 0.7078494429588318, "learning_rate": 0.00019850013067967482, "loss": 0.9903, "step": 1192 }, { "epoch": 0.2768945108506441, "grad_norm": 0.696902871131897, "learning_rate": 0.00019849761304889383, "loss": 0.961, "step": 1193 }, { "epoch": 0.27712661018916096, "grad_norm": 0.7753614783287048, "learning_rate": 0.00019849509332287151, "loss": 0.9461, "step": 1194 }, { "epoch": 0.2773587095276778, "grad_norm": 0.6889293193817139, "learning_rate": 0.00019849257150166146, "loss": 0.9168, "step": 1195 }, { "epoch": 0.27759080886619475, "grad_norm": 0.6924726963043213, "learning_rate": 0.0001984900475853173, "loss": 0.9567, "step": 1196 }, { "epoch": 0.2778229082047116, "grad_norm": 0.619976818561554, "learning_rate": 0.00019848752157389276, "loss": 0.9286, "step": 1197 }, { "epoch": 0.2780550075432285, "grad_norm": 0.6130307912826538, "learning_rate": 0.00019848499346744155, "loss": 0.9645, "step": 1198 }, { "epoch": 0.2782871068817454, "grad_norm": 0.6702805757522583, "learning_rate": 0.00019848246326601747, "loss": 0.9326, "step": 1199 }, { "epoch": 0.27851920622026227, "grad_norm": 0.6605530381202698, "learning_rate": 0.0001984799309696743, "loss": 0.9865, "step": 1200 }, { "epoch": 0.27875130555877914, "grad_norm": 0.7218630313873291, "learning_rate": 0.00019847739657846594, "loss": 0.9243, "step": 1201 }, { "epoch": 0.27898340489729606, "grad_norm": 0.5895372033119202, "learning_rate": 0.0001984748600924463, "loss": 0.9249, "step": 1202 }, { "epoch": 0.27921550423581293, "grad_norm": 0.6202947497367859, "learning_rate": 0.0001984723215116693, "loss": 0.967, "step": 1203 }, { "epoch": 0.2794476035743298, "grad_norm": 0.573955237865448, "learning_rate": 0.000198469780836189, "loss": 0.9575, "step": 1204 }, { "epoch": 0.2796797029128467, "grad_norm": 0.6376585960388184, "learning_rate": 0.0001984672380660594, "loss": 0.9622, "step": 1205 }, { "epoch": 0.2799118022513636, "grad_norm": 0.6243661046028137, "learning_rate": 0.00019846469320133458, "loss": 0.934, "step": 1206 }, { "epoch": 0.28014390158988045, "grad_norm": 0.5604823231697083, "learning_rate": 0.00019846214624206875, "loss": 0.9375, "step": 1207 }, { "epoch": 0.2803760009283974, "grad_norm": 0.6892914175987244, "learning_rate": 0.000198459597188316, "loss": 0.901, "step": 1208 }, { "epoch": 0.28060810026691424, "grad_norm": 0.5592294335365295, "learning_rate": 0.00019845704604013058, "loss": 0.9299, "step": 1209 }, { "epoch": 0.2808401996054311, "grad_norm": 0.5624305009841919, "learning_rate": 0.0001984544927975668, "loss": 0.954, "step": 1210 }, { "epoch": 0.28107229894394803, "grad_norm": 0.6918671131134033, "learning_rate": 0.00019845193746067895, "loss": 0.8607, "step": 1211 }, { "epoch": 0.2813043982824649, "grad_norm": 0.6283919811248779, "learning_rate": 0.00019844938002952134, "loss": 0.8982, "step": 1212 }, { "epoch": 0.28153649762098176, "grad_norm": 0.6212047934532166, "learning_rate": 0.00019844682050414845, "loss": 0.9713, "step": 1213 }, { "epoch": 0.2817685969594987, "grad_norm": 0.5790887475013733, "learning_rate": 0.00019844425888461466, "loss": 0.907, "step": 1214 }, { "epoch": 0.28200069629801555, "grad_norm": 0.6024935245513916, "learning_rate": 0.0001984416951709745, "loss": 0.9507, "step": 1215 }, { "epoch": 0.2822327956365324, "grad_norm": 0.5913442373275757, "learning_rate": 0.00019843912936328247, "loss": 0.9736, "step": 1216 }, { "epoch": 0.28246489497504934, "grad_norm": 0.5638153553009033, "learning_rate": 0.0001984365614615932, "loss": 0.9052, "step": 1217 }, { "epoch": 0.2826969943135662, "grad_norm": 0.5622501969337463, "learning_rate": 0.00019843399146596125, "loss": 0.9813, "step": 1218 }, { "epoch": 0.2829290936520831, "grad_norm": 0.6750192046165466, "learning_rate": 0.00019843141937644134, "loss": 0.9111, "step": 1219 }, { "epoch": 0.2831611929906, "grad_norm": 0.6033793687820435, "learning_rate": 0.0001984288451930882, "loss": 0.9504, "step": 1220 }, { "epoch": 0.28339329232911686, "grad_norm": 0.5745095014572144, "learning_rate": 0.00019842626891595653, "loss": 0.9646, "step": 1221 }, { "epoch": 0.28362539166763373, "grad_norm": 0.6014915108680725, "learning_rate": 0.00019842369054510117, "loss": 0.953, "step": 1222 }, { "epoch": 0.28385749100615065, "grad_norm": 0.5896861553192139, "learning_rate": 0.00019842111008057693, "loss": 0.9664, "step": 1223 }, { "epoch": 0.2840895903446675, "grad_norm": 0.5923585891723633, "learning_rate": 0.00019841852752243877, "loss": 0.9443, "step": 1224 }, { "epoch": 0.2843216896831844, "grad_norm": Infinity, "learning_rate": 0.00019841852752243877, "loss": 0.9298, "step": 1225 }, { "epoch": 0.2845537890217013, "grad_norm": 0.6662050485610962, "learning_rate": 0.00019841594287074157, "loss": 0.9536, "step": 1226 }, { "epoch": 0.2847858883602182, "grad_norm": 0.5837487578392029, "learning_rate": 0.0001984133561255403, "loss": 0.9569, "step": 1227 }, { "epoch": 0.28501798769873504, "grad_norm": 0.5826741456985474, "learning_rate": 0.00019841076728689005, "loss": 1.0053, "step": 1228 }, { "epoch": 0.28525008703725196, "grad_norm": 0.6345201730728149, "learning_rate": 0.00019840817635484584, "loss": 0.8799, "step": 1229 }, { "epoch": 0.28548218637576883, "grad_norm": 0.6240464448928833, "learning_rate": 0.00019840558332946277, "loss": 0.9507, "step": 1230 }, { "epoch": 0.2857142857142857, "grad_norm": 0.6892589926719666, "learning_rate": 0.00019840298821079605, "loss": 0.9509, "step": 1231 }, { "epoch": 0.2859463850528026, "grad_norm": 0.6426685452461243, "learning_rate": 0.00019840039099890087, "loss": 0.9316, "step": 1232 }, { "epoch": 0.2861784843913195, "grad_norm": 0.6755335927009583, "learning_rate": 0.00019839779169383243, "loss": 0.958, "step": 1233 }, { "epoch": 0.28641058372983635, "grad_norm": 0.5647304058074951, "learning_rate": 0.00019839519029564605, "loss": 0.9826, "step": 1234 }, { "epoch": 0.2866426830683533, "grad_norm": 0.6760590672492981, "learning_rate": 0.00019839258680439713, "loss": 0.9093, "step": 1235 }, { "epoch": 0.28687478240687014, "grad_norm": 0.5457693338394165, "learning_rate": 0.00019838998122014093, "loss": 0.9479, "step": 1236 }, { "epoch": 0.287106881745387, "grad_norm": 0.6709065437316895, "learning_rate": 0.00019838737354293298, "loss": 0.9609, "step": 1237 }, { "epoch": 0.28733898108390393, "grad_norm": 0.635703444480896, "learning_rate": 0.00019838476377282872, "loss": 0.9193, "step": 1238 }, { "epoch": 0.2875710804224208, "grad_norm": 0.6054381728172302, "learning_rate": 0.00019838215190988364, "loss": 0.9806, "step": 1239 }, { "epoch": 0.28780317976093767, "grad_norm": 0.7244990468025208, "learning_rate": 0.0001983795379541533, "loss": 0.9941, "step": 1240 }, { "epoch": 0.2880352790994546, "grad_norm": 0.5459023118019104, "learning_rate": 0.00019837692190569333, "loss": 0.9266, "step": 1241 }, { "epoch": 0.28826737843797146, "grad_norm": 0.6688336730003357, "learning_rate": 0.0001983743037645594, "loss": 0.95, "step": 1242 }, { "epoch": 0.2884994777764883, "grad_norm": 0.6450549364089966, "learning_rate": 0.00019837168353080715, "loss": 0.9319, "step": 1243 }, { "epoch": 0.28873157711500524, "grad_norm": 0.6404283046722412, "learning_rate": 0.00019836906120449232, "loss": 0.9482, "step": 1244 }, { "epoch": 0.2889636764535221, "grad_norm": 0.6874573826789856, "learning_rate": 0.00019836643678567076, "loss": 1.006, "step": 1245 }, { "epoch": 0.289195775792039, "grad_norm": 0.6311227083206177, "learning_rate": 0.00019836381027439818, "loss": 0.9438, "step": 1246 }, { "epoch": 0.2894278751305559, "grad_norm": 0.6493483185768127, "learning_rate": 0.00019836118167073058, "loss": 0.9577, "step": 1247 }, { "epoch": 0.28965997446907277, "grad_norm": 0.6660622954368591, "learning_rate": 0.0001983585509747238, "loss": 0.9307, "step": 1248 }, { "epoch": 0.28989207380758963, "grad_norm": 0.706911563873291, "learning_rate": 0.0001983559181864338, "loss": 0.9737, "step": 1249 }, { "epoch": 0.29012417314610656, "grad_norm": 0.6599269509315491, "learning_rate": 0.00019835328330591664, "loss": 0.9455, "step": 1250 }, { "epoch": 0.2903562724846234, "grad_norm": 0.6264684796333313, "learning_rate": 0.00019835064633322828, "loss": 0.9705, "step": 1251 }, { "epoch": 0.2905883718231403, "grad_norm": 0.6962366104125977, "learning_rate": 0.0001983480072684249, "loss": 0.9755, "step": 1252 }, { "epoch": 0.2908204711616572, "grad_norm": 0.6037434935569763, "learning_rate": 0.0001983453661115626, "loss": 0.9538, "step": 1253 }, { "epoch": 0.2910525705001741, "grad_norm": 0.6135785579681396, "learning_rate": 0.00019834272286269755, "loss": 0.9089, "step": 1254 }, { "epoch": 0.29128466983869095, "grad_norm": 0.5442163348197937, "learning_rate": 0.000198340077521886, "loss": 0.9593, "step": 1255 }, { "epoch": 0.29151676917720787, "grad_norm": 0.5807743072509766, "learning_rate": 0.0001983374300891842, "loss": 0.9592, "step": 1256 }, { "epoch": 0.29174886851572474, "grad_norm": 0.5582982301712036, "learning_rate": 0.0001983347805646485, "loss": 0.9154, "step": 1257 }, { "epoch": 0.2919809678542416, "grad_norm": 0.6122931838035583, "learning_rate": 0.00019833212894833523, "loss": 0.972, "step": 1258 }, { "epoch": 0.2922130671927585, "grad_norm": 0.6058019995689392, "learning_rate": 0.0001983294752403008, "loss": 0.9371, "step": 1259 }, { "epoch": 0.2924451665312754, "grad_norm": 0.5534565448760986, "learning_rate": 0.00019832681944060166, "loss": 0.9455, "step": 1260 }, { "epoch": 0.29267726586979226, "grad_norm": 0.6321182250976562, "learning_rate": 0.0001983241615492943, "loss": 0.9846, "step": 1261 }, { "epoch": 0.2929093652083092, "grad_norm": 0.6094294190406799, "learning_rate": 0.0001983215015664353, "loss": 0.9254, "step": 1262 }, { "epoch": 0.29314146454682605, "grad_norm": 0.6116045117378235, "learning_rate": 0.0001983188394920812, "loss": 0.8743, "step": 1263 }, { "epoch": 0.2933735638853429, "grad_norm": 0.6186642050743103, "learning_rate": 0.00019831617532628862, "loss": 0.9595, "step": 1264 }, { "epoch": 0.29360566322385984, "grad_norm": 0.7716023921966553, "learning_rate": 0.00019831350906911427, "loss": 0.9495, "step": 1265 }, { "epoch": 0.2938377625623767, "grad_norm": 0.6053861975669861, "learning_rate": 0.00019831084072061483, "loss": 0.981, "step": 1266 }, { "epoch": 0.29406986190089357, "grad_norm": 0.7833683490753174, "learning_rate": 0.0001983081702808471, "loss": 0.9119, "step": 1267 }, { "epoch": 0.2943019612394105, "grad_norm": 0.6753665208816528, "learning_rate": 0.00019830549774986787, "loss": 0.9076, "step": 1268 }, { "epoch": 0.29453406057792736, "grad_norm": 0.6646583080291748, "learning_rate": 0.00019830282312773397, "loss": 0.9457, "step": 1269 }, { "epoch": 0.2947661599164442, "grad_norm": 0.5755628943443298, "learning_rate": 0.00019830014641450226, "loss": 0.9115, "step": 1270 }, { "epoch": 0.29499825925496115, "grad_norm": 0.7574052214622498, "learning_rate": 0.0001982974676102298, "loss": 0.9273, "step": 1271 }, { "epoch": 0.295230358593478, "grad_norm": 0.6140890717506409, "learning_rate": 0.00019829478671497348, "loss": 0.9317, "step": 1272 }, { "epoch": 0.2954624579319949, "grad_norm": 0.735943615436554, "learning_rate": 0.00019829210372879035, "loss": 0.9549, "step": 1273 }, { "epoch": 0.2956945572705118, "grad_norm": 0.6471147537231445, "learning_rate": 0.00019828941865173748, "loss": 1.0158, "step": 1274 }, { "epoch": 0.29592665660902867, "grad_norm": 0.5931142568588257, "learning_rate": 0.00019828673148387197, "loss": 0.9303, "step": 1275 }, { "epoch": 0.29615875594754554, "grad_norm": 0.6224324107170105, "learning_rate": 0.00019828404222525103, "loss": 0.941, "step": 1276 }, { "epoch": 0.29639085528606246, "grad_norm": 0.6320950984954834, "learning_rate": 0.0001982813508759318, "loss": 0.9538, "step": 1277 }, { "epoch": 0.2966229546245793, "grad_norm": 0.6541613340377808, "learning_rate": 0.0001982786574359716, "loss": 0.988, "step": 1278 }, { "epoch": 0.2968550539630962, "grad_norm": 0.7296457886695862, "learning_rate": 0.0001982759619054277, "loss": 0.9254, "step": 1279 }, { "epoch": 0.2970871533016131, "grad_norm": 0.6065571904182434, "learning_rate": 0.0001982732642843574, "loss": 0.982, "step": 1280 }, { "epoch": 0.29731925264013, "grad_norm": 0.6203855872154236, "learning_rate": 0.0001982705645728181, "loss": 0.9887, "step": 1281 }, { "epoch": 0.29755135197864685, "grad_norm": 0.7182449698448181, "learning_rate": 0.00019826786277086727, "loss": 0.9407, "step": 1282 }, { "epoch": 0.29778345131716377, "grad_norm": 0.5926727652549744, "learning_rate": 0.00019826515887856237, "loss": 0.9667, "step": 1283 }, { "epoch": 0.29801555065568064, "grad_norm": 0.7838566303253174, "learning_rate": 0.0001982624528959609, "loss": 0.916, "step": 1284 }, { "epoch": 0.2982476499941975, "grad_norm": 0.6244154572486877, "learning_rate": 0.0001982597448231204, "loss": 0.9894, "step": 1285 }, { "epoch": 0.29847974933271443, "grad_norm": 0.5403871536254883, "learning_rate": 0.00019825703466009852, "loss": 0.8623, "step": 1286 }, { "epoch": 0.2987118486712313, "grad_norm": 0.7736438512802124, "learning_rate": 0.00019825432240695286, "loss": 0.9268, "step": 1287 }, { "epoch": 0.29894394800974816, "grad_norm": 0.5713629722595215, "learning_rate": 0.0001982516080637412, "loss": 0.9899, "step": 1288 }, { "epoch": 0.2991760473482651, "grad_norm": 0.7245318293571472, "learning_rate": 0.0001982488916305212, "loss": 0.9517, "step": 1289 }, { "epoch": 0.29940814668678195, "grad_norm": 0.6842908263206482, "learning_rate": 0.00019824617310735065, "loss": 0.9765, "step": 1290 }, { "epoch": 0.2996402460252988, "grad_norm": 0.6580656170845032, "learning_rate": 0.0001982434524942874, "loss": 0.9737, "step": 1291 }, { "epoch": 0.29987234536381574, "grad_norm": 0.7140244841575623, "learning_rate": 0.00019824072979138935, "loss": 0.9313, "step": 1292 }, { "epoch": 0.3001044447023326, "grad_norm": 0.586677074432373, "learning_rate": 0.00019823800499871436, "loss": 0.956, "step": 1293 }, { "epoch": 0.3003365440408495, "grad_norm": 0.6235963702201843, "learning_rate": 0.00019823527811632042, "loss": 0.9199, "step": 1294 }, { "epoch": 0.3005686433793664, "grad_norm": 0.5671932697296143, "learning_rate": 0.00019823254914426555, "loss": 0.9816, "step": 1295 }, { "epoch": 0.30080074271788326, "grad_norm": 0.7332002520561218, "learning_rate": 0.00019822981808260778, "loss": 0.9531, "step": 1296 }, { "epoch": 0.30103284205640013, "grad_norm": 0.5819656252861023, "learning_rate": 0.00019822708493140524, "loss": 0.9281, "step": 1297 }, { "epoch": 0.30126494139491705, "grad_norm": 0.546177089214325, "learning_rate": 0.000198224349690716, "loss": 0.9191, "step": 1298 }, { "epoch": 0.3014970407334339, "grad_norm": 0.7900681495666504, "learning_rate": 0.00019822161236059832, "loss": 0.9113, "step": 1299 }, { "epoch": 0.3017291400719508, "grad_norm": 0.633230984210968, "learning_rate": 0.00019821887294111035, "loss": 0.9903, "step": 1300 }, { "epoch": 0.3019612394104677, "grad_norm": 0.6512486934661865, "learning_rate": 0.00019821613143231043, "loss": 0.9602, "step": 1301 }, { "epoch": 0.3021933387489846, "grad_norm": 0.6826757192611694, "learning_rate": 0.00019821338783425688, "loss": 0.9685, "step": 1302 }, { "epoch": 0.30242543808750144, "grad_norm": 0.6921103596687317, "learning_rate": 0.00019821064214700803, "loss": 0.9152, "step": 1303 }, { "epoch": 0.3026575374260183, "grad_norm": 0.7648707032203674, "learning_rate": 0.00019820789437062226, "loss": 0.951, "step": 1304 }, { "epoch": 0.30288963676453523, "grad_norm": 0.6280544400215149, "learning_rate": 0.00019820514450515807, "loss": 0.9402, "step": 1305 }, { "epoch": 0.3031217361030521, "grad_norm": 0.7244302034378052, "learning_rate": 0.00019820239255067393, "loss": 0.9058, "step": 1306 }, { "epoch": 0.30335383544156896, "grad_norm": 0.587648868560791, "learning_rate": 0.0001981996385072284, "loss": 0.9586, "step": 1307 }, { "epoch": 0.3035859347800859, "grad_norm": 0.6172166466712952, "learning_rate": 0.00019819688237488004, "loss": 0.9265, "step": 1308 }, { "epoch": 0.30381803411860275, "grad_norm": 0.5611734986305237, "learning_rate": 0.00019819412415368753, "loss": 0.9573, "step": 1309 }, { "epoch": 0.3040501334571196, "grad_norm": 0.6665943264961243, "learning_rate": 0.00019819136384370946, "loss": 0.9682, "step": 1310 }, { "epoch": 0.30428223279563654, "grad_norm": 0.5719144344329834, "learning_rate": 0.0001981886014450046, "loss": 0.9474, "step": 1311 }, { "epoch": 0.3045143321341534, "grad_norm": 0.6100667119026184, "learning_rate": 0.0001981858369576317, "loss": 0.8819, "step": 1312 }, { "epoch": 0.3047464314726703, "grad_norm": 0.5738622546195984, "learning_rate": 0.0001981830703816496, "loss": 1.041, "step": 1313 }, { "epoch": 0.3049785308111872, "grad_norm": 0.5135136246681213, "learning_rate": 0.00019818030171711711, "loss": 0.9466, "step": 1314 }, { "epoch": 0.30521063014970407, "grad_norm": 0.567619264125824, "learning_rate": 0.0001981775309640931, "loss": 1.0443, "step": 1315 }, { "epoch": 0.30544272948822093, "grad_norm": 0.5346424579620361, "learning_rate": 0.00019817475812263657, "loss": 0.9558, "step": 1316 }, { "epoch": 0.30567482882673785, "grad_norm": 0.615497887134552, "learning_rate": 0.00019817198319280646, "loss": 0.9896, "step": 1317 }, { "epoch": 0.3059069281652547, "grad_norm": 0.608453094959259, "learning_rate": 0.00019816920617466184, "loss": 0.9439, "step": 1318 }, { "epoch": 0.3061390275037716, "grad_norm": 0.5326637625694275, "learning_rate": 0.00019816642706826172, "loss": 0.9449, "step": 1319 }, { "epoch": 0.3063711268422885, "grad_norm": 0.6365332007408142, "learning_rate": 0.0001981636458736653, "loss": 0.9548, "step": 1320 }, { "epoch": 0.3066032261808054, "grad_norm": 0.5700217485427856, "learning_rate": 0.00019816086259093167, "loss": 0.8934, "step": 1321 }, { "epoch": 0.30683532551932224, "grad_norm": 0.7104255557060242, "learning_rate": 0.00019815807722012006, "loss": 0.9298, "step": 1322 }, { "epoch": 0.30706742485783917, "grad_norm": 0.67552250623703, "learning_rate": 0.00019815528976128977, "loss": 0.9477, "step": 1323 }, { "epoch": 0.30729952419635603, "grad_norm": 0.657332181930542, "learning_rate": 0.00019815250021449997, "loss": 0.9496, "step": 1324 }, { "epoch": 0.3075316235348729, "grad_norm": 0.6245676875114441, "learning_rate": 0.00019814970857981015, "loss": 0.9564, "step": 1325 }, { "epoch": 0.3077637228733898, "grad_norm": 0.638247549533844, "learning_rate": 0.0001981469148572796, "loss": 1.0091, "step": 1326 }, { "epoch": 0.3079958222119067, "grad_norm": 0.8175264000892639, "learning_rate": 0.00019814411904696776, "loss": 0.9409, "step": 1327 }, { "epoch": 0.30822792155042356, "grad_norm": 0.5410275459289551, "learning_rate": 0.00019814132114893412, "loss": 0.9345, "step": 1328 }, { "epoch": 0.3084600208889405, "grad_norm": 0.6861386895179749, "learning_rate": 0.00019813852116323818, "loss": 1.0006, "step": 1329 }, { "epoch": 0.30869212022745735, "grad_norm": 0.7619258165359497, "learning_rate": 0.0001981357190899395, "loss": 0.9412, "step": 1330 }, { "epoch": 0.3089242195659742, "grad_norm": 0.5549468398094177, "learning_rate": 0.00019813291492909771, "loss": 0.9111, "step": 1331 }, { "epoch": 0.30915631890449113, "grad_norm": 0.6391865611076355, "learning_rate": 0.00019813010868077247, "loss": 0.9928, "step": 1332 }, { "epoch": 0.309388418243008, "grad_norm": 0.7584822773933411, "learning_rate": 0.00019812730034502344, "loss": 0.9351, "step": 1333 }, { "epoch": 0.30962051758152487, "grad_norm": 0.5710351467132568, "learning_rate": 0.00019812448992191035, "loss": 0.9106, "step": 1334 }, { "epoch": 0.3098526169200418, "grad_norm": 0.570615828037262, "learning_rate": 0.000198121677411493, "loss": 0.9622, "step": 1335 }, { "epoch": 0.31008471625855866, "grad_norm": 0.6886541247367859, "learning_rate": 0.00019811886281383124, "loss": 0.9723, "step": 1336 }, { "epoch": 0.3103168155970755, "grad_norm": 0.5683128237724304, "learning_rate": 0.0001981160461289849, "loss": 0.9394, "step": 1337 }, { "epoch": 0.31054891493559245, "grad_norm": 0.6025245785713196, "learning_rate": 0.00019811322735701392, "loss": 0.8927, "step": 1338 }, { "epoch": 0.3107810142741093, "grad_norm": 0.5691837668418884, "learning_rate": 0.00019811040649797828, "loss": 0.9122, "step": 1339 }, { "epoch": 0.3110131136126262, "grad_norm": 0.5640196204185486, "learning_rate": 0.00019810758355193797, "loss": 0.958, "step": 1340 }, { "epoch": 0.3112452129511431, "grad_norm": 0.5763377547264099, "learning_rate": 0.000198104758518953, "loss": 0.9585, "step": 1341 }, { "epoch": 0.31147731228965997, "grad_norm": 0.5440706014633179, "learning_rate": 0.00019810193139908354, "loss": 0.9559, "step": 1342 }, { "epoch": 0.31170941162817684, "grad_norm": 0.5341168642044067, "learning_rate": 0.00019809910219238967, "loss": 0.9594, "step": 1343 }, { "epoch": 0.31194151096669376, "grad_norm": 0.591697096824646, "learning_rate": 0.00019809627089893158, "loss": 1.006, "step": 1344 }, { "epoch": 0.3121736103052106, "grad_norm": 0.5377187728881836, "learning_rate": 0.00019809343751876952, "loss": 0.9563, "step": 1345 }, { "epoch": 0.3124057096437275, "grad_norm": 0.5656700730323792, "learning_rate": 0.00019809060205196378, "loss": 0.9145, "step": 1346 }, { "epoch": 0.3126378089822444, "grad_norm": 0.5729079246520996, "learning_rate": 0.00019808776449857458, "loss": 0.9219, "step": 1347 }, { "epoch": 0.3128699083207613, "grad_norm": 0.5574474930763245, "learning_rate": 0.0001980849248586624, "loss": 0.9688, "step": 1348 }, { "epoch": 0.31310200765927815, "grad_norm": 0.6526781916618347, "learning_rate": 0.00019808208313228754, "loss": 0.9736, "step": 1349 }, { "epoch": 0.31333410699779507, "grad_norm": 0.5639700293540955, "learning_rate": 0.00019807923931951056, "loss": 0.9818, "step": 1350 }, { "epoch": 0.31356620633631194, "grad_norm": 0.6792280673980713, "learning_rate": 0.00019807639342039188, "loss": 0.9457, "step": 1351 }, { "epoch": 0.3137983056748288, "grad_norm": 0.5985791087150574, "learning_rate": 0.000198073545434992, "loss": 0.9939, "step": 1352 }, { "epoch": 0.3140304050133457, "grad_norm": 0.6136618256568909, "learning_rate": 0.00019807069536337163, "loss": 0.9061, "step": 1353 }, { "epoch": 0.3142625043518626, "grad_norm": 0.5937149524688721, "learning_rate": 0.00019806784320559127, "loss": 1.0152, "step": 1354 }, { "epoch": 0.31449460369037946, "grad_norm": 0.6598206758499146, "learning_rate": 0.00019806498896171167, "loss": 0.947, "step": 1355 }, { "epoch": 0.3147267030288964, "grad_norm": 0.6281535029411316, "learning_rate": 0.00019806213263179348, "loss": 0.9489, "step": 1356 }, { "epoch": 0.31495880236741325, "grad_norm": 0.6873879432678223, "learning_rate": 0.00019805927421589752, "loss": 0.9092, "step": 1357 }, { "epoch": 0.3151909017059301, "grad_norm": 0.5967313051223755, "learning_rate": 0.00019805641371408456, "loss": 0.9305, "step": 1358 }, { "epoch": 0.31542300104444704, "grad_norm": 0.6507419347763062, "learning_rate": 0.00019805355112641548, "loss": 0.9371, "step": 1359 }, { "epoch": 0.3156551003829639, "grad_norm": 0.6330577731132507, "learning_rate": 0.00019805068645295114, "loss": 0.8892, "step": 1360 }, { "epoch": 0.31588719972148077, "grad_norm": 0.6029176115989685, "learning_rate": 0.00019804781969375249, "loss": 0.9891, "step": 1361 }, { "epoch": 0.3161192990599977, "grad_norm": 0.6641071438789368, "learning_rate": 0.0001980449508488805, "loss": 0.9311, "step": 1362 }, { "epoch": 0.31635139839851456, "grad_norm": 0.633445143699646, "learning_rate": 0.00019804207991839623, "loss": 0.9717, "step": 1363 }, { "epoch": 0.3165834977370314, "grad_norm": 0.6732186079025269, "learning_rate": 0.00019803920690236073, "loss": 0.9202, "step": 1364 }, { "epoch": 0.31681559707554835, "grad_norm": 0.5130820274353027, "learning_rate": 0.00019803633180083508, "loss": 0.9596, "step": 1365 }, { "epoch": 0.3170476964140652, "grad_norm": 0.6967129707336426, "learning_rate": 0.00019803345461388055, "loss": 0.9327, "step": 1366 }, { "epoch": 0.3172797957525821, "grad_norm": 0.6180625557899475, "learning_rate": 0.0001980305753415582, "loss": 0.9582, "step": 1367 }, { "epoch": 0.317511895091099, "grad_norm": 0.5545331239700317, "learning_rate": 0.00019802769398392936, "loss": 0.8793, "step": 1368 }, { "epoch": 0.3177439944296159, "grad_norm": 0.5788173675537109, "learning_rate": 0.0001980248105410553, "loss": 0.9252, "step": 1369 }, { "epoch": 0.31797609376813274, "grad_norm": 0.5864902138710022, "learning_rate": 0.00019802192501299737, "loss": 0.9182, "step": 1370 }, { "epoch": 0.31820819310664966, "grad_norm": 0.6294628977775574, "learning_rate": 0.00019801903739981693, "loss": 0.9766, "step": 1371 }, { "epoch": 0.31844029244516653, "grad_norm": 0.5971999168395996, "learning_rate": 0.00019801614770157544, "loss": 0.8861, "step": 1372 }, { "epoch": 0.3186723917836834, "grad_norm": 0.6040829420089722, "learning_rate": 0.00019801325591833432, "loss": 0.9121, "step": 1373 }, { "epoch": 0.3189044911222003, "grad_norm": 0.5990303754806519, "learning_rate": 0.00019801036205015514, "loss": 0.9546, "step": 1374 }, { "epoch": 0.3191365904607172, "grad_norm": 0.6420483589172363, "learning_rate": 0.0001980074660970994, "loss": 0.9339, "step": 1375 }, { "epoch": 0.31936868979923405, "grad_norm": 0.5439754128456116, "learning_rate": 0.00019800456805922878, "loss": 0.9383, "step": 1376 }, { "epoch": 0.319600789137751, "grad_norm": 0.6585529446601868, "learning_rate": 0.00019800166793660485, "loss": 0.8954, "step": 1377 }, { "epoch": 0.31983288847626784, "grad_norm": 0.5567095875740051, "learning_rate": 0.00019799876572928934, "loss": 0.9096, "step": 1378 }, { "epoch": 0.3200649878147847, "grad_norm": 0.7285929918289185, "learning_rate": 0.00019799586143734397, "loss": 0.9682, "step": 1379 }, { "epoch": 0.32029708715330163, "grad_norm": 0.6526049971580505, "learning_rate": 0.00019799295506083055, "loss": 0.9443, "step": 1380 }, { "epoch": 0.3205291864918185, "grad_norm": 0.7736508846282959, "learning_rate": 0.00019799004659981086, "loss": 0.9232, "step": 1381 }, { "epoch": 0.32076128583033536, "grad_norm": 0.728675365447998, "learning_rate": 0.0001979871360543468, "loss": 0.9249, "step": 1382 }, { "epoch": 0.3209933851688523, "grad_norm": 0.6301859617233276, "learning_rate": 0.00019798422342450026, "loss": 0.9232, "step": 1383 }, { "epoch": 0.32122548450736915, "grad_norm": 0.6275681853294373, "learning_rate": 0.00019798130871033322, "loss": 0.9635, "step": 1384 }, { "epoch": 0.321457583845886, "grad_norm": 0.610995352268219, "learning_rate": 0.0001979783919119077, "loss": 0.9493, "step": 1385 }, { "epoch": 0.32168968318440294, "grad_norm": 0.6228059530258179, "learning_rate": 0.0001979754730292857, "loss": 1.0168, "step": 1386 }, { "epoch": 0.3219217825229198, "grad_norm": 0.6232510209083557, "learning_rate": 0.00019797255206252933, "loss": 0.9985, "step": 1387 }, { "epoch": 0.3221538818614367, "grad_norm": 0.6092893481254578, "learning_rate": 0.0001979696290117007, "loss": 0.9189, "step": 1388 }, { "epoch": 0.3223859811999536, "grad_norm": 0.5062383413314819, "learning_rate": 0.00019796670387686204, "loss": 0.9087, "step": 1389 }, { "epoch": 0.32261808053847046, "grad_norm": 0.6024093627929688, "learning_rate": 0.00019796377665807558, "loss": 0.9491, "step": 1390 }, { "epoch": 0.32285017987698733, "grad_norm": 0.6089134812355042, "learning_rate": 0.0001979608473554035, "loss": 0.9321, "step": 1391 }, { "epoch": 0.32308227921550425, "grad_norm": 0.5878040194511414, "learning_rate": 0.0001979579159689082, "loss": 0.8861, "step": 1392 }, { "epoch": 0.3233143785540211, "grad_norm": 0.591105580329895, "learning_rate": 0.00019795498249865198, "loss": 0.9485, "step": 1393 }, { "epoch": 0.323546477892538, "grad_norm": 0.5758664011955261, "learning_rate": 0.00019795204694469728, "loss": 0.9703, "step": 1394 }, { "epoch": 0.3237785772310549, "grad_norm": 0.546970546245575, "learning_rate": 0.00019794910930710655, "loss": 0.9418, "step": 1395 }, { "epoch": 0.3240106765695718, "grad_norm": 0.5121433138847351, "learning_rate": 0.00019794616958594222, "loss": 0.9491, "step": 1396 }, { "epoch": 0.32424277590808864, "grad_norm": 0.5994260907173157, "learning_rate": 0.0001979432277812669, "loss": 0.9653, "step": 1397 }, { "epoch": 0.32447487524660557, "grad_norm": 0.6542670726776123, "learning_rate": 0.0001979402838931431, "loss": 0.9441, "step": 1398 }, { "epoch": 0.32470697458512243, "grad_norm": 0.5518794059753418, "learning_rate": 0.0001979373379216335, "loss": 0.9125, "step": 1399 }, { "epoch": 0.3249390739236393, "grad_norm": 0.5810558199882507, "learning_rate": 0.00019793438986680074, "loss": 0.9449, "step": 1400 }, { "epoch": 0.3251711732621562, "grad_norm": 0.6392338275909424, "learning_rate": 0.00019793143972870748, "loss": 0.949, "step": 1401 }, { "epoch": 0.3254032726006731, "grad_norm": 0.5685491561889648, "learning_rate": 0.0001979284875074166, "loss": 0.9744, "step": 1402 }, { "epoch": 0.32563537193918995, "grad_norm": 0.5548221468925476, "learning_rate": 0.0001979255332029908, "loss": 0.9704, "step": 1403 }, { "epoch": 0.3258674712777069, "grad_norm": 0.6489256024360657, "learning_rate": 0.00019792257681549292, "loss": 0.8874, "step": 1404 }, { "epoch": 0.32609957061622374, "grad_norm": 0.5918115973472595, "learning_rate": 0.00019791961834498588, "loss": 0.9168, "step": 1405 }, { "epoch": 0.3263316699547406, "grad_norm": 0.6828185319900513, "learning_rate": 0.00019791665779153266, "loss": 0.9586, "step": 1406 }, { "epoch": 0.32656376929325753, "grad_norm": 0.6338929533958435, "learning_rate": 0.00019791369515519613, "loss": 0.95, "step": 1407 }, { "epoch": 0.3267958686317744, "grad_norm": 0.6227958798408508, "learning_rate": 0.00019791073043603943, "loss": 0.9518, "step": 1408 }, { "epoch": 0.32702796797029127, "grad_norm": 0.5899162292480469, "learning_rate": 0.0001979077636341255, "loss": 0.9157, "step": 1409 }, { "epoch": 0.3272600673088082, "grad_norm": 0.5423386096954346, "learning_rate": 0.00019790479474951757, "loss": 0.9315, "step": 1410 }, { "epoch": 0.32749216664732506, "grad_norm": 0.6612974405288696, "learning_rate": 0.00019790182378227872, "loss": 0.9382, "step": 1411 }, { "epoch": 0.3277242659858419, "grad_norm": 0.6576513051986694, "learning_rate": 0.00019789885073247216, "loss": 0.9361, "step": 1412 }, { "epoch": 0.32795636532435885, "grad_norm": 0.6887245178222656, "learning_rate": 0.00019789587560016116, "loss": 0.9403, "step": 1413 }, { "epoch": 0.3281884646628757, "grad_norm": 0.5801193118095398, "learning_rate": 0.00019789289838540897, "loss": 0.9556, "step": 1414 }, { "epoch": 0.3284205640013926, "grad_norm": 0.5463318824768066, "learning_rate": 0.00019788991908827893, "loss": 0.9858, "step": 1415 }, { "epoch": 0.3286526633399095, "grad_norm": 0.5974694490432739, "learning_rate": 0.00019788693770883445, "loss": 0.9617, "step": 1416 }, { "epoch": 0.32888476267842637, "grad_norm": 0.5905255079269409, "learning_rate": 0.0001978839542471389, "loss": 0.9821, "step": 1417 }, { "epoch": 0.32911686201694323, "grad_norm": 0.5710744261741638, "learning_rate": 0.00019788096870325576, "loss": 0.956, "step": 1418 }, { "epoch": 0.32934896135546016, "grad_norm": 1.2371554374694824, "learning_rate": 0.00019787798107724858, "loss": 0.9351, "step": 1419 }, { "epoch": 0.329581060693977, "grad_norm": 0.6005064845085144, "learning_rate": 0.00019787499136918085, "loss": 0.9836, "step": 1420 }, { "epoch": 0.3298131600324939, "grad_norm": 0.6066033840179443, "learning_rate": 0.00019787199957911622, "loss": 0.9456, "step": 1421 }, { "epoch": 0.3300452593710108, "grad_norm": 0.557050347328186, "learning_rate": 0.00019786900570711828, "loss": 0.9618, "step": 1422 }, { "epoch": 0.3302773587095277, "grad_norm": 0.5723857283592224, "learning_rate": 0.00019786600975325077, "loss": 0.9608, "step": 1423 }, { "epoch": 0.33050945804804455, "grad_norm": 0.619159996509552, "learning_rate": 0.0001978630117175774, "loss": 0.9419, "step": 1424 }, { "epoch": 0.33074155738656147, "grad_norm": 0.6377588510513306, "learning_rate": 0.00019786001160016192, "loss": 0.9223, "step": 1425 }, { "epoch": 0.33097365672507834, "grad_norm": 0.6066679358482361, "learning_rate": 0.00019785700940106815, "loss": 0.8939, "step": 1426 }, { "epoch": 0.3312057560635952, "grad_norm": 0.5574718117713928, "learning_rate": 0.00019785400512036, "loss": 0.9044, "step": 1427 }, { "epoch": 0.3314378554021121, "grad_norm": 0.5707841515541077, "learning_rate": 0.00019785099875810133, "loss": 0.9157, "step": 1428 }, { "epoch": 0.331669954740629, "grad_norm": 0.5679470896720886, "learning_rate": 0.0001978479903143561, "loss": 0.9374, "step": 1429 }, { "epoch": 0.33190205407914586, "grad_norm": 0.5583582520484924, "learning_rate": 0.0001978449797891883, "loss": 0.9311, "step": 1430 }, { "epoch": 0.3321341534176628, "grad_norm": 0.5656445622444153, "learning_rate": 0.00019784196718266204, "loss": 0.9244, "step": 1431 }, { "epoch": 0.33236625275617965, "grad_norm": 0.5760427117347717, "learning_rate": 0.00019783895249484129, "loss": 0.9482, "step": 1432 }, { "epoch": 0.3325983520946965, "grad_norm": 0.6144405007362366, "learning_rate": 0.00019783593572579027, "loss": 1.0006, "step": 1433 }, { "epoch": 0.33283045143321344, "grad_norm": 0.5438564419746399, "learning_rate": 0.00019783291687557308, "loss": 0.9378, "step": 1434 }, { "epoch": 0.3330625507717303, "grad_norm": 0.4701556861400604, "learning_rate": 0.00019782989594425401, "loss": 0.9184, "step": 1435 }, { "epoch": 0.33329465011024717, "grad_norm": 0.6254618167877197, "learning_rate": 0.00019782687293189727, "loss": 0.9922, "step": 1436 }, { "epoch": 0.3335267494487641, "grad_norm": 0.60150146484375, "learning_rate": 0.00019782384783856717, "loss": 0.9096, "step": 1437 }, { "epoch": 0.33375884878728096, "grad_norm": 0.5635362863540649, "learning_rate": 0.00019782082066432813, "loss": 0.9546, "step": 1438 }, { "epoch": 0.3339909481257978, "grad_norm": 0.5585997700691223, "learning_rate": 0.0001978177914092444, "loss": 0.9335, "step": 1439 }, { "epoch": 0.33422304746431475, "grad_norm": 0.5360312461853027, "learning_rate": 0.00019781476007338058, "loss": 0.9266, "step": 1440 }, { "epoch": 0.3344551468028316, "grad_norm": 0.5142294764518738, "learning_rate": 0.00019781172665680102, "loss": 0.9528, "step": 1441 }, { "epoch": 0.3346872461413485, "grad_norm": 0.5453790426254272, "learning_rate": 0.00019780869115957036, "loss": 0.9236, "step": 1442 }, { "epoch": 0.3349193454798654, "grad_norm": 0.5120064616203308, "learning_rate": 0.00019780565358175307, "loss": 0.9515, "step": 1443 }, { "epoch": 0.33515144481838227, "grad_norm": 0.5188871026039124, "learning_rate": 0.00019780261392341383, "loss": 0.9401, "step": 1444 }, { "epoch": 0.33538354415689914, "grad_norm": 0.573962926864624, "learning_rate": 0.00019779957218461725, "loss": 0.946, "step": 1445 }, { "epoch": 0.33561564349541606, "grad_norm": 0.6462770104408264, "learning_rate": 0.0001977965283654281, "loss": 0.8766, "step": 1446 }, { "epoch": 0.3358477428339329, "grad_norm": 0.546744167804718, "learning_rate": 0.00019779348246591106, "loss": 0.946, "step": 1447 }, { "epoch": 0.3360798421724498, "grad_norm": 0.681845486164093, "learning_rate": 0.00019779043448613098, "loss": 0.9804, "step": 1448 }, { "epoch": 0.3363119415109667, "grad_norm": 0.574611485004425, "learning_rate": 0.0001977873844261527, "loss": 0.9569, "step": 1449 }, { "epoch": 0.3365440408494836, "grad_norm": 0.6257385611534119, "learning_rate": 0.000197784332286041, "loss": 0.9052, "step": 1450 }, { "epoch": 0.33677614018800045, "grad_norm": 0.6644338965415955, "learning_rate": 0.0001977812780658609, "loss": 0.8676, "step": 1451 }, { "epoch": 0.3370082395265174, "grad_norm": 0.7232085466384888, "learning_rate": 0.00019777822176567734, "loss": 0.9781, "step": 1452 }, { "epoch": 0.33724033886503424, "grad_norm": 0.6381409168243408, "learning_rate": 0.00019777516338555538, "loss": 0.9472, "step": 1453 }, { "epoch": 0.3374724382035511, "grad_norm": 0.6627258062362671, "learning_rate": 0.00019777210292556, "loss": 0.9211, "step": 1454 }, { "epoch": 0.33770453754206803, "grad_norm": 0.671782374382019, "learning_rate": 0.00019776904038575635, "loss": 1.0011, "step": 1455 }, { "epoch": 0.3379366368805849, "grad_norm": 0.5981830954551697, "learning_rate": 0.0001977659757662096, "loss": 0.9305, "step": 1456 }, { "epoch": 0.33816873621910176, "grad_norm": 0.5951483249664307, "learning_rate": 0.00019776290906698484, "loss": 0.9317, "step": 1457 }, { "epoch": 0.3384008355576187, "grad_norm": 0.6326116323471069, "learning_rate": 0.0001977598402881474, "loss": 0.9078, "step": 1458 }, { "epoch": 0.33863293489613555, "grad_norm": 0.703486979007721, "learning_rate": 0.00019775676942976252, "loss": 0.9086, "step": 1459 }, { "epoch": 0.3388650342346524, "grad_norm": 0.5593792796134949, "learning_rate": 0.00019775369649189557, "loss": 0.882, "step": 1460 }, { "epoch": 0.33909713357316934, "grad_norm": 0.7886638641357422, "learning_rate": 0.00019775062147461188, "loss": 0.9366, "step": 1461 }, { "epoch": 0.3393292329116862, "grad_norm": 0.5685400366783142, "learning_rate": 0.00019774754437797682, "loss": 0.9278, "step": 1462 }, { "epoch": 0.3395613322502031, "grad_norm": 0.5638322234153748, "learning_rate": 0.0001977444652020559, "loss": 0.9001, "step": 1463 }, { "epoch": 0.33979343158872, "grad_norm": 0.5817285776138306, "learning_rate": 0.00019774138394691463, "loss": 0.9556, "step": 1464 }, { "epoch": 0.34002553092723686, "grad_norm": 0.5542300343513489, "learning_rate": 0.00019773830061261852, "loss": 0.9101, "step": 1465 }, { "epoch": 0.34025763026575373, "grad_norm": 0.5184534788131714, "learning_rate": 0.00019773521519923318, "loss": 0.9212, "step": 1466 }, { "epoch": 0.34048972960427065, "grad_norm": 0.6495513319969177, "learning_rate": 0.00019773212770682422, "loss": 0.9642, "step": 1467 }, { "epoch": 0.3407218289427875, "grad_norm": 0.5473306775093079, "learning_rate": 0.00019772903813545736, "loss": 0.9118, "step": 1468 }, { "epoch": 0.3409539282813044, "grad_norm": 0.6399685144424438, "learning_rate": 0.0001977259464851983, "loss": 0.9341, "step": 1469 }, { "epoch": 0.3411860276198213, "grad_norm": 0.604189932346344, "learning_rate": 0.00019772285275611274, "loss": 0.9636, "step": 1470 }, { "epoch": 0.3414181269583382, "grad_norm": 0.6890191435813904, "learning_rate": 0.00019771975694826658, "loss": 0.8875, "step": 1471 }, { "epoch": 0.34165022629685504, "grad_norm": 0.545559287071228, "learning_rate": 0.00019771665906172567, "loss": 0.9345, "step": 1472 }, { "epoch": 0.34188232563537196, "grad_norm": 0.7726954221725464, "learning_rate": 0.00019771355909655583, "loss": 0.934, "step": 1473 }, { "epoch": 0.34211442497388883, "grad_norm": 0.5746750235557556, "learning_rate": 0.0001977104570528231, "loss": 0.9833, "step": 1474 }, { "epoch": 0.3423465243124057, "grad_norm": 0.6491584181785583, "learning_rate": 0.00019770735293059342, "loss": 0.9657, "step": 1475 }, { "epoch": 0.3425786236509226, "grad_norm": 0.6179370880126953, "learning_rate": 0.0001977042467299328, "loss": 1.014, "step": 1476 }, { "epoch": 0.3428107229894395, "grad_norm": 0.5335822105407715, "learning_rate": 0.00019770113845090734, "loss": 0.9491, "step": 1477 }, { "epoch": 0.34304282232795635, "grad_norm": 0.6509091854095459, "learning_rate": 0.00019769802809358318, "loss": 0.9134, "step": 1478 }, { "epoch": 0.3432749216664733, "grad_norm": 0.6070156097412109, "learning_rate": 0.00019769491565802642, "loss": 0.9194, "step": 1479 }, { "epoch": 0.34350702100499014, "grad_norm": 0.6138854622840881, "learning_rate": 0.00019769180114430332, "loss": 0.8867, "step": 1480 }, { "epoch": 0.343739120343507, "grad_norm": 0.6262592077255249, "learning_rate": 0.00019768868455248014, "loss": 0.9439, "step": 1481 }, { "epoch": 0.34397121968202393, "grad_norm": 0.5759004354476929, "learning_rate": 0.00019768556588262314, "loss": 0.9304, "step": 1482 }, { "epoch": 0.3442033190205408, "grad_norm": 0.7102017998695374, "learning_rate": 0.00019768244513479865, "loss": 0.914, "step": 1483 }, { "epoch": 0.34443541835905767, "grad_norm": 0.5651528239250183, "learning_rate": 0.00019767932230907312, "loss": 0.9289, "step": 1484 }, { "epoch": 0.3446675176975746, "grad_norm": 0.614322304725647, "learning_rate": 0.00019767619740551293, "loss": 0.9449, "step": 1485 }, { "epoch": 0.34489961703609145, "grad_norm": 0.5952088832855225, "learning_rate": 0.00019767307042418454, "loss": 0.9553, "step": 1486 }, { "epoch": 0.3451317163746083, "grad_norm": 0.5869555473327637, "learning_rate": 0.0001976699413651545, "loss": 0.9548, "step": 1487 }, { "epoch": 0.34536381571312524, "grad_norm": 0.5870482325553894, "learning_rate": 0.00019766681022848935, "loss": 0.8603, "step": 1488 }, { "epoch": 0.3455959150516421, "grad_norm": 0.6058305501937866, "learning_rate": 0.00019766367701425574, "loss": 0.9716, "step": 1489 }, { "epoch": 0.345828014390159, "grad_norm": 0.5507833361625671, "learning_rate": 0.00019766054172252028, "loss": 0.952, "step": 1490 }, { "epoch": 0.3460601137286759, "grad_norm": 0.66182941198349, "learning_rate": 0.00019765740435334964, "loss": 0.89, "step": 1491 }, { "epoch": 0.34629221306719277, "grad_norm": 0.6636322736740112, "learning_rate": 0.00019765426490681061, "loss": 0.8794, "step": 1492 }, { "epoch": 0.34652431240570963, "grad_norm": 0.5858044624328613, "learning_rate": 0.00019765112338296993, "loss": 0.9459, "step": 1493 }, { "epoch": 0.34675641174422656, "grad_norm": 0.6089038252830505, "learning_rate": 0.00019764797978189447, "loss": 0.9579, "step": 1494 }, { "epoch": 0.3469885110827434, "grad_norm": 0.5337439179420471, "learning_rate": 0.00019764483410365108, "loss": 0.9047, "step": 1495 }, { "epoch": 0.3472206104212603, "grad_norm": 0.623374879360199, "learning_rate": 0.00019764168634830664, "loss": 0.9355, "step": 1496 }, { "epoch": 0.3474527097597772, "grad_norm": 0.5791121125221252, "learning_rate": 0.00019763853651592815, "loss": 0.9547, "step": 1497 }, { "epoch": 0.3476848090982941, "grad_norm": 0.6008715033531189, "learning_rate": 0.00019763538460658263, "loss": 0.9718, "step": 1498 }, { "epoch": 0.34791690843681095, "grad_norm": 0.5084409117698669, "learning_rate": 0.00019763223062033706, "loss": 0.9511, "step": 1499 }, { "epoch": 0.34814900777532787, "grad_norm": 0.7017633318901062, "learning_rate": 0.00019762907455725862, "loss": 0.9365, "step": 1500 }, { "epoch": 0.34838110711384473, "grad_norm": 0.6205711364746094, "learning_rate": 0.00019762591641741436, "loss": 1.0011, "step": 1501 }, { "epoch": 0.3486132064523616, "grad_norm": 0.5541443824768066, "learning_rate": 0.0001976227562008715, "loss": 1.0046, "step": 1502 }, { "epoch": 0.34884530579087847, "grad_norm": 0.5386497378349304, "learning_rate": 0.00019761959390769728, "loss": 0.9561, "step": 1503 }, { "epoch": 0.3490774051293954, "grad_norm": 0.6039326190948486, "learning_rate": 0.00019761642953795895, "loss": 0.9947, "step": 1504 }, { "epoch": 0.34930950446791226, "grad_norm": 0.6043000221252441, "learning_rate": 0.00019761326309172383, "loss": 0.9203, "step": 1505 }, { "epoch": 0.3495416038064291, "grad_norm": 0.5661574602127075, "learning_rate": 0.0001976100945690593, "loss": 0.945, "step": 1506 }, { "epoch": 0.34977370314494605, "grad_norm": 0.5522181391716003, "learning_rate": 0.00019760692397003266, "loss": 0.9515, "step": 1507 }, { "epoch": 0.3500058024834629, "grad_norm": 0.5782403349876404, "learning_rate": 0.0001976037512947115, "loss": 0.9619, "step": 1508 }, { "epoch": 0.3502379018219798, "grad_norm": 0.5323981046676636, "learning_rate": 0.00019760057654316318, "loss": 0.9617, "step": 1509 }, { "epoch": 0.3504700011604967, "grad_norm": 0.4749138057231903, "learning_rate": 0.0001975973997154553, "loss": 0.9379, "step": 1510 }, { "epoch": 0.35070210049901357, "grad_norm": 0.5465108752250671, "learning_rate": 0.00019759422081165544, "loss": 0.9001, "step": 1511 }, { "epoch": 0.35093419983753044, "grad_norm": 0.5994126200675964, "learning_rate": 0.0001975910398318312, "loss": 0.9533, "step": 1512 }, { "epoch": 0.35116629917604736, "grad_norm": 0.5880894660949707, "learning_rate": 0.00019758785677605024, "loss": 0.9578, "step": 1513 }, { "epoch": 0.3513983985145642, "grad_norm": 0.5805595517158508, "learning_rate": 0.00019758467164438033, "loss": 0.9257, "step": 1514 }, { "epoch": 0.3516304978530811, "grad_norm": 0.587770402431488, "learning_rate": 0.00019758148443688913, "loss": 0.9885, "step": 1515 }, { "epoch": 0.351862597191598, "grad_norm": 0.5739663243293762, "learning_rate": 0.0001975782951536445, "loss": 0.8887, "step": 1516 }, { "epoch": 0.3520946965301149, "grad_norm": 0.5725883841514587, "learning_rate": 0.00019757510379471427, "loss": 0.9329, "step": 1517 }, { "epoch": 0.35232679586863175, "grad_norm": 0.5443446636199951, "learning_rate": 0.00019757191036016633, "loss": 0.939, "step": 1518 }, { "epoch": 0.35255889520714867, "grad_norm": 0.5400551557540894, "learning_rate": 0.00019756871485006861, "loss": 0.9245, "step": 1519 }, { "epoch": 0.35279099454566554, "grad_norm": 0.541759192943573, "learning_rate": 0.00019756551726448905, "loss": 0.8966, "step": 1520 }, { "epoch": 0.3530230938841824, "grad_norm": 0.5178937315940857, "learning_rate": 0.00019756231760349575, "loss": 0.9696, "step": 1521 }, { "epoch": 0.3532551932226993, "grad_norm": 0.5206173062324524, "learning_rate": 0.00019755911586715667, "loss": 0.9396, "step": 1522 }, { "epoch": 0.3534872925612162, "grad_norm": 0.4958394169807434, "learning_rate": 0.00019755591205554, "loss": 0.9489, "step": 1523 }, { "epoch": 0.35371939189973306, "grad_norm": 0.5530148148536682, "learning_rate": 0.00019755270616871387, "loss": 0.8995, "step": 1524 }, { "epoch": 0.35395149123825, "grad_norm": 0.477897971868515, "learning_rate": 0.00019754949820674646, "loss": 0.9233, "step": 1525 }, { "epoch": 0.35418359057676685, "grad_norm": 0.5829471945762634, "learning_rate": 0.00019754628816970603, "loss": 0.9274, "step": 1526 }, { "epoch": 0.3544156899152837, "grad_norm": 0.5249282717704773, "learning_rate": 0.00019754307605766082, "loss": 0.9279, "step": 1527 }, { "epoch": 0.35464778925380064, "grad_norm": 0.5659089684486389, "learning_rate": 0.00019753986187067918, "loss": 0.9242, "step": 1528 }, { "epoch": 0.3548798885923175, "grad_norm": 0.5052345991134644, "learning_rate": 0.00019753664560882953, "loss": 0.9204, "step": 1529 }, { "epoch": 0.35511198793083437, "grad_norm": 0.6979862451553345, "learning_rate": 0.00019753342727218024, "loss": 0.8979, "step": 1530 }, { "epoch": 0.3553440872693513, "grad_norm": 0.5367324948310852, "learning_rate": 0.00019753020686079977, "loss": 0.9735, "step": 1531 }, { "epoch": 0.35557618660786816, "grad_norm": 0.6325769424438477, "learning_rate": 0.00019752698437475663, "loss": 0.9262, "step": 1532 }, { "epoch": 0.35580828594638503, "grad_norm": 0.5063219666481018, "learning_rate": 0.00019752375981411938, "loss": 0.9413, "step": 1533 }, { "epoch": 0.35604038528490195, "grad_norm": 0.6506708264350891, "learning_rate": 0.0001975205331789566, "loss": 0.901, "step": 1534 }, { "epoch": 0.3562724846234188, "grad_norm": 0.5340774059295654, "learning_rate": 0.00019751730446933692, "loss": 0.8921, "step": 1535 }, { "epoch": 0.3565045839619357, "grad_norm": 0.5103849768638611, "learning_rate": 0.00019751407368532906, "loss": 0.9263, "step": 1536 }, { "epoch": 0.3567366833004526, "grad_norm": 0.6161956787109375, "learning_rate": 0.0001975108408270017, "loss": 0.9556, "step": 1537 }, { "epoch": 0.3569687826389695, "grad_norm": 0.6125021576881409, "learning_rate": 0.00019750760589442361, "loss": 0.8981, "step": 1538 }, { "epoch": 0.35720088197748634, "grad_norm": 0.632417619228363, "learning_rate": 0.00019750436888766365, "loss": 0.9071, "step": 1539 }, { "epoch": 0.35743298131600326, "grad_norm": 0.5637969970703125, "learning_rate": 0.00019750112980679063, "loss": 0.8918, "step": 1540 }, { "epoch": 0.35766508065452013, "grad_norm": 0.5019290447235107, "learning_rate": 0.00019749788865187345, "loss": 0.9267, "step": 1541 }, { "epoch": 0.357897179993037, "grad_norm": 0.5822156071662903, "learning_rate": 0.00019749464542298114, "loss": 0.9271, "step": 1542 }, { "epoch": 0.3581292793315539, "grad_norm": 0.6837159395217896, "learning_rate": 0.0001974914001201826, "loss": 0.8747, "step": 1543 }, { "epoch": 0.3583613786700708, "grad_norm": 0.5663506984710693, "learning_rate": 0.00019748815274354687, "loss": 1.0049, "step": 1544 }, { "epoch": 0.35859347800858765, "grad_norm": 0.6958603858947754, "learning_rate": 0.00019748490329314305, "loss": 0.886, "step": 1545 }, { "epoch": 0.3588255773471046, "grad_norm": 0.6604666709899902, "learning_rate": 0.00019748165176904024, "loss": 1.004, "step": 1546 }, { "epoch": 0.35905767668562144, "grad_norm": 0.67193204164505, "learning_rate": 0.00019747839817130767, "loss": 0.912, "step": 1547 }, { "epoch": 0.3592897760241383, "grad_norm": 0.5102511644363403, "learning_rate": 0.0001974751425000145, "loss": 0.9677, "step": 1548 }, { "epoch": 0.35952187536265523, "grad_norm": 0.6189117431640625, "learning_rate": 0.00019747188475522997, "loss": 0.9446, "step": 1549 }, { "epoch": 0.3597539747011721, "grad_norm": 0.5596984624862671, "learning_rate": 0.0001974686249370234, "loss": 0.9254, "step": 1550 }, { "epoch": 0.35998607403968896, "grad_norm": 0.6736109256744385, "learning_rate": 0.00019746536304546416, "loss": 0.9126, "step": 1551 }, { "epoch": 0.3602181733782059, "grad_norm": 0.5836181640625, "learning_rate": 0.0001974620990806216, "loss": 0.9299, "step": 1552 }, { "epoch": 0.36045027271672275, "grad_norm": 0.6409969925880432, "learning_rate": 0.00019745883304256515, "loss": 0.9868, "step": 1553 }, { "epoch": 0.3606823720552396, "grad_norm": 0.6091331243515015, "learning_rate": 0.0001974555649313643, "loss": 0.8968, "step": 1554 }, { "epoch": 0.36091447139375654, "grad_norm": 0.5808984637260437, "learning_rate": 0.00019745229474708859, "loss": 0.9484, "step": 1555 }, { "epoch": 0.3611465707322734, "grad_norm": 0.6666507720947266, "learning_rate": 0.00019744902248980753, "loss": 0.9685, "step": 1556 }, { "epoch": 0.3613786700707903, "grad_norm": 0.545849084854126, "learning_rate": 0.0001974457481595908, "loss": 0.9595, "step": 1557 }, { "epoch": 0.3616107694093072, "grad_norm": 0.6335651278495789, "learning_rate": 0.00019744247175650795, "loss": 1.0009, "step": 1558 }, { "epoch": 0.36184286874782406, "grad_norm": 0.7581154704093933, "learning_rate": 0.00019743919328062877, "loss": 0.91, "step": 1559 }, { "epoch": 0.36207496808634093, "grad_norm": 0.6457467675209045, "learning_rate": 0.00019743591273202292, "loss": 0.9606, "step": 1560 }, { "epoch": 0.36230706742485785, "grad_norm": 0.7776808142662048, "learning_rate": 0.0001974326301107603, "loss": 0.9504, "step": 1561 }, { "epoch": 0.3625391667633747, "grad_norm": 0.6935829520225525, "learning_rate": 0.0001974293454169106, "loss": 0.8886, "step": 1562 }, { "epoch": 0.3627712661018916, "grad_norm": 0.5965757966041565, "learning_rate": 0.0001974260586505438, "loss": 0.9683, "step": 1563 }, { "epoch": 0.3630033654404085, "grad_norm": 0.6753206253051758, "learning_rate": 0.00019742276981172976, "loss": 0.8935, "step": 1564 }, { "epoch": 0.3632354647789254, "grad_norm": 0.5645881295204163, "learning_rate": 0.00019741947890053845, "loss": 0.912, "step": 1565 }, { "epoch": 0.36346756411744224, "grad_norm": 0.6319294571876526, "learning_rate": 0.00019741618591703988, "loss": 0.9554, "step": 1566 }, { "epoch": 0.36369966345595917, "grad_norm": 0.5667675733566284, "learning_rate": 0.00019741289086130408, "loss": 0.9214, "step": 1567 }, { "epoch": 0.36393176279447603, "grad_norm": 0.5404272079467773, "learning_rate": 0.0001974095937334012, "loss": 0.882, "step": 1568 }, { "epoch": 0.3641638621329929, "grad_norm": 0.6230792999267578, "learning_rate": 0.0001974062945334013, "loss": 0.9616, "step": 1569 }, { "epoch": 0.3643959614715098, "grad_norm": 0.5880136489868164, "learning_rate": 0.0001974029932613746, "loss": 0.9364, "step": 1570 }, { "epoch": 0.3646280608100267, "grad_norm": 0.5819690823554993, "learning_rate": 0.00019739968991739132, "loss": 0.9191, "step": 1571 }, { "epoch": 0.36486016014854356, "grad_norm": 0.5429544448852539, "learning_rate": 0.00019739638450152173, "loss": 0.9548, "step": 1572 }, { "epoch": 0.3650922594870605, "grad_norm": 0.5827645063400269, "learning_rate": 0.00019739307701383617, "loss": 0.9229, "step": 1573 }, { "epoch": 0.36532435882557734, "grad_norm": 0.5028699040412903, "learning_rate": 0.00019738976745440495, "loss": 0.9103, "step": 1574 }, { "epoch": 0.3655564581640942, "grad_norm": 0.5750008821487427, "learning_rate": 0.00019738645582329846, "loss": 0.9031, "step": 1575 }, { "epoch": 0.36578855750261113, "grad_norm": 0.5459082722663879, "learning_rate": 0.00019738314212058722, "loss": 0.8997, "step": 1576 }, { "epoch": 0.366020656841128, "grad_norm": 0.5344048142433167, "learning_rate": 0.00019737982634634164, "loss": 0.9428, "step": 1577 }, { "epoch": 0.36625275617964487, "grad_norm": 0.6792230010032654, "learning_rate": 0.0001973765085006323, "loss": 0.9394, "step": 1578 }, { "epoch": 0.3664848555181618, "grad_norm": 0.5515922904014587, "learning_rate": 0.0001973731885835298, "loss": 0.8904, "step": 1579 }, { "epoch": 0.36671695485667866, "grad_norm": 0.6228951811790466, "learning_rate": 0.0001973698665951047, "loss": 0.9096, "step": 1580 }, { "epoch": 0.3669490541951955, "grad_norm": 0.7408158183097839, "learning_rate": 0.00019736654253542767, "loss": 0.873, "step": 1581 }, { "epoch": 0.36718115353371245, "grad_norm": 0.5501497983932495, "learning_rate": 0.00019736321640456946, "loss": 0.9582, "step": 1582 }, { "epoch": 0.3674132528722293, "grad_norm": 0.6826937198638916, "learning_rate": 0.00019735988820260078, "loss": 0.8782, "step": 1583 }, { "epoch": 0.3676453522107462, "grad_norm": 0.5639553666114807, "learning_rate": 0.00019735655792959249, "loss": 0.9114, "step": 1584 }, { "epoch": 0.3678774515492631, "grad_norm": 0.5987411141395569, "learning_rate": 0.00019735322558561537, "loss": 0.8632, "step": 1585 }, { "epoch": 0.36810955088777997, "grad_norm": 0.6253989934921265, "learning_rate": 0.00019734989117074032, "loss": 0.9281, "step": 1586 }, { "epoch": 0.36834165022629684, "grad_norm": 0.5664665699005127, "learning_rate": 0.00019734655468503832, "loss": 0.9511, "step": 1587 }, { "epoch": 0.36857374956481376, "grad_norm": 0.6325791478157043, "learning_rate": 0.00019734321612858025, "loss": 0.9291, "step": 1588 }, { "epoch": 0.3688058489033306, "grad_norm": 0.6093154549598694, "learning_rate": 0.0001973398755014372, "loss": 0.9389, "step": 1589 }, { "epoch": 0.3690379482418475, "grad_norm": 0.6303350329399109, "learning_rate": 0.00019733653280368024, "loss": 0.9363, "step": 1590 }, { "epoch": 0.3692700475803644, "grad_norm": 0.7450820207595825, "learning_rate": 0.0001973331880353804, "loss": 0.8871, "step": 1591 }, { "epoch": 0.3695021469188813, "grad_norm": 0.5334305763244629, "learning_rate": 0.00019732984119660892, "loss": 0.9786, "step": 1592 }, { "epoch": 0.36973424625739815, "grad_norm": 0.7185940742492676, "learning_rate": 0.00019732649228743694, "loss": 0.9432, "step": 1593 }, { "epoch": 0.36996634559591507, "grad_norm": 0.6632983684539795, "learning_rate": 0.00019732314130793568, "loss": 0.8723, "step": 1594 }, { "epoch": 0.37019844493443194, "grad_norm": 0.5856658220291138, "learning_rate": 0.00019731978825817648, "loss": 0.9806, "step": 1595 }, { "epoch": 0.3704305442729488, "grad_norm": 0.7276145815849304, "learning_rate": 0.00019731643313823064, "loss": 0.9446, "step": 1596 }, { "epoch": 0.3706626436114657, "grad_norm": 0.5602502822875977, "learning_rate": 0.0001973130759481695, "loss": 0.8925, "step": 1597 }, { "epoch": 0.3708947429499826, "grad_norm": 0.7300639152526855, "learning_rate": 0.0001973097166880645, "loss": 0.9528, "step": 1598 }, { "epoch": 0.37112684228849946, "grad_norm": 0.6592845916748047, "learning_rate": 0.00019730635535798713, "loss": 0.9309, "step": 1599 }, { "epoch": 0.3713589416270164, "grad_norm": 0.5904006958007812, "learning_rate": 0.00019730299195800885, "loss": 0.9063, "step": 1600 }, { "epoch": 0.37159104096553325, "grad_norm": 0.6748924255371094, "learning_rate": 0.00019729962648820122, "loss": 0.8978, "step": 1601 }, { "epoch": 0.3718231403040501, "grad_norm": 0.5594115257263184, "learning_rate": 0.00019729625894863583, "loss": 0.9745, "step": 1602 }, { "epoch": 0.37205523964256704, "grad_norm": 0.5782825946807861, "learning_rate": 0.00019729288933938432, "loss": 0.9959, "step": 1603 }, { "epoch": 0.3722873389810839, "grad_norm": 0.5338828563690186, "learning_rate": 0.00019728951766051836, "loss": 0.8985, "step": 1604 }, { "epoch": 0.37251943831960077, "grad_norm": 0.5760864019393921, "learning_rate": 0.00019728614391210965, "loss": 0.9201, "step": 1605 }, { "epoch": 0.3727515376581177, "grad_norm": 0.5703029632568359, "learning_rate": 0.00019728276809423, "loss": 0.9528, "step": 1606 }, { "epoch": 0.37298363699663456, "grad_norm": 0.5012573599815369, "learning_rate": 0.00019727939020695122, "loss": 0.9776, "step": 1607 }, { "epoch": 0.3732157363351514, "grad_norm": 0.6214978694915771, "learning_rate": 0.00019727601025034511, "loss": 0.8937, "step": 1608 }, { "epoch": 0.37344783567366835, "grad_norm": 0.5026699304580688, "learning_rate": 0.00019727262822448363, "loss": 0.9036, "step": 1609 }, { "epoch": 0.3736799350121852, "grad_norm": 0.6376842856407166, "learning_rate": 0.00019726924412943867, "loss": 0.947, "step": 1610 }, { "epoch": 0.3739120343507021, "grad_norm": 0.5963441133499146, "learning_rate": 0.00019726585796528225, "loss": 0.9552, "step": 1611 }, { "epoch": 0.374144133689219, "grad_norm": 0.5301645994186401, "learning_rate": 0.0001972624697320864, "loss": 0.9529, "step": 1612 }, { "epoch": 0.37437623302773587, "grad_norm": 0.4944808781147003, "learning_rate": 0.00019725907942992317, "loss": 0.9079, "step": 1613 }, { "epoch": 0.37460833236625274, "grad_norm": 0.563400149345398, "learning_rate": 0.0001972556870588647, "loss": 0.8874, "step": 1614 }, { "epoch": 0.37484043170476966, "grad_norm": 0.6124091148376465, "learning_rate": 0.00019725229261898316, "loss": 0.9141, "step": 1615 }, { "epoch": 0.37507253104328653, "grad_norm": 0.5982673764228821, "learning_rate": 0.00019724889611035076, "loss": 0.9128, "step": 1616 }, { "epoch": 0.3753046303818034, "grad_norm": 0.5352581143379211, "learning_rate": 0.00019724549753303972, "loss": 0.9417, "step": 1617 }, { "epoch": 0.3755367297203203, "grad_norm": 0.6180575489997864, "learning_rate": 0.00019724209688712233, "loss": 0.9843, "step": 1618 }, { "epoch": 0.3757688290588372, "grad_norm": 0.575268566608429, "learning_rate": 0.00019723869417267098, "loss": 0.9445, "step": 1619 }, { "epoch": 0.37600092839735405, "grad_norm": 0.6407877802848816, "learning_rate": 0.00019723528938975798, "loss": 0.9044, "step": 1620 }, { "epoch": 0.376233027735871, "grad_norm": 0.5825797915458679, "learning_rate": 0.00019723188253845586, "loss": 0.8734, "step": 1621 }, { "epoch": 0.37646512707438784, "grad_norm": 0.5917460918426514, "learning_rate": 0.000197228473618837, "loss": 0.9556, "step": 1622 }, { "epoch": 0.3766972264129047, "grad_norm": 0.564852774143219, "learning_rate": 0.0001972250626309739, "loss": 0.9908, "step": 1623 }, { "epoch": 0.37692932575142163, "grad_norm": 0.628251850605011, "learning_rate": 0.00019722164957493922, "loss": 0.9212, "step": 1624 }, { "epoch": 0.3771614250899385, "grad_norm": 0.4945141673088074, "learning_rate": 0.00019721823445080548, "loss": 0.9269, "step": 1625 }, { "epoch": 0.37739352442845536, "grad_norm": 0.6066126227378845, "learning_rate": 0.00019721481725864533, "loss": 0.8847, "step": 1626 }, { "epoch": 0.3776256237669723, "grad_norm": 0.5646041631698608, "learning_rate": 0.00019721139799853152, "loss": 0.9541, "step": 1627 }, { "epoch": 0.37785772310548915, "grad_norm": 0.5645127296447754, "learning_rate": 0.00019720797667053673, "loss": 0.9604, "step": 1628 }, { "epoch": 0.378089822444006, "grad_norm": 0.5751519799232483, "learning_rate": 0.00019720455327473376, "loss": 0.9355, "step": 1629 }, { "epoch": 0.37832192178252294, "grad_norm": 0.5986213684082031, "learning_rate": 0.00019720112781119542, "loss": 0.8732, "step": 1630 }, { "epoch": 0.3785540211210398, "grad_norm": 0.5750144124031067, "learning_rate": 0.00019719770027999456, "loss": 0.9367, "step": 1631 }, { "epoch": 0.3787861204595567, "grad_norm": 0.5141638517379761, "learning_rate": 0.00019719427068120413, "loss": 0.9309, "step": 1632 }, { "epoch": 0.3790182197980736, "grad_norm": 0.5338748693466187, "learning_rate": 0.00019719083901489708, "loss": 0.9242, "step": 1633 }, { "epoch": 0.37925031913659046, "grad_norm": 0.5212205648422241, "learning_rate": 0.00019718740528114638, "loss": 0.9179, "step": 1634 }, { "epoch": 0.37948241847510733, "grad_norm": 0.580852746963501, "learning_rate": 0.0001971839694800251, "loss": 0.9268, "step": 1635 }, { "epoch": 0.37971451781362425, "grad_norm": 0.6030675172805786, "learning_rate": 0.0001971805316116063, "loss": 0.9295, "step": 1636 }, { "epoch": 0.3799466171521411, "grad_norm": 0.533976137638092, "learning_rate": 0.00019717709167596314, "loss": 0.9213, "step": 1637 }, { "epoch": 0.380178716490658, "grad_norm": 0.5307078957557678, "learning_rate": 0.00019717364967316878, "loss": 0.8894, "step": 1638 }, { "epoch": 0.3804108158291749, "grad_norm": 0.5469356179237366, "learning_rate": 0.00019717020560329642, "loss": 0.9096, "step": 1639 }, { "epoch": 0.3806429151676918, "grad_norm": 0.4882349371910095, "learning_rate": 0.00019716675946641936, "loss": 0.9003, "step": 1640 }, { "epoch": 0.38087501450620864, "grad_norm": 0.7171533107757568, "learning_rate": 0.00019716331126261088, "loss": 0.9311, "step": 1641 }, { "epoch": 0.38110711384472556, "grad_norm": 0.5348209142684937, "learning_rate": 0.00019715986099194433, "loss": 0.9467, "step": 1642 }, { "epoch": 0.38133921318324243, "grad_norm": 0.6302210688591003, "learning_rate": 0.00019715640865449312, "loss": 0.9747, "step": 1643 }, { "epoch": 0.3815713125217593, "grad_norm": 0.5794427990913391, "learning_rate": 0.00019715295425033066, "loss": 0.8839, "step": 1644 }, { "epoch": 0.3818034118602762, "grad_norm": 0.558002769947052, "learning_rate": 0.00019714949777953046, "loss": 0.9287, "step": 1645 }, { "epoch": 0.3820355111987931, "grad_norm": 0.5529537200927734, "learning_rate": 0.00019714603924216606, "loss": 0.9151, "step": 1646 }, { "epoch": 0.38226761053730995, "grad_norm": 0.5004493594169617, "learning_rate": 0.000197142578638311, "loss": 0.9708, "step": 1647 }, { "epoch": 0.3824997098758269, "grad_norm": 0.48359474539756775, "learning_rate": 0.00019713911596803887, "loss": 0.9498, "step": 1648 }, { "epoch": 0.38273180921434374, "grad_norm": 0.4988899230957031, "learning_rate": 0.0001971356512314234, "loss": 0.8796, "step": 1649 }, { "epoch": 0.3829639085528606, "grad_norm": 0.4979213774204254, "learning_rate": 0.0001971321844285382, "loss": 0.9552, "step": 1650 }, { "epoch": 0.38319600789137753, "grad_norm": 0.5435231924057007, "learning_rate": 0.0001971287155594571, "loss": 0.9588, "step": 1651 }, { "epoch": 0.3834281072298944, "grad_norm": 0.5512322187423706, "learning_rate": 0.00019712524462425383, "loss": 0.8707, "step": 1652 }, { "epoch": 0.38366020656841127, "grad_norm": 0.5418404340744019, "learning_rate": 0.00019712177162300228, "loss": 0.9988, "step": 1653 }, { "epoch": 0.3838923059069282, "grad_norm": 0.5771339535713196, "learning_rate": 0.0001971182965557763, "loss": 0.9474, "step": 1654 }, { "epoch": 0.38412440524544506, "grad_norm": 0.5901991724967957, "learning_rate": 0.0001971148194226498, "loss": 0.9048, "step": 1655 }, { "epoch": 0.3843565045839619, "grad_norm": 0.5912519693374634, "learning_rate": 0.00019711134022369674, "loss": 0.9447, "step": 1656 }, { "epoch": 0.38458860392247884, "grad_norm": 0.561922550201416, "learning_rate": 0.00019710785895899114, "loss": 0.9306, "step": 1657 }, { "epoch": 0.3848207032609957, "grad_norm": 0.4707268476486206, "learning_rate": 0.0001971043756286071, "loss": 0.9265, "step": 1658 }, { "epoch": 0.3850528025995126, "grad_norm": 0.5743205547332764, "learning_rate": 0.00019710089023261863, "loss": 0.9639, "step": 1659 }, { "epoch": 0.3852849019380295, "grad_norm": 0.5739997029304504, "learning_rate": 0.00019709740277109995, "loss": 0.9238, "step": 1660 }, { "epoch": 0.38551700127654637, "grad_norm": 0.5229659080505371, "learning_rate": 0.00019709391324412517, "loss": 0.9338, "step": 1661 }, { "epoch": 0.38574910061506323, "grad_norm": 0.5479699373245239, "learning_rate": 0.00019709042165176862, "loss": 0.9203, "step": 1662 }, { "epoch": 0.38598119995358016, "grad_norm": 0.5656000375747681, "learning_rate": 0.00019708692799410446, "loss": 0.8987, "step": 1663 }, { "epoch": 0.386213299292097, "grad_norm": 0.5645245909690857, "learning_rate": 0.00019708343227120708, "loss": 0.9203, "step": 1664 }, { "epoch": 0.3864453986306139, "grad_norm": 0.5992217063903809, "learning_rate": 0.00019707993448315084, "loss": 0.9215, "step": 1665 }, { "epoch": 0.3866774979691308, "grad_norm": 0.6246165037155151, "learning_rate": 0.00019707643463001012, "loss": 0.9134, "step": 1666 }, { "epoch": 0.3869095973076477, "grad_norm": 0.5814501643180847, "learning_rate": 0.00019707293271185937, "loss": 0.9781, "step": 1667 }, { "epoch": 0.38714169664616455, "grad_norm": 0.60662841796875, "learning_rate": 0.0001970694287287731, "loss": 0.9623, "step": 1668 }, { "epoch": 0.38737379598468147, "grad_norm": 0.4914987087249756, "learning_rate": 0.00019706592268082582, "loss": 0.9251, "step": 1669 }, { "epoch": 0.38760589532319834, "grad_norm": 0.555020272731781, "learning_rate": 0.0001970624145680921, "loss": 0.923, "step": 1670 }, { "epoch": 0.3878379946617152, "grad_norm": 0.5705359578132629, "learning_rate": 0.00019705890439064664, "loss": 0.9033, "step": 1671 }, { "epoch": 0.3880700940002321, "grad_norm": 0.5839096307754517, "learning_rate": 0.00019705539214856404, "loss": 0.9644, "step": 1672 }, { "epoch": 0.388302193338749, "grad_norm": 0.7123615741729736, "learning_rate": 0.00019705187784191904, "loss": 0.8681, "step": 1673 }, { "epoch": 0.38853429267726586, "grad_norm": 0.5580779910087585, "learning_rate": 0.00019704836147078637, "loss": 0.9515, "step": 1674 }, { "epoch": 0.3887663920157828, "grad_norm": 0.6267217993736267, "learning_rate": 0.00019704484303524088, "loss": 1.007, "step": 1675 }, { "epoch": 0.38899849135429965, "grad_norm": 0.5746256113052368, "learning_rate": 0.00019704132253535735, "loss": 0.9877, "step": 1676 }, { "epoch": 0.3892305906928165, "grad_norm": 0.5516994595527649, "learning_rate": 0.00019703779997121072, "loss": 0.9039, "step": 1677 }, { "epoch": 0.38946269003133344, "grad_norm": 0.5637810230255127, "learning_rate": 0.0001970342753428759, "loss": 0.9232, "step": 1678 }, { "epoch": 0.3896947893698503, "grad_norm": 0.500447154045105, "learning_rate": 0.00019703074865042787, "loss": 0.9452, "step": 1679 }, { "epoch": 0.38992688870836717, "grad_norm": 0.6201179027557373, "learning_rate": 0.00019702721989394163, "loss": 0.9057, "step": 1680 }, { "epoch": 0.3901589880468841, "grad_norm": 0.5591408014297485, "learning_rate": 0.00019702368907349228, "loss": 0.9341, "step": 1681 }, { "epoch": 0.39039108738540096, "grad_norm": 0.5264056324958801, "learning_rate": 0.0001970201561891549, "loss": 0.9291, "step": 1682 }, { "epoch": 0.3906231867239178, "grad_norm": 0.6217264533042908, "learning_rate": 0.00019701662124100466, "loss": 0.9178, "step": 1683 }, { "epoch": 0.39085528606243475, "grad_norm": 0.6031366586685181, "learning_rate": 0.00019701308422911672, "loss": 0.9309, "step": 1684 }, { "epoch": 0.3910873854009516, "grad_norm": 0.6389620304107666, "learning_rate": 0.00019700954515356638, "loss": 0.9195, "step": 1685 }, { "epoch": 0.3913194847394685, "grad_norm": 0.593556821346283, "learning_rate": 0.0001970060040144289, "loss": 0.8891, "step": 1686 }, { "epoch": 0.3915515840779854, "grad_norm": 0.6324663758277893, "learning_rate": 0.00019700246081177957, "loss": 0.8818, "step": 1687 }, { "epoch": 0.39178368341650227, "grad_norm": 0.5469187498092651, "learning_rate": 0.0001969989155456938, "loss": 0.943, "step": 1688 }, { "epoch": 0.39201578275501914, "grad_norm": 0.6830863356590271, "learning_rate": 0.00019699536821624696, "loss": 0.8699, "step": 1689 }, { "epoch": 0.39224788209353606, "grad_norm": 0.5306219458580017, "learning_rate": 0.00019699181882351457, "loss": 0.9067, "step": 1690 }, { "epoch": 0.3924799814320529, "grad_norm": 0.5304410457611084, "learning_rate": 0.0001969882673675721, "loss": 0.9112, "step": 1691 }, { "epoch": 0.3927120807705698, "grad_norm": 0.5035080909729004, "learning_rate": 0.0001969847138484951, "loss": 0.9095, "step": 1692 }, { "epoch": 0.3929441801090867, "grad_norm": 0.5795302987098694, "learning_rate": 0.00019698115826635916, "loss": 0.9402, "step": 1693 }, { "epoch": 0.3931762794476036, "grad_norm": 0.6034168004989624, "learning_rate": 0.0001969776006212399, "loss": 0.9934, "step": 1694 }, { "epoch": 0.39340837878612045, "grad_norm": 0.5329034924507141, "learning_rate": 0.00019697404091321302, "loss": 0.899, "step": 1695 }, { "epoch": 0.39364047812463737, "grad_norm": 0.564725935459137, "learning_rate": 0.00019697047914235427, "loss": 0.9435, "step": 1696 }, { "epoch": 0.39387257746315424, "grad_norm": 0.5388345718383789, "learning_rate": 0.00019696691530873936, "loss": 0.9094, "step": 1697 }, { "epoch": 0.3941046768016711, "grad_norm": 0.6013877391815186, "learning_rate": 0.0001969633494124441, "loss": 0.9225, "step": 1698 }, { "epoch": 0.39433677614018803, "grad_norm": 0.5003378987312317, "learning_rate": 0.0001969597814535444, "loss": 0.9175, "step": 1699 }, { "epoch": 0.3945688754787049, "grad_norm": 0.4714001417160034, "learning_rate": 0.0001969562114321161, "loss": 0.9208, "step": 1700 }, { "epoch": 0.39480097481722176, "grad_norm": 0.7132161855697632, "learning_rate": 0.00019695263934823516, "loss": 0.9277, "step": 1701 }, { "epoch": 0.3950330741557387, "grad_norm": 0.5339311361312866, "learning_rate": 0.00019694906520197758, "loss": 0.9398, "step": 1702 }, { "epoch": 0.39526517349425555, "grad_norm": 0.6531013250350952, "learning_rate": 0.00019694548899341937, "loss": 0.9028, "step": 1703 }, { "epoch": 0.3954972728327724, "grad_norm": 0.6410702466964722, "learning_rate": 0.00019694191072263664, "loss": 0.8977, "step": 1704 }, { "epoch": 0.3957293721712893, "grad_norm": 0.5067394375801086, "learning_rate": 0.00019693833038970544, "loss": 0.8951, "step": 1705 }, { "epoch": 0.3959614715098062, "grad_norm": 0.6658715605735779, "learning_rate": 0.000196934747994702, "loss": 0.9467, "step": 1706 }, { "epoch": 0.3961935708483231, "grad_norm": 0.6768953204154968, "learning_rate": 0.00019693116353770247, "loss": 0.9463, "step": 1707 }, { "epoch": 0.39642567018683994, "grad_norm": 0.5174306631088257, "learning_rate": 0.00019692757701878313, "loss": 0.8979, "step": 1708 }, { "epoch": 0.39665776952535686, "grad_norm": 0.7803263068199158, "learning_rate": 0.00019692398843802026, "loss": 0.8818, "step": 1709 }, { "epoch": 0.39688986886387373, "grad_norm": 0.6031627655029297, "learning_rate": 0.00019692039779549024, "loss": 0.9249, "step": 1710 }, { "epoch": 0.3971219682023906, "grad_norm": 0.7092903852462769, "learning_rate": 0.00019691680509126936, "loss": 0.9214, "step": 1711 }, { "epoch": 0.3973540675409075, "grad_norm": 0.6472411155700684, "learning_rate": 0.00019691321032543413, "loss": 0.9442, "step": 1712 }, { "epoch": 0.3975861668794244, "grad_norm": 0.6126742959022522, "learning_rate": 0.00019690961349806097, "loss": 0.9291, "step": 1713 }, { "epoch": 0.39781826621794125, "grad_norm": 0.6912745833396912, "learning_rate": 0.0001969060146092264, "loss": 0.8853, "step": 1714 }, { "epoch": 0.3980503655564582, "grad_norm": 0.6399800181388855, "learning_rate": 0.000196902413659007, "loss": 0.9397, "step": 1715 }, { "epoch": 0.39828246489497504, "grad_norm": 0.761953592300415, "learning_rate": 0.00019689881064747936, "loss": 0.8949, "step": 1716 }, { "epoch": 0.3985145642334919, "grad_norm": 0.6291630268096924, "learning_rate": 0.00019689520557472005, "loss": 0.9436, "step": 1717 }, { "epoch": 0.39874666357200883, "grad_norm": 0.5590611100196838, "learning_rate": 0.00019689159844080587, "loss": 0.9428, "step": 1718 }, { "epoch": 0.3989787629105257, "grad_norm": 0.6626025438308716, "learning_rate": 0.00019688798924581352, "loss": 0.8818, "step": 1719 }, { "epoch": 0.39921086224904256, "grad_norm": 0.6841716170310974, "learning_rate": 0.00019688437798981974, "loss": 0.9612, "step": 1720 }, { "epoch": 0.3994429615875595, "grad_norm": 0.5206955075263977, "learning_rate": 0.00019688076467290136, "loss": 0.8924, "step": 1721 }, { "epoch": 0.39967506092607635, "grad_norm": 0.5658777356147766, "learning_rate": 0.0001968771492951352, "loss": 0.9681, "step": 1722 }, { "epoch": 0.3999071602645932, "grad_norm": 0.5846256613731384, "learning_rate": 0.00019687353185659828, "loss": 0.9865, "step": 1723 }, { "epoch": 0.40013925960311014, "grad_norm": 0.5471869707107544, "learning_rate": 0.00019686991235736746, "loss": 0.9304, "step": 1724 }, { "epoch": 0.400371358941627, "grad_norm": 0.5699465870857239, "learning_rate": 0.00019686629079751977, "loss": 0.9242, "step": 1725 }, { "epoch": 0.4006034582801439, "grad_norm": 0.5692659616470337, "learning_rate": 0.0001968626671771322, "loss": 0.922, "step": 1726 }, { "epoch": 0.4008355576186608, "grad_norm": 0.5654596090316772, "learning_rate": 0.0001968590414962819, "loss": 0.9967, "step": 1727 }, { "epoch": 0.40106765695717767, "grad_norm": 0.6113571524620056, "learning_rate": 0.000196855413755046, "loss": 0.9018, "step": 1728 }, { "epoch": 0.40129975629569453, "grad_norm": 0.6133480072021484, "learning_rate": 0.00019685178395350156, "loss": 0.9501, "step": 1729 }, { "epoch": 0.40153185563421145, "grad_norm": 0.5793326497077942, "learning_rate": 0.00019684815209172592, "loss": 0.9717, "step": 1730 }, { "epoch": 0.4017639549727283, "grad_norm": 0.5764376521110535, "learning_rate": 0.00019684451816979624, "loss": 0.9021, "step": 1731 }, { "epoch": 0.4019960543112452, "grad_norm": 0.535716712474823, "learning_rate": 0.00019684088218778988, "loss": 0.9845, "step": 1732 }, { "epoch": 0.4022281536497621, "grad_norm": 0.6482261419296265, "learning_rate": 0.00019683724414578418, "loss": 0.9084, "step": 1733 }, { "epoch": 0.402460252988279, "grad_norm": 0.5768622159957886, "learning_rate": 0.00019683360404385654, "loss": 0.9228, "step": 1734 }, { "epoch": 0.40269235232679584, "grad_norm": 0.5473170876502991, "learning_rate": 0.0001968299618820843, "loss": 0.9223, "step": 1735 }, { "epoch": 0.40292445166531277, "grad_norm": 0.586716890335083, "learning_rate": 0.00019682631766054507, "loss": 0.8924, "step": 1736 }, { "epoch": 0.40315655100382963, "grad_norm": 0.6202045679092407, "learning_rate": 0.00019682267137931624, "loss": 0.9354, "step": 1737 }, { "epoch": 0.4033886503423465, "grad_norm": 0.4940319061279297, "learning_rate": 0.0001968190230384755, "loss": 0.8903, "step": 1738 }, { "epoch": 0.4036207496808634, "grad_norm": 0.6034631133079529, "learning_rate": 0.00019681537263810037, "loss": 0.9081, "step": 1739 }, { "epoch": 0.4038528490193803, "grad_norm": 0.5716384053230286, "learning_rate": 0.00019681172017826853, "loss": 0.9138, "step": 1740 }, { "epoch": 0.40408494835789716, "grad_norm": 0.48418059945106506, "learning_rate": 0.00019680806565905768, "loss": 0.9187, "step": 1741 }, { "epoch": 0.4043170476964141, "grad_norm": 0.5621926188468933, "learning_rate": 0.00019680440908054553, "loss": 0.9346, "step": 1742 }, { "epoch": 0.40454914703493094, "grad_norm": 0.4910869896411896, "learning_rate": 0.0001968007504428099, "loss": 0.946, "step": 1743 }, { "epoch": 0.4047812463734478, "grad_norm": 0.5012584924697876, "learning_rate": 0.0001967970897459286, "loss": 0.9489, "step": 1744 }, { "epoch": 0.40501334571196473, "grad_norm": 0.6289322376251221, "learning_rate": 0.00019679342698997946, "loss": 0.8338, "step": 1745 }, { "epoch": 0.4052454450504816, "grad_norm": 0.5225081443786621, "learning_rate": 0.00019678976217504048, "loss": 0.9403, "step": 1746 }, { "epoch": 0.40547754438899847, "grad_norm": 0.5957691073417664, "learning_rate": 0.00019678609530118956, "loss": 0.9153, "step": 1747 }, { "epoch": 0.4057096437275154, "grad_norm": 0.5762056112289429, "learning_rate": 0.0001967824263685047, "loss": 0.956, "step": 1748 }, { "epoch": 0.40594174306603226, "grad_norm": 0.6158578991889954, "learning_rate": 0.000196778755377064, "loss": 0.9039, "step": 1749 }, { "epoch": 0.4061738424045491, "grad_norm": 0.6595797538757324, "learning_rate": 0.0001967750823269455, "loss": 0.8951, "step": 1750 }, { "epoch": 0.40640594174306605, "grad_norm": 0.6410776972770691, "learning_rate": 0.00019677140721822734, "loss": 0.8931, "step": 1751 }, { "epoch": 0.4066380410815829, "grad_norm": 0.7138279676437378, "learning_rate": 0.00019676773005098766, "loss": 0.9195, "step": 1752 }, { "epoch": 0.4068701404200998, "grad_norm": 0.5348248481750488, "learning_rate": 0.00019676405082530476, "loss": 0.9232, "step": 1753 }, { "epoch": 0.4071022397586167, "grad_norm": 0.6424762010574341, "learning_rate": 0.00019676036954125684, "loss": 0.9068, "step": 1754 }, { "epoch": 0.40733433909713357, "grad_norm": 0.6323909759521484, "learning_rate": 0.00019675668619892228, "loss": 0.929, "step": 1755 }, { "epoch": 0.40756643843565044, "grad_norm": 0.541854977607727, "learning_rate": 0.00019675300079837935, "loss": 0.9729, "step": 1756 }, { "epoch": 0.40779853777416736, "grad_norm": 0.7092952132225037, "learning_rate": 0.00019674931333970647, "loss": 0.9005, "step": 1757 }, { "epoch": 0.4080306371126842, "grad_norm": 0.5437161922454834, "learning_rate": 0.0001967456238229821, "loss": 0.9944, "step": 1758 }, { "epoch": 0.4082627364512011, "grad_norm": 0.5253750681877136, "learning_rate": 0.00019674193224828473, "loss": 0.9535, "step": 1759 }, { "epoch": 0.408494835789718, "grad_norm": 0.6329406499862671, "learning_rate": 0.00019673823861569286, "loss": 0.8904, "step": 1760 }, { "epoch": 0.4087269351282349, "grad_norm": 0.5530345439910889, "learning_rate": 0.00019673454292528508, "loss": 0.934, "step": 1761 }, { "epoch": 0.40895903446675175, "grad_norm": 0.6421835422515869, "learning_rate": 0.00019673084517714, "loss": 0.9014, "step": 1762 }, { "epoch": 0.40919113380526867, "grad_norm": 0.5271580219268799, "learning_rate": 0.00019672714537133628, "loss": 0.923, "step": 1763 }, { "epoch": 0.40942323314378554, "grad_norm": 0.5356336236000061, "learning_rate": 0.00019672344350795258, "loss": 0.9246, "step": 1764 }, { "epoch": 0.4096553324823024, "grad_norm": 0.6168617606163025, "learning_rate": 0.0001967197395870677, "loss": 0.923, "step": 1765 }, { "epoch": 0.4098874318208193, "grad_norm": 0.49557581543922424, "learning_rate": 0.00019671603360876043, "loss": 0.9448, "step": 1766 }, { "epoch": 0.4101195311593362, "grad_norm": 0.5493084192276001, "learning_rate": 0.00019671232557310958, "loss": 0.9362, "step": 1767 }, { "epoch": 0.41035163049785306, "grad_norm": 0.6057862639427185, "learning_rate": 0.00019670861548019405, "loss": 0.9443, "step": 1768 }, { "epoch": 0.41058372983637, "grad_norm": 0.5347152948379517, "learning_rate": 0.0001967049033300927, "loss": 0.9054, "step": 1769 }, { "epoch": 0.41081582917488685, "grad_norm": 0.5570089817047119, "learning_rate": 0.0001967011891228846, "loss": 0.9094, "step": 1770 }, { "epoch": 0.4110479285134037, "grad_norm": 0.5425180792808533, "learning_rate": 0.00019669747285864863, "loss": 0.9072, "step": 1771 }, { "epoch": 0.41128002785192064, "grad_norm": 0.5784744024276733, "learning_rate": 0.00019669375453746396, "loss": 1.0027, "step": 1772 }, { "epoch": 0.4115121271904375, "grad_norm": 0.6552026867866516, "learning_rate": 0.0001966900341594096, "loss": 0.9353, "step": 1773 }, { "epoch": 0.41174422652895437, "grad_norm": 0.4845140874385834, "learning_rate": 0.0001966863117245648, "loss": 0.9227, "step": 1774 }, { "epoch": 0.4119763258674713, "grad_norm": 0.5522558689117432, "learning_rate": 0.0001966825872330086, "loss": 0.9159, "step": 1775 }, { "epoch": 0.41220842520598816, "grad_norm": 0.6886111497879028, "learning_rate": 0.0001966788606848203, "loss": 0.8733, "step": 1776 }, { "epoch": 0.412440524544505, "grad_norm": 0.5358473062515259, "learning_rate": 0.0001966751320800792, "loss": 0.8916, "step": 1777 }, { "epoch": 0.41267262388302195, "grad_norm": 0.574971079826355, "learning_rate": 0.0001966714014188646, "loss": 0.8828, "step": 1778 }, { "epoch": 0.4129047232215388, "grad_norm": 0.5384320616722107, "learning_rate": 0.0001966676687012558, "loss": 0.8881, "step": 1779 }, { "epoch": 0.4131368225600557, "grad_norm": 0.6178532838821411, "learning_rate": 0.00019666393392733228, "loss": 0.9724, "step": 1780 }, { "epoch": 0.4133689218985726, "grad_norm": 0.5532113313674927, "learning_rate": 0.00019666019709717344, "loss": 0.9535, "step": 1781 }, { "epoch": 0.4136010212370895, "grad_norm": 0.5668889880180359, "learning_rate": 0.00019665645821085876, "loss": 0.9127, "step": 1782 }, { "epoch": 0.41383312057560634, "grad_norm": 0.5764045715332031, "learning_rate": 0.00019665271726846783, "loss": 0.9412, "step": 1783 }, { "epoch": 0.41406521991412326, "grad_norm": 0.5341030955314636, "learning_rate": 0.00019664897427008014, "loss": 0.9349, "step": 1784 }, { "epoch": 0.41429731925264013, "grad_norm": 0.6231575012207031, "learning_rate": 0.00019664522921577544, "loss": 0.8928, "step": 1785 }, { "epoch": 0.414529418591157, "grad_norm": 0.5901029706001282, "learning_rate": 0.00019664148210563328, "loss": 0.9054, "step": 1786 }, { "epoch": 0.4147615179296739, "grad_norm": 0.5409894585609436, "learning_rate": 0.0001966377329397334, "loss": 0.8859, "step": 1787 }, { "epoch": 0.4149936172681908, "grad_norm": 0.6134136915206909, "learning_rate": 0.00019663398171815554, "loss": 0.8984, "step": 1788 }, { "epoch": 0.41522571660670765, "grad_norm": 0.5341612696647644, "learning_rate": 0.00019663022844097956, "loss": 0.8723, "step": 1789 }, { "epoch": 0.4154578159452246, "grad_norm": 0.5658878684043884, "learning_rate": 0.00019662647310828523, "loss": 0.8559, "step": 1790 }, { "epoch": 0.41568991528374144, "grad_norm": 0.49125760793685913, "learning_rate": 0.00019662271572015247, "loss": 0.8786, "step": 1791 }, { "epoch": 0.4159220146222583, "grad_norm": 0.6301273703575134, "learning_rate": 0.00019661895627666115, "loss": 0.8943, "step": 1792 }, { "epoch": 0.41615411396077523, "grad_norm": 0.5750293731689453, "learning_rate": 0.00019661519477789135, "loss": 0.8957, "step": 1793 }, { "epoch": 0.4163862132992921, "grad_norm": 0.5299922823905945, "learning_rate": 0.000196611431223923, "loss": 0.963, "step": 1794 }, { "epoch": 0.41661831263780896, "grad_norm": 0.7080173492431641, "learning_rate": 0.00019660766561483618, "loss": 0.9599, "step": 1795 }, { "epoch": 0.4168504119763259, "grad_norm": 0.5741339921951294, "learning_rate": 0.00019660389795071097, "loss": 0.941, "step": 1796 }, { "epoch": 0.41708251131484275, "grad_norm": 0.5957292318344116, "learning_rate": 0.00019660012823162755, "loss": 0.91, "step": 1797 }, { "epoch": 0.4173146106533596, "grad_norm": 0.5832741856575012, "learning_rate": 0.0001965963564576661, "loss": 0.9038, "step": 1798 }, { "epoch": 0.41754670999187654, "grad_norm": 0.613530158996582, "learning_rate": 0.00019659258262890683, "loss": 0.9392, "step": 1799 }, { "epoch": 0.4177788093303934, "grad_norm": 0.5645830631256104, "learning_rate": 0.00019658880674543004, "loss": 0.9393, "step": 1800 }, { "epoch": 0.4180109086689103, "grad_norm": 0.5023618936538696, "learning_rate": 0.00019658502880731609, "loss": 0.8936, "step": 1801 }, { "epoch": 0.4182430080074272, "grad_norm": 0.7162852883338928, "learning_rate": 0.00019658124881464525, "loss": 0.9267, "step": 1802 }, { "epoch": 0.41847510734594406, "grad_norm": 0.6218928694725037, "learning_rate": 0.000196577466767498, "loss": 0.9263, "step": 1803 }, { "epoch": 0.41870720668446093, "grad_norm": 0.5862722396850586, "learning_rate": 0.00019657368266595476, "loss": 0.8879, "step": 1804 }, { "epoch": 0.41893930602297785, "grad_norm": 0.5510787963867188, "learning_rate": 0.00019656989651009607, "loss": 0.9052, "step": 1805 }, { "epoch": 0.4191714053614947, "grad_norm": 0.6477943062782288, "learning_rate": 0.00019656610830000242, "loss": 0.8852, "step": 1806 }, { "epoch": 0.4194035047000116, "grad_norm": 0.5812867283821106, "learning_rate": 0.0001965623180357544, "loss": 0.946, "step": 1807 }, { "epoch": 0.4196356040385285, "grad_norm": 0.5934841632843018, "learning_rate": 0.00019655852571743264, "loss": 0.9071, "step": 1808 }, { "epoch": 0.4198677033770454, "grad_norm": 0.5561374425888062, "learning_rate": 0.00019655473134511784, "loss": 0.927, "step": 1809 }, { "epoch": 0.42009980271556224, "grad_norm": 1.1915708780288696, "learning_rate": 0.00019655093491889068, "loss": 0.8689, "step": 1810 }, { "epoch": 0.42033190205407917, "grad_norm": 0.6147586107254028, "learning_rate": 0.00019654713643883193, "loss": 0.884, "step": 1811 }, { "epoch": 0.42056400139259603, "grad_norm": 0.5104571580886841, "learning_rate": 0.00019654333590502234, "loss": 0.9142, "step": 1812 }, { "epoch": 0.4207961007311129, "grad_norm": 0.5579031705856323, "learning_rate": 0.00019653953331754285, "loss": 0.9324, "step": 1813 }, { "epoch": 0.4210282000696298, "grad_norm": 0.5296502709388733, "learning_rate": 0.00019653572867647426, "loss": 0.8842, "step": 1814 }, { "epoch": 0.4212602994081467, "grad_norm": 0.5059155821800232, "learning_rate": 0.0001965319219818976, "loss": 0.9587, "step": 1815 }, { "epoch": 0.42149239874666355, "grad_norm": 0.565586507320404, "learning_rate": 0.00019652811323389376, "loss": 0.9126, "step": 1816 }, { "epoch": 0.4217244980851805, "grad_norm": 0.5331727266311646, "learning_rate": 0.00019652430243254377, "loss": 0.9126, "step": 1817 }, { "epoch": 0.42195659742369734, "grad_norm": 0.677875816822052, "learning_rate": 0.00019652048957792876, "loss": 0.9316, "step": 1818 }, { "epoch": 0.4221886967622142, "grad_norm": 0.5276561975479126, "learning_rate": 0.00019651667467012978, "loss": 0.8651, "step": 1819 }, { "epoch": 0.42242079610073113, "grad_norm": 0.5190433859825134, "learning_rate": 0.00019651285770922797, "loss": 0.917, "step": 1820 }, { "epoch": 0.422652895439248, "grad_norm": 0.5006272792816162, "learning_rate": 0.00019650903869530454, "loss": 0.9645, "step": 1821 }, { "epoch": 0.42288499477776487, "grad_norm": 0.5366963148117065, "learning_rate": 0.00019650521762844077, "loss": 0.912, "step": 1822 }, { "epoch": 0.4231170941162818, "grad_norm": 0.4790819585323334, "learning_rate": 0.00019650139450871787, "loss": 0.9122, "step": 1823 }, { "epoch": 0.42334919345479866, "grad_norm": 0.5402861833572388, "learning_rate": 0.0001964975693362172, "loss": 0.955, "step": 1824 }, { "epoch": 0.4235812927933155, "grad_norm": 0.5376525521278381, "learning_rate": 0.00019649374211102013, "loss": 0.8837, "step": 1825 }, { "epoch": 0.42381339213183244, "grad_norm": 0.5535842180252075, "learning_rate": 0.0001964899128332081, "loss": 0.9001, "step": 1826 }, { "epoch": 0.4240454914703493, "grad_norm": 0.5926379561424255, "learning_rate": 0.0001964860815028625, "loss": 0.9202, "step": 1827 }, { "epoch": 0.4242775908088662, "grad_norm": 0.547949492931366, "learning_rate": 0.0001964822481200649, "loss": 0.9076, "step": 1828 }, { "epoch": 0.4245096901473831, "grad_norm": 0.4945877194404602, "learning_rate": 0.00019647841268489682, "loss": 0.9013, "step": 1829 }, { "epoch": 0.42474178948589997, "grad_norm": 0.4917088449001312, "learning_rate": 0.0001964745751974398, "loss": 0.9332, "step": 1830 }, { "epoch": 0.42497388882441683, "grad_norm": 0.5218760967254639, "learning_rate": 0.00019647073565777555, "loss": 0.9954, "step": 1831 }, { "epoch": 0.42520598816293376, "grad_norm": 0.5042544007301331, "learning_rate": 0.00019646689406598567, "loss": 0.9127, "step": 1832 }, { "epoch": 0.4254380875014506, "grad_norm": 0.4927321672439575, "learning_rate": 0.00019646305042215193, "loss": 0.9304, "step": 1833 }, { "epoch": 0.4256701868399675, "grad_norm": 0.5180162191390991, "learning_rate": 0.00019645920472635608, "loss": 0.9806, "step": 1834 }, { "epoch": 0.4259022861784844, "grad_norm": 0.5972525477409363, "learning_rate": 0.0001964553569786799, "loss": 0.8653, "step": 1835 }, { "epoch": 0.4261343855170013, "grad_norm": 0.5696606636047363, "learning_rate": 0.00019645150717920528, "loss": 0.9044, "step": 1836 }, { "epoch": 0.42636648485551815, "grad_norm": 0.6031782031059265, "learning_rate": 0.00019644765532801412, "loss": 0.9033, "step": 1837 }, { "epoch": 0.42659858419403507, "grad_norm": 0.5550394654273987, "learning_rate": 0.0001964438014251883, "loss": 0.9123, "step": 1838 }, { "epoch": 0.42683068353255194, "grad_norm": 0.6705706119537354, "learning_rate": 0.00019643994547080982, "loss": 0.9248, "step": 1839 }, { "epoch": 0.4270627828710688, "grad_norm": 0.6391993761062622, "learning_rate": 0.0001964360874649607, "loss": 0.9417, "step": 1840 }, { "epoch": 0.4272948822095857, "grad_norm": 0.4748203754425049, "learning_rate": 0.0001964322274077231, "loss": 0.9371, "step": 1841 }, { "epoch": 0.4275269815481026, "grad_norm": 0.6255447864532471, "learning_rate": 0.000196428365299179, "loss": 0.8876, "step": 1842 }, { "epoch": 0.42775908088661946, "grad_norm": 0.5568214058876038, "learning_rate": 0.00019642450113941057, "loss": 0.9473, "step": 1843 }, { "epoch": 0.4279911802251364, "grad_norm": 0.609748363494873, "learning_rate": 0.0001964206349285001, "loss": 0.897, "step": 1844 }, { "epoch": 0.42822327956365325, "grad_norm": 0.6173092722892761, "learning_rate": 0.00019641676666652978, "loss": 0.9228, "step": 1845 }, { "epoch": 0.4284553789021701, "grad_norm": 0.5989368557929993, "learning_rate": 0.00019641289635358186, "loss": 0.9863, "step": 1846 }, { "epoch": 0.42868747824068704, "grad_norm": 0.651771605014801, "learning_rate": 0.00019640902398973873, "loss": 0.9606, "step": 1847 }, { "epoch": 0.4289195775792039, "grad_norm": 0.6313779354095459, "learning_rate": 0.0001964051495750827, "loss": 0.9273, "step": 1848 }, { "epoch": 0.42915167691772077, "grad_norm": 0.5805069208145142, "learning_rate": 0.00019640127310969628, "loss": 0.8874, "step": 1849 }, { "epoch": 0.4293837762562377, "grad_norm": 0.6379398107528687, "learning_rate": 0.00019639739459366182, "loss": 0.9533, "step": 1850 }, { "epoch": 0.42961587559475456, "grad_norm": 0.47029098868370056, "learning_rate": 0.0001963935140270619, "loss": 0.8904, "step": 1851 }, { "epoch": 0.4298479749332714, "grad_norm": 0.5885578393936157, "learning_rate": 0.00019638963140997906, "loss": 0.8957, "step": 1852 }, { "epoch": 0.43008007427178835, "grad_norm": 0.5930177569389343, "learning_rate": 0.00019638574674249587, "loss": 1.0084, "step": 1853 }, { "epoch": 0.4303121736103052, "grad_norm": 0.5558833479881287, "learning_rate": 0.00019638186002469494, "loss": 0.9102, "step": 1854 }, { "epoch": 0.4305442729488221, "grad_norm": 0.5855537056922913, "learning_rate": 0.000196377971256659, "loss": 0.9468, "step": 1855 }, { "epoch": 0.430776372287339, "grad_norm": 0.5754596590995789, "learning_rate": 0.00019637408043847074, "loss": 0.9044, "step": 1856 }, { "epoch": 0.43100847162585587, "grad_norm": 0.584676206111908, "learning_rate": 0.00019637018757021296, "loss": 0.8508, "step": 1857 }, { "epoch": 0.43124057096437274, "grad_norm": 0.49439355731010437, "learning_rate": 0.0001963662926519684, "loss": 0.8681, "step": 1858 }, { "epoch": 0.43147267030288966, "grad_norm": 0.5786611437797546, "learning_rate": 0.00019636239568382, "loss": 0.9083, "step": 1859 }, { "epoch": 0.4317047696414065, "grad_norm": 0.5431936383247375, "learning_rate": 0.00019635849666585058, "loss": 0.9406, "step": 1860 }, { "epoch": 0.4319368689799234, "grad_norm": 0.6521342992782593, "learning_rate": 0.00019635459559814314, "loss": 0.8659, "step": 1861 }, { "epoch": 0.4321689683184403, "grad_norm": 0.5077570676803589, "learning_rate": 0.00019635069248078062, "loss": 0.9172, "step": 1862 }, { "epoch": 0.4324010676569572, "grad_norm": 0.5636994242668152, "learning_rate": 0.00019634678731384608, "loss": 0.9587, "step": 1863 }, { "epoch": 0.43263316699547405, "grad_norm": 0.48513078689575195, "learning_rate": 0.00019634288009742255, "loss": 0.9519, "step": 1864 }, { "epoch": 0.432865266333991, "grad_norm": 0.519437849521637, "learning_rate": 0.00019633897083159318, "loss": 0.9289, "step": 1865 }, { "epoch": 0.43309736567250784, "grad_norm": 0.5995944738388062, "learning_rate": 0.00019633505951644113, "loss": 0.9566, "step": 1866 }, { "epoch": 0.4333294650110247, "grad_norm": 0.5057395100593567, "learning_rate": 0.00019633114615204958, "loss": 0.9654, "step": 1867 }, { "epoch": 0.43356156434954163, "grad_norm": 0.5791558623313904, "learning_rate": 0.00019632723073850176, "loss": 0.9469, "step": 1868 }, { "epoch": 0.4337936636880585, "grad_norm": 0.5840992331504822, "learning_rate": 0.000196323313275881, "loss": 0.918, "step": 1869 }, { "epoch": 0.43402576302657536, "grad_norm": 0.550893247127533, "learning_rate": 0.00019631939376427062, "loss": 0.8612, "step": 1870 }, { "epoch": 0.4342578623650923, "grad_norm": 0.537064790725708, "learning_rate": 0.00019631547220375398, "loss": 0.9316, "step": 1871 }, { "epoch": 0.43448996170360915, "grad_norm": 0.5622636675834656, "learning_rate": 0.00019631154859441454, "loss": 0.8822, "step": 1872 }, { "epoch": 0.434722061042126, "grad_norm": 0.599727213382721, "learning_rate": 0.0001963076229363357, "loss": 0.956, "step": 1873 }, { "epoch": 0.43495416038064294, "grad_norm": 0.5084268450737, "learning_rate": 0.00019630369522960104, "loss": 0.8993, "step": 1874 }, { "epoch": 0.4351862597191598, "grad_norm": 0.547834038734436, "learning_rate": 0.00019629976547429402, "loss": 0.9046, "step": 1875 }, { "epoch": 0.4354183590576767, "grad_norm": 0.5189753770828247, "learning_rate": 0.0001962958336704983, "loss": 0.8458, "step": 1876 }, { "epoch": 0.4356504583961936, "grad_norm": 0.501224160194397, "learning_rate": 0.00019629189981829753, "loss": 0.905, "step": 1877 }, { "epoch": 0.43588255773471046, "grad_norm": 0.5444706082344055, "learning_rate": 0.0001962879639177753, "loss": 0.8975, "step": 1878 }, { "epoch": 0.43611465707322733, "grad_norm": 0.5328624248504639, "learning_rate": 0.00019628402596901545, "loss": 0.9257, "step": 1879 }, { "epoch": 0.43634675641174425, "grad_norm": 0.5254698991775513, "learning_rate": 0.00019628008597210168, "loss": 0.8739, "step": 1880 }, { "epoch": 0.4365788557502611, "grad_norm": 0.5245271921157837, "learning_rate": 0.0001962761439271178, "loss": 0.8952, "step": 1881 }, { "epoch": 0.436810955088778, "grad_norm": 0.5154178142547607, "learning_rate": 0.00019627219983414768, "loss": 0.9408, "step": 1882 }, { "epoch": 0.4370430544272949, "grad_norm": 0.5660544037818909, "learning_rate": 0.00019626825369327525, "loss": 0.8846, "step": 1883 }, { "epoch": 0.4372751537658118, "grad_norm": 0.5544506907463074, "learning_rate": 0.0001962643055045844, "loss": 0.9322, "step": 1884 }, { "epoch": 0.43750725310432864, "grad_norm": 0.49590614438056946, "learning_rate": 0.00019626035526815912, "loss": 0.9737, "step": 1885 }, { "epoch": 0.43773935244284556, "grad_norm": 0.5184259414672852, "learning_rate": 0.0001962564029840835, "loss": 0.9169, "step": 1886 }, { "epoch": 0.43797145178136243, "grad_norm": 0.5171828866004944, "learning_rate": 0.00019625244865244156, "loss": 0.8724, "step": 1887 }, { "epoch": 0.4382035511198793, "grad_norm": 0.606625497341156, "learning_rate": 0.0001962484922733174, "loss": 0.8666, "step": 1888 }, { "epoch": 0.4384356504583962, "grad_norm": 0.5377411842346191, "learning_rate": 0.0001962445338467952, "loss": 0.9142, "step": 1889 }, { "epoch": 0.4386677497969131, "grad_norm": 0.5942894220352173, "learning_rate": 0.00019624057337295922, "loss": 0.957, "step": 1890 }, { "epoch": 0.43889984913542995, "grad_norm": 0.5858636498451233, "learning_rate": 0.00019623661085189364, "loss": 0.9022, "step": 1891 }, { "epoch": 0.4391319484739469, "grad_norm": 0.5353084206581116, "learning_rate": 0.00019623264628368275, "loss": 0.8723, "step": 1892 }, { "epoch": 0.43936404781246374, "grad_norm": 0.5895339846611023, "learning_rate": 0.0001962286796684109, "loss": 0.9509, "step": 1893 }, { "epoch": 0.4395961471509806, "grad_norm": 0.5124474763870239, "learning_rate": 0.0001962247110061625, "loss": 0.9523, "step": 1894 }, { "epoch": 0.43982824648949753, "grad_norm": 0.53212571144104, "learning_rate": 0.00019622074029702194, "loss": 0.8931, "step": 1895 }, { "epoch": 0.4400603458280144, "grad_norm": 0.4760664999485016, "learning_rate": 0.00019621676754107367, "loss": 0.9609, "step": 1896 }, { "epoch": 0.44029244516653127, "grad_norm": 0.4855426549911499, "learning_rate": 0.0001962127927384022, "loss": 0.9561, "step": 1897 }, { "epoch": 0.4405245445050482, "grad_norm": 0.6112794876098633, "learning_rate": 0.00019620881588909212, "loss": 0.9166, "step": 1898 }, { "epoch": 0.44075664384356505, "grad_norm": 0.5399686098098755, "learning_rate": 0.00019620483699322802, "loss": 0.8998, "step": 1899 }, { "epoch": 0.4409887431820819, "grad_norm": 0.5019717216491699, "learning_rate": 0.00019620085605089448, "loss": 0.8652, "step": 1900 }, { "epoch": 0.44122084252059884, "grad_norm": 0.5761865377426147, "learning_rate": 0.00019619687306217625, "loss": 0.937, "step": 1901 }, { "epoch": 0.4414529418591157, "grad_norm": 0.4702695310115814, "learning_rate": 0.000196192888027158, "loss": 0.9062, "step": 1902 }, { "epoch": 0.4416850411976326, "grad_norm": 0.5716812610626221, "learning_rate": 0.00019618890094592458, "loss": 0.927, "step": 1903 }, { "epoch": 0.44191714053614944, "grad_norm": 0.5039640665054321, "learning_rate": 0.00019618491181856072, "loss": 0.8862, "step": 1904 }, { "epoch": 0.44214923987466637, "grad_norm": 0.47040554881095886, "learning_rate": 0.00019618092064515135, "loss": 0.9657, "step": 1905 }, { "epoch": 0.44238133921318323, "grad_norm": 0.5035209059715271, "learning_rate": 0.0001961769274257813, "loss": 0.9354, "step": 1906 }, { "epoch": 0.4426134385517001, "grad_norm": 0.5171375274658203, "learning_rate": 0.00019617293216053555, "loss": 0.9348, "step": 1907 }, { "epoch": 0.442845537890217, "grad_norm": 0.5248234272003174, "learning_rate": 0.00019616893484949906, "loss": 0.938, "step": 1908 }, { "epoch": 0.4430776372287339, "grad_norm": 0.5029984712600708, "learning_rate": 0.0001961649354927569, "loss": 0.9318, "step": 1909 }, { "epoch": 0.44330973656725076, "grad_norm": 0.5188071727752686, "learning_rate": 0.00019616093409039414, "loss": 0.8954, "step": 1910 }, { "epoch": 0.4435418359057677, "grad_norm": 0.5307495594024658, "learning_rate": 0.00019615693064249586, "loss": 0.904, "step": 1911 }, { "epoch": 0.44377393524428455, "grad_norm": 0.44408828020095825, "learning_rate": 0.00019615292514914726, "loss": 0.9082, "step": 1912 }, { "epoch": 0.4440060345828014, "grad_norm": 0.5384889245033264, "learning_rate": 0.0001961489176104335, "loss": 0.883, "step": 1913 }, { "epoch": 0.44423813392131833, "grad_norm": 0.5015782713890076, "learning_rate": 0.0001961449080264399, "loss": 0.8773, "step": 1914 }, { "epoch": 0.4444702332598352, "grad_norm": 0.5356475710868835, "learning_rate": 0.0001961408963972517, "loss": 0.9082, "step": 1915 }, { "epoch": 0.44470233259835207, "grad_norm": 0.5075067281723022, "learning_rate": 0.00019613688272295424, "loss": 0.9731, "step": 1916 }, { "epoch": 0.444934431936869, "grad_norm": 0.5486013293266296, "learning_rate": 0.0001961328670036329, "loss": 0.9195, "step": 1917 }, { "epoch": 0.44516653127538586, "grad_norm": 0.4604564607143402, "learning_rate": 0.00019612884923937314, "loss": 0.9279, "step": 1918 }, { "epoch": 0.4453986306139027, "grad_norm": 0.5348174571990967, "learning_rate": 0.00019612482943026036, "loss": 0.9773, "step": 1919 }, { "epoch": 0.44563072995241965, "grad_norm": 0.5519504547119141, "learning_rate": 0.0001961208075763801, "loss": 0.901, "step": 1920 }, { "epoch": 0.4458628292909365, "grad_norm": 0.47014275193214417, "learning_rate": 0.00019611678367781793, "loss": 0.9176, "step": 1921 }, { "epoch": 0.4460949286294534, "grad_norm": 0.6167324185371399, "learning_rate": 0.00019611275773465944, "loss": 0.9019, "step": 1922 }, { "epoch": 0.4463270279679703, "grad_norm": 0.5909091234207153, "learning_rate": 0.00019610872974699025, "loss": 0.9578, "step": 1923 }, { "epoch": 0.44655912730648717, "grad_norm": 0.5725836157798767, "learning_rate": 0.00019610469971489608, "loss": 0.9193, "step": 1924 }, { "epoch": 0.44679122664500404, "grad_norm": 0.5966814756393433, "learning_rate": 0.0001961006676384626, "loss": 0.9259, "step": 1925 }, { "epoch": 0.44702332598352096, "grad_norm": 0.5604855418205261, "learning_rate": 0.00019609663351777563, "loss": 0.9018, "step": 1926 }, { "epoch": 0.4472554253220378, "grad_norm": 0.48488208651542664, "learning_rate": 0.00019609259735292099, "loss": 0.9149, "step": 1927 }, { "epoch": 0.4474875246605547, "grad_norm": 0.6734607815742493, "learning_rate": 0.00019608855914398445, "loss": 0.9056, "step": 1928 }, { "epoch": 0.4477196239990716, "grad_norm": 0.5557631850242615, "learning_rate": 0.00019608451889105202, "loss": 0.9031, "step": 1929 }, { "epoch": 0.4479517233375885, "grad_norm": 0.5543137788772583, "learning_rate": 0.0001960804765942096, "loss": 0.8914, "step": 1930 }, { "epoch": 0.44818382267610535, "grad_norm": 0.516521155834198, "learning_rate": 0.00019607643225354315, "loss": 0.9206, "step": 1931 }, { "epoch": 0.44841592201462227, "grad_norm": 0.5570945143699646, "learning_rate": 0.00019607238586913877, "loss": 0.9182, "step": 1932 }, { "epoch": 0.44864802135313914, "grad_norm": 0.4790147542953491, "learning_rate": 0.00019606833744108246, "loss": 0.9263, "step": 1933 }, { "epoch": 0.448880120691656, "grad_norm": 0.5812014937400818, "learning_rate": 0.00019606428696946035, "loss": 0.9442, "step": 1934 }, { "epoch": 0.4491122200301729, "grad_norm": 0.5393258929252625, "learning_rate": 0.00019606023445435867, "loss": 0.9537, "step": 1935 }, { "epoch": 0.4493443193686898, "grad_norm": 0.5810916423797607, "learning_rate": 0.00019605617989586357, "loss": 0.9026, "step": 1936 }, { "epoch": 0.44957641870720666, "grad_norm": 0.5529928803443909, "learning_rate": 0.00019605212329406127, "loss": 0.9489, "step": 1937 }, { "epoch": 0.4498085180457236, "grad_norm": 0.5258718132972717, "learning_rate": 0.00019604806464903812, "loss": 0.913, "step": 1938 }, { "epoch": 0.45004061738424045, "grad_norm": 0.6002803444862366, "learning_rate": 0.00019604400396088046, "loss": 0.9525, "step": 1939 }, { "epoch": 0.4502727167227573, "grad_norm": 0.5405875444412231, "learning_rate": 0.0001960399412296746, "loss": 0.9324, "step": 1940 }, { "epoch": 0.45050481606127424, "grad_norm": 0.5560583472251892, "learning_rate": 0.00019603587645550702, "loss": 0.9332, "step": 1941 }, { "epoch": 0.4507369153997911, "grad_norm": 0.5281173586845398, "learning_rate": 0.00019603180963846418, "loss": 0.8362, "step": 1942 }, { "epoch": 0.45096901473830797, "grad_norm": 0.5471031069755554, "learning_rate": 0.00019602774077863257, "loss": 0.9324, "step": 1943 }, { "epoch": 0.4512011140768249, "grad_norm": 0.5409397482872009, "learning_rate": 0.00019602366987609874, "loss": 0.8657, "step": 1944 }, { "epoch": 0.45143321341534176, "grad_norm": 0.48840057849884033, "learning_rate": 0.00019601959693094933, "loss": 0.9205, "step": 1945 }, { "epoch": 0.4516653127538586, "grad_norm": 0.5752264857292175, "learning_rate": 0.00019601552194327092, "loss": 0.925, "step": 1946 }, { "epoch": 0.45189741209237555, "grad_norm": 0.4900873005390167, "learning_rate": 0.00019601144491315024, "loss": 0.935, "step": 1947 }, { "epoch": 0.4521295114308924, "grad_norm": 0.4785551130771637, "learning_rate": 0.000196007365840674, "loss": 0.8791, "step": 1948 }, { "epoch": 0.4523616107694093, "grad_norm": 0.4918622672557831, "learning_rate": 0.00019600328472592894, "loss": 0.9111, "step": 1949 }, { "epoch": 0.4525937101079262, "grad_norm": 0.5546112656593323, "learning_rate": 0.00019599920156900195, "loss": 0.9423, "step": 1950 }, { "epoch": 0.4528258094464431, "grad_norm": 0.4841361343860626, "learning_rate": 0.0001959951163699798, "loss": 0.9006, "step": 1951 }, { "epoch": 0.45305790878495994, "grad_norm": 0.5398581027984619, "learning_rate": 0.00019599102912894946, "loss": 0.8931, "step": 1952 }, { "epoch": 0.45329000812347686, "grad_norm": 0.43122559785842896, "learning_rate": 0.00019598693984599784, "loss": 0.8824, "step": 1953 }, { "epoch": 0.45352210746199373, "grad_norm": 0.535581111907959, "learning_rate": 0.00019598284852121188, "loss": 0.9454, "step": 1954 }, { "epoch": 0.4537542068005106, "grad_norm": 0.4269912540912628, "learning_rate": 0.00019597875515467872, "loss": 0.839, "step": 1955 }, { "epoch": 0.4539863061390275, "grad_norm": 0.49303337931632996, "learning_rate": 0.00019597465974648535, "loss": 0.9439, "step": 1956 }, { "epoch": 0.4542184054775444, "grad_norm": 0.5840035080909729, "learning_rate": 0.0001959705622967189, "loss": 0.9376, "step": 1957 }, { "epoch": 0.45445050481606125, "grad_norm": 0.492584228515625, "learning_rate": 0.00019596646280546659, "loss": 0.9261, "step": 1958 }, { "epoch": 0.4546826041545782, "grad_norm": 0.5107449293136597, "learning_rate": 0.00019596236127281553, "loss": 0.9026, "step": 1959 }, { "epoch": 0.45491470349309504, "grad_norm": 0.5504915714263916, "learning_rate": 0.00019595825769885304, "loss": 0.9019, "step": 1960 }, { "epoch": 0.4551468028316119, "grad_norm": 0.5677697062492371, "learning_rate": 0.00019595415208366638, "loss": 0.9779, "step": 1961 }, { "epoch": 0.45537890217012883, "grad_norm": 0.5682851672172546, "learning_rate": 0.00019595004442734287, "loss": 0.8844, "step": 1962 }, { "epoch": 0.4556110015086457, "grad_norm": 0.5634030699729919, "learning_rate": 0.00019594593472996996, "loss": 0.8773, "step": 1963 }, { "epoch": 0.45584310084716256, "grad_norm": 0.5925066471099854, "learning_rate": 0.000195941822991635, "loss": 0.925, "step": 1964 }, { "epoch": 0.4560752001856795, "grad_norm": 0.5782003402709961, "learning_rate": 0.00019593770921242545, "loss": 0.8766, "step": 1965 }, { "epoch": 0.45630729952419635, "grad_norm": 0.4970909059047699, "learning_rate": 0.00019593359339242886, "loss": 0.8769, "step": 1966 }, { "epoch": 0.4565393988627132, "grad_norm": 0.6017650365829468, "learning_rate": 0.00019592947553173277, "loss": 0.9009, "step": 1967 }, { "epoch": 0.45677149820123014, "grad_norm": 0.510680079460144, "learning_rate": 0.00019592535563042477, "loss": 0.9364, "step": 1968 }, { "epoch": 0.457003597539747, "grad_norm": 0.5048564672470093, "learning_rate": 0.0001959212336885925, "loss": 0.9166, "step": 1969 }, { "epoch": 0.4572356968782639, "grad_norm": 0.613900363445282, "learning_rate": 0.00019591710970632366, "loss": 0.8573, "step": 1970 }, { "epoch": 0.4574677962167808, "grad_norm": 0.48438921570777893, "learning_rate": 0.00019591298368370593, "loss": 0.865, "step": 1971 }, { "epoch": 0.45769989555529766, "grad_norm": 0.505158007144928, "learning_rate": 0.0001959088556208271, "loss": 0.9022, "step": 1972 }, { "epoch": 0.45793199489381453, "grad_norm": 0.5448834300041199, "learning_rate": 0.000195904725517775, "loss": 0.9131, "step": 1973 }, { "epoch": 0.45816409423233145, "grad_norm": 0.5185540318489075, "learning_rate": 0.00019590059337463748, "loss": 0.9031, "step": 1974 }, { "epoch": 0.4583961935708483, "grad_norm": 0.5443059802055359, "learning_rate": 0.00019589645919150246, "loss": 0.9227, "step": 1975 }, { "epoch": 0.4586282929093652, "grad_norm": 0.49211442470550537, "learning_rate": 0.0001958923229684578, "loss": 0.8691, "step": 1976 }, { "epoch": 0.4588603922478821, "grad_norm": 0.4826250374317169, "learning_rate": 0.00019588818470559158, "loss": 0.8882, "step": 1977 }, { "epoch": 0.459092491586399, "grad_norm": 0.579042911529541, "learning_rate": 0.00019588404440299176, "loss": 0.8995, "step": 1978 }, { "epoch": 0.45932459092491584, "grad_norm": 0.4910588264465332, "learning_rate": 0.00019587990206074648, "loss": 0.9171, "step": 1979 }, { "epoch": 0.45955669026343277, "grad_norm": 0.5784628391265869, "learning_rate": 0.0001958757576789438, "loss": 0.9168, "step": 1980 }, { "epoch": 0.45978878960194963, "grad_norm": 0.551332950592041, "learning_rate": 0.00019587161125767192, "loss": 0.875, "step": 1981 }, { "epoch": 0.4600208889404665, "grad_norm": 0.5933083891868591, "learning_rate": 0.00019586746279701898, "loss": 0.8441, "step": 1982 }, { "epoch": 0.4602529882789834, "grad_norm": 0.4975191354751587, "learning_rate": 0.00019586331229707327, "loss": 0.8787, "step": 1983 }, { "epoch": 0.4604850876175003, "grad_norm": 0.5638577342033386, "learning_rate": 0.0001958591597579231, "loss": 0.9068, "step": 1984 }, { "epoch": 0.46071718695601716, "grad_norm": 0.5375779867172241, "learning_rate": 0.00019585500517965677, "loss": 0.8826, "step": 1985 }, { "epoch": 0.4609492862945341, "grad_norm": 0.5152628421783447, "learning_rate": 0.00019585084856236265, "loss": 0.9229, "step": 1986 }, { "epoch": 0.46118138563305094, "grad_norm": 0.5216783285140991, "learning_rate": 0.00019584668990612917, "loss": 0.9127, "step": 1987 }, { "epoch": 0.4614134849715678, "grad_norm": 0.5304017663002014, "learning_rate": 0.00019584252921104483, "loss": 0.9191, "step": 1988 }, { "epoch": 0.46164558431008473, "grad_norm": 0.48700809478759766, "learning_rate": 0.00019583836647719805, "loss": 0.8495, "step": 1989 }, { "epoch": 0.4618776836486016, "grad_norm": 0.5635843276977539, "learning_rate": 0.0001958342017046775, "loss": 0.9403, "step": 1990 }, { "epoch": 0.46210978298711847, "grad_norm": 0.5841273665428162, "learning_rate": 0.00019583003489357168, "loss": 0.937, "step": 1991 }, { "epoch": 0.4623418823256354, "grad_norm": 0.5407207012176514, "learning_rate": 0.00019582586604396923, "loss": 0.8495, "step": 1992 }, { "epoch": 0.46257398166415226, "grad_norm": 0.49960610270500183, "learning_rate": 0.0001958216951559589, "loss": 0.844, "step": 1993 }, { "epoch": 0.4628060810026691, "grad_norm": 0.5769187211990356, "learning_rate": 0.00019581752222962932, "loss": 0.9174, "step": 1994 }, { "epoch": 0.46303818034118605, "grad_norm": 0.4979911744594574, "learning_rate": 0.00019581334726506932, "loss": 0.9478, "step": 1995 }, { "epoch": 0.4632702796797029, "grad_norm": 0.5241754055023193, "learning_rate": 0.0001958091702623677, "loss": 0.932, "step": 1996 }, { "epoch": 0.4635023790182198, "grad_norm": 0.5005834102630615, "learning_rate": 0.00019580499122161328, "loss": 0.895, "step": 1997 }, { "epoch": 0.4637344783567367, "grad_norm": 0.4948982298374176, "learning_rate": 0.00019580081014289503, "loss": 0.9075, "step": 1998 }, { "epoch": 0.46396657769525357, "grad_norm": 0.5925588011741638, "learning_rate": 0.00019579662702630176, "loss": 0.9537, "step": 1999 }, { "epoch": 0.46419867703377043, "grad_norm": 0.5166564583778381, "learning_rate": 0.00019579244187192262, "loss": 0.8708, "step": 2000 }, { "epoch": 0.46443077637228736, "grad_norm": 0.6230931282043457, "learning_rate": 0.00019578825467984648, "loss": 0.8818, "step": 2001 }, { "epoch": 0.4646628757108042, "grad_norm": 0.5697864294052124, "learning_rate": 0.00019578406545016255, "loss": 0.9138, "step": 2002 }, { "epoch": 0.4648949750493211, "grad_norm": 0.535642683506012, "learning_rate": 0.0001957798741829598, "loss": 0.9296, "step": 2003 }, { "epoch": 0.465127074387838, "grad_norm": 0.5585079193115234, "learning_rate": 0.00019577568087832754, "loss": 0.8587, "step": 2004 }, { "epoch": 0.4653591737263549, "grad_norm": 0.5018905997276306, "learning_rate": 0.00019577148553635488, "loss": 0.9479, "step": 2005 }, { "epoch": 0.46559127306487175, "grad_norm": 0.5511053204536438, "learning_rate": 0.00019576728815713103, "loss": 0.9844, "step": 2006 }, { "epoch": 0.46582337240338867, "grad_norm": 0.5053110122680664, "learning_rate": 0.00019576308874074533, "loss": 0.9168, "step": 2007 }, { "epoch": 0.46605547174190554, "grad_norm": 0.5145901441574097, "learning_rate": 0.00019575888728728713, "loss": 0.9669, "step": 2008 }, { "epoch": 0.4662875710804224, "grad_norm": 0.5164647102355957, "learning_rate": 0.00019575468379684575, "loss": 0.8991, "step": 2009 }, { "epoch": 0.4665196704189393, "grad_norm": 0.4872562885284424, "learning_rate": 0.00019575047826951063, "loss": 0.9352, "step": 2010 }, { "epoch": 0.4667517697574562, "grad_norm": 0.5511845946311951, "learning_rate": 0.00019574627070537127, "loss": 0.91, "step": 2011 }, { "epoch": 0.46698386909597306, "grad_norm": 0.503393292427063, "learning_rate": 0.0001957420611045171, "loss": 0.8772, "step": 2012 }, { "epoch": 0.46721596843449, "grad_norm": 0.49584048986434937, "learning_rate": 0.00019573784946703767, "loss": 0.9219, "step": 2013 }, { "epoch": 0.46744806777300685, "grad_norm": 0.5501582622528076, "learning_rate": 0.00019573363579302266, "loss": 0.8951, "step": 2014 }, { "epoch": 0.4676801671115237, "grad_norm": 0.5959308743476868, "learning_rate": 0.0001957294200825616, "loss": 0.8897, "step": 2015 }, { "epoch": 0.46791226645004064, "grad_norm": 0.5239407420158386, "learning_rate": 0.00019572520233574424, "loss": 0.8676, "step": 2016 }, { "epoch": 0.4681443657885575, "grad_norm": 0.5157362222671509, "learning_rate": 0.00019572098255266028, "loss": 0.9431, "step": 2017 }, { "epoch": 0.46837646512707437, "grad_norm": 0.48770245909690857, "learning_rate": 0.00019571676073339945, "loss": 0.9211, "step": 2018 }, { "epoch": 0.4686085644655913, "grad_norm": 0.5136491656303406, "learning_rate": 0.0001957125368780516, "loss": 0.855, "step": 2019 }, { "epoch": 0.46884066380410816, "grad_norm": 0.5674514174461365, "learning_rate": 0.00019570831098670652, "loss": 0.9041, "step": 2020 }, { "epoch": 0.469072763142625, "grad_norm": 0.585849940776825, "learning_rate": 0.00019570408305945417, "loss": 0.8812, "step": 2021 }, { "epoch": 0.46930486248114195, "grad_norm": 0.5049577355384827, "learning_rate": 0.00019569985309638447, "loss": 0.8909, "step": 2022 }, { "epoch": 0.4695369618196588, "grad_norm": 0.4999822974205017, "learning_rate": 0.0001956956210975874, "loss": 0.9247, "step": 2023 }, { "epoch": 0.4697690611581757, "grad_norm": 0.5696849226951599, "learning_rate": 0.00019569138706315297, "loss": 0.8681, "step": 2024 }, { "epoch": 0.4700011604966926, "grad_norm": 0.5075076222419739, "learning_rate": 0.0001956871509931712, "loss": 0.936, "step": 2025 }, { "epoch": 0.47023325983520947, "grad_norm": 0.6332967877388, "learning_rate": 0.00019568291288773231, "loss": 0.8966, "step": 2026 }, { "epoch": 0.47046535917372634, "grad_norm": 0.4838268458843231, "learning_rate": 0.00019567867274692638, "loss": 0.9278, "step": 2027 }, { "epoch": 0.47069745851224326, "grad_norm": 0.5323596000671387, "learning_rate": 0.0001956744305708436, "loss": 0.9429, "step": 2028 }, { "epoch": 0.4709295578507601, "grad_norm": 0.5936027765274048, "learning_rate": 0.00019567018635957423, "loss": 0.9357, "step": 2029 }, { "epoch": 0.471161657189277, "grad_norm": 0.5838035941123962, "learning_rate": 0.00019566594011320856, "loss": 0.8695, "step": 2030 }, { "epoch": 0.4713937565277939, "grad_norm": 0.5815604329109192, "learning_rate": 0.00019566169183183688, "loss": 0.8629, "step": 2031 }, { "epoch": 0.4716258558663108, "grad_norm": 0.6865569353103638, "learning_rate": 0.0001956574415155496, "loss": 0.8656, "step": 2032 }, { "epoch": 0.47185795520482765, "grad_norm": 0.5690908432006836, "learning_rate": 0.00019565318916443713, "loss": 0.9459, "step": 2033 }, { "epoch": 0.4720900545433446, "grad_norm": 0.6081282496452332, "learning_rate": 0.0001956489347785899, "loss": 0.9046, "step": 2034 }, { "epoch": 0.47232215388186144, "grad_norm": 0.5417218804359436, "learning_rate": 0.00019564467835809848, "loss": 0.9164, "step": 2035 }, { "epoch": 0.4725542532203783, "grad_norm": 0.5728604197502136, "learning_rate": 0.00019564041990305332, "loss": 0.8683, "step": 2036 }, { "epoch": 0.47278635255889523, "grad_norm": 0.5314555764198303, "learning_rate": 0.000195636159413545, "loss": 0.9219, "step": 2037 }, { "epoch": 0.4730184518974121, "grad_norm": 0.5500565767288208, "learning_rate": 0.00019563189688966425, "loss": 0.8878, "step": 2038 }, { "epoch": 0.47325055123592896, "grad_norm": 0.5905523300170898, "learning_rate": 0.00019562763233150164, "loss": 0.9061, "step": 2039 }, { "epoch": 0.4734826505744459, "grad_norm": 0.5262758731842041, "learning_rate": 0.00019562336573914797, "loss": 0.9134, "step": 2040 }, { "epoch": 0.47371474991296275, "grad_norm": 0.5873813629150391, "learning_rate": 0.00019561909711269398, "loss": 0.8482, "step": 2041 }, { "epoch": 0.4739468492514796, "grad_norm": 0.5743016004562378, "learning_rate": 0.0001956148264522304, "loss": 0.9067, "step": 2042 }, { "epoch": 0.47417894858999654, "grad_norm": 0.5349574685096741, "learning_rate": 0.00019561055375784815, "loss": 0.9112, "step": 2043 }, { "epoch": 0.4744110479285134, "grad_norm": 0.5605901479721069, "learning_rate": 0.00019560627902963807, "loss": 0.8609, "step": 2044 }, { "epoch": 0.4746431472670303, "grad_norm": 0.5553717017173767, "learning_rate": 0.00019560200226769115, "loss": 0.9217, "step": 2045 }, { "epoch": 0.4748752466055472, "grad_norm": 0.5265030264854431, "learning_rate": 0.0001955977234720983, "loss": 0.8478, "step": 2046 }, { "epoch": 0.47510734594406406, "grad_norm": 0.5454626083374023, "learning_rate": 0.0001955934426429506, "loss": 0.9498, "step": 2047 }, { "epoch": 0.47533944528258093, "grad_norm": 0.5870716571807861, "learning_rate": 0.00019558915978033908, "loss": 0.9321, "step": 2048 }, { "epoch": 0.47557154462109785, "grad_norm": 0.5218504071235657, "learning_rate": 0.00019558487488435485, "loss": 0.9108, "step": 2049 }, { "epoch": 0.4758036439596147, "grad_norm": 0.5763911008834839, "learning_rate": 0.00019558058795508902, "loss": 0.8718, "step": 2050 }, { "epoch": 0.4760357432981316, "grad_norm": 0.575238049030304, "learning_rate": 0.00019557629899263286, "loss": 0.9376, "step": 2051 }, { "epoch": 0.4762678426366485, "grad_norm": 0.4731752276420593, "learning_rate": 0.00019557200799707756, "loss": 0.8675, "step": 2052 }, { "epoch": 0.4764999419751654, "grad_norm": 0.6033223271369934, "learning_rate": 0.00019556771496851438, "loss": 0.9221, "step": 2053 }, { "epoch": 0.47673204131368224, "grad_norm": 0.5698314309120178, "learning_rate": 0.00019556341990703468, "loss": 0.9202, "step": 2054 }, { "epoch": 0.47696414065219916, "grad_norm": 0.5314188003540039, "learning_rate": 0.0001955591228127298, "loss": 0.9204, "step": 2055 }, { "epoch": 0.47719623999071603, "grad_norm": 0.6173273921012878, "learning_rate": 0.00019555482368569115, "loss": 0.8823, "step": 2056 }, { "epoch": 0.4774283393292329, "grad_norm": 0.6539553999900818, "learning_rate": 0.00019555052252601018, "loss": 0.8836, "step": 2057 }, { "epoch": 0.4776604386677498, "grad_norm": 0.55010586977005, "learning_rate": 0.00019554621933377844, "loss": 0.9186, "step": 2058 }, { "epoch": 0.4778925380062667, "grad_norm": 0.5486656427383423, "learning_rate": 0.00019554191410908736, "loss": 0.903, "step": 2059 }, { "epoch": 0.47812463734478355, "grad_norm": 0.4603251516819, "learning_rate": 0.0001955376068520286, "loss": 0.9329, "step": 2060 }, { "epoch": 0.4783567366833005, "grad_norm": 0.5479786396026611, "learning_rate": 0.00019553329756269376, "loss": 0.8733, "step": 2061 }, { "epoch": 0.47858883602181734, "grad_norm": 0.46165579557418823, "learning_rate": 0.00019552898624117452, "loss": 0.8813, "step": 2062 }, { "epoch": 0.4788209353603342, "grad_norm": 0.5669509172439575, "learning_rate": 0.00019552467288756254, "loss": 0.9151, "step": 2063 }, { "epoch": 0.47905303469885113, "grad_norm": 0.5602128505706787, "learning_rate": 0.00019552035750194965, "loss": 0.8773, "step": 2064 }, { "epoch": 0.479285134037368, "grad_norm": 0.514526903629303, "learning_rate": 0.00019551604008442763, "loss": 0.8655, "step": 2065 }, { "epoch": 0.47951723337588487, "grad_norm": 0.5441296100616455, "learning_rate": 0.00019551172063508827, "loss": 0.9041, "step": 2066 }, { "epoch": 0.4797493327144018, "grad_norm": 0.4919191598892212, "learning_rate": 0.00019550739915402348, "loss": 0.8699, "step": 2067 }, { "epoch": 0.47998143205291866, "grad_norm": 0.5988068580627441, "learning_rate": 0.00019550307564132518, "loss": 0.8372, "step": 2068 }, { "epoch": 0.4802135313914355, "grad_norm": 0.6158168315887451, "learning_rate": 0.0001954987500970854, "loss": 0.8716, "step": 2069 }, { "epoch": 0.48044563072995244, "grad_norm": 0.5587723255157471, "learning_rate": 0.00019549442252139607, "loss": 0.8979, "step": 2070 }, { "epoch": 0.4806777300684693, "grad_norm": 0.7656692266464233, "learning_rate": 0.00019549009291434927, "loss": 0.892, "step": 2071 }, { "epoch": 0.4809098294069862, "grad_norm": 0.6068994402885437, "learning_rate": 0.00019548576127603713, "loss": 0.9336, "step": 2072 }, { "epoch": 0.4811419287455031, "grad_norm": 0.5412265062332153, "learning_rate": 0.00019548142760655174, "loss": 0.9226, "step": 2073 }, { "epoch": 0.48137402808401997, "grad_norm": 0.6627940535545349, "learning_rate": 0.00019547709190598534, "loss": 0.8766, "step": 2074 }, { "epoch": 0.48160612742253683, "grad_norm": 0.5101146101951599, "learning_rate": 0.00019547275417443016, "loss": 0.9044, "step": 2075 }, { "epoch": 0.48183822676105376, "grad_norm": 0.5092777609825134, "learning_rate": 0.0001954684144119784, "loss": 0.8573, "step": 2076 }, { "epoch": 0.4820703260995706, "grad_norm": 0.4972693920135498, "learning_rate": 0.00019546407261872246, "loss": 0.9626, "step": 2077 }, { "epoch": 0.4823024254380875, "grad_norm": 0.6001821160316467, "learning_rate": 0.00019545972879475465, "loss": 0.861, "step": 2078 }, { "epoch": 0.4825345247766044, "grad_norm": 0.6060700416564941, "learning_rate": 0.00019545538294016737, "loss": 0.9631, "step": 2079 }, { "epoch": 0.4827666241151213, "grad_norm": 0.5229616165161133, "learning_rate": 0.00019545103505505312, "loss": 0.8578, "step": 2080 }, { "epoch": 0.48299872345363815, "grad_norm": 0.5887095332145691, "learning_rate": 0.00019544668513950432, "loss": 0.9088, "step": 2081 }, { "epoch": 0.48323082279215507, "grad_norm": 0.5677828788757324, "learning_rate": 0.00019544233319361353, "loss": 0.8833, "step": 2082 }, { "epoch": 0.48346292213067193, "grad_norm": 0.5883793234825134, "learning_rate": 0.00019543797921747332, "loss": 0.9601, "step": 2083 }, { "epoch": 0.4836950214691888, "grad_norm": 0.5135243535041809, "learning_rate": 0.00019543362321117633, "loss": 0.8631, "step": 2084 }, { "epoch": 0.4839271208077057, "grad_norm": 0.4875528812408447, "learning_rate": 0.00019542926517481519, "loss": 0.9674, "step": 2085 }, { "epoch": 0.4841592201462226, "grad_norm": 0.519137978553772, "learning_rate": 0.0001954249051084826, "loss": 0.8999, "step": 2086 }, { "epoch": 0.48439131948473946, "grad_norm": 0.4562690556049347, "learning_rate": 0.00019542054301227135, "loss": 0.8704, "step": 2087 }, { "epoch": 0.4846234188232564, "grad_norm": 0.5049262642860413, "learning_rate": 0.00019541617888627422, "loss": 0.8952, "step": 2088 }, { "epoch": 0.48485551816177325, "grad_norm": 0.4953257441520691, "learning_rate": 0.000195411812730584, "loss": 0.9291, "step": 2089 }, { "epoch": 0.4850876175002901, "grad_norm": 0.5123974084854126, "learning_rate": 0.0001954074445452936, "loss": 0.8563, "step": 2090 }, { "epoch": 0.48531971683880704, "grad_norm": 0.5635369420051575, "learning_rate": 0.00019540307433049594, "loss": 0.8914, "step": 2091 }, { "epoch": 0.4855518161773239, "grad_norm": 0.5234144926071167, "learning_rate": 0.00019539870208628397, "loss": 0.9102, "step": 2092 }, { "epoch": 0.48578391551584077, "grad_norm": 0.6798670887947083, "learning_rate": 0.0001953943278127507, "loss": 0.9158, "step": 2093 }, { "epoch": 0.4860160148543577, "grad_norm": 0.49345844984054565, "learning_rate": 0.0001953899515099892, "loss": 0.9241, "step": 2094 }, { "epoch": 0.48624811419287456, "grad_norm": 0.4790210723876953, "learning_rate": 0.00019538557317809251, "loss": 0.9068, "step": 2095 }, { "epoch": 0.4864802135313914, "grad_norm": 0.5722448229789734, "learning_rate": 0.00019538119281715382, "loss": 0.9091, "step": 2096 }, { "epoch": 0.48671231286990835, "grad_norm": 0.4839096963405609, "learning_rate": 0.00019537681042726632, "loss": 0.9268, "step": 2097 }, { "epoch": 0.4869444122084252, "grad_norm": 0.5238417983055115, "learning_rate": 0.0001953724260085232, "loss": 0.8573, "step": 2098 }, { "epoch": 0.4871765115469421, "grad_norm": 0.5275476574897766, "learning_rate": 0.0001953680395610177, "loss": 0.8945, "step": 2099 }, { "epoch": 0.487408610885459, "grad_norm": 0.6298844218254089, "learning_rate": 0.0001953636510848432, "loss": 0.8947, "step": 2100 }, { "epoch": 0.48764071022397587, "grad_norm": 0.6051921844482422, "learning_rate": 0.00019535926058009297, "loss": 0.9175, "step": 2101 }, { "epoch": 0.48787280956249274, "grad_norm": 0.48621538281440735, "learning_rate": 0.00019535486804686048, "loss": 0.8961, "step": 2102 }, { "epoch": 0.48810490890100966, "grad_norm": 0.5916410684585571, "learning_rate": 0.00019535047348523906, "loss": 0.8883, "step": 2103 }, { "epoch": 0.4883370082395265, "grad_norm": 0.5387211441993713, "learning_rate": 0.00019534607689532233, "loss": 0.8983, "step": 2104 }, { "epoch": 0.4885691075780434, "grad_norm": 0.5253148078918457, "learning_rate": 0.00019534167827720374, "loss": 0.888, "step": 2105 }, { "epoch": 0.48880120691656026, "grad_norm": 0.6399052143096924, "learning_rate": 0.00019533727763097685, "loss": 0.8145, "step": 2106 }, { "epoch": 0.4890333062550772, "grad_norm": 0.45159241557121277, "learning_rate": 0.0001953328749567353, "loss": 0.8572, "step": 2107 }, { "epoch": 0.48926540559359405, "grad_norm": 0.6449863910675049, "learning_rate": 0.00019532847025457273, "loss": 0.8636, "step": 2108 }, { "epoch": 0.4894975049321109, "grad_norm": 0.6047663688659668, "learning_rate": 0.0001953240635245828, "loss": 0.8818, "step": 2109 }, { "epoch": 0.48972960427062784, "grad_norm": 0.5458334684371948, "learning_rate": 0.00019531965476685933, "loss": 0.9119, "step": 2110 }, { "epoch": 0.4899617036091447, "grad_norm": 0.5901333093643188, "learning_rate": 0.00019531524398149605, "loss": 0.98, "step": 2111 }, { "epoch": 0.49019380294766157, "grad_norm": 0.5657872557640076, "learning_rate": 0.00019531083116858678, "loss": 0.9264, "step": 2112 }, { "epoch": 0.4904259022861785, "grad_norm": 0.5771969556808472, "learning_rate": 0.00019530641632822544, "loss": 0.8926, "step": 2113 }, { "epoch": 0.49065800162469536, "grad_norm": 0.6205262541770935, "learning_rate": 0.00019530199946050586, "loss": 0.8912, "step": 2114 }, { "epoch": 0.49089010096321223, "grad_norm": 0.6388532519340515, "learning_rate": 0.00019529758056552207, "loss": 0.9195, "step": 2115 }, { "epoch": 0.49112220030172915, "grad_norm": 0.5143721699714661, "learning_rate": 0.00019529315964336805, "loss": 0.9182, "step": 2116 }, { "epoch": 0.491354299640246, "grad_norm": 0.6212396025657654, "learning_rate": 0.00019528873669413783, "loss": 0.9054, "step": 2117 }, { "epoch": 0.4915863989787629, "grad_norm": 0.514752209186554, "learning_rate": 0.00019528431171792547, "loss": 0.9016, "step": 2118 }, { "epoch": 0.4918184983172798, "grad_norm": 0.6209048628807068, "learning_rate": 0.00019527988471482513, "loss": 0.8728, "step": 2119 }, { "epoch": 0.4920505976557967, "grad_norm": 0.6546598672866821, "learning_rate": 0.00019527545568493102, "loss": 0.9736, "step": 2120 }, { "epoch": 0.49228269699431354, "grad_norm": 0.5192540884017944, "learning_rate": 0.00019527102462833727, "loss": 0.8831, "step": 2121 }, { "epoch": 0.49251479633283046, "grad_norm": 0.6247277855873108, "learning_rate": 0.0001952665915451382, "loss": 0.8559, "step": 2122 }, { "epoch": 0.49274689567134733, "grad_norm": 0.7023539543151855, "learning_rate": 0.0001952621564354281, "loss": 0.8799, "step": 2123 }, { "epoch": 0.4929789950098642, "grad_norm": 0.5213330388069153, "learning_rate": 0.0001952577192993013, "loss": 0.8717, "step": 2124 }, { "epoch": 0.4932110943483811, "grad_norm": 0.5959592461585999, "learning_rate": 0.00019525328013685216, "loss": 0.9408, "step": 2125 }, { "epoch": 0.493443193686898, "grad_norm": 0.7939774990081787, "learning_rate": 0.00019524883894817517, "loss": 0.8695, "step": 2126 }, { "epoch": 0.49367529302541485, "grad_norm": 0.5154587030410767, "learning_rate": 0.00019524439573336477, "loss": 0.8763, "step": 2127 }, { "epoch": 0.4939073923639318, "grad_norm": 0.5708668231964111, "learning_rate": 0.00019523995049251547, "loss": 0.8994, "step": 2128 }, { "epoch": 0.49413949170244864, "grad_norm": 0.5843937397003174, "learning_rate": 0.00019523550322572185, "loss": 0.9653, "step": 2129 }, { "epoch": 0.4943715910409655, "grad_norm": 0.5205866098403931, "learning_rate": 0.0001952310539330785, "loss": 0.8975, "step": 2130 }, { "epoch": 0.49460369037948243, "grad_norm": 0.5288206338882446, "learning_rate": 0.00019522660261468003, "loss": 0.8734, "step": 2131 }, { "epoch": 0.4948357897179993, "grad_norm": 0.4943452775478363, "learning_rate": 0.00019522214927062122, "loss": 0.8476, "step": 2132 }, { "epoch": 0.49506788905651616, "grad_norm": 0.48217111825942993, "learning_rate": 0.00019521769390099672, "loss": 0.8976, "step": 2133 }, { "epoch": 0.4952999883950331, "grad_norm": 0.6509501934051514, "learning_rate": 0.00019521323650590133, "loss": 0.8565, "step": 2134 }, { "epoch": 0.49553208773354995, "grad_norm": 0.5099740624427795, "learning_rate": 0.00019520877708542988, "loss": 0.8849, "step": 2135 }, { "epoch": 0.4957641870720668, "grad_norm": 0.5067795515060425, "learning_rate": 0.0001952043156396772, "loss": 0.8789, "step": 2136 }, { "epoch": 0.49599628641058374, "grad_norm": 0.5766698718070984, "learning_rate": 0.00019519985216873823, "loss": 0.9142, "step": 2137 }, { "epoch": 0.4962283857491006, "grad_norm": 0.5290481448173523, "learning_rate": 0.00019519538667270788, "loss": 0.8402, "step": 2138 }, { "epoch": 0.4964604850876175, "grad_norm": 0.520176351070404, "learning_rate": 0.0001951909191516812, "loss": 0.8826, "step": 2139 }, { "epoch": 0.4966925844261344, "grad_norm": 0.5012844204902649, "learning_rate": 0.00019518644960575315, "loss": 0.844, "step": 2140 }, { "epoch": 0.49692468376465126, "grad_norm": 0.49937236309051514, "learning_rate": 0.00019518197803501886, "loss": 0.8803, "step": 2141 }, { "epoch": 0.49715678310316813, "grad_norm": 0.5741779208183289, "learning_rate": 0.00019517750443957338, "loss": 0.9233, "step": 2142 }, { "epoch": 0.49738888244168505, "grad_norm": 0.45928579568862915, "learning_rate": 0.00019517302881951197, "loss": 0.8864, "step": 2143 }, { "epoch": 0.4976209817802019, "grad_norm": 0.43618515133857727, "learning_rate": 0.0001951685511749298, "loss": 0.8449, "step": 2144 }, { "epoch": 0.4978530811187188, "grad_norm": 0.42380961775779724, "learning_rate": 0.00019516407150592206, "loss": 0.9337, "step": 2145 }, { "epoch": 0.4980851804572357, "grad_norm": 0.49494054913520813, "learning_rate": 0.00019515958981258411, "loss": 0.8526, "step": 2146 }, { "epoch": 0.4983172797957526, "grad_norm": 0.4449799358844757, "learning_rate": 0.00019515510609501127, "loss": 0.8552, "step": 2147 }, { "epoch": 0.49854937913426944, "grad_norm": 0.4397004544734955, "learning_rate": 0.0001951506203532989, "loss": 0.8978, "step": 2148 }, { "epoch": 0.49878147847278637, "grad_norm": 0.5032230019569397, "learning_rate": 0.00019514613258754242, "loss": 0.8966, "step": 2149 }, { "epoch": 0.49901357781130323, "grad_norm": 0.5053930878639221, "learning_rate": 0.0001951416427978373, "loss": 0.8914, "step": 2150 }, { "epoch": 0.4992456771498201, "grad_norm": 0.4558164179325104, "learning_rate": 0.00019513715098427907, "loss": 0.8911, "step": 2151 }, { "epoch": 0.499477776488337, "grad_norm": 0.45704421401023865, "learning_rate": 0.00019513265714696324, "loss": 0.8367, "step": 2152 }, { "epoch": 0.4997098758268539, "grad_norm": 0.45319822430610657, "learning_rate": 0.00019512816128598544, "loss": 0.9252, "step": 2153 }, { "epoch": 0.49994197516537076, "grad_norm": 0.5545783042907715, "learning_rate": 0.0001951236634014413, "loss": 0.8972, "step": 2154 }, { "epoch": 0.5001740745038876, "grad_norm": 0.5528686046600342, "learning_rate": 0.00019511916349342645, "loss": 0.863, "step": 2155 }, { "epoch": 0.5004061738424046, "grad_norm": 0.47822800278663635, "learning_rate": 0.00019511466156203668, "loss": 0.9041, "step": 2156 }, { "epoch": 0.5006382731809215, "grad_norm": 0.5282189249992371, "learning_rate": 0.00019511015760736772, "loss": 0.8692, "step": 2157 }, { "epoch": 0.5008703725194383, "grad_norm": 0.5183687210083008, "learning_rate": 0.00019510565162951537, "loss": 0.905, "step": 2158 }, { "epoch": 0.5011024718579552, "grad_norm": 0.5129354000091553, "learning_rate": 0.0001951011436285755, "loss": 0.9097, "step": 2159 }, { "epoch": 0.5013345711964721, "grad_norm": 0.5561512112617493, "learning_rate": 0.00019509663360464397, "loss": 0.8745, "step": 2160 }, { "epoch": 0.5015666705349889, "grad_norm": 0.47147613763809204, "learning_rate": 0.0001950921215578168, "loss": 0.8659, "step": 2161 }, { "epoch": 0.5017987698735059, "grad_norm": 0.549425482749939, "learning_rate": 0.00019508760748818992, "loss": 0.9256, "step": 2162 }, { "epoch": 0.5020308692120228, "grad_norm": 0.5543694496154785, "learning_rate": 0.0001950830913958593, "loss": 0.9208, "step": 2163 }, { "epoch": 0.5022629685505396, "grad_norm": 0.49664321541786194, "learning_rate": 0.00019507857328092108, "loss": 0.8623, "step": 2164 }, { "epoch": 0.5024950678890565, "grad_norm": 0.45974230766296387, "learning_rate": 0.00019507405314347134, "loss": 0.9484, "step": 2165 }, { "epoch": 0.5027271672275734, "grad_norm": 0.5120901465415955, "learning_rate": 0.00019506953098360623, "loss": 0.9121, "step": 2166 }, { "epoch": 0.5029592665660902, "grad_norm": 0.5062810778617859, "learning_rate": 0.00019506500680142196, "loss": 0.8861, "step": 2167 }, { "epoch": 0.5031913659046072, "grad_norm": 0.5190533995628357, "learning_rate": 0.0001950604805970148, "loss": 0.9379, "step": 2168 }, { "epoch": 0.5034234652431241, "grad_norm": 0.4918048679828644, "learning_rate": 0.00019505595237048092, "loss": 0.9177, "step": 2169 }, { "epoch": 0.503655564581641, "grad_norm": 0.592923641204834, "learning_rate": 0.00019505142212191675, "loss": 0.8778, "step": 2170 }, { "epoch": 0.5038876639201578, "grad_norm": 0.474789559841156, "learning_rate": 0.00019504688985141867, "loss": 0.9106, "step": 2171 }, { "epoch": 0.5041197632586747, "grad_norm": 0.48594924807548523, "learning_rate": 0.00019504235555908297, "loss": 0.8982, "step": 2172 }, { "epoch": 0.5043518625971916, "grad_norm": 0.48497945070266724, "learning_rate": 0.00019503781924500623, "loss": 0.9153, "step": 2173 }, { "epoch": 0.5045839619357085, "grad_norm": 0.42631635069847107, "learning_rate": 0.00019503328090928488, "loss": 0.8852, "step": 2174 }, { "epoch": 0.5048160612742254, "grad_norm": 0.5551347732543945, "learning_rate": 0.00019502874055201547, "loss": 0.9265, "step": 2175 }, { "epoch": 0.5050481606127423, "grad_norm": 0.4849414527416229, "learning_rate": 0.00019502419817329458, "loss": 0.9074, "step": 2176 }, { "epoch": 0.5052802599512591, "grad_norm": 0.4529147744178772, "learning_rate": 0.00019501965377321885, "loss": 0.8208, "step": 2177 }, { "epoch": 0.505512359289776, "grad_norm": 0.5041713118553162, "learning_rate": 0.00019501510735188494, "loss": 0.8852, "step": 2178 }, { "epoch": 0.5057444586282929, "grad_norm": 0.4994879961013794, "learning_rate": 0.00019501055890938957, "loss": 0.9742, "step": 2179 }, { "epoch": 0.5059765579668098, "grad_norm": 0.490825891494751, "learning_rate": 0.00019500600844582947, "loss": 0.8696, "step": 2180 }, { "epoch": 0.5062086573053267, "grad_norm": 0.46610814332962036, "learning_rate": 0.00019500145596130147, "loss": 0.8949, "step": 2181 }, { "epoch": 0.5064407566438436, "grad_norm": 0.4748044013977051, "learning_rate": 0.0001949969014559024, "loss": 0.8864, "step": 2182 }, { "epoch": 0.5066728559823604, "grad_norm": 0.4976150095462799, "learning_rate": 0.0001949923449297291, "loss": 0.8975, "step": 2183 }, { "epoch": 0.5069049553208773, "grad_norm": 0.566085696220398, "learning_rate": 0.00019498778638287858, "loss": 0.947, "step": 2184 }, { "epoch": 0.5071370546593942, "grad_norm": 0.470172643661499, "learning_rate": 0.00019498322581544775, "loss": 0.9584, "step": 2185 }, { "epoch": 0.5073691539979112, "grad_norm": 0.43850842118263245, "learning_rate": 0.00019497866322753363, "loss": 0.8782, "step": 2186 }, { "epoch": 0.507601253336428, "grad_norm": 0.49665388464927673, "learning_rate": 0.00019497409861923327, "loss": 0.8861, "step": 2187 }, { "epoch": 0.5078333526749449, "grad_norm": 0.48345258831977844, "learning_rate": 0.0001949695319906438, "loss": 0.9002, "step": 2188 }, { "epoch": 0.5080654520134618, "grad_norm": 0.5360321998596191, "learning_rate": 0.00019496496334186233, "loss": 0.9334, "step": 2189 }, { "epoch": 0.5082975513519786, "grad_norm": 0.46455636620521545, "learning_rate": 0.00019496039267298608, "loss": 0.9011, "step": 2190 }, { "epoch": 0.5085296506904955, "grad_norm": 0.6066244840621948, "learning_rate": 0.0001949558199841122, "loss": 0.8888, "step": 2191 }, { "epoch": 0.5087617500290125, "grad_norm": 0.4936549961566925, "learning_rate": 0.00019495124527533806, "loss": 0.916, "step": 2192 }, { "epoch": 0.5089938493675293, "grad_norm": 0.48682743310928345, "learning_rate": 0.0001949466685467609, "loss": 0.9205, "step": 2193 }, { "epoch": 0.5092259487060462, "grad_norm": 0.5464056730270386, "learning_rate": 0.00019494208979847812, "loss": 0.8842, "step": 2194 }, { "epoch": 0.5094580480445631, "grad_norm": 0.5372827053070068, "learning_rate": 0.0001949375090305871, "loss": 0.9324, "step": 2195 }, { "epoch": 0.5096901473830799, "grad_norm": 0.6235803365707397, "learning_rate": 0.0001949329262431853, "loss": 0.8957, "step": 2196 }, { "epoch": 0.5099222467215968, "grad_norm": 0.5118064880371094, "learning_rate": 0.00019492834143637015, "loss": 0.9167, "step": 2197 }, { "epoch": 0.5101543460601138, "grad_norm": 0.5715994238853455, "learning_rate": 0.00019492375461023926, "loss": 1.0021, "step": 2198 }, { "epoch": 0.5103864453986307, "grad_norm": 0.5673936605453491, "learning_rate": 0.00019491916576489014, "loss": 0.8273, "step": 2199 }, { "epoch": 0.5106185447371475, "grad_norm": 0.5407105684280396, "learning_rate": 0.00019491457490042043, "loss": 0.9726, "step": 2200 }, { "epoch": 0.5108506440756644, "grad_norm": 0.5387925505638123, "learning_rate": 0.00019490998201692774, "loss": 0.9063, "step": 2201 }, { "epoch": 0.5110827434141813, "grad_norm": 0.488993763923645, "learning_rate": 0.00019490538711450985, "loss": 0.952, "step": 2202 }, { "epoch": 0.5113148427526981, "grad_norm": 0.5393021106719971, "learning_rate": 0.00019490079019326444, "loss": 0.873, "step": 2203 }, { "epoch": 0.5115469420912151, "grad_norm": 0.5611486434936523, "learning_rate": 0.00019489619125328931, "loss": 0.905, "step": 2204 }, { "epoch": 0.511779041429732, "grad_norm": 0.4920741319656372, "learning_rate": 0.00019489159029468232, "loss": 0.8612, "step": 2205 }, { "epoch": 0.5120111407682488, "grad_norm": 0.47725075483322144, "learning_rate": 0.00019488698731754133, "loss": 0.8632, "step": 2206 }, { "epoch": 0.5122432401067657, "grad_norm": 0.4922488033771515, "learning_rate": 0.0001948823823219642, "loss": 0.8647, "step": 2207 }, { "epoch": 0.5124753394452826, "grad_norm": 0.50462406873703, "learning_rate": 0.00019487777530804895, "loss": 0.8685, "step": 2208 }, { "epoch": 0.5127074387837994, "grad_norm": 0.4940124452114105, "learning_rate": 0.00019487316627589353, "loss": 0.9409, "step": 2209 }, { "epoch": 0.5129395381223163, "grad_norm": 0.46814465522766113, "learning_rate": 0.00019486855522559605, "loss": 0.9125, "step": 2210 }, { "epoch": 0.5131716374608333, "grad_norm": 0.5584454536437988, "learning_rate": 0.00019486394215725454, "loss": 0.8934, "step": 2211 }, { "epoch": 0.5134037367993501, "grad_norm": 0.5169687867164612, "learning_rate": 0.00019485932707096718, "loss": 0.875, "step": 2212 }, { "epoch": 0.513635836137867, "grad_norm": 0.5924013257026672, "learning_rate": 0.00019485470996683207, "loss": 0.8949, "step": 2213 }, { "epoch": 0.5138679354763839, "grad_norm": 0.5526252388954163, "learning_rate": 0.0001948500908449475, "loss": 0.8586, "step": 2214 }, { "epoch": 0.5141000348149007, "grad_norm": 0.5124498605728149, "learning_rate": 0.00019484546970541166, "loss": 0.9601, "step": 2215 }, { "epoch": 0.5143321341534176, "grad_norm": 0.5372180938720703, "learning_rate": 0.00019484084654832292, "loss": 0.87, "step": 2216 }, { "epoch": 0.5145642334919346, "grad_norm": 0.48329445719718933, "learning_rate": 0.00019483622137377957, "loss": 0.8866, "step": 2217 }, { "epoch": 0.5147963328304515, "grad_norm": 0.5898883938789368, "learning_rate": 0.00019483159418188001, "loss": 0.9075, "step": 2218 }, { "epoch": 0.5150284321689683, "grad_norm": 0.5989969372749329, "learning_rate": 0.00019482696497272268, "loss": 0.9263, "step": 2219 }, { "epoch": 0.5152605315074852, "grad_norm": 0.4823264181613922, "learning_rate": 0.00019482233374640604, "loss": 0.9154, "step": 2220 }, { "epoch": 0.515492630846002, "grad_norm": 0.5369601249694824, "learning_rate": 0.00019481770050302865, "loss": 0.9052, "step": 2221 }, { "epoch": 0.5157247301845189, "grad_norm": 0.5158388018608093, "learning_rate": 0.000194813065242689, "loss": 0.9137, "step": 2222 }, { "epoch": 0.5159568295230359, "grad_norm": 0.4591442942619324, "learning_rate": 0.0001948084279654857, "loss": 0.8845, "step": 2223 }, { "epoch": 0.5161889288615528, "grad_norm": 0.5225938558578491, "learning_rate": 0.00019480378867151746, "loss": 0.8625, "step": 2224 }, { "epoch": 0.5164210282000696, "grad_norm": 0.4707620143890381, "learning_rate": 0.00019479914736088288, "loss": 0.8947, "step": 2225 }, { "epoch": 0.5166531275385865, "grad_norm": 0.5079500079154968, "learning_rate": 0.00019479450403368077, "loss": 0.9246, "step": 2226 }, { "epoch": 0.5168852268771034, "grad_norm": 0.48015832901000977, "learning_rate": 0.00019478985869000984, "loss": 0.9009, "step": 2227 }, { "epoch": 0.5171173262156202, "grad_norm": 0.538700520992279, "learning_rate": 0.00019478521132996893, "loss": 0.9086, "step": 2228 }, { "epoch": 0.5173494255541372, "grad_norm": 0.5823108553886414, "learning_rate": 0.00019478056195365695, "loss": 0.9561, "step": 2229 }, { "epoch": 0.5175815248926541, "grad_norm": 0.5222913026809692, "learning_rate": 0.0001947759105611727, "loss": 0.9337, "step": 2230 }, { "epoch": 0.5178136242311709, "grad_norm": 0.4694076180458069, "learning_rate": 0.0001947712571526152, "loss": 0.9071, "step": 2231 }, { "epoch": 0.5180457235696878, "grad_norm": 0.5498440265655518, "learning_rate": 0.0001947666017280834, "loss": 0.8902, "step": 2232 }, { "epoch": 0.5182778229082047, "grad_norm": 0.6000506281852722, "learning_rate": 0.00019476194428767635, "loss": 0.8967, "step": 2233 }, { "epoch": 0.5185099222467215, "grad_norm": 0.5160364508628845, "learning_rate": 0.00019475728483149313, "loss": 0.9427, "step": 2234 }, { "epoch": 0.5187420215852385, "grad_norm": 0.5338958501815796, "learning_rate": 0.00019475262335963284, "loss": 0.8869, "step": 2235 }, { "epoch": 0.5189741209237554, "grad_norm": 0.5340920090675354, "learning_rate": 0.00019474795987219464, "loss": 0.8999, "step": 2236 }, { "epoch": 0.5192062202622723, "grad_norm": 0.5917741060256958, "learning_rate": 0.00019474329436927772, "loss": 0.9132, "step": 2237 }, { "epoch": 0.5194383196007891, "grad_norm": 0.6958568096160889, "learning_rate": 0.00019473862685098135, "loss": 0.8933, "step": 2238 }, { "epoch": 0.519670418939306, "grad_norm": 0.481386661529541, "learning_rate": 0.0001947339573174048, "loss": 0.9147, "step": 2239 }, { "epoch": 0.5199025182778229, "grad_norm": 0.6101193428039551, "learning_rate": 0.0001947292857686474, "loss": 0.9236, "step": 2240 }, { "epoch": 0.5201346176163398, "grad_norm": 0.6471207737922668, "learning_rate": 0.00019472461220480854, "loss": 0.8749, "step": 2241 }, { "epoch": 0.5203667169548567, "grad_norm": 0.5118407607078552, "learning_rate": 0.00019471993662598761, "loss": 0.8735, "step": 2242 }, { "epoch": 0.5205988162933736, "grad_norm": 0.7117790579795837, "learning_rate": 0.0001947152590322841, "loss": 0.8663, "step": 2243 }, { "epoch": 0.5208309156318904, "grad_norm": 0.5396313071250916, "learning_rate": 0.0001947105794237975, "loss": 0.9041, "step": 2244 }, { "epoch": 0.5210630149704073, "grad_norm": 0.5098955035209656, "learning_rate": 0.00019470589780062737, "loss": 0.9369, "step": 2245 }, { "epoch": 0.5212951143089242, "grad_norm": 0.5958825945854187, "learning_rate": 0.00019470121416287323, "loss": 0.8476, "step": 2246 }, { "epoch": 0.5215272136474411, "grad_norm": 0.4828053116798401, "learning_rate": 0.0001946965285106348, "loss": 0.8434, "step": 2247 }, { "epoch": 0.521759312985958, "grad_norm": 0.6353124380111694, "learning_rate": 0.0001946918408440117, "loss": 0.884, "step": 2248 }, { "epoch": 0.5219914123244749, "grad_norm": 0.5921216607093811, "learning_rate": 0.00019468715116310366, "loss": 0.9121, "step": 2249 }, { "epoch": 0.5222235116629917, "grad_norm": 0.4666282832622528, "learning_rate": 0.00019468245946801044, "loss": 0.9213, "step": 2250 }, { "epoch": 0.5224556110015086, "grad_norm": 0.6230570673942566, "learning_rate": 0.0001946777657588318, "loss": 0.8569, "step": 2251 }, { "epoch": 0.5226877103400255, "grad_norm": 0.6215329170227051, "learning_rate": 0.00019467307003566764, "loss": 0.8506, "step": 2252 }, { "epoch": 0.5229198096785425, "grad_norm": 0.6473332643508911, "learning_rate": 0.00019466837229861785, "loss": 0.8659, "step": 2253 }, { "epoch": 0.5231519090170593, "grad_norm": 0.5597507953643799, "learning_rate": 0.00019466367254778233, "loss": 0.9444, "step": 2254 }, { "epoch": 0.5233840083555762, "grad_norm": 0.4687129855155945, "learning_rate": 0.00019465897078326108, "loss": 0.882, "step": 2255 }, { "epoch": 0.5236161076940931, "grad_norm": 0.5430654883384705, "learning_rate": 0.00019465426700515407, "loss": 0.8351, "step": 2256 }, { "epoch": 0.5238482070326099, "grad_norm": 0.49307429790496826, "learning_rate": 0.00019464956121356138, "loss": 0.8427, "step": 2257 }, { "epoch": 0.5240803063711268, "grad_norm": 0.47743698954582214, "learning_rate": 0.00019464485340858317, "loss": 0.9213, "step": 2258 }, { "epoch": 0.5243124057096438, "grad_norm": 0.5775288343429565, "learning_rate": 0.0001946401435903195, "loss": 0.8943, "step": 2259 }, { "epoch": 0.5245445050481606, "grad_norm": 0.4688223898410797, "learning_rate": 0.0001946354317588706, "loss": 0.8986, "step": 2260 }, { "epoch": 0.5247766043866775, "grad_norm": 0.5374196171760559, "learning_rate": 0.0001946307179143367, "loss": 0.8022, "step": 2261 }, { "epoch": 0.5250087037251944, "grad_norm": 0.4845978021621704, "learning_rate": 0.00019462600205681806, "loss": 0.8863, "step": 2262 }, { "epoch": 0.5252408030637112, "grad_norm": 0.5203724503517151, "learning_rate": 0.00019462128418641498, "loss": 0.9025, "step": 2263 }, { "epoch": 0.5254729024022281, "grad_norm": 0.514147162437439, "learning_rate": 0.00019461656430322787, "loss": 0.9152, "step": 2264 }, { "epoch": 0.5257050017407451, "grad_norm": 0.4792666733264923, "learning_rate": 0.00019461184240735708, "loss": 0.8479, "step": 2265 }, { "epoch": 0.525937101079262, "grad_norm": 0.5625209808349609, "learning_rate": 0.00019460711849890306, "loss": 0.9086, "step": 2266 }, { "epoch": 0.5261692004177788, "grad_norm": 0.5080220699310303, "learning_rate": 0.00019460239257796636, "loss": 0.9034, "step": 2267 }, { "epoch": 0.5264012997562957, "grad_norm": 0.5365491509437561, "learning_rate": 0.00019459766464464742, "loss": 0.9597, "step": 2268 }, { "epoch": 0.5266333990948125, "grad_norm": 0.47180289030075073, "learning_rate": 0.0001945929346990469, "loss": 0.8716, "step": 2269 }, { "epoch": 0.5268654984333294, "grad_norm": 0.49987611174583435, "learning_rate": 0.00019458820274126533, "loss": 0.8831, "step": 2270 }, { "epoch": 0.5270975977718464, "grad_norm": 0.5344650149345398, "learning_rate": 0.00019458346877140339, "loss": 0.9773, "step": 2271 }, { "epoch": 0.5273296971103633, "grad_norm": 0.49787455797195435, "learning_rate": 0.00019457873278956185, "loss": 0.8665, "step": 2272 }, { "epoch": 0.5275617964488801, "grad_norm": 0.5409340858459473, "learning_rate": 0.00019457399479584137, "loss": 0.9459, "step": 2273 }, { "epoch": 0.527793895787397, "grad_norm": 0.4853708744049072, "learning_rate": 0.00019456925479034278, "loss": 0.918, "step": 2274 }, { "epoch": 0.5280259951259139, "grad_norm": 0.5302354693412781, "learning_rate": 0.00019456451277316688, "loss": 0.9196, "step": 2275 }, { "epoch": 0.5282580944644307, "grad_norm": 0.5782338380813599, "learning_rate": 0.00019455976874441458, "loss": 0.8744, "step": 2276 }, { "epoch": 0.5284901938029477, "grad_norm": 0.5380991697311401, "learning_rate": 0.00019455502270418674, "loss": 0.8453, "step": 2277 }, { "epoch": 0.5287222931414646, "grad_norm": 0.5260257720947266, "learning_rate": 0.0001945502746525844, "loss": 0.9142, "step": 2278 }, { "epoch": 0.5289543924799814, "grad_norm": 0.49521541595458984, "learning_rate": 0.0001945455245897085, "loss": 0.9111, "step": 2279 }, { "epoch": 0.5291864918184983, "grad_norm": 0.4972239136695862, "learning_rate": 0.0001945407725156601, "loss": 0.8481, "step": 2280 }, { "epoch": 0.5294185911570152, "grad_norm": 0.5357541441917419, "learning_rate": 0.0001945360184305403, "loss": 0.8636, "step": 2281 }, { "epoch": 0.529650690495532, "grad_norm": 0.5304385423660278, "learning_rate": 0.00019453126233445018, "loss": 0.9271, "step": 2282 }, { "epoch": 0.529882789834049, "grad_norm": 0.5507416129112244, "learning_rate": 0.00019452650422749094, "loss": 0.9036, "step": 2283 }, { "epoch": 0.5301148891725659, "grad_norm": 0.5371777415275574, "learning_rate": 0.0001945217441097638, "loss": 0.8803, "step": 2284 }, { "epoch": 0.5303469885110828, "grad_norm": 0.5308743715286255, "learning_rate": 0.00019451698198137004, "loss": 0.9334, "step": 2285 }, { "epoch": 0.5305790878495996, "grad_norm": 0.621589720249176, "learning_rate": 0.00019451221784241095, "loss": 0.9469, "step": 2286 }, { "epoch": 0.5308111871881165, "grad_norm": 0.5172211527824402, "learning_rate": 0.0001945074516929878, "loss": 0.841, "step": 2287 }, { "epoch": 0.5310432865266334, "grad_norm": 0.5350139737129211, "learning_rate": 0.0001945026835332021, "loss": 0.8796, "step": 2288 }, { "epoch": 0.5312753858651503, "grad_norm": 0.6176063418388367, "learning_rate": 0.00019449791336315517, "loss": 0.9246, "step": 2289 }, { "epoch": 0.5315074852036672, "grad_norm": 0.49428290128707886, "learning_rate": 0.00019449314118294853, "loss": 0.9137, "step": 2290 }, { "epoch": 0.5317395845421841, "grad_norm": 0.48354092240333557, "learning_rate": 0.00019448836699268368, "loss": 0.8893, "step": 2291 }, { "epoch": 0.5319716838807009, "grad_norm": 0.5256873965263367, "learning_rate": 0.00019448359079246222, "loss": 0.838, "step": 2292 }, { "epoch": 0.5322037832192178, "grad_norm": 0.5151171684265137, "learning_rate": 0.00019447881258238567, "loss": 0.8846, "step": 2293 }, { "epoch": 0.5324358825577347, "grad_norm": 0.5168576240539551, "learning_rate": 0.00019447403236255576, "loss": 0.8618, "step": 2294 }, { "epoch": 0.5326679818962516, "grad_norm": 0.4554763436317444, "learning_rate": 0.0001944692501330741, "loss": 0.858, "step": 2295 }, { "epoch": 0.5329000812347685, "grad_norm": 0.4892447888851166, "learning_rate": 0.00019446446589404243, "loss": 0.8809, "step": 2296 }, { "epoch": 0.5331321805732854, "grad_norm": 0.4440969228744507, "learning_rate": 0.00019445967964556258, "loss": 0.8737, "step": 2297 }, { "epoch": 0.5333642799118022, "grad_norm": 0.533918559551239, "learning_rate": 0.0001944548913877363, "loss": 0.8743, "step": 2298 }, { "epoch": 0.5335963792503191, "grad_norm": 0.5094028115272522, "learning_rate": 0.00019445010112066543, "loss": 0.8723, "step": 2299 }, { "epoch": 0.533828478588836, "grad_norm": 0.4703962802886963, "learning_rate": 0.00019444530884445195, "loss": 0.8634, "step": 2300 }, { "epoch": 0.534060577927353, "grad_norm": 0.49895256757736206, "learning_rate": 0.00019444051455919773, "loss": 0.9347, "step": 2301 }, { "epoch": 0.5342926772658698, "grad_norm": 0.47373950481414795, "learning_rate": 0.0001944357182650048, "loss": 0.8811, "step": 2302 }, { "epoch": 0.5345247766043867, "grad_norm": 0.4757462739944458, "learning_rate": 0.0001944309199619752, "loss": 0.8941, "step": 2303 }, { "epoch": 0.5347568759429036, "grad_norm": 0.41804438829421997, "learning_rate": 0.00019442611965021095, "loss": 0.8911, "step": 2304 }, { "epoch": 0.5349889752814204, "grad_norm": 0.4527941048145294, "learning_rate": 0.00019442131732981413, "loss": 0.9331, "step": 2305 }, { "epoch": 0.5352210746199373, "grad_norm": 0.4860967993736267, "learning_rate": 0.00019441651300088699, "loss": 0.8806, "step": 2306 }, { "epoch": 0.5354531739584543, "grad_norm": 0.4618609547615051, "learning_rate": 0.00019441170666353167, "loss": 0.93, "step": 2307 }, { "epoch": 0.5356852732969711, "grad_norm": 0.46941566467285156, "learning_rate": 0.0001944068983178504, "loss": 0.8705, "step": 2308 }, { "epoch": 0.535917372635488, "grad_norm": 0.5012117028236389, "learning_rate": 0.00019440208796394552, "loss": 0.8752, "step": 2309 }, { "epoch": 0.5361494719740049, "grad_norm": 0.4841819405555725, "learning_rate": 0.0001943972756019193, "loss": 0.8799, "step": 2310 }, { "epoch": 0.5363815713125217, "grad_norm": 0.5163159370422363, "learning_rate": 0.00019439246123187416, "loss": 0.9075, "step": 2311 }, { "epoch": 0.5366136706510386, "grad_norm": 0.47789424657821655, "learning_rate": 0.00019438764485391245, "loss": 0.9219, "step": 2312 }, { "epoch": 0.5368457699895556, "grad_norm": 0.5047345757484436, "learning_rate": 0.00019438282646813665, "loss": 0.8627, "step": 2313 }, { "epoch": 0.5370778693280724, "grad_norm": 0.4576607644557953, "learning_rate": 0.00019437800607464932, "loss": 0.909, "step": 2314 }, { "epoch": 0.5373099686665893, "grad_norm": 0.48208168148994446, "learning_rate": 0.00019437318367355287, "loss": 0.9093, "step": 2315 }, { "epoch": 0.5375420680051062, "grad_norm": 0.5140655636787415, "learning_rate": 0.00019436835926494998, "loss": 0.8834, "step": 2316 }, { "epoch": 0.537774167343623, "grad_norm": 0.46531084179878235, "learning_rate": 0.00019436353284894327, "loss": 0.8773, "step": 2317 }, { "epoch": 0.5380062666821399, "grad_norm": 0.49124783277511597, "learning_rate": 0.00019435870442563536, "loss": 0.9303, "step": 2318 }, { "epoch": 0.5382383660206569, "grad_norm": 0.524798572063446, "learning_rate": 0.00019435387399512898, "loss": 0.8931, "step": 2319 }, { "epoch": 0.5384704653591738, "grad_norm": 0.5019339919090271, "learning_rate": 0.00019434904155752692, "loss": 0.9109, "step": 2320 }, { "epoch": 0.5387025646976906, "grad_norm": 0.5564262270927429, "learning_rate": 0.00019434420711293192, "loss": 0.88, "step": 2321 }, { "epoch": 0.5389346640362075, "grad_norm": 0.49711668491363525, "learning_rate": 0.00019433937066144682, "loss": 0.8626, "step": 2322 }, { "epoch": 0.5391667633747244, "grad_norm": 0.5930631160736084, "learning_rate": 0.00019433453220317457, "loss": 0.9029, "step": 2323 }, { "epoch": 0.5393988627132412, "grad_norm": 0.44928017258644104, "learning_rate": 0.00019432969173821803, "loss": 0.9131, "step": 2324 }, { "epoch": 0.5396309620517582, "grad_norm": 0.5018507242202759, "learning_rate": 0.00019432484926668018, "loss": 0.8379, "step": 2325 }, { "epoch": 0.5398630613902751, "grad_norm": 0.5034919381141663, "learning_rate": 0.00019432000478866406, "loss": 0.9155, "step": 2326 }, { "epoch": 0.5400951607287919, "grad_norm": 0.5508437156677246, "learning_rate": 0.00019431515830427266, "loss": 0.8959, "step": 2327 }, { "epoch": 0.5403272600673088, "grad_norm": 0.6041455864906311, "learning_rate": 0.00019431030981360912, "loss": 0.8586, "step": 2328 }, { "epoch": 0.5405593594058257, "grad_norm": 0.491547554731369, "learning_rate": 0.00019430545931677657, "loss": 0.889, "step": 2329 }, { "epoch": 0.5407914587443425, "grad_norm": 0.581338107585907, "learning_rate": 0.00019430060681387818, "loss": 0.8991, "step": 2330 }, { "epoch": 0.5410235580828595, "grad_norm": 0.4920233488082886, "learning_rate": 0.0001942957523050172, "loss": 0.889, "step": 2331 }, { "epoch": 0.5412556574213764, "grad_norm": 0.5669891238212585, "learning_rate": 0.00019429089579029685, "loss": 0.8962, "step": 2332 }, { "epoch": 0.5414877567598932, "grad_norm": 0.6379129886627197, "learning_rate": 0.00019428603726982045, "loss": 0.8452, "step": 2333 }, { "epoch": 0.5417198560984101, "grad_norm": 0.4495159685611725, "learning_rate": 0.00019428117674369138, "loss": 0.8953, "step": 2334 }, { "epoch": 0.541951955436927, "grad_norm": 0.6457833051681519, "learning_rate": 0.00019427631421201302, "loss": 0.8956, "step": 2335 }, { "epoch": 0.5421840547754438, "grad_norm": 0.5418552160263062, "learning_rate": 0.0001942714496748888, "loss": 0.8989, "step": 2336 }, { "epoch": 0.5424161541139608, "grad_norm": 0.4600215554237366, "learning_rate": 0.0001942665831324222, "loss": 0.8399, "step": 2337 }, { "epoch": 0.5426482534524777, "grad_norm": 0.5136691927909851, "learning_rate": 0.00019426171458471672, "loss": 0.9127, "step": 2338 }, { "epoch": 0.5428803527909946, "grad_norm": 0.4295002818107605, "learning_rate": 0.00019425684403187598, "loss": 0.8698, "step": 2339 }, { "epoch": 0.5431124521295114, "grad_norm": 0.5031488537788391, "learning_rate": 0.0001942519714740035, "loss": 0.9168, "step": 2340 }, { "epoch": 0.5433445514680283, "grad_norm": 0.5040335059165955, "learning_rate": 0.00019424709691120303, "loss": 0.8913, "step": 2341 }, { "epoch": 0.5435766508065452, "grad_norm": 0.46856674551963806, "learning_rate": 0.00019424222034357817, "loss": 0.91, "step": 2342 }, { "epoch": 0.5438087501450621, "grad_norm": 0.6152729988098145, "learning_rate": 0.00019423734177123275, "loss": 0.8695, "step": 2343 }, { "epoch": 0.544040849483579, "grad_norm": 0.5399003028869629, "learning_rate": 0.00019423246119427043, "loss": 0.9025, "step": 2344 }, { "epoch": 0.5442729488220959, "grad_norm": 0.6308207511901855, "learning_rate": 0.00019422757861279512, "loss": 0.8727, "step": 2345 }, { "epoch": 0.5445050481606127, "grad_norm": 0.616094708442688, "learning_rate": 0.00019422269402691067, "loss": 0.8621, "step": 2346 }, { "epoch": 0.5447371474991296, "grad_norm": 0.4999483823776245, "learning_rate": 0.00019421780743672095, "loss": 0.8842, "step": 2347 }, { "epoch": 0.5449692468376465, "grad_norm": 0.46148985624313354, "learning_rate": 0.00019421291884232992, "loss": 0.9445, "step": 2348 }, { "epoch": 0.5452013461761634, "grad_norm": 0.5143673419952393, "learning_rate": 0.0001942080282438416, "loss": 0.9198, "step": 2349 }, { "epoch": 0.5454334455146803, "grad_norm": 0.44410768151283264, "learning_rate": 0.00019420313564135998, "loss": 0.9257, "step": 2350 }, { "epoch": 0.5456655448531972, "grad_norm": 0.45710766315460205, "learning_rate": 0.00019419824103498917, "loss": 0.8798, "step": 2351 }, { "epoch": 0.545897644191714, "grad_norm": 0.45260199904441833, "learning_rate": 0.00019419334442483325, "loss": 0.8503, "step": 2352 }, { "epoch": 0.5461297435302309, "grad_norm": 0.5660114288330078, "learning_rate": 0.00019418844581099644, "loss": 0.8884, "step": 2353 }, { "epoch": 0.5463618428687478, "grad_norm": 0.5468779802322388, "learning_rate": 0.00019418354519358291, "loss": 0.8792, "step": 2354 }, { "epoch": 0.5465939422072648, "grad_norm": 0.47821682691574097, "learning_rate": 0.00019417864257269686, "loss": 0.8592, "step": 2355 }, { "epoch": 0.5468260415457816, "grad_norm": 0.5458794832229614, "learning_rate": 0.00019417373794844265, "loss": 0.8624, "step": 2356 }, { "epoch": 0.5470581408842985, "grad_norm": 0.5079901814460754, "learning_rate": 0.00019416883132092458, "loss": 0.8629, "step": 2357 }, { "epoch": 0.5472902402228154, "grad_norm": 0.5289338231086731, "learning_rate": 0.000194163922690247, "loss": 0.9088, "step": 2358 }, { "epoch": 0.5475223395613322, "grad_norm": 0.5826409459114075, "learning_rate": 0.00019415901205651438, "loss": 0.9067, "step": 2359 }, { "epoch": 0.5477544388998491, "grad_norm": 0.4645446240901947, "learning_rate": 0.00019415409941983116, "loss": 0.8872, "step": 2360 }, { "epoch": 0.5479865382383661, "grad_norm": 0.5332089066505432, "learning_rate": 0.0001941491847803018, "loss": 0.9002, "step": 2361 }, { "epoch": 0.5482186375768829, "grad_norm": 0.4994647800922394, "learning_rate": 0.00019414426813803095, "loss": 0.8906, "step": 2362 }, { "epoch": 0.5484507369153998, "grad_norm": 0.46931859850883484, "learning_rate": 0.00019413934949312307, "loss": 0.9102, "step": 2363 }, { "epoch": 0.5486828362539167, "grad_norm": 0.625973105430603, "learning_rate": 0.00019413442884568284, "loss": 0.9173, "step": 2364 }, { "epoch": 0.5489149355924335, "grad_norm": 0.4469800889492035, "learning_rate": 0.00019412950619581497, "loss": 0.8744, "step": 2365 }, { "epoch": 0.5491470349309504, "grad_norm": 0.48621559143066406, "learning_rate": 0.00019412458154362413, "loss": 0.8834, "step": 2366 }, { "epoch": 0.5493791342694674, "grad_norm": 0.48736312985420227, "learning_rate": 0.00019411965488921507, "loss": 0.8414, "step": 2367 }, { "epoch": 0.5496112336079843, "grad_norm": 0.414681077003479, "learning_rate": 0.0001941147262326926, "loss": 0.8509, "step": 2368 }, { "epoch": 0.5498433329465011, "grad_norm": 0.49165618419647217, "learning_rate": 0.00019410979557416157, "loss": 0.8914, "step": 2369 }, { "epoch": 0.550075432285018, "grad_norm": 0.46515318751335144, "learning_rate": 0.0001941048629137269, "loss": 0.8685, "step": 2370 }, { "epoch": 0.5503075316235349, "grad_norm": 0.453905314207077, "learning_rate": 0.00019409992825149345, "loss": 0.9143, "step": 2371 }, { "epoch": 0.5505396309620517, "grad_norm": 0.5306609272956848, "learning_rate": 0.00019409499158756622, "loss": 0.933, "step": 2372 }, { "epoch": 0.5507717303005687, "grad_norm": 0.4926414489746094, "learning_rate": 0.00019409005292205027, "loss": 0.8493, "step": 2373 }, { "epoch": 0.5510038296390856, "grad_norm": 0.47566598653793335, "learning_rate": 0.00019408511225505056, "loss": 0.9371, "step": 2374 }, { "epoch": 0.5512359289776024, "grad_norm": 0.5937473177909851, "learning_rate": 0.0001940801695866723, "loss": 0.8793, "step": 2375 }, { "epoch": 0.5514680283161193, "grad_norm": 0.4446541368961334, "learning_rate": 0.00019407522491702052, "loss": 0.866, "step": 2376 }, { "epoch": 0.5517001276546362, "grad_norm": 0.49526840448379517, "learning_rate": 0.00019407027824620046, "loss": 0.8689, "step": 2377 }, { "epoch": 0.551932226993153, "grad_norm": 0.4916817247867584, "learning_rate": 0.00019406532957431735, "loss": 0.909, "step": 2378 }, { "epoch": 0.55216432633167, "grad_norm": 0.5651233196258545, "learning_rate": 0.00019406037890147646, "loss": 0.8631, "step": 2379 }, { "epoch": 0.5523964256701869, "grad_norm": 0.5214006900787354, "learning_rate": 0.00019405542622778302, "loss": 0.8317, "step": 2380 }, { "epoch": 0.5526285250087037, "grad_norm": 0.49199211597442627, "learning_rate": 0.00019405047155334252, "loss": 0.9002, "step": 2381 }, { "epoch": 0.5528606243472206, "grad_norm": 0.5576756000518799, "learning_rate": 0.00019404551487826028, "loss": 0.8969, "step": 2382 }, { "epoch": 0.5530927236857375, "grad_norm": 0.5788450241088867, "learning_rate": 0.00019404055620264173, "loss": 0.8884, "step": 2383 }, { "epoch": 0.5533248230242543, "grad_norm": 0.4981861114501953, "learning_rate": 0.00019403559552659235, "loss": 0.8909, "step": 2384 }, { "epoch": 0.5535569223627713, "grad_norm": 0.5601760745048523, "learning_rate": 0.00019403063285021766, "loss": 0.8413, "step": 2385 }, { "epoch": 0.5537890217012882, "grad_norm": 0.6168556809425354, "learning_rate": 0.0001940256681736233, "loss": 0.9055, "step": 2386 }, { "epoch": 0.554021121039805, "grad_norm": 0.4815440773963928, "learning_rate": 0.00019402070149691483, "loss": 0.8674, "step": 2387 }, { "epoch": 0.5542532203783219, "grad_norm": 0.5286462903022766, "learning_rate": 0.00019401573282019785, "loss": 0.9093, "step": 2388 }, { "epoch": 0.5544853197168388, "grad_norm": 0.526772677898407, "learning_rate": 0.0001940107621435781, "loss": 0.8271, "step": 2389 }, { "epoch": 0.5547174190553557, "grad_norm": 0.5306504368782043, "learning_rate": 0.00019400578946716132, "loss": 0.8688, "step": 2390 }, { "epoch": 0.5549495183938726, "grad_norm": 0.5305059552192688, "learning_rate": 0.0001940008147910533, "loss": 0.9026, "step": 2391 }, { "epoch": 0.5551816177323895, "grad_norm": 0.4816405475139618, "learning_rate": 0.00019399583811535983, "loss": 0.8688, "step": 2392 }, { "epoch": 0.5554137170709064, "grad_norm": 0.4857674241065979, "learning_rate": 0.00019399085944018683, "loss": 0.8777, "step": 2393 }, { "epoch": 0.5556458164094232, "grad_norm": 0.5299513936042786, "learning_rate": 0.00019398587876564012, "loss": 0.9131, "step": 2394 }, { "epoch": 0.5558779157479401, "grad_norm": 0.4759480357170105, "learning_rate": 0.00019398089609182567, "loss": 0.8976, "step": 2395 }, { "epoch": 0.556110015086457, "grad_norm": 0.4851369559764862, "learning_rate": 0.00019397591141884956, "loss": 0.9038, "step": 2396 }, { "epoch": 0.5563421144249739, "grad_norm": 0.4655773937702179, "learning_rate": 0.00019397092474681774, "loss": 0.9019, "step": 2397 }, { "epoch": 0.5565742137634908, "grad_norm": 0.47859612107276917, "learning_rate": 0.00019396593607583627, "loss": 0.8842, "step": 2398 }, { "epoch": 0.5568063131020077, "grad_norm": 0.5428576469421387, "learning_rate": 0.00019396094540601136, "loss": 0.9116, "step": 2399 }, { "epoch": 0.5570384124405245, "grad_norm": 0.4793083965778351, "learning_rate": 0.00019395595273744907, "loss": 0.8926, "step": 2400 }, { "epoch": 0.5572705117790414, "grad_norm": 0.519672155380249, "learning_rate": 0.00019395095807025564, "loss": 0.8642, "step": 2401 }, { "epoch": 0.5575026111175583, "grad_norm": 0.49902594089508057, "learning_rate": 0.00019394596140453736, "loss": 0.8712, "step": 2402 }, { "epoch": 0.5577347104560753, "grad_norm": 0.46825188398361206, "learning_rate": 0.00019394096274040048, "loss": 0.9133, "step": 2403 }, { "epoch": 0.5579668097945921, "grad_norm": 0.48629945516586304, "learning_rate": 0.00019393596207795136, "loss": 0.8778, "step": 2404 }, { "epoch": 0.558198909133109, "grad_norm": 0.4493025541305542, "learning_rate": 0.00019393095941729632, "loss": 0.8719, "step": 2405 }, { "epoch": 0.5584310084716259, "grad_norm": 0.5146043300628662, "learning_rate": 0.00019392595475854182, "loss": 0.8483, "step": 2406 }, { "epoch": 0.5586631078101427, "grad_norm": 0.47668129205703735, "learning_rate": 0.0001939209481017943, "loss": 0.8742, "step": 2407 }, { "epoch": 0.5588952071486596, "grad_norm": 0.5439273715019226, "learning_rate": 0.00019391593944716027, "loss": 0.8477, "step": 2408 }, { "epoch": 0.5591273064871765, "grad_norm": 0.5277884006500244, "learning_rate": 0.0001939109287947463, "loss": 0.9214, "step": 2409 }, { "epoch": 0.5593594058256934, "grad_norm": 0.5097240805625916, "learning_rate": 0.00019390591614465888, "loss": 0.9058, "step": 2410 }, { "epoch": 0.5595915051642103, "grad_norm": 0.4825122356414795, "learning_rate": 0.00019390090149700478, "loss": 0.9161, "step": 2411 }, { "epoch": 0.5598236045027272, "grad_norm": 0.4902425706386566, "learning_rate": 0.0001938958848518906, "loss": 0.8375, "step": 2412 }, { "epoch": 0.560055703841244, "grad_norm": 0.5292900800704956, "learning_rate": 0.000193890866209423, "loss": 0.838, "step": 2413 }, { "epoch": 0.5602878031797609, "grad_norm": 0.5518543720245361, "learning_rate": 0.00019388584556970885, "loss": 0.8427, "step": 2414 }, { "epoch": 0.5605199025182778, "grad_norm": 0.5023407340049744, "learning_rate": 0.00019388082293285484, "loss": 0.8917, "step": 2415 }, { "epoch": 0.5607520018567947, "grad_norm": 0.5093254446983337, "learning_rate": 0.0001938757982989679, "loss": 0.8683, "step": 2416 }, { "epoch": 0.5609841011953116, "grad_norm": 0.48008766770362854, "learning_rate": 0.00019387077166815484, "loss": 0.8584, "step": 2417 }, { "epoch": 0.5612162005338285, "grad_norm": 0.5634531378746033, "learning_rate": 0.00019386574304052263, "loss": 0.9141, "step": 2418 }, { "epoch": 0.5614482998723453, "grad_norm": 0.46604859828948975, "learning_rate": 0.00019386071241617826, "loss": 0.9352, "step": 2419 }, { "epoch": 0.5616803992108622, "grad_norm": 0.47470033168792725, "learning_rate": 0.0001938556797952287, "loss": 0.8644, "step": 2420 }, { "epoch": 0.5619124985493791, "grad_norm": 0.4769364297389984, "learning_rate": 0.00019385064517778101, "loss": 0.9219, "step": 2421 }, { "epoch": 0.5621445978878961, "grad_norm": 0.4717792272567749, "learning_rate": 0.0001938456085639423, "loss": 0.8722, "step": 2422 }, { "epoch": 0.5623766972264129, "grad_norm": 0.5754774808883667, "learning_rate": 0.00019384056995381968, "loss": 0.9044, "step": 2423 }, { "epoch": 0.5626087965649298, "grad_norm": 0.47137171030044556, "learning_rate": 0.00019383552934752038, "loss": 0.8649, "step": 2424 }, { "epoch": 0.5628408959034467, "grad_norm": 0.4994586408138275, "learning_rate": 0.0001938304867451516, "loss": 0.8603, "step": 2425 }, { "epoch": 0.5630729952419635, "grad_norm": 0.5090224146842957, "learning_rate": 0.00019382544214682058, "loss": 0.866, "step": 2426 }, { "epoch": 0.5633050945804804, "grad_norm": 0.519207775592804, "learning_rate": 0.00019382039555263466, "loss": 0.8344, "step": 2427 }, { "epoch": 0.5635371939189974, "grad_norm": 0.5059794783592224, "learning_rate": 0.00019381534696270118, "loss": 0.8989, "step": 2428 }, { "epoch": 0.5637692932575142, "grad_norm": 0.5585174560546875, "learning_rate": 0.00019381029637712755, "loss": 0.9894, "step": 2429 }, { "epoch": 0.5640013925960311, "grad_norm": 0.555206835269928, "learning_rate": 0.00019380524379602115, "loss": 0.8827, "step": 2430 }, { "epoch": 0.564233491934548, "grad_norm": 0.4937175512313843, "learning_rate": 0.00019380018921948957, "loss": 0.8655, "step": 2431 }, { "epoch": 0.5644655912730648, "grad_norm": 0.6201871633529663, "learning_rate": 0.0001937951326476402, "loss": 0.9318, "step": 2432 }, { "epoch": 0.5646976906115817, "grad_norm": 0.5935556292533875, "learning_rate": 0.0001937900740805807, "loss": 0.8485, "step": 2433 }, { "epoch": 0.5649297899500987, "grad_norm": 0.52373206615448, "learning_rate": 0.00019378501351841865, "loss": 0.8948, "step": 2434 }, { "epoch": 0.5651618892886155, "grad_norm": 0.5149019360542297, "learning_rate": 0.00019377995096126166, "loss": 0.8565, "step": 2435 }, { "epoch": 0.5653939886271324, "grad_norm": 0.5378832817077637, "learning_rate": 0.00019377488640921748, "loss": 0.9489, "step": 2436 }, { "epoch": 0.5656260879656493, "grad_norm": 0.4693540930747986, "learning_rate": 0.0001937698198623938, "loss": 0.8932, "step": 2437 }, { "epoch": 0.5658581873041661, "grad_norm": 0.5232880711555481, "learning_rate": 0.0001937647513208984, "loss": 0.922, "step": 2438 }, { "epoch": 0.566090286642683, "grad_norm": 0.526426374912262, "learning_rate": 0.0001937596807848391, "loss": 0.9113, "step": 2439 }, { "epoch": 0.5663223859812, "grad_norm": 0.5341154336929321, "learning_rate": 0.00019375460825432378, "loss": 0.8895, "step": 2440 }, { "epoch": 0.5665544853197169, "grad_norm": 0.4787288010120392, "learning_rate": 0.00019374953372946035, "loss": 0.8542, "step": 2441 }, { "epoch": 0.5667865846582337, "grad_norm": 0.506059467792511, "learning_rate": 0.0001937444572103567, "loss": 0.9097, "step": 2442 }, { "epoch": 0.5670186839967506, "grad_norm": 0.46549925208091736, "learning_rate": 0.0001937393786971209, "loss": 0.8958, "step": 2443 }, { "epoch": 0.5672507833352675, "grad_norm": 0.512458086013794, "learning_rate": 0.0001937342981898609, "loss": 0.8872, "step": 2444 }, { "epoch": 0.5674828826737843, "grad_norm": 0.4481663107872009, "learning_rate": 0.00019372921568868478, "loss": 0.8531, "step": 2445 }, { "epoch": 0.5677149820123013, "grad_norm": 0.44241198897361755, "learning_rate": 0.00019372413119370072, "loss": 0.8965, "step": 2446 }, { "epoch": 0.5679470813508182, "grad_norm": 0.4838462471961975, "learning_rate": 0.00019371904470501684, "loss": 0.8994, "step": 2447 }, { "epoch": 0.568179180689335, "grad_norm": 0.46779921650886536, "learning_rate": 0.00019371395622274132, "loss": 0.8727, "step": 2448 }, { "epoch": 0.5684112800278519, "grad_norm": 0.47992274165153503, "learning_rate": 0.00019370886574698244, "loss": 0.8845, "step": 2449 }, { "epoch": 0.5686433793663688, "grad_norm": 0.5130655169487, "learning_rate": 0.00019370377327784845, "loss": 0.8848, "step": 2450 }, { "epoch": 0.5688754787048856, "grad_norm": 0.4538505971431732, "learning_rate": 0.0001936986788154477, "loss": 0.8899, "step": 2451 }, { "epoch": 0.5691075780434026, "grad_norm": 0.4817444980144501, "learning_rate": 0.00019369358235988855, "loss": 0.8851, "step": 2452 }, { "epoch": 0.5693396773819195, "grad_norm": 0.49702510237693787, "learning_rate": 0.0001936884839112794, "loss": 0.8694, "step": 2453 }, { "epoch": 0.5695717767204364, "grad_norm": 0.42820343375205994, "learning_rate": 0.00019368338346972873, "loss": 0.8316, "step": 2454 }, { "epoch": 0.5698038760589532, "grad_norm": 0.4280126392841339, "learning_rate": 0.00019367828103534503, "loss": 0.8281, "step": 2455 }, { "epoch": 0.5700359753974701, "grad_norm": 0.5092231035232544, "learning_rate": 0.00019367317660823682, "loss": 0.8914, "step": 2456 }, { "epoch": 0.570268074735987, "grad_norm": 0.43657800555229187, "learning_rate": 0.0001936680701885127, "loss": 0.9087, "step": 2457 }, { "epoch": 0.5705001740745039, "grad_norm": 0.423939973115921, "learning_rate": 0.0001936629617762813, "loss": 0.8732, "step": 2458 }, { "epoch": 0.5707322734130208, "grad_norm": 0.4575686454772949, "learning_rate": 0.00019365785137165126, "loss": 0.8726, "step": 2459 }, { "epoch": 0.5709643727515377, "grad_norm": 0.45697084069252014, "learning_rate": 0.0001936527389747313, "loss": 0.8799, "step": 2460 }, { "epoch": 0.5711964720900545, "grad_norm": 0.47360023856163025, "learning_rate": 0.0001936476245856302, "loss": 0.8585, "step": 2461 }, { "epoch": 0.5714285714285714, "grad_norm": 0.5222719311714172, "learning_rate": 0.0001936425082044567, "loss": 0.9112, "step": 2462 }, { "epoch": 0.5716606707670883, "grad_norm": 0.48283854126930237, "learning_rate": 0.00019363738983131968, "loss": 0.9038, "step": 2463 }, { "epoch": 0.5718927701056052, "grad_norm": 0.5490610003471375, "learning_rate": 0.000193632269466328, "loss": 0.8517, "step": 2464 }, { "epoch": 0.5721248694441221, "grad_norm": 0.4478590488433838, "learning_rate": 0.00019362714710959055, "loss": 0.9089, "step": 2465 }, { "epoch": 0.572356968782639, "grad_norm": 0.4874999225139618, "learning_rate": 0.00019362202276121634, "loss": 0.9297, "step": 2466 }, { "epoch": 0.5725890681211558, "grad_norm": 0.54438316822052, "learning_rate": 0.0001936168964213144, "loss": 0.8624, "step": 2467 }, { "epoch": 0.5728211674596727, "grad_norm": 0.4551005959510803, "learning_rate": 0.00019361176808999368, "loss": 0.891, "step": 2468 }, { "epoch": 0.5730532667981896, "grad_norm": 0.5449005961418152, "learning_rate": 0.00019360663776736336, "loss": 0.9046, "step": 2469 }, { "epoch": 0.5732853661367066, "grad_norm": 0.47211650013923645, "learning_rate": 0.00019360150545353252, "loss": 0.8234, "step": 2470 }, { "epoch": 0.5735174654752234, "grad_norm": 0.4869956374168396, "learning_rate": 0.00019359637114861036, "loss": 0.9322, "step": 2471 }, { "epoch": 0.5737495648137403, "grad_norm": 0.537509560585022, "learning_rate": 0.0001935912348527061, "loss": 0.8586, "step": 2472 }, { "epoch": 0.5739816641522572, "grad_norm": 0.45348936319351196, "learning_rate": 0.00019358609656592899, "loss": 0.8725, "step": 2473 }, { "epoch": 0.574213763490774, "grad_norm": 0.44994351267814636, "learning_rate": 0.0001935809562883883, "loss": 0.8722, "step": 2474 }, { "epoch": 0.5744458628292909, "grad_norm": 0.5008593201637268, "learning_rate": 0.00019357581402019346, "loss": 0.9061, "step": 2475 }, { "epoch": 0.5746779621678079, "grad_norm": 0.4767882227897644, "learning_rate": 0.00019357066976145374, "loss": 0.9056, "step": 2476 }, { "epoch": 0.5749100615063247, "grad_norm": 0.47955289483070374, "learning_rate": 0.00019356552351227867, "loss": 0.9061, "step": 2477 }, { "epoch": 0.5751421608448416, "grad_norm": 0.5667338371276855, "learning_rate": 0.00019356037527277768, "loss": 0.8432, "step": 2478 }, { "epoch": 0.5753742601833585, "grad_norm": 0.4561117887496948, "learning_rate": 0.00019355522504306026, "loss": 0.8913, "step": 2479 }, { "epoch": 0.5756063595218753, "grad_norm": 0.49610593914985657, "learning_rate": 0.00019355007282323604, "loss": 0.8723, "step": 2480 }, { "epoch": 0.5758384588603922, "grad_norm": 0.4767979383468628, "learning_rate": 0.00019354491861341453, "loss": 0.8533, "step": 2481 }, { "epoch": 0.5760705581989092, "grad_norm": 0.4673297703266144, "learning_rate": 0.00019353976241370542, "loss": 0.8566, "step": 2482 }, { "epoch": 0.576302657537426, "grad_norm": 0.4706180691719055, "learning_rate": 0.00019353460422421836, "loss": 0.8768, "step": 2483 }, { "epoch": 0.5765347568759429, "grad_norm": 0.4735381305217743, "learning_rate": 0.0001935294440450631, "loss": 0.9009, "step": 2484 }, { "epoch": 0.5767668562144598, "grad_norm": 0.44768598675727844, "learning_rate": 0.00019352428187634942, "loss": 0.8489, "step": 2485 }, { "epoch": 0.5769989555529766, "grad_norm": 0.48990532755851746, "learning_rate": 0.00019351911771818713, "loss": 0.8921, "step": 2486 }, { "epoch": 0.5772310548914935, "grad_norm": 0.4560520052909851, "learning_rate": 0.00019351395157068605, "loss": 0.8948, "step": 2487 }, { "epoch": 0.5774631542300105, "grad_norm": 0.4439263641834259, "learning_rate": 0.00019350878343395606, "loss": 0.8454, "step": 2488 }, { "epoch": 0.5776952535685274, "grad_norm": 0.4915536344051361, "learning_rate": 0.00019350361330810716, "loss": 0.8289, "step": 2489 }, { "epoch": 0.5779273529070442, "grad_norm": 0.48438647389411926, "learning_rate": 0.0001934984411932493, "loss": 0.9037, "step": 2490 }, { "epoch": 0.5781594522455611, "grad_norm": 0.5009618401527405, "learning_rate": 0.00019349326708949246, "loss": 0.8616, "step": 2491 }, { "epoch": 0.578391551584078, "grad_norm": 0.5363379716873169, "learning_rate": 0.00019348809099694675, "loss": 0.9228, "step": 2492 }, { "epoch": 0.5786236509225948, "grad_norm": 0.5247824788093567, "learning_rate": 0.0001934829129157223, "loss": 0.9081, "step": 2493 }, { "epoch": 0.5788557502611118, "grad_norm": 0.45890405774116516, "learning_rate": 0.0001934777328459292, "loss": 0.8611, "step": 2494 }, { "epoch": 0.5790878495996287, "grad_norm": 0.5478378534317017, "learning_rate": 0.00019347255078767768, "loss": 0.9, "step": 2495 }, { "epoch": 0.5793199489381455, "grad_norm": 0.5734007954597473, "learning_rate": 0.00019346736674107792, "loss": 0.8376, "step": 2496 }, { "epoch": 0.5795520482766624, "grad_norm": 0.5085743069648743, "learning_rate": 0.00019346218070624027, "loss": 0.9233, "step": 2497 }, { "epoch": 0.5797841476151793, "grad_norm": 0.514342725276947, "learning_rate": 0.000193456992683275, "loss": 0.8649, "step": 2498 }, { "epoch": 0.5800162469536961, "grad_norm": 0.42478933930397034, "learning_rate": 0.00019345180267229246, "loss": 0.921, "step": 2499 }, { "epoch": 0.5802483462922131, "grad_norm": 0.5099529027938843, "learning_rate": 0.0001934466106734031, "loss": 0.8813, "step": 2500 }, { "epoch": 0.58048044563073, "grad_norm": 0.44463464617729187, "learning_rate": 0.00019344141668671734, "loss": 0.9051, "step": 2501 }, { "epoch": 0.5807125449692468, "grad_norm": 0.471042662858963, "learning_rate": 0.00019343622071234563, "loss": 0.9615, "step": 2502 }, { "epoch": 0.5809446443077637, "grad_norm": 0.5954896211624146, "learning_rate": 0.00019343102275039856, "loss": 0.8794, "step": 2503 }, { "epoch": 0.5811767436462806, "grad_norm": 0.45148149132728577, "learning_rate": 0.00019342582280098666, "loss": 0.9244, "step": 2504 }, { "epoch": 0.5814088429847974, "grad_norm": 0.5119134187698364, "learning_rate": 0.00019342062086422052, "loss": 0.859, "step": 2505 }, { "epoch": 0.5816409423233144, "grad_norm": 0.4735534191131592, "learning_rate": 0.00019341541694021088, "loss": 0.9034, "step": 2506 }, { "epoch": 0.5818730416618313, "grad_norm": 0.42697054147720337, "learning_rate": 0.00019341021102906836, "loss": 0.8978, "step": 2507 }, { "epoch": 0.5821051410003482, "grad_norm": 0.49720442295074463, "learning_rate": 0.00019340500313090372, "loss": 0.8556, "step": 2508 }, { "epoch": 0.582337240338865, "grad_norm": 0.7829086780548096, "learning_rate": 0.0001933997932458278, "loss": 0.8951, "step": 2509 }, { "epoch": 0.5825693396773819, "grad_norm": 0.5283287763595581, "learning_rate": 0.0001933945813739513, "loss": 0.8734, "step": 2510 }, { "epoch": 0.5828014390158988, "grad_norm": 0.449337899684906, "learning_rate": 0.00019338936751538523, "loss": 0.9273, "step": 2511 }, { "epoch": 0.5830335383544157, "grad_norm": 0.4591444432735443, "learning_rate": 0.00019338415167024042, "loss": 0.9279, "step": 2512 }, { "epoch": 0.5832656376929326, "grad_norm": 0.5248879790306091, "learning_rate": 0.0001933789338386278, "loss": 0.9145, "step": 2513 }, { "epoch": 0.5834977370314495, "grad_norm": 0.4726925492286682, "learning_rate": 0.00019337371402065846, "loss": 0.9002, "step": 2514 }, { "epoch": 0.5837298363699663, "grad_norm": 0.4451372027397156, "learning_rate": 0.00019336849221644333, "loss": 0.8517, "step": 2515 }, { "epoch": 0.5839619357084832, "grad_norm": 0.5344880819320679, "learning_rate": 0.00019336326842609353, "loss": 0.9375, "step": 2516 }, { "epoch": 0.5841940350470001, "grad_norm": 0.49543967843055725, "learning_rate": 0.00019335804264972018, "loss": 0.859, "step": 2517 }, { "epoch": 0.584426134385517, "grad_norm": 0.7066001892089844, "learning_rate": 0.00019335281488743443, "loss": 0.9099, "step": 2518 }, { "epoch": 0.5846582337240339, "grad_norm": 0.5365727543830872, "learning_rate": 0.00019334758513934753, "loss": 0.8581, "step": 2519 }, { "epoch": 0.5848903330625508, "grad_norm": 0.4678250253200531, "learning_rate": 0.0001933423534055707, "loss": 0.8942, "step": 2520 }, { "epoch": 0.5851224324010676, "grad_norm": 0.4834999144077301, "learning_rate": 0.00019333711968621523, "loss": 0.871, "step": 2521 }, { "epoch": 0.5853545317395845, "grad_norm": 0.48226138949394226, "learning_rate": 0.0001933318839813924, "loss": 0.917, "step": 2522 }, { "epoch": 0.5855866310781014, "grad_norm": 0.44544094800949097, "learning_rate": 0.00019332664629121367, "loss": 0.8911, "step": 2523 }, { "epoch": 0.5858187304166184, "grad_norm": 0.46759527921676636, "learning_rate": 0.00019332140661579042, "loss": 0.9079, "step": 2524 }, { "epoch": 0.5860508297551352, "grad_norm": 0.5230559706687927, "learning_rate": 0.00019331616495523411, "loss": 0.8291, "step": 2525 }, { "epoch": 0.5862829290936521, "grad_norm": 0.4834955334663391, "learning_rate": 0.00019331092130965623, "loss": 0.8504, "step": 2526 }, { "epoch": 0.586515028432169, "grad_norm": 0.4747895300388336, "learning_rate": 0.0001933056756791683, "loss": 0.8836, "step": 2527 }, { "epoch": 0.5867471277706858, "grad_norm": 0.4800885319709778, "learning_rate": 0.00019330042806388198, "loss": 0.905, "step": 2528 }, { "epoch": 0.5869792271092027, "grad_norm": 0.5709804892539978, "learning_rate": 0.0001932951784639088, "loss": 0.8949, "step": 2529 }, { "epoch": 0.5872113264477197, "grad_norm": 0.43790504336357117, "learning_rate": 0.0001932899268793605, "loss": 0.8712, "step": 2530 }, { "epoch": 0.5874434257862365, "grad_norm": 0.5484793186187744, "learning_rate": 0.0001932846733103488, "loss": 0.9598, "step": 2531 }, { "epoch": 0.5876755251247534, "grad_norm": 0.5762323141098022, "learning_rate": 0.00019327941775698545, "loss": 0.9379, "step": 2532 }, { "epoch": 0.5879076244632703, "grad_norm": 0.4360809922218323, "learning_rate": 0.00019327416021938217, "loss": 0.8491, "step": 2533 }, { "epoch": 0.5881397238017871, "grad_norm": 0.521664559841156, "learning_rate": 0.00019326890069765087, "loss": 0.9076, "step": 2534 }, { "epoch": 0.588371823140304, "grad_norm": 0.5274367928504944, "learning_rate": 0.00019326363919190337, "loss": 0.8788, "step": 2535 }, { "epoch": 0.588603922478821, "grad_norm": 0.4719744324684143, "learning_rate": 0.0001932583757022517, "loss": 0.8645, "step": 2536 }, { "epoch": 0.5888360218173379, "grad_norm": 0.46467727422714233, "learning_rate": 0.00019325311022880772, "loss": 0.8472, "step": 2537 }, { "epoch": 0.5890681211558547, "grad_norm": 0.5409187078475952, "learning_rate": 0.0001932478427716835, "loss": 0.8893, "step": 2538 }, { "epoch": 0.5893002204943716, "grad_norm": 0.4498489201068878, "learning_rate": 0.00019324257333099104, "loss": 0.8795, "step": 2539 }, { "epoch": 0.5895323198328885, "grad_norm": 0.5297455787658691, "learning_rate": 0.00019323730190684248, "loss": 0.9055, "step": 2540 }, { "epoch": 0.5897644191714053, "grad_norm": 0.5930957794189453, "learning_rate": 0.00019323202849934993, "loss": 0.8802, "step": 2541 }, { "epoch": 0.5899965185099223, "grad_norm": 0.45306596159935, "learning_rate": 0.00019322675310862557, "loss": 0.8764, "step": 2542 }, { "epoch": 0.5902286178484392, "grad_norm": 0.4708379805088043, "learning_rate": 0.00019322147573478163, "loss": 0.9261, "step": 2543 }, { "epoch": 0.590460717186956, "grad_norm": 0.4536955654621124, "learning_rate": 0.00019321619637793032, "loss": 0.8935, "step": 2544 }, { "epoch": 0.5906928165254729, "grad_norm": 0.39047491550445557, "learning_rate": 0.000193210915038184, "loss": 0.8551, "step": 2545 }, { "epoch": 0.5909249158639898, "grad_norm": 0.5154076218605042, "learning_rate": 0.00019320563171565497, "loss": 0.8709, "step": 2546 }, { "epoch": 0.5911570152025066, "grad_norm": 0.4345548748970032, "learning_rate": 0.00019320034641045567, "loss": 0.8806, "step": 2547 }, { "epoch": 0.5913891145410236, "grad_norm": 0.4990389943122864, "learning_rate": 0.00019319505912269847, "loss": 0.8472, "step": 2548 }, { "epoch": 0.5916212138795405, "grad_norm": 0.458088755607605, "learning_rate": 0.0001931897698524959, "loss": 0.9066, "step": 2549 }, { "epoch": 0.5918533132180573, "grad_norm": 0.4653499126434326, "learning_rate": 0.00019318447859996044, "loss": 0.912, "step": 2550 }, { "epoch": 0.5920854125565742, "grad_norm": 0.44644612073898315, "learning_rate": 0.00019317918536520463, "loss": 0.8838, "step": 2551 }, { "epoch": 0.5923175118950911, "grad_norm": 0.4809524118900299, "learning_rate": 0.0001931738901483411, "loss": 0.8916, "step": 2552 }, { "epoch": 0.5925496112336079, "grad_norm": 0.4678967297077179, "learning_rate": 0.00019316859294948247, "loss": 0.8939, "step": 2553 }, { "epoch": 0.5927817105721249, "grad_norm": 0.4782308042049408, "learning_rate": 0.00019316329376874145, "loss": 0.8737, "step": 2554 }, { "epoch": 0.5930138099106418, "grad_norm": 0.4958636164665222, "learning_rate": 0.0001931579926062307, "loss": 0.875, "step": 2555 }, { "epoch": 0.5932459092491587, "grad_norm": 0.48781678080558777, "learning_rate": 0.00019315268946206305, "loss": 0.8793, "step": 2556 }, { "epoch": 0.5934780085876755, "grad_norm": 0.4265095591545105, "learning_rate": 0.00019314738433635128, "loss": 0.8711, "step": 2557 }, { "epoch": 0.5937101079261924, "grad_norm": 0.4887530505657196, "learning_rate": 0.0001931420772292083, "loss": 0.8974, "step": 2558 }, { "epoch": 0.5939422072647093, "grad_norm": 0.41965755820274353, "learning_rate": 0.0001931367681407469, "loss": 0.8454, "step": 2559 }, { "epoch": 0.5941743066032262, "grad_norm": 0.4781024754047394, "learning_rate": 0.0001931314570710801, "loss": 0.885, "step": 2560 }, { "epoch": 0.5944064059417431, "grad_norm": 0.46324673295021057, "learning_rate": 0.0001931261440203208, "loss": 0.9391, "step": 2561 }, { "epoch": 0.59463850528026, "grad_norm": 0.4876393675804138, "learning_rate": 0.0001931208289885821, "loss": 0.8848, "step": 2562 }, { "epoch": 0.5948706046187768, "grad_norm": 0.4386286437511444, "learning_rate": 0.00019311551197597696, "loss": 0.8837, "step": 2563 }, { "epoch": 0.5951027039572937, "grad_norm": 1.733683705329895, "learning_rate": 0.00019311019298261863, "loss": 0.8686, "step": 2564 }, { "epoch": 0.5953348032958106, "grad_norm": 0.5382581353187561, "learning_rate": 0.0001931048720086201, "loss": 0.9055, "step": 2565 }, { "epoch": 0.5955669026343275, "grad_norm": 0.4845849573612213, "learning_rate": 0.00019309954905409469, "loss": 0.9286, "step": 2566 }, { "epoch": 0.5957990019728444, "grad_norm": 0.45746690034866333, "learning_rate": 0.00019309422411915554, "loss": 0.9294, "step": 2567 }, { "epoch": 0.5960311013113613, "grad_norm": 0.5198777318000793, "learning_rate": 0.00019308889720391596, "loss": 0.8287, "step": 2568 }, { "epoch": 0.5962632006498781, "grad_norm": 0.4783334732055664, "learning_rate": 0.00019308356830848925, "loss": 0.9109, "step": 2569 }, { "epoch": 0.596495299988395, "grad_norm": 0.4724397361278534, "learning_rate": 0.00019307823743298878, "loss": 0.8337, "step": 2570 }, { "epoch": 0.5967273993269119, "grad_norm": 0.5216954946517944, "learning_rate": 0.00019307290457752795, "loss": 0.8712, "step": 2571 }, { "epoch": 0.5969594986654289, "grad_norm": 0.4313892126083374, "learning_rate": 0.00019306756974222017, "loss": 0.8412, "step": 2572 }, { "epoch": 0.5971915980039457, "grad_norm": 0.46765783429145813, "learning_rate": 0.00019306223292717898, "loss": 0.8553, "step": 2573 }, { "epoch": 0.5974236973424626, "grad_norm": 0.499893456697464, "learning_rate": 0.00019305689413251785, "loss": 0.8778, "step": 2574 }, { "epoch": 0.5976557966809795, "grad_norm": 0.5029781460762024, "learning_rate": 0.00019305155335835034, "loss": 0.8913, "step": 2575 }, { "epoch": 0.5978878960194963, "grad_norm": 0.4026431441307068, "learning_rate": 0.0001930462106047901, "loss": 0.8126, "step": 2576 }, { "epoch": 0.5981199953580132, "grad_norm": 0.6298390626907349, "learning_rate": 0.00019304086587195078, "loss": 0.8575, "step": 2577 }, { "epoch": 0.5983520946965302, "grad_norm": 0.503284215927124, "learning_rate": 0.00019303551915994606, "loss": 0.8958, "step": 2578 }, { "epoch": 0.598584194035047, "grad_norm": 0.43492600321769714, "learning_rate": 0.00019303017046888967, "loss": 0.8874, "step": 2579 }, { "epoch": 0.5988162933735639, "grad_norm": 0.43377459049224854, "learning_rate": 0.0001930248197988954, "loss": 0.8515, "step": 2580 }, { "epoch": 0.5990483927120808, "grad_norm": 0.4960271418094635, "learning_rate": 0.00019301946715007703, "loss": 0.8768, "step": 2581 }, { "epoch": 0.5992804920505976, "grad_norm": 0.51056307554245, "learning_rate": 0.0001930141125225485, "loss": 0.8975, "step": 2582 }, { "epoch": 0.5995125913891145, "grad_norm": 0.48658326268196106, "learning_rate": 0.00019300875591642363, "loss": 0.8724, "step": 2583 }, { "epoch": 0.5997446907276315, "grad_norm": 0.4765193462371826, "learning_rate": 0.00019300339733181642, "loss": 0.8939, "step": 2584 }, { "epoch": 0.5999767900661483, "grad_norm": 0.47788435220718384, "learning_rate": 0.00019299803676884082, "loss": 0.9078, "step": 2585 }, { "epoch": 0.6002088894046652, "grad_norm": 0.4566074311733246, "learning_rate": 0.0001929926742276109, "loss": 0.8967, "step": 2586 }, { "epoch": 0.6004409887431821, "grad_norm": 0.4427584111690521, "learning_rate": 0.0001929873097082407, "loss": 0.8985, "step": 2587 }, { "epoch": 0.600673088081699, "grad_norm": 0.47713688015937805, "learning_rate": 0.00019298194321084436, "loss": 0.8655, "step": 2588 }, { "epoch": 0.6009051874202158, "grad_norm": 0.4358782172203064, "learning_rate": 0.00019297657473553602, "loss": 0.8881, "step": 2589 }, { "epoch": 0.6011372867587328, "grad_norm": 0.5032920241355896, "learning_rate": 0.00019297120428242985, "loss": 0.8769, "step": 2590 }, { "epoch": 0.6013693860972497, "grad_norm": 0.4733133912086487, "learning_rate": 0.00019296583185164014, "loss": 0.8939, "step": 2591 }, { "epoch": 0.6016014854357665, "grad_norm": 0.46265941858291626, "learning_rate": 0.00019296045744328114, "loss": 0.9069, "step": 2592 }, { "epoch": 0.6018335847742834, "grad_norm": 0.4713018834590912, "learning_rate": 0.00019295508105746721, "loss": 0.8938, "step": 2593 }, { "epoch": 0.6020656841128003, "grad_norm": 0.4861757159233093, "learning_rate": 0.0001929497026943127, "loss": 0.861, "step": 2594 }, { "epoch": 0.6022977834513171, "grad_norm": 0.44694045186042786, "learning_rate": 0.000192944322353932, "loss": 0.8922, "step": 2595 }, { "epoch": 0.6025298827898341, "grad_norm": 0.4278295636177063, "learning_rate": 0.0001929389400364396, "loss": 0.8702, "step": 2596 }, { "epoch": 0.602761982128351, "grad_norm": 0.43469884991645813, "learning_rate": 0.0001929335557419499, "loss": 0.8651, "step": 2597 }, { "epoch": 0.6029940814668678, "grad_norm": 0.4828168451786041, "learning_rate": 0.00019292816947057758, "loss": 0.9003, "step": 2598 }, { "epoch": 0.6032261808053847, "grad_norm": 0.4532548785209656, "learning_rate": 0.00019292278122243705, "loss": 0.8842, "step": 2599 }, { "epoch": 0.6034582801439016, "grad_norm": 0.49631425738334656, "learning_rate": 0.00019291739099764309, "loss": 0.9348, "step": 2600 }, { "epoch": 0.6036903794824184, "grad_norm": 0.5052564144134521, "learning_rate": 0.00019291199879631026, "loss": 0.8634, "step": 2601 }, { "epoch": 0.6039224788209354, "grad_norm": 0.4765855371952057, "learning_rate": 0.0001929066046185533, "loss": 0.8947, "step": 2602 }, { "epoch": 0.6041545781594523, "grad_norm": 0.4979798197746277, "learning_rate": 0.00019290120846448693, "loss": 0.8877, "step": 2603 }, { "epoch": 0.6043866774979691, "grad_norm": 0.5246139168739319, "learning_rate": 0.00019289581033422594, "loss": 0.8508, "step": 2604 }, { "epoch": 0.604618776836486, "grad_norm": 0.4968685805797577, "learning_rate": 0.0001928904102278852, "loss": 0.9123, "step": 2605 }, { "epoch": 0.6048508761750029, "grad_norm": 0.503105640411377, "learning_rate": 0.00019288500814557951, "loss": 0.841, "step": 2606 }, { "epoch": 0.6050829755135197, "grad_norm": 0.4359753727912903, "learning_rate": 0.00019287960408742388, "loss": 0.8668, "step": 2607 }, { "epoch": 0.6053150748520366, "grad_norm": 0.5256127715110779, "learning_rate": 0.00019287419805353318, "loss": 0.8899, "step": 2608 }, { "epoch": 0.6055471741905536, "grad_norm": 0.4877070188522339, "learning_rate": 0.00019286879004402243, "loss": 0.8836, "step": 2609 }, { "epoch": 0.6057792735290705, "grad_norm": 0.47836223244667053, "learning_rate": 0.00019286338005900669, "loss": 0.9053, "step": 2610 }, { "epoch": 0.6060113728675873, "grad_norm": 0.5239952206611633, "learning_rate": 0.000192857968098601, "loss": 0.8339, "step": 2611 }, { "epoch": 0.6062434722061042, "grad_norm": 0.49767911434173584, "learning_rate": 0.00019285255416292055, "loss": 0.9298, "step": 2612 }, { "epoch": 0.6064755715446211, "grad_norm": 0.44114670157432556, "learning_rate": 0.00019284713825208044, "loss": 0.9397, "step": 2613 }, { "epoch": 0.6067076708831379, "grad_norm": 0.4767727851867676, "learning_rate": 0.00019284172036619594, "loss": 0.9458, "step": 2614 }, { "epoch": 0.6069397702216549, "grad_norm": 0.45797502994537354, "learning_rate": 0.00019283630050538224, "loss": 0.9004, "step": 2615 }, { "epoch": 0.6071718695601718, "grad_norm": 0.47780996561050415, "learning_rate": 0.00019283087866975464, "loss": 0.893, "step": 2616 }, { "epoch": 0.6074039688986886, "grad_norm": 0.44671744108200073, "learning_rate": 0.0001928254548594285, "loss": 0.9069, "step": 2617 }, { "epoch": 0.6076360682372055, "grad_norm": 0.46249130368232727, "learning_rate": 0.0001928200290745192, "loss": 0.8115, "step": 2618 }, { "epoch": 0.6078681675757224, "grad_norm": 0.5575698614120483, "learning_rate": 0.0001928146013151421, "loss": 0.8629, "step": 2619 }, { "epoch": 0.6081002669142392, "grad_norm": 0.43960994482040405, "learning_rate": 0.00019280917158141274, "loss": 0.8914, "step": 2620 }, { "epoch": 0.6083323662527562, "grad_norm": 0.5342211127281189, "learning_rate": 0.00019280373987344654, "loss": 0.8246, "step": 2621 }, { "epoch": 0.6085644655912731, "grad_norm": 0.4552851617336273, "learning_rate": 0.0001927983061913591, "loss": 0.8493, "step": 2622 }, { "epoch": 0.60879656492979, "grad_norm": 0.48618146777153015, "learning_rate": 0.000192792870535266, "loss": 0.8903, "step": 2623 }, { "epoch": 0.6090286642683068, "grad_norm": 0.5206015706062317, "learning_rate": 0.00019278743290528284, "loss": 0.8315, "step": 2624 }, { "epoch": 0.6092607636068237, "grad_norm": 0.48439937829971313, "learning_rate": 0.0001927819933015253, "loss": 0.8742, "step": 2625 }, { "epoch": 0.6094928629453406, "grad_norm": 0.45934921503067017, "learning_rate": 0.00019277655172410913, "loss": 0.8834, "step": 2626 }, { "epoch": 0.6097249622838575, "grad_norm": 0.5401334762573242, "learning_rate": 0.00019277110817315002, "loss": 0.8233, "step": 2627 }, { "epoch": 0.6099570616223744, "grad_norm": 0.4678502082824707, "learning_rate": 0.0001927656626487638, "loss": 0.8563, "step": 2628 }, { "epoch": 0.6101891609608913, "grad_norm": 0.5016833543777466, "learning_rate": 0.00019276021515106636, "loss": 0.8723, "step": 2629 }, { "epoch": 0.6104212602994081, "grad_norm": 0.6041736006736755, "learning_rate": 0.00019275476568017346, "loss": 0.9005, "step": 2630 }, { "epoch": 0.610653359637925, "grad_norm": 0.5082993507385254, "learning_rate": 0.00019274931423620109, "loss": 0.9194, "step": 2631 }, { "epoch": 0.6108854589764419, "grad_norm": 0.4329264760017395, "learning_rate": 0.00019274386081926517, "loss": 0.8819, "step": 2632 }, { "epoch": 0.6111175583149588, "grad_norm": 0.46763306856155396, "learning_rate": 0.0001927384054294818, "loss": 0.8978, "step": 2633 }, { "epoch": 0.6113496576534757, "grad_norm": 0.5032113194465637, "learning_rate": 0.00019273294806696696, "loss": 0.8998, "step": 2634 }, { "epoch": 0.6115817569919926, "grad_norm": 0.4557526707649231, "learning_rate": 0.00019272748873183675, "loss": 0.8872, "step": 2635 }, { "epoch": 0.6118138563305094, "grad_norm": 0.46926379203796387, "learning_rate": 0.0001927220274242073, "loss": 0.8921, "step": 2636 }, { "epoch": 0.6120459556690263, "grad_norm": 0.4583445191383362, "learning_rate": 0.00019271656414419471, "loss": 0.9078, "step": 2637 }, { "epoch": 0.6122780550075432, "grad_norm": 0.42917710542678833, "learning_rate": 0.00019271109889191533, "loss": 0.9239, "step": 2638 }, { "epoch": 0.6125101543460602, "grad_norm": 0.47529730200767517, "learning_rate": 0.00019270563166748537, "loss": 0.8618, "step": 2639 }, { "epoch": 0.612742253684577, "grad_norm": 0.470740407705307, "learning_rate": 0.0001927001624710211, "loss": 0.8401, "step": 2640 }, { "epoch": 0.6129743530230939, "grad_norm": 0.48315200209617615, "learning_rate": 0.00019269469130263884, "loss": 0.9047, "step": 2641 }, { "epoch": 0.6132064523616108, "grad_norm": 0.5029613971710205, "learning_rate": 0.00019268921816245502, "loss": 0.8137, "step": 2642 }, { "epoch": 0.6134385517001276, "grad_norm": 0.4541841149330139, "learning_rate": 0.00019268374305058607, "loss": 0.8904, "step": 2643 }, { "epoch": 0.6136706510386445, "grad_norm": 0.5444797873497009, "learning_rate": 0.0001926782659671484, "loss": 0.8605, "step": 2644 }, { "epoch": 0.6139027503771615, "grad_norm": 0.47851836681365967, "learning_rate": 0.00019267278691225857, "loss": 0.8958, "step": 2645 }, { "epoch": 0.6141348497156783, "grad_norm": 0.4818877577781677, "learning_rate": 0.00019266730588603313, "loss": 0.93, "step": 2646 }, { "epoch": 0.6143669490541952, "grad_norm": 0.4705510139465332, "learning_rate": 0.00019266182288858863, "loss": 0.8784, "step": 2647 }, { "epoch": 0.6145990483927121, "grad_norm": 0.4414081275463104, "learning_rate": 0.00019265633792004175, "loss": 0.8515, "step": 2648 }, { "epoch": 0.6148311477312289, "grad_norm": 0.5356183648109436, "learning_rate": 0.00019265085098050917, "loss": 0.8339, "step": 2649 }, { "epoch": 0.6150632470697458, "grad_norm": 0.4181044101715088, "learning_rate": 0.00019264536207010756, "loss": 0.8296, "step": 2650 }, { "epoch": 0.6152953464082628, "grad_norm": 0.47458040714263916, "learning_rate": 0.0001926398711889537, "loss": 0.8625, "step": 2651 }, { "epoch": 0.6155274457467796, "grad_norm": 0.5018423199653625, "learning_rate": 0.00019263437833716443, "loss": 0.8719, "step": 2652 }, { "epoch": 0.6157595450852965, "grad_norm": 0.47671252489089966, "learning_rate": 0.00019262888351485652, "loss": 0.8585, "step": 2653 }, { "epoch": 0.6159916444238134, "grad_norm": 0.47836175560951233, "learning_rate": 0.00019262338672214693, "loss": 0.8632, "step": 2654 }, { "epoch": 0.6162237437623302, "grad_norm": 0.4300893545150757, "learning_rate": 0.0001926178879591525, "loss": 0.825, "step": 2655 }, { "epoch": 0.6164558431008471, "grad_norm": 0.44675567746162415, "learning_rate": 0.00019261238722599033, "loss": 0.8549, "step": 2656 }, { "epoch": 0.6166879424393641, "grad_norm": 0.5337309837341309, "learning_rate": 0.00019260688452277732, "loss": 0.8879, "step": 2657 }, { "epoch": 0.616920041777881, "grad_norm": 0.5016878843307495, "learning_rate": 0.00019260137984963057, "loss": 0.8766, "step": 2658 }, { "epoch": 0.6171521411163978, "grad_norm": 0.4398707449436188, "learning_rate": 0.00019259587320666718, "loss": 0.8654, "step": 2659 }, { "epoch": 0.6173842404549147, "grad_norm": 0.4730830788612366, "learning_rate": 0.00019259036459400426, "loss": 0.8768, "step": 2660 }, { "epoch": 0.6176163397934316, "grad_norm": 0.4627930819988251, "learning_rate": 0.000192584854011759, "loss": 0.8725, "step": 2661 }, { "epoch": 0.6178484391319484, "grad_norm": 0.45593133568763733, "learning_rate": 0.00019257934146004865, "loss": 0.8959, "step": 2662 }, { "epoch": 0.6180805384704654, "grad_norm": 0.5043888688087463, "learning_rate": 0.0001925738269389904, "loss": 0.8734, "step": 2663 }, { "epoch": 0.6183126378089823, "grad_norm": 0.4750177562236786, "learning_rate": 0.00019256831044870163, "loss": 0.8722, "step": 2664 }, { "epoch": 0.6185447371474991, "grad_norm": 0.4864463806152344, "learning_rate": 0.00019256279198929965, "loss": 0.8528, "step": 2665 }, { "epoch": 0.618776836486016, "grad_norm": 0.45510080456733704, "learning_rate": 0.00019255727156090185, "loss": 0.8853, "step": 2666 }, { "epoch": 0.6190089358245329, "grad_norm": 0.496878445148468, "learning_rate": 0.00019255174916362567, "loss": 0.8334, "step": 2667 }, { "epoch": 0.6192410351630497, "grad_norm": 0.4975915253162384, "learning_rate": 0.00019254622479758862, "loss": 0.8899, "step": 2668 }, { "epoch": 0.6194731345015667, "grad_norm": 0.4611932635307312, "learning_rate": 0.00019254069846290814, "loss": 0.8397, "step": 2669 }, { "epoch": 0.6197052338400836, "grad_norm": 0.4994605779647827, "learning_rate": 0.00019253517015970184, "loss": 0.9216, "step": 2670 }, { "epoch": 0.6199373331786004, "grad_norm": 0.5236203074455261, "learning_rate": 0.00019252963988808727, "loss": 0.867, "step": 2671 }, { "epoch": 0.6201694325171173, "grad_norm": 0.46661072969436646, "learning_rate": 0.0001925241076481821, "loss": 0.8598, "step": 2672 }, { "epoch": 0.6204015318556342, "grad_norm": 0.5368458032608032, "learning_rate": 0.00019251857344010405, "loss": 0.9138, "step": 2673 }, { "epoch": 0.620633631194151, "grad_norm": 0.5046195387840271, "learning_rate": 0.00019251303726397078, "loss": 0.852, "step": 2674 }, { "epoch": 0.620865730532668, "grad_norm": 0.4611165225505829, "learning_rate": 0.00019250749911990008, "loss": 0.893, "step": 2675 }, { "epoch": 0.6210978298711849, "grad_norm": 0.4981367886066437, "learning_rate": 0.00019250195900800973, "loss": 0.8625, "step": 2676 }, { "epoch": 0.6213299292097018, "grad_norm": 0.5488566756248474, "learning_rate": 0.00019249641692841763, "loss": 0.9306, "step": 2677 }, { "epoch": 0.6215620285482186, "grad_norm": 0.4605712592601776, "learning_rate": 0.00019249087288124165, "loss": 0.9136, "step": 2678 }, { "epoch": 0.6217941278867355, "grad_norm": 0.5570294857025146, "learning_rate": 0.00019248532686659973, "loss": 0.857, "step": 2679 }, { "epoch": 0.6220262272252524, "grad_norm": 0.4256240725517273, "learning_rate": 0.00019247977888460982, "loss": 0.8776, "step": 2680 }, { "epoch": 0.6222583265637693, "grad_norm": 0.4876495897769928, "learning_rate": 0.00019247422893538997, "loss": 0.8898, "step": 2681 }, { "epoch": 0.6224904259022862, "grad_norm": 0.48194774985313416, "learning_rate": 0.0001924686770190582, "loss": 0.8153, "step": 2682 }, { "epoch": 0.6227225252408031, "grad_norm": 0.46095573902130127, "learning_rate": 0.00019246312313573265, "loss": 0.8922, "step": 2683 }, { "epoch": 0.6229546245793199, "grad_norm": 0.5336556434631348, "learning_rate": 0.00019245756728553145, "loss": 0.9138, "step": 2684 }, { "epoch": 0.6231867239178368, "grad_norm": 0.4394529163837433, "learning_rate": 0.00019245200946857276, "loss": 0.8961, "step": 2685 }, { "epoch": 0.6234188232563537, "grad_norm": 0.44539985060691833, "learning_rate": 0.0001924464496849748, "loss": 0.8989, "step": 2686 }, { "epoch": 0.6236509225948706, "grad_norm": 0.5228295922279358, "learning_rate": 0.0001924408879348559, "loss": 0.9178, "step": 2687 }, { "epoch": 0.6238830219333875, "grad_norm": 0.43397727608680725, "learning_rate": 0.00019243532421833433, "loss": 0.8734, "step": 2688 }, { "epoch": 0.6241151212719044, "grad_norm": 0.4830508828163147, "learning_rate": 0.0001924297585355284, "loss": 0.9086, "step": 2689 }, { "epoch": 0.6243472206104212, "grad_norm": 0.4932643175125122, "learning_rate": 0.00019242419088655657, "loss": 0.8714, "step": 2690 }, { "epoch": 0.6245793199489381, "grad_norm": 0.48464274406433105, "learning_rate": 0.00019241862127153728, "loss": 0.9197, "step": 2691 }, { "epoch": 0.624811419287455, "grad_norm": 0.47309547662734985, "learning_rate": 0.00019241304969058894, "loss": 0.8684, "step": 2692 }, { "epoch": 0.625043518625972, "grad_norm": 0.4778303802013397, "learning_rate": 0.0001924074761438301, "loss": 0.8549, "step": 2693 }, { "epoch": 0.6252756179644888, "grad_norm": 0.42031317949295044, "learning_rate": 0.00019240190063137935, "loss": 0.8302, "step": 2694 }, { "epoch": 0.6255077173030057, "grad_norm": 0.4497253894805908, "learning_rate": 0.00019239632315335526, "loss": 0.8674, "step": 2695 }, { "epoch": 0.6257398166415226, "grad_norm": 0.4734026789665222, "learning_rate": 0.0001923907437098765, "loss": 0.8829, "step": 2696 }, { "epoch": 0.6259719159800394, "grad_norm": 0.4518885612487793, "learning_rate": 0.00019238516230106172, "loss": 0.8421, "step": 2697 }, { "epoch": 0.6262040153185563, "grad_norm": 0.4409095048904419, "learning_rate": 0.00019237957892702967, "loss": 0.9108, "step": 2698 }, { "epoch": 0.6264361146570733, "grad_norm": 0.4821644425392151, "learning_rate": 0.00019237399358789913, "loss": 0.8882, "step": 2699 }, { "epoch": 0.6266682139955901, "grad_norm": 0.5023102760314941, "learning_rate": 0.00019236840628378884, "loss": 0.8724, "step": 2700 }, { "epoch": 0.626900313334107, "grad_norm": 0.42386749386787415, "learning_rate": 0.00019236281701481776, "loss": 0.849, "step": 2701 }, { "epoch": 0.6271324126726239, "grad_norm": 0.5999467372894287, "learning_rate": 0.00019235722578110474, "loss": 0.8619, "step": 2702 }, { "epoch": 0.6273645120111407, "grad_norm": 0.5176237225532532, "learning_rate": 0.0001923516325827687, "loss": 0.9178, "step": 2703 }, { "epoch": 0.6275966113496576, "grad_norm": 0.4874061048030853, "learning_rate": 0.00019234603741992862, "loss": 0.87, "step": 2704 }, { "epoch": 0.6278287106881746, "grad_norm": 0.5077924728393555, "learning_rate": 0.00019234044029270355, "loss": 0.9247, "step": 2705 }, { "epoch": 0.6280608100266915, "grad_norm": 0.43726372718811035, "learning_rate": 0.00019233484120121253, "loss": 0.8059, "step": 2706 }, { "epoch": 0.6282929093652083, "grad_norm": 0.4451414942741394, "learning_rate": 0.00019232924014557466, "loss": 0.8689, "step": 2707 }, { "epoch": 0.6285250087037252, "grad_norm": 0.42669329047203064, "learning_rate": 0.00019232363712590907, "loss": 0.8586, "step": 2708 }, { "epoch": 0.628757108042242, "grad_norm": 0.4634314477443695, "learning_rate": 0.00019231803214233501, "loss": 0.8755, "step": 2709 }, { "epoch": 0.6289892073807589, "grad_norm": 0.47543099522590637, "learning_rate": 0.00019231242519497164, "loss": 0.8594, "step": 2710 }, { "epoch": 0.6292213067192759, "grad_norm": 0.48976650834083557, "learning_rate": 0.00019230681628393828, "loss": 0.8589, "step": 2711 }, { "epoch": 0.6294534060577928, "grad_norm": 0.4141114056110382, "learning_rate": 0.0001923012054093542, "loss": 0.8818, "step": 2712 }, { "epoch": 0.6296855053963096, "grad_norm": 0.5422409176826477, "learning_rate": 0.00019229559257133883, "loss": 0.8619, "step": 2713 }, { "epoch": 0.6299176047348265, "grad_norm": 0.44097277522087097, "learning_rate": 0.00019228997777001144, "loss": 0.8856, "step": 2714 }, { "epoch": 0.6301497040733434, "grad_norm": 0.4885256290435791, "learning_rate": 0.0001922843610054916, "loss": 0.9123, "step": 2715 }, { "epoch": 0.6303818034118602, "grad_norm": 0.4975029528141022, "learning_rate": 0.0001922787422778987, "loss": 0.8472, "step": 2716 }, { "epoch": 0.6306139027503772, "grad_norm": 0.4896419644355774, "learning_rate": 0.0001922731215873523, "loss": 0.8805, "step": 2717 }, { "epoch": 0.6308460020888941, "grad_norm": 0.5261626243591309, "learning_rate": 0.00019226749893397194, "loss": 0.9309, "step": 2718 }, { "epoch": 0.6310781014274109, "grad_norm": 0.4647147059440613, "learning_rate": 0.0001922618743178773, "loss": 0.8717, "step": 2719 }, { "epoch": 0.6313102007659278, "grad_norm": 0.48772215843200684, "learning_rate": 0.0001922562477391879, "loss": 0.869, "step": 2720 }, { "epoch": 0.6315423001044447, "grad_norm": 0.46886134147644043, "learning_rate": 0.00019225061919802354, "loss": 0.9183, "step": 2721 }, { "epoch": 0.6317743994429615, "grad_norm": 0.50131756067276, "learning_rate": 0.0001922449886945039, "loss": 0.8559, "step": 2722 }, { "epoch": 0.6320064987814785, "grad_norm": 0.48354411125183105, "learning_rate": 0.00019223935622874873, "loss": 0.8534, "step": 2723 }, { "epoch": 0.6322385981199954, "grad_norm": 0.5166534781455994, "learning_rate": 0.0001922337218008779, "loss": 0.9492, "step": 2724 }, { "epoch": 0.6324706974585123, "grad_norm": 0.5398663878440857, "learning_rate": 0.00019222808541101123, "loss": 0.8668, "step": 2725 }, { "epoch": 0.6327027967970291, "grad_norm": 0.45771121978759766, "learning_rate": 0.00019222244705926865, "loss": 0.9061, "step": 2726 }, { "epoch": 0.632934896135546, "grad_norm": 0.46169978380203247, "learning_rate": 0.00019221680674577006, "loss": 0.9414, "step": 2727 }, { "epoch": 0.6331669954740629, "grad_norm": 0.5081409811973572, "learning_rate": 0.00019221116447063543, "loss": 0.827, "step": 2728 }, { "epoch": 0.6333990948125798, "grad_norm": 0.46072274446487427, "learning_rate": 0.00019220552023398483, "loss": 0.8579, "step": 2729 }, { "epoch": 0.6336311941510967, "grad_norm": 0.5351796746253967, "learning_rate": 0.00019219987403593832, "loss": 0.9124, "step": 2730 }, { "epoch": 0.6338632934896136, "grad_norm": 0.4696336090564728, "learning_rate": 0.00019219422587661596, "loss": 0.8945, "step": 2731 }, { "epoch": 0.6340953928281304, "grad_norm": 0.49669718742370605, "learning_rate": 0.00019218857575613793, "loss": 0.8729, "step": 2732 }, { "epoch": 0.6343274921666473, "grad_norm": 0.49306586384773254, "learning_rate": 0.00019218292367462441, "loss": 0.8514, "step": 2733 }, { "epoch": 0.6345595915051642, "grad_norm": 0.4284917116165161, "learning_rate": 0.00019217726963219567, "loss": 0.9057, "step": 2734 }, { "epoch": 0.6347916908436811, "grad_norm": 0.5439285635948181, "learning_rate": 0.00019217161362897192, "loss": 0.8711, "step": 2735 }, { "epoch": 0.635023790182198, "grad_norm": 0.4630812704563141, "learning_rate": 0.00019216595566507353, "loss": 0.8634, "step": 2736 }, { "epoch": 0.6352558895207149, "grad_norm": 0.5656422972679138, "learning_rate": 0.0001921602957406208, "loss": 0.8436, "step": 2737 }, { "epoch": 0.6354879888592317, "grad_norm": 0.5454251766204834, "learning_rate": 0.00019215463385573417, "loss": 0.8362, "step": 2738 }, { "epoch": 0.6357200881977486, "grad_norm": 0.46193239092826843, "learning_rate": 0.00019214897001053406, "loss": 0.8779, "step": 2739 }, { "epoch": 0.6359521875362655, "grad_norm": 0.5004550218582153, "learning_rate": 0.00019214330420514098, "loss": 0.8598, "step": 2740 }, { "epoch": 0.6361842868747825, "grad_norm": 0.4910329282283783, "learning_rate": 0.00019213763643967542, "loss": 0.9131, "step": 2741 }, { "epoch": 0.6364163862132993, "grad_norm": 0.4547365605831146, "learning_rate": 0.00019213196671425794, "loss": 0.8727, "step": 2742 }, { "epoch": 0.6366484855518162, "grad_norm": 0.45139214396476746, "learning_rate": 0.0001921262950290092, "loss": 0.8914, "step": 2743 }, { "epoch": 0.6368805848903331, "grad_norm": 0.442780077457428, "learning_rate": 0.0001921206213840498, "loss": 0.8891, "step": 2744 }, { "epoch": 0.6371126842288499, "grad_norm": 0.5090554356575012, "learning_rate": 0.0001921149457795004, "loss": 0.9081, "step": 2745 }, { "epoch": 0.6373447835673668, "grad_norm": 0.43977823853492737, "learning_rate": 0.00019210926821548182, "loss": 0.8782, "step": 2746 }, { "epoch": 0.6375768829058838, "grad_norm": 0.44503676891326904, "learning_rate": 0.00019210358869211475, "loss": 0.9273, "step": 2747 }, { "epoch": 0.6378089822444006, "grad_norm": 0.47687196731567383, "learning_rate": 0.00019209790720952009, "loss": 0.8473, "step": 2748 }, { "epoch": 0.6380410815829175, "grad_norm": 0.4625609815120697, "learning_rate": 0.0001920922237678186, "loss": 0.8701, "step": 2749 }, { "epoch": 0.6382731809214344, "grad_norm": 0.43450161814689636, "learning_rate": 0.00019208653836713126, "loss": 0.9102, "step": 2750 }, { "epoch": 0.6385052802599512, "grad_norm": 0.40365535020828247, "learning_rate": 0.00019208085100757893, "loss": 0.8971, "step": 2751 }, { "epoch": 0.6387373795984681, "grad_norm": 0.4559798240661621, "learning_rate": 0.00019207516168928267, "loss": 0.8676, "step": 2752 }, { "epoch": 0.6389694789369851, "grad_norm": 0.41150403022766113, "learning_rate": 0.00019206947041236347, "loss": 0.8732, "step": 2753 }, { "epoch": 0.639201578275502, "grad_norm": 0.5155861377716064, "learning_rate": 0.00019206377717694237, "loss": 0.8503, "step": 2754 }, { "epoch": 0.6394336776140188, "grad_norm": 0.45946455001831055, "learning_rate": 0.00019205808198314051, "loss": 0.8976, "step": 2755 }, { "epoch": 0.6396657769525357, "grad_norm": 0.5377851128578186, "learning_rate": 0.00019205238483107907, "loss": 0.8559, "step": 2756 }, { "epoch": 0.6398978762910525, "grad_norm": 0.4723004698753357, "learning_rate": 0.00019204668572087916, "loss": 0.8368, "step": 2757 }, { "epoch": 0.6401299756295694, "grad_norm": 0.4599927067756653, "learning_rate": 0.00019204098465266204, "loss": 0.8133, "step": 2758 }, { "epoch": 0.6403620749680864, "grad_norm": 0.5282870531082153, "learning_rate": 0.00019203528162654903, "loss": 0.8467, "step": 2759 }, { "epoch": 0.6405941743066033, "grad_norm": 0.47011449933052063, "learning_rate": 0.00019202957664266136, "loss": 0.8831, "step": 2760 }, { "epoch": 0.6408262736451201, "grad_norm": 0.4749070107936859, "learning_rate": 0.00019202386970112046, "loss": 0.8742, "step": 2761 }, { "epoch": 0.641058372983637, "grad_norm": 0.4641600549221039, "learning_rate": 0.00019201816080204772, "loss": 0.9505, "step": 2762 }, { "epoch": 0.6412904723221539, "grad_norm": 0.48399174213409424, "learning_rate": 0.0001920124499455646, "loss": 0.8495, "step": 2763 }, { "epoch": 0.6415225716606707, "grad_norm": 0.43020349740982056, "learning_rate": 0.00019200673713179245, "loss": 0.8617, "step": 2764 }, { "epoch": 0.6417546709991877, "grad_norm": 0.426265150308609, "learning_rate": 0.00019200102236085294, "loss": 0.8659, "step": 2765 }, { "epoch": 0.6419867703377046, "grad_norm": 0.4647378623485565, "learning_rate": 0.00019199530563286757, "loss": 0.858, "step": 2766 }, { "epoch": 0.6422188696762214, "grad_norm": 0.4733147919178009, "learning_rate": 0.00019198958694795795, "loss": 0.8733, "step": 2767 }, { "epoch": 0.6424509690147383, "grad_norm": 0.44017985463142395, "learning_rate": 0.00019198386630624574, "loss": 0.9048, "step": 2768 }, { "epoch": 0.6426830683532552, "grad_norm": 0.46815598011016846, "learning_rate": 0.00019197814370785262, "loss": 0.8961, "step": 2769 }, { "epoch": 0.642915167691772, "grad_norm": 0.4678772985935211, "learning_rate": 0.00019197241915290037, "loss": 0.8234, "step": 2770 }, { "epoch": 0.643147267030289, "grad_norm": 0.4570580720901489, "learning_rate": 0.00019196669264151067, "loss": 0.8558, "step": 2771 }, { "epoch": 0.6433793663688059, "grad_norm": 0.5768052339553833, "learning_rate": 0.0001919609641738054, "loss": 0.8675, "step": 2772 }, { "epoch": 0.6436114657073227, "grad_norm": 0.42644062638282776, "learning_rate": 0.0001919552337499064, "loss": 0.8887, "step": 2773 }, { "epoch": 0.6438435650458396, "grad_norm": 0.44646957516670227, "learning_rate": 0.00019194950136993555, "loss": 0.8733, "step": 2774 }, { "epoch": 0.6440756643843565, "grad_norm": 0.4836980402469635, "learning_rate": 0.0001919437670340148, "loss": 0.9244, "step": 2775 }, { "epoch": 0.6443077637228734, "grad_norm": 0.4405345320701599, "learning_rate": 0.00019193803074226619, "loss": 0.9148, "step": 2776 }, { "epoch": 0.6445398630613903, "grad_norm": 0.46738266944885254, "learning_rate": 0.00019193229249481167, "loss": 0.8511, "step": 2777 }, { "epoch": 0.6447719623999072, "grad_norm": 0.469083309173584, "learning_rate": 0.0001919265522917733, "loss": 0.9086, "step": 2778 }, { "epoch": 0.6450040617384241, "grad_norm": 0.45336636900901794, "learning_rate": 0.00019192081013327323, "loss": 0.9188, "step": 2779 }, { "epoch": 0.6452361610769409, "grad_norm": 0.4742618501186371, "learning_rate": 0.00019191506601943357, "loss": 0.8729, "step": 2780 }, { "epoch": 0.6454682604154578, "grad_norm": 0.4318464696407318, "learning_rate": 0.00019190931995037653, "loss": 0.8678, "step": 2781 }, { "epoch": 0.6457003597539747, "grad_norm": 0.480639785528183, "learning_rate": 0.0001919035719262243, "loss": 0.8621, "step": 2782 }, { "epoch": 0.6459324590924916, "grad_norm": 0.46810439229011536, "learning_rate": 0.00019189782194709926, "loss": 0.8648, "step": 2783 }, { "epoch": 0.6461645584310085, "grad_norm": 0.4420754313468933, "learning_rate": 0.00019189207001312362, "loss": 0.9197, "step": 2784 }, { "epoch": 0.6463966577695254, "grad_norm": 0.5105909705162048, "learning_rate": 0.00019188631612441977, "loss": 0.8631, "step": 2785 }, { "epoch": 0.6466287571080422, "grad_norm": 0.45106253027915955, "learning_rate": 0.00019188056028111007, "loss": 0.8581, "step": 2786 }, { "epoch": 0.6468608564465591, "grad_norm": 0.4595653712749481, "learning_rate": 0.00019187480248331702, "loss": 0.8576, "step": 2787 }, { "epoch": 0.647092955785076, "grad_norm": 0.47918906807899475, "learning_rate": 0.00019186904273116307, "loss": 0.8643, "step": 2788 }, { "epoch": 0.647325055123593, "grad_norm": 0.43501511216163635, "learning_rate": 0.00019186328102477074, "loss": 0.832, "step": 2789 }, { "epoch": 0.6475571544621098, "grad_norm": 0.4904051423072815, "learning_rate": 0.00019185751736426257, "loss": 0.8554, "step": 2790 }, { "epoch": 0.6477892538006267, "grad_norm": 0.4456624984741211, "learning_rate": 0.00019185175174976123, "loss": 0.9171, "step": 2791 }, { "epoch": 0.6480213531391436, "grad_norm": 0.468327134847641, "learning_rate": 0.00019184598418138928, "loss": 0.8595, "step": 2792 }, { "epoch": 0.6482534524776604, "grad_norm": 0.4097321033477783, "learning_rate": 0.00019184021465926948, "loss": 0.8999, "step": 2793 }, { "epoch": 0.6484855518161773, "grad_norm": 0.41541096568107605, "learning_rate": 0.00019183444318352457, "loss": 0.8764, "step": 2794 }, { "epoch": 0.6487176511546943, "grad_norm": 0.4475294351577759, "learning_rate": 0.00019182866975427722, "loss": 0.8862, "step": 2795 }, { "epoch": 0.6489497504932111, "grad_norm": 0.4381924867630005, "learning_rate": 0.00019182289437165036, "loss": 0.8635, "step": 2796 }, { "epoch": 0.649181849831728, "grad_norm": 0.4176989793777466, "learning_rate": 0.00019181711703576676, "loss": 0.8832, "step": 2797 }, { "epoch": 0.6494139491702449, "grad_norm": 0.50672447681427, "learning_rate": 0.00019181133774674936, "loss": 0.8474, "step": 2798 }, { "epoch": 0.6496460485087617, "grad_norm": 0.4565121829509735, "learning_rate": 0.00019180555650472107, "loss": 0.8622, "step": 2799 }, { "epoch": 0.6498781478472786, "grad_norm": 0.5207083225250244, "learning_rate": 0.00019179977330980487, "loss": 0.8965, "step": 2800 }, { "epoch": 0.6501102471857956, "grad_norm": 0.4750087857246399, "learning_rate": 0.00019179398816212382, "loss": 0.9313, "step": 2801 }, { "epoch": 0.6503423465243124, "grad_norm": 0.4137982130050659, "learning_rate": 0.00019178820106180094, "loss": 0.8269, "step": 2802 }, { "epoch": 0.6505744458628293, "grad_norm": 0.48815712332725525, "learning_rate": 0.00019178241200895935, "loss": 0.8957, "step": 2803 }, { "epoch": 0.6508065452013462, "grad_norm": 0.5212056636810303, "learning_rate": 0.0001917766210037222, "loss": 0.826, "step": 2804 }, { "epoch": 0.651038644539863, "grad_norm": 0.46167051792144775, "learning_rate": 0.0001917708280462126, "loss": 0.9014, "step": 2805 }, { "epoch": 0.6512707438783799, "grad_norm": 0.47623032331466675, "learning_rate": 0.00019176503313655393, "loss": 0.8882, "step": 2806 }, { "epoch": 0.6515028432168968, "grad_norm": 0.46180862188339233, "learning_rate": 0.00019175923627486936, "loss": 0.9117, "step": 2807 }, { "epoch": 0.6517349425554138, "grad_norm": 0.4569379687309265, "learning_rate": 0.0001917534374612822, "loss": 0.8399, "step": 2808 }, { "epoch": 0.6519670418939306, "grad_norm": 0.42162245512008667, "learning_rate": 0.00019174763669591583, "loss": 0.8652, "step": 2809 }, { "epoch": 0.6521991412324475, "grad_norm": 0.4374902546405792, "learning_rate": 0.0001917418339788936, "loss": 0.895, "step": 2810 }, { "epoch": 0.6524312405709644, "grad_norm": 0.4497464895248413, "learning_rate": 0.000191736029310339, "loss": 0.8953, "step": 2811 }, { "epoch": 0.6526633399094812, "grad_norm": 0.4323320686817169, "learning_rate": 0.00019173022269037548, "loss": 0.8703, "step": 2812 }, { "epoch": 0.6528954392479981, "grad_norm": 0.45908528566360474, "learning_rate": 0.00019172441411912657, "loss": 0.8765, "step": 2813 }, { "epoch": 0.6531275385865151, "grad_norm": 0.41703182458877563, "learning_rate": 0.00019171860359671583, "loss": 0.8681, "step": 2814 }, { "epoch": 0.6533596379250319, "grad_norm": 0.45060259103775024, "learning_rate": 0.00019171279112326683, "loss": 0.8919, "step": 2815 }, { "epoch": 0.6535917372635488, "grad_norm": 0.4701296389102936, "learning_rate": 0.00019170697669890324, "loss": 0.8749, "step": 2816 }, { "epoch": 0.6538238366020657, "grad_norm": 0.4668188691139221, "learning_rate": 0.00019170116032374876, "loss": 0.8601, "step": 2817 }, { "epoch": 0.6540559359405825, "grad_norm": 0.42963141202926636, "learning_rate": 0.0001916953419979271, "loss": 0.884, "step": 2818 }, { "epoch": 0.6542880352790994, "grad_norm": 0.5206764340400696, "learning_rate": 0.00019168952172156202, "loss": 0.8831, "step": 2819 }, { "epoch": 0.6545201346176164, "grad_norm": 0.4822680652141571, "learning_rate": 0.0001916836994947773, "loss": 0.8141, "step": 2820 }, { "epoch": 0.6547522339561332, "grad_norm": 0.44132062792778015, "learning_rate": 0.00019167787531769684, "loss": 0.8837, "step": 2821 }, { "epoch": 0.6549843332946501, "grad_norm": 0.47267404198646545, "learning_rate": 0.00019167204919044451, "loss": 0.9059, "step": 2822 }, { "epoch": 0.655216432633167, "grad_norm": 0.4189220070838928, "learning_rate": 0.00019166622111314426, "loss": 0.8696, "step": 2823 }, { "epoch": 0.6554485319716838, "grad_norm": 0.41616180539131165, "learning_rate": 0.0001916603910859201, "loss": 0.8296, "step": 2824 }, { "epoch": 0.6556806313102007, "grad_norm": 0.4162457287311554, "learning_rate": 0.00019165455910889593, "loss": 0.8204, "step": 2825 }, { "epoch": 0.6559127306487177, "grad_norm": 0.4778987467288971, "learning_rate": 0.0001916487251821959, "loss": 0.8528, "step": 2826 }, { "epoch": 0.6561448299872346, "grad_norm": 0.4973873198032379, "learning_rate": 0.0001916428893059441, "loss": 0.8403, "step": 2827 }, { "epoch": 0.6563769293257514, "grad_norm": 0.4930678904056549, "learning_rate": 0.00019163705148026464, "loss": 0.8223, "step": 2828 }, { "epoch": 0.6566090286642683, "grad_norm": 0.44355422258377075, "learning_rate": 0.00019163121170528175, "loss": 0.8361, "step": 2829 }, { "epoch": 0.6568411280027852, "grad_norm": 0.45476454496383667, "learning_rate": 0.0001916253699811196, "loss": 0.8712, "step": 2830 }, { "epoch": 0.657073227341302, "grad_norm": 0.4533182382583618, "learning_rate": 0.00019161952630790248, "loss": 0.8984, "step": 2831 }, { "epoch": 0.657305326679819, "grad_norm": 0.4435712695121765, "learning_rate": 0.0001916136806857547, "loss": 0.8294, "step": 2832 }, { "epoch": 0.6575374260183359, "grad_norm": 0.5167298316955566, "learning_rate": 0.00019160783311480061, "loss": 0.9074, "step": 2833 }, { "epoch": 0.6577695253568527, "grad_norm": 0.48255985975265503, "learning_rate": 0.00019160198359516456, "loss": 0.8771, "step": 2834 }, { "epoch": 0.6580016246953696, "grad_norm": 0.49954113364219666, "learning_rate": 0.00019159613212697108, "loss": 0.837, "step": 2835 }, { "epoch": 0.6582337240338865, "grad_norm": 0.45875173807144165, "learning_rate": 0.00019159027871034452, "loss": 0.9007, "step": 2836 }, { "epoch": 0.6584658233724033, "grad_norm": 0.4180905818939209, "learning_rate": 0.00019158442334540947, "loss": 0.9139, "step": 2837 }, { "epoch": 0.6586979227109203, "grad_norm": 0.492866188287735, "learning_rate": 0.00019157856603229048, "loss": 0.8481, "step": 2838 }, { "epoch": 0.6589300220494372, "grad_norm": 0.45765408873558044, "learning_rate": 0.0001915727067711121, "loss": 0.8913, "step": 2839 }, { "epoch": 0.659162121387954, "grad_norm": 0.4523009657859802, "learning_rate": 0.00019156684556199903, "loss": 0.8815, "step": 2840 }, { "epoch": 0.6593942207264709, "grad_norm": 0.463329941034317, "learning_rate": 0.00019156098240507592, "loss": 0.8844, "step": 2841 }, { "epoch": 0.6596263200649878, "grad_norm": 0.4301539957523346, "learning_rate": 0.00019155511730046748, "loss": 0.8209, "step": 2842 }, { "epoch": 0.6598584194035046, "grad_norm": 0.4687608480453491, "learning_rate": 0.0001915492502482985, "loss": 0.8791, "step": 2843 }, { "epoch": 0.6600905187420216, "grad_norm": 0.46065258979797363, "learning_rate": 0.00019154338124869377, "loss": 0.8791, "step": 2844 }, { "epoch": 0.6603226180805385, "grad_norm": 0.4436477720737457, "learning_rate": 0.0001915375103017781, "loss": 0.879, "step": 2845 }, { "epoch": 0.6605547174190554, "grad_norm": 0.4415607750415802, "learning_rate": 0.0001915316374076764, "loss": 0.8601, "step": 2846 }, { "epoch": 0.6607868167575722, "grad_norm": 0.46711909770965576, "learning_rate": 0.00019152576256651366, "loss": 0.8796, "step": 2847 }, { "epoch": 0.6610189160960891, "grad_norm": 0.4268472194671631, "learning_rate": 0.0001915198857784148, "loss": 0.8689, "step": 2848 }, { "epoch": 0.661251015434606, "grad_norm": 0.3973580002784729, "learning_rate": 0.0001915140070435048, "loss": 0.8466, "step": 2849 }, { "epoch": 0.6614831147731229, "grad_norm": 0.4282270669937134, "learning_rate": 0.00019150812636190874, "loss": 0.8451, "step": 2850 }, { "epoch": 0.6617152141116398, "grad_norm": 0.45293116569519043, "learning_rate": 0.00019150224373375174, "loss": 0.9152, "step": 2851 }, { "epoch": 0.6619473134501567, "grad_norm": 0.49578094482421875, "learning_rate": 0.00019149635915915889, "loss": 0.8429, "step": 2852 }, { "epoch": 0.6621794127886735, "grad_norm": 0.45070314407348633, "learning_rate": 0.00019149047263825538, "loss": 0.829, "step": 2853 }, { "epoch": 0.6624115121271904, "grad_norm": 0.44752323627471924, "learning_rate": 0.00019148458417116645, "loss": 0.874, "step": 2854 }, { "epoch": 0.6626436114657073, "grad_norm": 0.4903758466243744, "learning_rate": 0.00019147869375801734, "loss": 0.8787, "step": 2855 }, { "epoch": 0.6628757108042242, "grad_norm": 0.43119940161705017, "learning_rate": 0.00019147280139893337, "loss": 0.8978, "step": 2856 }, { "epoch": 0.6631078101427411, "grad_norm": 0.5306719541549683, "learning_rate": 0.00019146690709403988, "loss": 0.9067, "step": 2857 }, { "epoch": 0.663339909481258, "grad_norm": 0.45615947246551514, "learning_rate": 0.0001914610108434622, "loss": 0.839, "step": 2858 }, { "epoch": 0.6635720088197749, "grad_norm": 0.4449672996997833, "learning_rate": 0.00019145511264732584, "loss": 0.8675, "step": 2859 }, { "epoch": 0.6638041081582917, "grad_norm": 0.4791627526283264, "learning_rate": 0.00019144921250575619, "loss": 0.8853, "step": 2860 }, { "epoch": 0.6640362074968086, "grad_norm": 0.4788571298122406, "learning_rate": 0.00019144331041887882, "loss": 0.9273, "step": 2861 }, { "epoch": 0.6642683068353256, "grad_norm": 0.42612382769584656, "learning_rate": 0.00019143740638681922, "loss": 0.893, "step": 2862 }, { "epoch": 0.6645004061738424, "grad_norm": 0.47250261902809143, "learning_rate": 0.000191431500409703, "loss": 0.8795, "step": 2863 }, { "epoch": 0.6647325055123593, "grad_norm": 0.4971529543399811, "learning_rate": 0.00019142559248765587, "loss": 0.9454, "step": 2864 }, { "epoch": 0.6649646048508762, "grad_norm": 0.5304151177406311, "learning_rate": 0.00019141968262080335, "loss": 0.9761, "step": 2865 }, { "epoch": 0.665196704189393, "grad_norm": 0.4551432728767395, "learning_rate": 0.00019141377080927132, "loss": 0.8263, "step": 2866 }, { "epoch": 0.6654288035279099, "grad_norm": 0.4839153289794922, "learning_rate": 0.0001914078570531854, "loss": 0.8516, "step": 2867 }, { "epoch": 0.6656609028664269, "grad_norm": 0.501598060131073, "learning_rate": 0.0001914019413526715, "loss": 0.8753, "step": 2868 }, { "epoch": 0.6658930022049437, "grad_norm": 0.49526655673980713, "learning_rate": 0.00019139602370785538, "loss": 0.8342, "step": 2869 }, { "epoch": 0.6661251015434606, "grad_norm": 0.49142616987228394, "learning_rate": 0.00019139010411886291, "loss": 0.8389, "step": 2870 }, { "epoch": 0.6663572008819775, "grad_norm": 0.5267114639282227, "learning_rate": 0.00019138418258582006, "loss": 0.8339, "step": 2871 }, { "epoch": 0.6665893002204943, "grad_norm": 0.42393583059310913, "learning_rate": 0.0001913782591088528, "loss": 0.8576, "step": 2872 }, { "epoch": 0.6668213995590112, "grad_norm": 0.4962637722492218, "learning_rate": 0.0001913723336880871, "loss": 0.837, "step": 2873 }, { "epoch": 0.6670534988975282, "grad_norm": 0.4471946656703949, "learning_rate": 0.000191366406323649, "loss": 0.8259, "step": 2874 }, { "epoch": 0.667285598236045, "grad_norm": 0.48034703731536865, "learning_rate": 0.00019136047701566464, "loss": 0.8537, "step": 2875 }, { "epoch": 0.6675176975745619, "grad_norm": 0.47116121649742126, "learning_rate": 0.0001913545457642601, "loss": 0.8252, "step": 2876 }, { "epoch": 0.6677497969130788, "grad_norm": 0.5071761012077332, "learning_rate": 0.00019134861256956155, "loss": 0.898, "step": 2877 }, { "epoch": 0.6679818962515957, "grad_norm": 0.4993492662906647, "learning_rate": 0.00019134267743169524, "loss": 0.8555, "step": 2878 }, { "epoch": 0.6682139955901125, "grad_norm": 0.5150817036628723, "learning_rate": 0.00019133674035078736, "loss": 0.8624, "step": 2879 }, { "epoch": 0.6684460949286295, "grad_norm": 0.5153425931930542, "learning_rate": 0.00019133080132696426, "loss": 0.8093, "step": 2880 }, { "epoch": 0.6686781942671464, "grad_norm": 0.4248557686805725, "learning_rate": 0.00019132486036035226, "loss": 0.8488, "step": 2881 }, { "epoch": 0.6689102936056632, "grad_norm": 0.4647797644138336, "learning_rate": 0.0001913189174510777, "loss": 0.9239, "step": 2882 }, { "epoch": 0.6691423929441801, "grad_norm": 0.5158550143241882, "learning_rate": 0.00019131297259926706, "loss": 0.8746, "step": 2883 }, { "epoch": 0.669374492282697, "grad_norm": 0.4511086344718933, "learning_rate": 0.00019130702580504676, "loss": 0.897, "step": 2884 }, { "epoch": 0.6696065916212138, "grad_norm": 0.5059782862663269, "learning_rate": 0.0001913010770685433, "loss": 0.8666, "step": 2885 }, { "epoch": 0.6698386909597308, "grad_norm": 0.4928185045719147, "learning_rate": 0.00019129512638988322, "loss": 0.842, "step": 2886 }, { "epoch": 0.6700707902982477, "grad_norm": 0.5002438426017761, "learning_rate": 0.00019128917376919313, "loss": 0.9076, "step": 2887 }, { "epoch": 0.6703028896367645, "grad_norm": 0.427513986825943, "learning_rate": 0.0001912832192065996, "loss": 0.8238, "step": 2888 }, { "epoch": 0.6705349889752814, "grad_norm": 0.45401087403297424, "learning_rate": 0.0001912772627022294, "loss": 0.8605, "step": 2889 }, { "epoch": 0.6707670883137983, "grad_norm": 0.43657442927360535, "learning_rate": 0.0001912713042562091, "loss": 0.8506, "step": 2890 }, { "epoch": 0.6709991876523151, "grad_norm": 0.41969212889671326, "learning_rate": 0.00019126534386866556, "loss": 0.8791, "step": 2891 }, { "epoch": 0.6712312869908321, "grad_norm": 0.46783447265625, "learning_rate": 0.00019125938153972548, "loss": 0.8774, "step": 2892 }, { "epoch": 0.671463386329349, "grad_norm": 0.44763606786727905, "learning_rate": 0.00019125341726951577, "loss": 0.9214, "step": 2893 }, { "epoch": 0.6716954856678659, "grad_norm": 0.46709761023521423, "learning_rate": 0.00019124745105816325, "loss": 0.8276, "step": 2894 }, { "epoch": 0.6719275850063827, "grad_norm": 0.471754252910614, "learning_rate": 0.0001912414829057949, "loss": 0.8645, "step": 2895 }, { "epoch": 0.6721596843448996, "grad_norm": 0.4268680810928345, "learning_rate": 0.00019123551281253757, "loss": 0.8376, "step": 2896 }, { "epoch": 0.6723917836834165, "grad_norm": 0.4184396266937256, "learning_rate": 0.00019122954077851833, "loss": 0.87, "step": 2897 }, { "epoch": 0.6726238830219334, "grad_norm": 0.48813703656196594, "learning_rate": 0.0001912235668038642, "loss": 0.866, "step": 2898 }, { "epoch": 0.6728559823604503, "grad_norm": 0.4599473774433136, "learning_rate": 0.00019121759088870226, "loss": 0.834, "step": 2899 }, { "epoch": 0.6730880816989672, "grad_norm": 0.4024162292480469, "learning_rate": 0.00019121161303315963, "loss": 0.8731, "step": 2900 }, { "epoch": 0.673320181037484, "grad_norm": 0.4451583921909332, "learning_rate": 0.00019120563323736343, "loss": 0.8934, "step": 2901 }, { "epoch": 0.6735522803760009, "grad_norm": 0.41901981830596924, "learning_rate": 0.00019119965150144095, "loss": 0.8637, "step": 2902 }, { "epoch": 0.6737843797145178, "grad_norm": 0.42898762226104736, "learning_rate": 0.00019119366782551937, "loss": 0.8929, "step": 2903 }, { "epoch": 0.6740164790530347, "grad_norm": 0.4139856994152069, "learning_rate": 0.00019118768220972596, "loss": 0.8958, "step": 2904 }, { "epoch": 0.6742485783915516, "grad_norm": 0.4518340528011322, "learning_rate": 0.0001911816946541881, "loss": 0.884, "step": 2905 }, { "epoch": 0.6744806777300685, "grad_norm": 0.4949742555618286, "learning_rate": 0.00019117570515903313, "loss": 0.9065, "step": 2906 }, { "epoch": 0.6747127770685853, "grad_norm": 0.42285311222076416, "learning_rate": 0.00019116971372438847, "loss": 0.9126, "step": 2907 }, { "epoch": 0.6749448764071022, "grad_norm": 0.46767348051071167, "learning_rate": 0.00019116372035038153, "loss": 0.8784, "step": 2908 }, { "epoch": 0.6751769757456191, "grad_norm": 0.48399636149406433, "learning_rate": 0.00019115772503713985, "loss": 0.8913, "step": 2909 }, { "epoch": 0.6754090750841361, "grad_norm": 0.44633030891418457, "learning_rate": 0.00019115172778479093, "loss": 0.8711, "step": 2910 }, { "epoch": 0.6756411744226529, "grad_norm": 0.43487444519996643, "learning_rate": 0.00019114572859346235, "loss": 0.8847, "step": 2911 }, { "epoch": 0.6758732737611698, "grad_norm": 0.3979194760322571, "learning_rate": 0.00019113972746328178, "loss": 0.849, "step": 2912 }, { "epoch": 0.6761053730996867, "grad_norm": 0.4204396605491638, "learning_rate": 0.0001911337243943768, "loss": 0.8596, "step": 2913 }, { "epoch": 0.6763374724382035, "grad_norm": 0.41835030913352966, "learning_rate": 0.0001911277193868751, "loss": 0.8431, "step": 2914 }, { "epoch": 0.6765695717767204, "grad_norm": 0.4458625912666321, "learning_rate": 0.00019112171244090452, "loss": 0.8341, "step": 2915 }, { "epoch": 0.6768016711152374, "grad_norm": 0.4265308976173401, "learning_rate": 0.0001911157035565927, "loss": 0.8193, "step": 2916 }, { "epoch": 0.6770337704537542, "grad_norm": 0.4003806412220001, "learning_rate": 0.0001911096927340676, "loss": 0.8821, "step": 2917 }, { "epoch": 0.6772658697922711, "grad_norm": 0.44573527574539185, "learning_rate": 0.00019110367997345697, "loss": 0.864, "step": 2918 }, { "epoch": 0.677497969130788, "grad_norm": 0.4213849902153015, "learning_rate": 0.00019109766527488877, "loss": 0.8711, "step": 2919 }, { "epoch": 0.6777300684693048, "grad_norm": 0.41736915707588196, "learning_rate": 0.00019109164863849096, "loss": 0.8666, "step": 2920 }, { "epoch": 0.6779621678078217, "grad_norm": 0.4173840284347534, "learning_rate": 0.00019108563006439147, "loss": 0.8964, "step": 2921 }, { "epoch": 0.6781942671463387, "grad_norm": 0.4290173649787903, "learning_rate": 0.00019107960955271836, "loss": 0.8684, "step": 2922 }, { "epoch": 0.6784263664848555, "grad_norm": 0.4732690751552582, "learning_rate": 0.0001910735871035997, "loss": 0.844, "step": 2923 }, { "epoch": 0.6786584658233724, "grad_norm": 0.44380733370780945, "learning_rate": 0.00019106756271716362, "loss": 0.8779, "step": 2924 }, { "epoch": 0.6788905651618893, "grad_norm": 0.4828498959541321, "learning_rate": 0.00019106153639353822, "loss": 0.8606, "step": 2925 }, { "epoch": 0.6791226645004061, "grad_norm": 0.4402746260166168, "learning_rate": 0.00019105550813285175, "loss": 0.8463, "step": 2926 }, { "epoch": 0.679354763838923, "grad_norm": 0.44497203826904297, "learning_rate": 0.00019104947793523234, "loss": 0.8601, "step": 2927 }, { "epoch": 0.67958686317744, "grad_norm": 0.44765856862068176, "learning_rate": 0.00019104344580080838, "loss": 0.8867, "step": 2928 }, { "epoch": 0.6798189625159569, "grad_norm": 0.43054118752479553, "learning_rate": 0.00019103741172970818, "loss": 0.8119, "step": 2929 }, { "epoch": 0.6800510618544737, "grad_norm": 0.555328369140625, "learning_rate": 0.00019103137572206, "loss": 0.8219, "step": 2930 }, { "epoch": 0.6802831611929906, "grad_norm": 0.45921704173088074, "learning_rate": 0.0001910253377779923, "loss": 0.8887, "step": 2931 }, { "epoch": 0.6805152605315075, "grad_norm": 0.4183528423309326, "learning_rate": 0.00019101929789763354, "loss": 0.885, "step": 2932 }, { "epoch": 0.6807473598700243, "grad_norm": 0.4342934787273407, "learning_rate": 0.00019101325608111218, "loss": 0.9084, "step": 2933 }, { "epoch": 0.6809794592085413, "grad_norm": 0.41013672947883606, "learning_rate": 0.0001910072123285567, "loss": 0.8773, "step": 2934 }, { "epoch": 0.6812115585470582, "grad_norm": 0.4397852122783661, "learning_rate": 0.00019100116664009576, "loss": 0.8478, "step": 2935 }, { "epoch": 0.681443657885575, "grad_norm": 0.46658027172088623, "learning_rate": 0.00019099511901585786, "loss": 0.8682, "step": 2936 }, { "epoch": 0.6816757572240919, "grad_norm": 0.4161824584007263, "learning_rate": 0.00019098906945597168, "loss": 0.8447, "step": 2937 }, { "epoch": 0.6819078565626088, "grad_norm": 0.45820096135139465, "learning_rate": 0.00019098301796056593, "loss": 0.8632, "step": 2938 }, { "epoch": 0.6821399559011256, "grad_norm": 0.49335211515426636, "learning_rate": 0.00019097696452976935, "loss": 0.8543, "step": 2939 }, { "epoch": 0.6823720552396426, "grad_norm": 0.5060347318649292, "learning_rate": 0.00019097090916371062, "loss": 0.9283, "step": 2940 }, { "epoch": 0.6826041545781595, "grad_norm": 0.5007983446121216, "learning_rate": 0.00019096485186251866, "loss": 0.8542, "step": 2941 }, { "epoch": 0.6828362539166764, "grad_norm": 0.5087704062461853, "learning_rate": 0.00019095879262632227, "loss": 0.8908, "step": 2942 }, { "epoch": 0.6830683532551932, "grad_norm": 0.5069675445556641, "learning_rate": 0.0001909527314552503, "loss": 0.9079, "step": 2943 }, { "epoch": 0.6833004525937101, "grad_norm": 0.47137320041656494, "learning_rate": 0.00019094666834943179, "loss": 0.8626, "step": 2944 }, { "epoch": 0.683532551932227, "grad_norm": 0.4283658564090729, "learning_rate": 0.0001909406033089956, "loss": 0.8541, "step": 2945 }, { "epoch": 0.6837646512707439, "grad_norm": 0.46082451939582825, "learning_rate": 0.00019093453633407082, "loss": 0.8143, "step": 2946 }, { "epoch": 0.6839967506092608, "grad_norm": 0.4551635682582855, "learning_rate": 0.00019092846742478647, "loss": 0.8945, "step": 2947 }, { "epoch": 0.6842288499477777, "grad_norm": 0.5660843253135681, "learning_rate": 0.00019092239658127167, "loss": 0.8522, "step": 2948 }, { "epoch": 0.6844609492862945, "grad_norm": 0.481251060962677, "learning_rate": 0.00019091632380365553, "loss": 0.8549, "step": 2949 }, { "epoch": 0.6846930486248114, "grad_norm": 0.45565807819366455, "learning_rate": 0.00019091024909206729, "loss": 0.8892, "step": 2950 }, { "epoch": 0.6849251479633283, "grad_norm": 0.4629526734352112, "learning_rate": 0.00019090417244663609, "loss": 0.8584, "step": 2951 }, { "epoch": 0.6851572473018452, "grad_norm": 0.49955448508262634, "learning_rate": 0.00019089809386749125, "loss": 0.8832, "step": 2952 }, { "epoch": 0.6853893466403621, "grad_norm": 0.5168017745018005, "learning_rate": 0.00019089201335476208, "loss": 0.8551, "step": 2953 }, { "epoch": 0.685621445978879, "grad_norm": 0.4181295335292816, "learning_rate": 0.00019088593090857788, "loss": 0.8732, "step": 2954 }, { "epoch": 0.6858535453173958, "grad_norm": 0.46301600337028503, "learning_rate": 0.00019087984652906806, "loss": 0.8293, "step": 2955 }, { "epoch": 0.6860856446559127, "grad_norm": 0.47264742851257324, "learning_rate": 0.00019087376021636207, "loss": 0.9152, "step": 2956 }, { "epoch": 0.6863177439944296, "grad_norm": 0.5421038866043091, "learning_rate": 0.0001908676719705893, "loss": 0.8275, "step": 2957 }, { "epoch": 0.6865498433329466, "grad_norm": 0.40728798508644104, "learning_rate": 0.00019086158179187936, "loss": 0.9143, "step": 2958 }, { "epoch": 0.6867819426714634, "grad_norm": 0.48777931928634644, "learning_rate": 0.00019085548968036174, "loss": 0.8316, "step": 2959 }, { "epoch": 0.6870140420099803, "grad_norm": 0.5215877294540405, "learning_rate": 0.00019084939563616604, "loss": 0.8873, "step": 2960 }, { "epoch": 0.6872461413484972, "grad_norm": 0.42543748021125793, "learning_rate": 0.0001908432996594219, "loss": 0.8657, "step": 2961 }, { "epoch": 0.687478240687014, "grad_norm": 0.47026681900024414, "learning_rate": 0.00019083720175025896, "loss": 0.8799, "step": 2962 }, { "epoch": 0.6877103400255309, "grad_norm": 0.522604763507843, "learning_rate": 0.00019083110190880703, "loss": 0.9158, "step": 2963 }, { "epoch": 0.6879424393640479, "grad_norm": 0.5238401889801025, "learning_rate": 0.00019082500013519575, "loss": 0.8556, "step": 2964 }, { "epoch": 0.6881745387025647, "grad_norm": 0.4724384844303131, "learning_rate": 0.00019081889642955498, "loss": 0.8601, "step": 2965 }, { "epoch": 0.6884066380410816, "grad_norm": 0.48549726605415344, "learning_rate": 0.00019081279079201453, "loss": 0.9035, "step": 2966 }, { "epoch": 0.6886387373795985, "grad_norm": 0.4577504098415375, "learning_rate": 0.0001908066832227043, "loss": 0.8507, "step": 2967 }, { "epoch": 0.6888708367181153, "grad_norm": 0.48382043838500977, "learning_rate": 0.00019080057372175424, "loss": 0.8271, "step": 2968 }, { "epoch": 0.6891029360566322, "grad_norm": 0.4796565771102905, "learning_rate": 0.00019079446228929426, "loss": 0.88, "step": 2969 }, { "epoch": 0.6893350353951492, "grad_norm": 0.45773276686668396, "learning_rate": 0.00019078834892545437, "loss": 0.8851, "step": 2970 }, { "epoch": 0.689567134733666, "grad_norm": 0.5537842512130737, "learning_rate": 0.00019078223363036464, "loss": 0.8531, "step": 2971 }, { "epoch": 0.6897992340721829, "grad_norm": 0.49126115441322327, "learning_rate": 0.0001907761164041551, "loss": 0.8933, "step": 2972 }, { "epoch": 0.6900313334106998, "grad_norm": 0.5125454664230347, "learning_rate": 0.00019076999724695591, "loss": 0.9219, "step": 2973 }, { "epoch": 0.6902634327492166, "grad_norm": 0.524172842502594, "learning_rate": 0.00019076387615889727, "loss": 0.886, "step": 2974 }, { "epoch": 0.6904955320877335, "grad_norm": 0.4038882553577423, "learning_rate": 0.00019075775314010934, "loss": 0.8375, "step": 2975 }, { "epoch": 0.6907276314262505, "grad_norm": 0.60374915599823, "learning_rate": 0.0001907516281907224, "loss": 0.8267, "step": 2976 }, { "epoch": 0.6909597307647674, "grad_norm": 0.49629759788513184, "learning_rate": 0.00019074550131086674, "loss": 0.869, "step": 2977 }, { "epoch": 0.6911918301032842, "grad_norm": 0.4762909412384033, "learning_rate": 0.00019073937250067263, "loss": 0.8727, "step": 2978 }, { "epoch": 0.6914239294418011, "grad_norm": 0.547644853591919, "learning_rate": 0.00019073324176027053, "loss": 0.8769, "step": 2979 }, { "epoch": 0.691656028780318, "grad_norm": 0.5605371594429016, "learning_rate": 0.00019072710908979077, "loss": 0.9064, "step": 2980 }, { "epoch": 0.6918881281188348, "grad_norm": 0.4492296278476715, "learning_rate": 0.00019072097448936387, "loss": 0.8816, "step": 2981 }, { "epoch": 0.6921202274573518, "grad_norm": 0.5059700608253479, "learning_rate": 0.00019071483795912034, "loss": 0.8107, "step": 2982 }, { "epoch": 0.6923523267958687, "grad_norm": 0.5650781989097595, "learning_rate": 0.00019070869949919063, "loss": 0.8611, "step": 2983 }, { "epoch": 0.6925844261343855, "grad_norm": 0.4539996385574341, "learning_rate": 0.00019070255910970537, "loss": 0.8633, "step": 2984 }, { "epoch": 0.6928165254729024, "grad_norm": 0.5409964919090271, "learning_rate": 0.00019069641679079517, "loss": 0.8454, "step": 2985 }, { "epoch": 0.6930486248114193, "grad_norm": 0.45325782895088196, "learning_rate": 0.00019069027254259071, "loss": 0.8302, "step": 2986 }, { "epoch": 0.6932807241499361, "grad_norm": 0.5243237614631653, "learning_rate": 0.00019068412636522267, "loss": 0.8394, "step": 2987 }, { "epoch": 0.6935128234884531, "grad_norm": 0.5446032285690308, "learning_rate": 0.00019067797825882179, "loss": 0.8526, "step": 2988 }, { "epoch": 0.69374492282697, "grad_norm": 0.4610207676887512, "learning_rate": 0.00019067182822351882, "loss": 0.8722, "step": 2989 }, { "epoch": 0.6939770221654868, "grad_norm": 0.48760634660720825, "learning_rate": 0.00019066567625944467, "loss": 0.8907, "step": 2990 }, { "epoch": 0.6942091215040037, "grad_norm": 0.525909960269928, "learning_rate": 0.00019065952236673014, "loss": 0.8755, "step": 2991 }, { "epoch": 0.6944412208425206, "grad_norm": 0.4830683767795563, "learning_rate": 0.00019065336654550618, "loss": 0.9193, "step": 2992 }, { "epoch": 0.6946733201810374, "grad_norm": 0.44178134202957153, "learning_rate": 0.00019064720879590367, "loss": 0.8844, "step": 2993 }, { "epoch": 0.6949054195195544, "grad_norm": 0.4111638069152832, "learning_rate": 0.00019064104911805364, "loss": 0.8428, "step": 2994 }, { "epoch": 0.6951375188580713, "grad_norm": 0.5036029815673828, "learning_rate": 0.00019063488751208714, "loss": 0.8502, "step": 2995 }, { "epoch": 0.6953696181965882, "grad_norm": 0.4300152063369751, "learning_rate": 0.00019062872397813518, "loss": 0.8109, "step": 2996 }, { "epoch": 0.695601717535105, "grad_norm": 0.41722041368484497, "learning_rate": 0.0001906225585163289, "loss": 0.8877, "step": 2997 }, { "epoch": 0.6958338168736219, "grad_norm": 0.4701729714870453, "learning_rate": 0.00019061639112679947, "loss": 0.8916, "step": 2998 }, { "epoch": 0.6960659162121388, "grad_norm": 0.4922943413257599, "learning_rate": 0.0001906102218096781, "loss": 0.8967, "step": 2999 }, { "epoch": 0.6962980155506557, "grad_norm": 0.42822059988975525, "learning_rate": 0.00019060405056509595, "loss": 0.8603, "step": 3000 }, { "epoch": 0.6965301148891726, "grad_norm": 0.4583202302455902, "learning_rate": 0.00019059787739318434, "loss": 0.903, "step": 3001 }, { "epoch": 0.6967622142276895, "grad_norm": 0.458661288022995, "learning_rate": 0.0001905917022940746, "loss": 0.8717, "step": 3002 }, { "epoch": 0.6969943135662063, "grad_norm": 0.45995208621025085, "learning_rate": 0.00019058552526789808, "loss": 0.9008, "step": 3003 }, { "epoch": 0.6972264129047232, "grad_norm": 0.4817812144756317, "learning_rate": 0.00019057934631478617, "loss": 0.858, "step": 3004 }, { "epoch": 0.6974585122432401, "grad_norm": 0.39633262157440186, "learning_rate": 0.00019057316543487026, "loss": 0.8462, "step": 3005 }, { "epoch": 0.6976906115817569, "grad_norm": 0.45640578866004944, "learning_rate": 0.00019056698262828192, "loss": 0.7977, "step": 3006 }, { "epoch": 0.6979227109202739, "grad_norm": 0.5016095042228699, "learning_rate": 0.0001905607978951526, "loss": 0.9016, "step": 3007 }, { "epoch": 0.6981548102587908, "grad_norm": 0.4365701377391815, "learning_rate": 0.0001905546112356139, "loss": 0.876, "step": 3008 }, { "epoch": 0.6983869095973076, "grad_norm": 0.44235169887542725, "learning_rate": 0.0001905484226497974, "loss": 0.8377, "step": 3009 }, { "epoch": 0.6986190089358245, "grad_norm": 0.42876043915748596, "learning_rate": 0.00019054223213783479, "loss": 0.8863, "step": 3010 }, { "epoch": 0.6988511082743414, "grad_norm": 0.5030390620231628, "learning_rate": 0.00019053603969985768, "loss": 0.8411, "step": 3011 }, { "epoch": 0.6990832076128582, "grad_norm": 0.6135694980621338, "learning_rate": 0.00019052984533599783, "loss": 0.8239, "step": 3012 }, { "epoch": 0.6993153069513752, "grad_norm": 0.46195584535598755, "learning_rate": 0.00019052364904638703, "loss": 0.8632, "step": 3013 }, { "epoch": 0.6995474062898921, "grad_norm": 0.5559212565422058, "learning_rate": 0.00019051745083115707, "loss": 0.8986, "step": 3014 }, { "epoch": 0.699779505628409, "grad_norm": 0.5121561288833618, "learning_rate": 0.00019051125069043977, "loss": 0.788, "step": 3015 }, { "epoch": 0.7000116049669258, "grad_norm": 0.47827059030532837, "learning_rate": 0.00019050504862436709, "loss": 0.8596, "step": 3016 }, { "epoch": 0.7002437043054427, "grad_norm": 0.6204367876052856, "learning_rate": 0.0001904988446330709, "loss": 0.8438, "step": 3017 }, { "epoch": 0.7004758036439596, "grad_norm": 0.46493759751319885, "learning_rate": 0.00019049263871668317, "loss": 0.8739, "step": 3018 }, { "epoch": 0.7007079029824765, "grad_norm": 0.5650869607925415, "learning_rate": 0.00019048643087533592, "loss": 0.8915, "step": 3019 }, { "epoch": 0.7009400023209934, "grad_norm": NaN, "learning_rate": 0.00019048643087533592, "loss": 0.8471, "step": 3020 }, { "epoch": 0.7011721016595103, "grad_norm": 0.5289834141731262, "learning_rate": 0.00019048022110916123, "loss": 0.8814, "step": 3021 }, { "epoch": 0.7014042009980271, "grad_norm": 0.46831443905830383, "learning_rate": 0.0001904740094182912, "loss": 0.8164, "step": 3022 }, { "epoch": 0.701636300336544, "grad_norm": 0.5179585218429565, "learning_rate": 0.00019046779580285793, "loss": 0.8368, "step": 3023 }, { "epoch": 0.7018683996750609, "grad_norm": 0.496789813041687, "learning_rate": 0.0001904615802629936, "loss": 0.825, "step": 3024 }, { "epoch": 0.7021004990135779, "grad_norm": 0.5338672399520874, "learning_rate": 0.00019045536279883044, "loss": 0.8568, "step": 3025 }, { "epoch": 0.7023325983520947, "grad_norm": 0.4842953085899353, "learning_rate": 0.00019044914341050068, "loss": 0.8353, "step": 3026 }, { "epoch": 0.7025646976906116, "grad_norm": 0.4783724546432495, "learning_rate": 0.00019044292209813667, "loss": 0.8458, "step": 3027 }, { "epoch": 0.7027967970291285, "grad_norm": 0.5048144459724426, "learning_rate": 0.0001904366988618707, "loss": 0.852, "step": 3028 }, { "epoch": 0.7030288963676453, "grad_norm": 0.46747303009033203, "learning_rate": 0.0001904304737018352, "loss": 0.8185, "step": 3029 }, { "epoch": 0.7032609957061622, "grad_norm": 0.5539613366127014, "learning_rate": 0.0001904242466181626, "loss": 0.8551, "step": 3030 }, { "epoch": 0.7034930950446792, "grad_norm": 0.7017244100570679, "learning_rate": 0.00019041801761098524, "loss": 0.8437, "step": 3031 }, { "epoch": 0.703725194383196, "grad_norm": 0.46192508935928345, "learning_rate": 0.00019041178668043575, "loss": 0.8575, "step": 3032 }, { "epoch": 0.7039572937217129, "grad_norm": 0.6109695434570312, "learning_rate": 0.00019040555382664664, "loss": 0.8557, "step": 3033 }, { "epoch": 0.7041893930602298, "grad_norm": 0.5278798937797546, "learning_rate": 0.0001903993190497505, "loss": 0.8483, "step": 3034 }, { "epoch": 0.7044214923987466, "grad_norm": 0.5359739065170288, "learning_rate": 0.00019039308234987992, "loss": 0.8377, "step": 3035 }, { "epoch": 0.7046535917372635, "grad_norm": 0.5182442665100098, "learning_rate": 0.0001903868437271676, "loss": 0.8852, "step": 3036 }, { "epoch": 0.7048856910757805, "grad_norm": 0.5192808508872986, "learning_rate": 0.00019038060318174625, "loss": 0.8597, "step": 3037 }, { "epoch": 0.7051177904142973, "grad_norm": 0.4852927327156067, "learning_rate": 0.0001903743607137486, "loss": 0.9021, "step": 3038 }, { "epoch": 0.7053498897528142, "grad_norm": 0.5354822278022766, "learning_rate": 0.00019036811632330746, "loss": 0.8516, "step": 3039 }, { "epoch": 0.7055819890913311, "grad_norm": 0.44874250888824463, "learning_rate": 0.00019036187001055565, "loss": 0.7794, "step": 3040 }, { "epoch": 0.7058140884298479, "grad_norm": 0.473663866519928, "learning_rate": 0.00019035562177562604, "loss": 0.8997, "step": 3041 }, { "epoch": 0.7060461877683648, "grad_norm": 0.5499244332313538, "learning_rate": 0.00019034937161865154, "loss": 0.8662, "step": 3042 }, { "epoch": 0.7062782871068818, "grad_norm": 0.48487070202827454, "learning_rate": 0.00019034311953976512, "loss": 0.8509, "step": 3043 }, { "epoch": 0.7065103864453987, "grad_norm": 0.5054479837417603, "learning_rate": 0.00019033686553909974, "loss": 0.8052, "step": 3044 }, { "epoch": 0.7067424857839155, "grad_norm": 0.4876578450202942, "learning_rate": 0.00019033060961678845, "loss": 0.8739, "step": 3045 }, { "epoch": 0.7069745851224324, "grad_norm": 0.48914724588394165, "learning_rate": 0.00019032435177296437, "loss": 0.8524, "step": 3046 }, { "epoch": 0.7072066844609493, "grad_norm": 0.4960716962814331, "learning_rate": 0.00019031809200776057, "loss": 0.8756, "step": 3047 }, { "epoch": 0.7074387837994661, "grad_norm": 0.509127140045166, "learning_rate": 0.00019031183032131022, "loss": 0.862, "step": 3048 }, { "epoch": 0.7076708831379831, "grad_norm": 0.4922364354133606, "learning_rate": 0.0001903055667137465, "loss": 0.8584, "step": 3049 }, { "epoch": 0.7079029824765, "grad_norm": 0.5280460119247437, "learning_rate": 0.00019029930118520268, "loss": 0.8757, "step": 3050 }, { "epoch": 0.7081350818150168, "grad_norm": 1.082001805305481, "learning_rate": 0.000190293033735812, "loss": 0.8781, "step": 3051 }, { "epoch": 0.7083671811535337, "grad_norm": 0.6554441452026367, "learning_rate": 0.00019028676436570784, "loss": 0.8482, "step": 3052 }, { "epoch": 0.7085992804920506, "grad_norm": 0.592295229434967, "learning_rate": 0.00019028049307502352, "loss": 0.84, "step": 3053 }, { "epoch": 0.7088313798305674, "grad_norm": 0.48731228709220886, "learning_rate": 0.00019027421986389243, "loss": 0.8547, "step": 3054 }, { "epoch": 0.7090634791690844, "grad_norm": 0.4809691607952118, "learning_rate": 0.00019026794473244807, "loss": 0.8592, "step": 3055 }, { "epoch": 0.7092955785076013, "grad_norm": 0.4836047291755676, "learning_rate": 0.00019026166768082388, "loss": 0.8471, "step": 3056 }, { "epoch": 0.7095276778461181, "grad_norm": 0.45520707964897156, "learning_rate": 0.0001902553887091534, "loss": 0.8902, "step": 3057 }, { "epoch": 0.709759777184635, "grad_norm": 0.4413166344165802, "learning_rate": 0.00019024910781757017, "loss": 0.8494, "step": 3058 }, { "epoch": 0.7099918765231519, "grad_norm": 0.44015225768089294, "learning_rate": 0.00019024282500620783, "loss": 0.8921, "step": 3059 }, { "epoch": 0.7102239758616687, "grad_norm": 0.4459102153778076, "learning_rate": 0.00019023654027520003, "loss": 0.8987, "step": 3060 }, { "epoch": 0.7104560752001857, "grad_norm": 0.5179413557052612, "learning_rate": 0.00019023025362468045, "loss": 0.9569, "step": 3061 }, { "epoch": 0.7106881745387026, "grad_norm": 0.46936163306236267, "learning_rate": 0.0001902239650547828, "loss": 0.9004, "step": 3062 }, { "epoch": 0.7109202738772195, "grad_norm": 0.4739760458469391, "learning_rate": 0.00019021767456564085, "loss": 0.834, "step": 3063 }, { "epoch": 0.7111523732157363, "grad_norm": 0.5172191858291626, "learning_rate": 0.00019021138215738844, "loss": 0.8817, "step": 3064 }, { "epoch": 0.7113844725542532, "grad_norm": 0.537525475025177, "learning_rate": 0.00019020508783015942, "loss": 0.8741, "step": 3065 }, { "epoch": 0.7116165718927701, "grad_norm": 0.4432235062122345, "learning_rate": 0.00019019879158408768, "loss": 0.8582, "step": 3066 }, { "epoch": 0.711848671231287, "grad_norm": 0.5253779292106628, "learning_rate": 0.00019019249341930717, "loss": 0.8198, "step": 3067 }, { "epoch": 0.7120807705698039, "grad_norm": 0.48431235551834106, "learning_rate": 0.0001901861933359518, "loss": 0.8358, "step": 3068 }, { "epoch": 0.7123128699083208, "grad_norm": 0.4678995907306671, "learning_rate": 0.0001901798913341556, "loss": 0.8438, "step": 3069 }, { "epoch": 0.7125449692468376, "grad_norm": 0.5113543272018433, "learning_rate": 0.0001901735874140527, "loss": 0.8725, "step": 3070 }, { "epoch": 0.7127770685853545, "grad_norm": 0.4243150055408478, "learning_rate": 0.00019016728157577714, "loss": 0.8491, "step": 3071 }, { "epoch": 0.7130091679238714, "grad_norm": 0.40014317631721497, "learning_rate": 0.00019016097381946307, "loss": 0.8362, "step": 3072 }, { "epoch": 0.7132412672623883, "grad_norm": 0.49873313307762146, "learning_rate": 0.00019015466414524466, "loss": 0.8678, "step": 3073 }, { "epoch": 0.7134733666009052, "grad_norm": 0.41961678862571716, "learning_rate": 0.00019014835255325613, "loss": 0.8787, "step": 3074 }, { "epoch": 0.7137054659394221, "grad_norm": 0.4166480004787445, "learning_rate": 0.00019014203904363177, "loss": 0.8621, "step": 3075 }, { "epoch": 0.713937565277939, "grad_norm": 0.4485580027103424, "learning_rate": 0.00019013572361650582, "loss": 0.8715, "step": 3076 }, { "epoch": 0.7141696646164558, "grad_norm": 0.42902570962905884, "learning_rate": 0.0001901294062720127, "loss": 0.8797, "step": 3077 }, { "epoch": 0.7144017639549727, "grad_norm": 0.4232020080089569, "learning_rate": 0.00019012308701028672, "loss": 0.8281, "step": 3078 }, { "epoch": 0.7146338632934897, "grad_norm": 0.4117775857448578, "learning_rate": 0.00019011676583146234, "loss": 0.907, "step": 3079 }, { "epoch": 0.7148659626320065, "grad_norm": 0.4568987190723419, "learning_rate": 0.00019011044273567403, "loss": 0.869, "step": 3080 }, { "epoch": 0.7150980619705234, "grad_norm": 0.4611486792564392, "learning_rate": 0.0001901041177230563, "loss": 0.8234, "step": 3081 }, { "epoch": 0.7153301613090403, "grad_norm": 0.3874223530292511, "learning_rate": 0.0001900977907937436, "loss": 0.8273, "step": 3082 }, { "epoch": 0.7155622606475571, "grad_norm": 0.5141695141792297, "learning_rate": 0.0001900914619478707, "loss": 0.8519, "step": 3083 }, { "epoch": 0.715794359986074, "grad_norm": 0.4338914752006531, "learning_rate": 0.00019008513118557206, "loss": 0.8166, "step": 3084 }, { "epoch": 0.716026459324591, "grad_norm": 0.4456069767475128, "learning_rate": 0.00019007879850698242, "loss": 0.8718, "step": 3085 }, { "epoch": 0.7162585586631078, "grad_norm": 0.4544895589351654, "learning_rate": 0.00019007246391223649, "loss": 0.8841, "step": 3086 }, { "epoch": 0.7164906580016247, "grad_norm": 0.4263644814491272, "learning_rate": 0.00019006612740146898, "loss": 0.859, "step": 3087 }, { "epoch": 0.7167227573401416, "grad_norm": 0.48884785175323486, "learning_rate": 0.00019005978897481472, "loss": 0.8158, "step": 3088 }, { "epoch": 0.7169548566786584, "grad_norm": 0.4546215534210205, "learning_rate": 0.00019005344863240854, "loss": 0.8788, "step": 3089 }, { "epoch": 0.7171869560171753, "grad_norm": 0.40565377473831177, "learning_rate": 0.00019004710637438534, "loss": 0.8348, "step": 3090 }, { "epoch": 0.7174190553556923, "grad_norm": 0.503899872303009, "learning_rate": 0.00019004076220087993, "loss": 0.8292, "step": 3091 }, { "epoch": 0.7176511546942091, "grad_norm": 0.45326709747314453, "learning_rate": 0.00019003441611202738, "loss": 0.8362, "step": 3092 }, { "epoch": 0.717883254032726, "grad_norm": 0.5009828209877014, "learning_rate": 0.00019002806810796262, "loss": 0.8682, "step": 3093 }, { "epoch": 0.7181153533712429, "grad_norm": 0.5006335377693176, "learning_rate": 0.00019002171818882066, "loss": 0.8631, "step": 3094 }, { "epoch": 0.7183474527097597, "grad_norm": 0.4244285523891449, "learning_rate": 0.00019001536635473664, "loss": 0.8961, "step": 3095 }, { "epoch": 0.7185795520482766, "grad_norm": 0.4618474841117859, "learning_rate": 0.00019000901260584564, "loss": 0.9026, "step": 3096 }, { "epoch": 0.7188116513867936, "grad_norm": 0.5342423319816589, "learning_rate": 0.00019000265694228284, "loss": 0.8685, "step": 3097 }, { "epoch": 0.7190437507253105, "grad_norm": 0.3960004448890686, "learning_rate": 0.00018999629936418343, "loss": 0.8816, "step": 3098 }, { "epoch": 0.7192758500638273, "grad_norm": 0.5196926593780518, "learning_rate": 0.0001899899398716826, "loss": 0.8788, "step": 3099 }, { "epoch": 0.7195079494023442, "grad_norm": 0.5009579658508301, "learning_rate": 0.00018998357846491568, "loss": 0.8847, "step": 3100 }, { "epoch": 0.7197400487408611, "grad_norm": 0.42285189032554626, "learning_rate": 0.000189977215144018, "loss": 0.8618, "step": 3101 }, { "epoch": 0.7199721480793779, "grad_norm": 0.5198935270309448, "learning_rate": 0.00018997084990912487, "loss": 0.8465, "step": 3102 }, { "epoch": 0.7202042474178949, "grad_norm": 0.42918962240219116, "learning_rate": 0.00018996448276037178, "loss": 0.8687, "step": 3103 }, { "epoch": 0.7204363467564118, "grad_norm": 0.4479561746120453, "learning_rate": 0.00018995811369789406, "loss": 0.8962, "step": 3104 }, { "epoch": 0.7206684460949286, "grad_norm": 0.51496422290802, "learning_rate": 0.00018995174272182726, "loss": 0.8694, "step": 3105 }, { "epoch": 0.7209005454334455, "grad_norm": 0.42323508858680725, "learning_rate": 0.0001899453698323069, "loss": 0.8652, "step": 3106 }, { "epoch": 0.7211326447719624, "grad_norm": 0.42874228954315186, "learning_rate": 0.00018993899502946855, "loss": 0.869, "step": 3107 }, { "epoch": 0.7213647441104792, "grad_norm": 0.4768570065498352, "learning_rate": 0.00018993261831344778, "loss": 0.8956, "step": 3108 }, { "epoch": 0.7215968434489962, "grad_norm": 0.43182435631752014, "learning_rate": 0.00018992623968438026, "loss": 0.8572, "step": 3109 }, { "epoch": 0.7218289427875131, "grad_norm": 0.459556519985199, "learning_rate": 0.00018991985914240168, "loss": 0.8715, "step": 3110 }, { "epoch": 0.72206104212603, "grad_norm": 0.43720918893814087, "learning_rate": 0.00018991347668764774, "loss": 0.8564, "step": 3111 }, { "epoch": 0.7222931414645468, "grad_norm": 0.4254119396209717, "learning_rate": 0.00018990709232025423, "loss": 0.9077, "step": 3112 }, { "epoch": 0.7225252408030637, "grad_norm": 0.46387243270874023, "learning_rate": 0.00018990070604035694, "loss": 0.9364, "step": 3113 }, { "epoch": 0.7227573401415806, "grad_norm": 0.4888327717781067, "learning_rate": 0.00018989431784809178, "loss": 0.8836, "step": 3114 }, { "epoch": 0.7229894394800975, "grad_norm": 0.42000171542167664, "learning_rate": 0.00018988792774359453, "loss": 0.8949, "step": 3115 }, { "epoch": 0.7232215388186144, "grad_norm": 0.4905990958213806, "learning_rate": 0.00018988153572700123, "loss": 0.8905, "step": 3116 }, { "epoch": 0.7234536381571313, "grad_norm": 0.5477259159088135, "learning_rate": 0.0001898751417984478, "loss": 0.8342, "step": 3117 }, { "epoch": 0.7236857374956481, "grad_norm": 0.4085805118083954, "learning_rate": 0.00018986874595807024, "loss": 0.8242, "step": 3118 }, { "epoch": 0.723917836834165, "grad_norm": 0.4763002097606659, "learning_rate": 0.0001898623482060046, "loss": 0.8679, "step": 3119 }, { "epoch": 0.7241499361726819, "grad_norm": 0.463765025138855, "learning_rate": 0.00018985594854238703, "loss": 0.8571, "step": 3120 }, { "epoch": 0.7243820355111988, "grad_norm": 0.47047966718673706, "learning_rate": 0.00018984954696735356, "loss": 0.8575, "step": 3121 }, { "epoch": 0.7246141348497157, "grad_norm": 0.4776700735092163, "learning_rate": 0.0001898431434810405, "loss": 0.818, "step": 3122 }, { "epoch": 0.7248462341882326, "grad_norm": 0.5062687397003174, "learning_rate": 0.00018983673808358395, "loss": 0.8662, "step": 3123 }, { "epoch": 0.7250783335267494, "grad_norm": 0.4829709827899933, "learning_rate": 0.00018983033077512023, "loss": 0.8515, "step": 3124 }, { "epoch": 0.7253104328652663, "grad_norm": 0.4450545907020569, "learning_rate": 0.0001898239215557856, "loss": 0.8457, "step": 3125 }, { "epoch": 0.7255425322037832, "grad_norm": 0.4246063232421875, "learning_rate": 0.00018981751042571643, "loss": 0.8345, "step": 3126 }, { "epoch": 0.7257746315423002, "grad_norm": 0.514647364616394, "learning_rate": 0.00018981109738504907, "loss": 0.8331, "step": 3127 }, { "epoch": 0.726006730880817, "grad_norm": 0.5237157940864563, "learning_rate": 0.00018980468243391996, "loss": 0.8763, "step": 3128 }, { "epoch": 0.7262388302193339, "grad_norm": 0.4339951276779175, "learning_rate": 0.00018979826557246555, "loss": 0.8874, "step": 3129 }, { "epoch": 0.7264709295578508, "grad_norm": 0.49301356077194214, "learning_rate": 0.0001897918468008223, "loss": 0.9326, "step": 3130 }, { "epoch": 0.7267030288963676, "grad_norm": 0.5697692036628723, "learning_rate": 0.0001897854261191268, "loss": 0.8895, "step": 3131 }, { "epoch": 0.7269351282348845, "grad_norm": 0.4274684190750122, "learning_rate": 0.00018977900352751563, "loss": 0.8621, "step": 3132 }, { "epoch": 0.7271672275734015, "grad_norm": 0.5186319351196289, "learning_rate": 0.0001897725790261254, "loss": 0.8243, "step": 3133 }, { "epoch": 0.7273993269119183, "grad_norm": 0.4818325936794281, "learning_rate": 0.00018976615261509278, "loss": 0.8565, "step": 3134 }, { "epoch": 0.7276314262504352, "grad_norm": 0.44587647914886475, "learning_rate": 0.00018975972429455444, "loss": 0.8501, "step": 3135 }, { "epoch": 0.7278635255889521, "grad_norm": 0.4550928473472595, "learning_rate": 0.00018975329406464714, "loss": 0.8701, "step": 3136 }, { "epoch": 0.7280956249274689, "grad_norm": 0.4888739287853241, "learning_rate": 0.00018974686192550771, "loss": 0.8462, "step": 3137 }, { "epoch": 0.7283277242659858, "grad_norm": 0.43426457047462463, "learning_rate": 0.00018974042787727287, "loss": 0.8578, "step": 3138 }, { "epoch": 0.7285598236045028, "grad_norm": 0.45152491331100464, "learning_rate": 0.0001897339919200796, "loss": 0.8313, "step": 3139 }, { "epoch": 0.7287919229430196, "grad_norm": 0.49123987555503845, "learning_rate": 0.00018972755405406475, "loss": 0.8785, "step": 3140 }, { "epoch": 0.7290240222815365, "grad_norm": 0.45944082736968994, "learning_rate": 0.00018972111427936528, "loss": 0.8237, "step": 3141 }, { "epoch": 0.7292561216200534, "grad_norm": 0.47724369168281555, "learning_rate": 0.00018971467259611816, "loss": 0.8679, "step": 3142 }, { "epoch": 0.7294882209585702, "grad_norm": 0.49501869082450867, "learning_rate": 0.0001897082290044604, "loss": 0.8281, "step": 3143 }, { "epoch": 0.7297203202970871, "grad_norm": 0.5993200540542603, "learning_rate": 0.00018970178350452912, "loss": 0.8781, "step": 3144 }, { "epoch": 0.7299524196356041, "grad_norm": 0.4735378324985504, "learning_rate": 0.0001896953360964614, "loss": 0.8449, "step": 3145 }, { "epoch": 0.730184518974121, "grad_norm": 0.49868133664131165, "learning_rate": 0.0001896888867803944, "loss": 0.8505, "step": 3146 }, { "epoch": 0.7304166183126378, "grad_norm": 0.5391285419464111, "learning_rate": 0.0001896824355564653, "loss": 0.8964, "step": 3147 }, { "epoch": 0.7306487176511547, "grad_norm": 0.46896976232528687, "learning_rate": 0.00018967598242481133, "loss": 0.8513, "step": 3148 }, { "epoch": 0.7308808169896716, "grad_norm": 0.42831292748451233, "learning_rate": 0.00018966952738556976, "loss": 0.8693, "step": 3149 }, { "epoch": 0.7311129163281884, "grad_norm": 0.4517759084701538, "learning_rate": 0.0001896630704388779, "loss": 0.8426, "step": 3150 }, { "epoch": 0.7313450156667054, "grad_norm": 0.42865288257598877, "learning_rate": 0.00018965661158487312, "loss": 0.877, "step": 3151 }, { "epoch": 0.7315771150052223, "grad_norm": 0.46844547986984253, "learning_rate": 0.00018965015082369278, "loss": 0.8116, "step": 3152 }, { "epoch": 0.7318092143437391, "grad_norm": 0.47352421283721924, "learning_rate": 0.00018964368815547436, "loss": 0.8454, "step": 3153 }, { "epoch": 0.732041313682256, "grad_norm": 0.46213504672050476, "learning_rate": 0.00018963722358035528, "loss": 0.853, "step": 3154 }, { "epoch": 0.7322734130207729, "grad_norm": 0.44239532947540283, "learning_rate": 0.0001896307570984731, "loss": 0.8284, "step": 3155 }, { "epoch": 0.7325055123592897, "grad_norm": 0.5035265684127808, "learning_rate": 0.00018962428870996533, "loss": 0.8643, "step": 3156 }, { "epoch": 0.7327376116978067, "grad_norm": 0.4533073306083679, "learning_rate": 0.00018961781841496964, "loss": 0.8634, "step": 3157 }, { "epoch": 0.7329697110363236, "grad_norm": 0.44151052832603455, "learning_rate": 0.0001896113462136236, "loss": 0.902, "step": 3158 }, { "epoch": 0.7332018103748404, "grad_norm": 0.4948562979698181, "learning_rate": 0.0001896048721060649, "loss": 0.8285, "step": 3159 }, { "epoch": 0.7334339097133573, "grad_norm": 0.45940902829170227, "learning_rate": 0.00018959839609243123, "loss": 0.8079, "step": 3160 }, { "epoch": 0.7336660090518742, "grad_norm": 0.45523616671562195, "learning_rate": 0.00018959191817286044, "loss": 0.8409, "step": 3161 }, { "epoch": 0.733898108390391, "grad_norm": 0.45003390312194824, "learning_rate": 0.00018958543834749023, "loss": 0.8241, "step": 3162 }, { "epoch": 0.734130207728908, "grad_norm": 0.4484458863735199, "learning_rate": 0.00018957895661645847, "loss": 0.8479, "step": 3163 }, { "epoch": 0.7343623070674249, "grad_norm": 0.42023393511772156, "learning_rate": 0.0001895724729799031, "loss": 0.8685, "step": 3164 }, { "epoch": 0.7345944064059418, "grad_norm": 0.4726946949958801, "learning_rate": 0.00018956598743796193, "loss": 0.8782, "step": 3165 }, { "epoch": 0.7348265057444586, "grad_norm": 0.5408649444580078, "learning_rate": 0.000189559499990773, "loss": 0.8135, "step": 3166 }, { "epoch": 0.7350586050829755, "grad_norm": 1.014572262763977, "learning_rate": 0.00018955301063847428, "loss": 0.8384, "step": 3167 }, { "epoch": 0.7352907044214924, "grad_norm": 0.5515017509460449, "learning_rate": 0.00018954651938120383, "loss": 0.8878, "step": 3168 }, { "epoch": 0.7355228037600093, "grad_norm": 0.8106303811073303, "learning_rate": 0.0001895400262190997, "loss": 0.9051, "step": 3169 }, { "epoch": 0.7357549030985262, "grad_norm": 0.5268080234527588, "learning_rate": 0.00018953353115230007, "loss": 0.8564, "step": 3170 }, { "epoch": 0.7359870024370431, "grad_norm": 0.718584418296814, "learning_rate": 0.00018952703418094303, "loss": 0.8687, "step": 3171 }, { "epoch": 0.7362191017755599, "grad_norm": 1.5258069038391113, "learning_rate": 0.00018952053530516685, "loss": 0.9262, "step": 3172 }, { "epoch": 0.7364512011140768, "grad_norm": 0.8677170872688293, "learning_rate": 0.00018951403452510972, "loss": 0.8515, "step": 3173 }, { "epoch": 0.7366833004525937, "grad_norm": 0.4866112768650055, "learning_rate": 0.00018950753184090996, "loss": 0.8993, "step": 3174 }, { "epoch": 0.7369153997911106, "grad_norm": 0.4085189700126648, "learning_rate": 0.00018950102725270588, "loss": 0.8673, "step": 3175 }, { "epoch": 0.7371474991296275, "grad_norm": 0.4749407172203064, "learning_rate": 0.00018949452076063588, "loss": 0.8745, "step": 3176 }, { "epoch": 0.7373795984681444, "grad_norm": 0.5084285140037537, "learning_rate": 0.00018948801236483832, "loss": 0.8702, "step": 3177 }, { "epoch": 0.7376116978066612, "grad_norm": 0.4563770592212677, "learning_rate": 0.00018948150206545166, "loss": 0.8899, "step": 3178 }, { "epoch": 0.7378437971451781, "grad_norm": 0.45034876465797424, "learning_rate": 0.00018947498986261437, "loss": 0.7949, "step": 3179 }, { "epoch": 0.738075896483695, "grad_norm": 0.4427724778652191, "learning_rate": 0.00018946847575646502, "loss": 0.8633, "step": 3180 }, { "epoch": 0.738307995822212, "grad_norm": 0.4840271770954132, "learning_rate": 0.00018946195974714215, "loss": 0.8457, "step": 3181 }, { "epoch": 0.7385400951607288, "grad_norm": 1.2185347080230713, "learning_rate": 0.00018945544183478438, "loss": 0.8604, "step": 3182 }, { "epoch": 0.7387721944992457, "grad_norm": 0.5074031949043274, "learning_rate": 0.00018944892201953035, "loss": 0.9013, "step": 3183 }, { "epoch": 0.7390042938377626, "grad_norm": 0.4280321002006531, "learning_rate": 0.00018944240030151875, "loss": 0.834, "step": 3184 }, { "epoch": 0.7392363931762794, "grad_norm": 0.44075271487236023, "learning_rate": 0.00018943587668088832, "loss": 0.9234, "step": 3185 }, { "epoch": 0.7394684925147963, "grad_norm": 0.47977179288864136, "learning_rate": 0.0001894293511577778, "loss": 0.813, "step": 3186 }, { "epoch": 0.7397005918533133, "grad_norm": 0.4650183320045471, "learning_rate": 0.00018942282373232605, "loss": 0.8627, "step": 3187 }, { "epoch": 0.7399326911918301, "grad_norm": 0.45061948895454407, "learning_rate": 0.00018941629440467187, "loss": 0.8253, "step": 3188 }, { "epoch": 0.740164790530347, "grad_norm": 0.411185622215271, "learning_rate": 0.0001894097631749542, "loss": 0.8309, "step": 3189 }, { "epoch": 0.7403968898688639, "grad_norm": 0.41506874561309814, "learning_rate": 0.00018940323004331194, "loss": 0.8719, "step": 3190 }, { "epoch": 0.7406289892073807, "grad_norm": 0.46496716141700745, "learning_rate": 0.00018939669500988408, "loss": 0.9062, "step": 3191 }, { "epoch": 0.7408610885458976, "grad_norm": 0.4695342779159546, "learning_rate": 0.00018939015807480962, "loss": 0.8441, "step": 3192 }, { "epoch": 0.7410931878844146, "grad_norm": 0.5576683878898621, "learning_rate": 0.00018938361923822762, "loss": 0.815, "step": 3193 }, { "epoch": 0.7413252872229315, "grad_norm": 0.4930550158023834, "learning_rate": 0.00018937707850027717, "loss": 0.8519, "step": 3194 }, { "epoch": 0.7415573865614483, "grad_norm": 0.4443536400794983, "learning_rate": 0.0001893705358610974, "loss": 0.8637, "step": 3195 }, { "epoch": 0.7417894858999652, "grad_norm": 0.4816209077835083, "learning_rate": 0.00018936399132082752, "loss": 0.8275, "step": 3196 }, { "epoch": 0.742021585238482, "grad_norm": 0.4986628592014313, "learning_rate": 0.00018935744487960673, "loss": 0.8745, "step": 3197 }, { "epoch": 0.7422536845769989, "grad_norm": 0.46281275153160095, "learning_rate": 0.00018935089653757424, "loss": 0.8969, "step": 3198 }, { "epoch": 0.7424857839155159, "grad_norm": 0.4312998950481415, "learning_rate": 0.0001893443462948694, "loss": 0.8677, "step": 3199 }, { "epoch": 0.7427178832540328, "grad_norm": 0.4425949156284332, "learning_rate": 0.00018933779415163148, "loss": 0.8689, "step": 3200 }, { "epoch": 0.7429499825925496, "grad_norm": 0.4613339602947235, "learning_rate": 0.00018933124010799996, "loss": 0.9143, "step": 3201 }, { "epoch": 0.7431820819310665, "grad_norm": 0.4730546772480011, "learning_rate": 0.00018932468416411416, "loss": 0.8815, "step": 3202 }, { "epoch": 0.7434141812695834, "grad_norm": 0.4563005566596985, "learning_rate": 0.00018931812632011364, "loss": 0.8243, "step": 3203 }, { "epoch": 0.7436462806081002, "grad_norm": 0.41305968165397644, "learning_rate": 0.0001893115665761378, "loss": 0.8877, "step": 3204 }, { "epoch": 0.7438783799466172, "grad_norm": 0.43871843814849854, "learning_rate": 0.0001893050049323262, "loss": 0.8426, "step": 3205 }, { "epoch": 0.7441104792851341, "grad_norm": 0.41709595918655396, "learning_rate": 0.0001892984413888185, "loss": 0.8784, "step": 3206 }, { "epoch": 0.7443425786236509, "grad_norm": 0.4102787971496582, "learning_rate": 0.0001892918759457542, "loss": 0.8648, "step": 3207 }, { "epoch": 0.7445746779621678, "grad_norm": 0.43479642271995544, "learning_rate": 0.00018928530860327304, "loss": 0.8655, "step": 3208 }, { "epoch": 0.7448067773006847, "grad_norm": 0.4014144837856293, "learning_rate": 0.0001892787393615147, "loss": 0.8288, "step": 3209 }, { "epoch": 0.7450388766392015, "grad_norm": 0.4985094964504242, "learning_rate": 0.0001892721682206189, "loss": 0.8806, "step": 3210 }, { "epoch": 0.7452709759777184, "grad_norm": 0.4252754747867584, "learning_rate": 0.00018926559518072544, "loss": 0.8453, "step": 3211 }, { "epoch": 0.7455030753162354, "grad_norm": 0.44293612241744995, "learning_rate": 0.00018925902024197417, "loss": 0.8696, "step": 3212 }, { "epoch": 0.7457351746547523, "grad_norm": 0.4645526111125946, "learning_rate": 0.0001892524434045049, "loss": 0.8276, "step": 3213 }, { "epoch": 0.7459672739932691, "grad_norm": 0.44585075974464417, "learning_rate": 0.00018924586466845757, "loss": 0.8693, "step": 3214 }, { "epoch": 0.746199373331786, "grad_norm": 0.47503209114074707, "learning_rate": 0.00018923928403397208, "loss": 0.8425, "step": 3215 }, { "epoch": 0.7464314726703029, "grad_norm": 0.4473656713962555, "learning_rate": 0.00018923270150118847, "loss": 0.8936, "step": 3216 }, { "epoch": 0.7466635720088197, "grad_norm": 0.42564651370048523, "learning_rate": 0.00018922611707024675, "loss": 0.807, "step": 3217 }, { "epoch": 0.7468956713473367, "grad_norm": 0.47442176938056946, "learning_rate": 0.00018921953074128693, "loss": 0.8337, "step": 3218 }, { "epoch": 0.7471277706858536, "grad_norm": 0.5126108527183533, "learning_rate": 0.00018921294251444918, "loss": 0.8875, "step": 3219 }, { "epoch": 0.7473598700243704, "grad_norm": 0.49941980838775635, "learning_rate": 0.0001892063523898736, "loss": 0.843, "step": 3220 }, { "epoch": 0.7475919693628873, "grad_norm": 0.44842731952667236, "learning_rate": 0.0001891997603677004, "loss": 0.8214, "step": 3221 }, { "epoch": 0.7478240687014042, "grad_norm": 0.5276552438735962, "learning_rate": 0.0001891931664480698, "loss": 0.8946, "step": 3222 }, { "epoch": 0.748056168039921, "grad_norm": 0.615677535533905, "learning_rate": 0.0001891865706311221, "loss": 0.8562, "step": 3223 }, { "epoch": 0.748288267378438, "grad_norm": 0.49550357460975647, "learning_rate": 0.00018917997291699755, "loss": 0.8244, "step": 3224 }, { "epoch": 0.7485203667169549, "grad_norm": 0.49385181069374084, "learning_rate": 0.00018917337330583652, "loss": 0.799, "step": 3225 }, { "epoch": 0.7487524660554717, "grad_norm": 0.45975160598754883, "learning_rate": 0.00018916677179777938, "loss": 0.9083, "step": 3226 }, { "epoch": 0.7489845653939886, "grad_norm": 0.4725382924079895, "learning_rate": 0.00018916016839296655, "loss": 0.8593, "step": 3227 }, { "epoch": 0.7492166647325055, "grad_norm": 0.4528813064098358, "learning_rate": 0.00018915356309153855, "loss": 0.8728, "step": 3228 }, { "epoch": 0.7494487640710223, "grad_norm": 0.4972705841064453, "learning_rate": 0.00018914695589363587, "loss": 0.8541, "step": 3229 }, { "epoch": 0.7496808634095393, "grad_norm": 0.47611868381500244, "learning_rate": 0.00018914034679939903, "loss": 0.8798, "step": 3230 }, { "epoch": 0.7499129627480562, "grad_norm": 0.4505031108856201, "learning_rate": 0.00018913373580896865, "loss": 0.8792, "step": 3231 }, { "epoch": 0.7501450620865731, "grad_norm": 0.4875110387802124, "learning_rate": 0.00018912712292248532, "loss": 0.8783, "step": 3232 }, { "epoch": 0.7503771614250899, "grad_norm": 0.47140252590179443, "learning_rate": 0.00018912050814008974, "loss": 0.8561, "step": 3233 }, { "epoch": 0.7506092607636068, "grad_norm": 0.4657476246356964, "learning_rate": 0.00018911389146192265, "loss": 0.8488, "step": 3234 }, { "epoch": 0.7508413601021237, "grad_norm": 0.4387149214744568, "learning_rate": 0.00018910727288812474, "loss": 0.8742, "step": 3235 }, { "epoch": 0.7510734594406406, "grad_norm": 0.5052922368049622, "learning_rate": 0.0001891006524188368, "loss": 0.8739, "step": 3236 }, { "epoch": 0.7513055587791575, "grad_norm": 0.4530850052833557, "learning_rate": 0.0001890940300541997, "loss": 0.9061, "step": 3237 }, { "epoch": 0.7515376581176744, "grad_norm": 3.1598098278045654, "learning_rate": 0.0001890874057943543, "loss": 0.9016, "step": 3238 }, { "epoch": 0.7517697574561912, "grad_norm": 0.575463056564331, "learning_rate": 0.00018908077963944152, "loss": 0.8593, "step": 3239 }, { "epoch": 0.7520018567947081, "grad_norm": 0.5003046989440918, "learning_rate": 0.00018907415158960227, "loss": 0.8577, "step": 3240 }, { "epoch": 0.752233956133225, "grad_norm": 0.4676498472690582, "learning_rate": 0.00018906752164497758, "loss": 0.8465, "step": 3241 }, { "epoch": 0.752466055471742, "grad_norm": 0.5160213112831116, "learning_rate": 0.00018906088980570845, "loss": 0.8915, "step": 3242 }, { "epoch": 0.7526981548102588, "grad_norm": 0.47442084550857544, "learning_rate": 0.000189054256071936, "loss": 0.8759, "step": 3243 }, { "epoch": 0.7529302541487757, "grad_norm": 0.4204835295677185, "learning_rate": 0.00018904762044380134, "loss": 0.8539, "step": 3244 }, { "epoch": 0.7531623534872925, "grad_norm": 1.161871314048767, "learning_rate": 0.00018904098292144554, "loss": 0.8769, "step": 3245 }, { "epoch": 0.7533944528258094, "grad_norm": 0.5895676612854004, "learning_rate": 0.0001890343435050099, "loss": 0.8331, "step": 3246 }, { "epoch": 0.7536265521643263, "grad_norm": 0.46669527888298035, "learning_rate": 0.0001890277021946356, "loss": 0.8935, "step": 3247 }, { "epoch": 0.7538586515028433, "grad_norm": 0.45063653588294983, "learning_rate": 0.00018902105899046393, "loss": 0.8901, "step": 3248 }, { "epoch": 0.7540907508413601, "grad_norm": 0.529914915561676, "learning_rate": 0.00018901441389263616, "loss": 0.8639, "step": 3249 }, { "epoch": 0.754322850179877, "grad_norm": 0.4377218186855316, "learning_rate": 0.0001890077669012937, "loss": 0.8383, "step": 3250 }, { "epoch": 0.7545549495183939, "grad_norm": 0.48695462942123413, "learning_rate": 0.00018900111801657793, "loss": 0.8819, "step": 3251 }, { "epoch": 0.7547870488569107, "grad_norm": 0.5476789474487305, "learning_rate": 0.0001889944672386303, "loss": 0.8498, "step": 3252 }, { "epoch": 0.7550191481954276, "grad_norm": 0.4699525237083435, "learning_rate": 0.00018898781456759225, "loss": 0.8713, "step": 3253 }, { "epoch": 0.7552512475339446, "grad_norm": 0.46312832832336426, "learning_rate": 0.0001889811600036053, "loss": 0.9125, "step": 3254 }, { "epoch": 0.7554833468724614, "grad_norm": 0.47221139073371887, "learning_rate": 0.000188974503546811, "loss": 0.8606, "step": 3255 }, { "epoch": 0.7557154462109783, "grad_norm": 0.40278011560440063, "learning_rate": 0.00018896784519735097, "loss": 0.8529, "step": 3256 }, { "epoch": 0.7559475455494952, "grad_norm": 0.48359543085098267, "learning_rate": 0.00018896118495536688, "loss": 0.8463, "step": 3257 }, { "epoch": 0.756179644888012, "grad_norm": 0.4459293782711029, "learning_rate": 0.00018895452282100035, "loss": 0.8415, "step": 3258 }, { "epoch": 0.7564117442265289, "grad_norm": 0.4269028306007385, "learning_rate": 0.0001889478587943931, "loss": 0.831, "step": 3259 }, { "epoch": 0.7566438435650459, "grad_norm": 0.9275829195976257, "learning_rate": 0.0001889411928756869, "loss": 0.8383, "step": 3260 }, { "epoch": 0.7568759429035627, "grad_norm": 0.4466782212257385, "learning_rate": 0.00018893452506502353, "loss": 0.8809, "step": 3261 }, { "epoch": 0.7571080422420796, "grad_norm": 0.4627876281738281, "learning_rate": 0.00018892785536254487, "loss": 0.8879, "step": 3262 }, { "epoch": 0.7573401415805965, "grad_norm": 0.43063217401504517, "learning_rate": 0.00018892118376839277, "loss": 0.857, "step": 3263 }, { "epoch": 0.7575722409191133, "grad_norm": 0.41059431433677673, "learning_rate": 0.00018891451028270914, "loss": 0.8268, "step": 3264 }, { "epoch": 0.7578043402576302, "grad_norm": 0.4340810477733612, "learning_rate": 0.00018890783490563599, "loss": 0.8546, "step": 3265 }, { "epoch": 0.7580364395961472, "grad_norm": 0.4665816128253937, "learning_rate": 0.00018890115763731523, "loss": 0.8638, "step": 3266 }, { "epoch": 0.7582685389346641, "grad_norm": 0.4820438623428345, "learning_rate": 0.00018889447847788898, "loss": 0.8583, "step": 3267 }, { "epoch": 0.7585006382731809, "grad_norm": 0.4906483292579651, "learning_rate": 0.00018888779742749927, "loss": 0.8361, "step": 3268 }, { "epoch": 0.7587327376116978, "grad_norm": 0.4087394177913666, "learning_rate": 0.00018888111448628822, "loss": 0.8926, "step": 3269 }, { "epoch": 0.7589648369502147, "grad_norm": 0.42434924840927124, "learning_rate": 0.00018887442965439806, "loss": 0.8773, "step": 3270 }, { "epoch": 0.7591969362887315, "grad_norm": 0.47027862071990967, "learning_rate": 0.00018886774293197086, "loss": 0.8899, "step": 3271 }, { "epoch": 0.7594290356272485, "grad_norm": 0.4409805238246918, "learning_rate": 0.000188861054319149, "loss": 0.8652, "step": 3272 }, { "epoch": 0.7596611349657654, "grad_norm": 0.4796992540359497, "learning_rate": 0.00018885436381607467, "loss": 0.8621, "step": 3273 }, { "epoch": 0.7598932343042822, "grad_norm": 0.45063644647598267, "learning_rate": 0.0001888476714228902, "loss": 0.8419, "step": 3274 }, { "epoch": 0.7601253336427991, "grad_norm": 0.43417638540267944, "learning_rate": 0.00018884097713973798, "loss": 0.8915, "step": 3275 }, { "epoch": 0.760357432981316, "grad_norm": 0.4351886808872223, "learning_rate": 0.00018883428096676038, "loss": 0.8915, "step": 3276 }, { "epoch": 0.7605895323198328, "grad_norm": 0.469130277633667, "learning_rate": 0.0001888275829040999, "loss": 0.8532, "step": 3277 }, { "epoch": 0.7608216316583498, "grad_norm": 0.42595192790031433, "learning_rate": 0.00018882088295189896, "loss": 0.849, "step": 3278 }, { "epoch": 0.7610537309968667, "grad_norm": 0.5126999616622925, "learning_rate": 0.00018881418111030008, "loss": 0.8266, "step": 3279 }, { "epoch": 0.7612858303353836, "grad_norm": 0.5088613033294678, "learning_rate": 0.00018880747737944588, "loss": 0.8588, "step": 3280 }, { "epoch": 0.7615179296739004, "grad_norm": 0.46761438250541687, "learning_rate": 0.0001888007717594789, "loss": 0.8895, "step": 3281 }, { "epoch": 0.7617500290124173, "grad_norm": 0.5314352512359619, "learning_rate": 0.0001887940642505418, "loss": 0.8697, "step": 3282 }, { "epoch": 0.7619821283509342, "grad_norm": 0.5307244658470154, "learning_rate": 0.00018878735485277728, "loss": 0.874, "step": 3283 }, { "epoch": 0.7622142276894511, "grad_norm": 0.5744552612304688, "learning_rate": 0.00018878064356632804, "loss": 0.8609, "step": 3284 }, { "epoch": 0.762446327027968, "grad_norm": 0.5284180641174316, "learning_rate": 0.00018877393039133685, "loss": 0.8228, "step": 3285 }, { "epoch": 0.7626784263664849, "grad_norm": 0.44894444942474365, "learning_rate": 0.00018876721532794656, "loss": 0.8212, "step": 3286 }, { "epoch": 0.7629105257050017, "grad_norm": 0.4346754848957062, "learning_rate": 0.0001887604983762999, "loss": 0.8445, "step": 3287 }, { "epoch": 0.7631426250435186, "grad_norm": 0.48122262954711914, "learning_rate": 0.00018875377953653987, "loss": 0.8486, "step": 3288 }, { "epoch": 0.7633747243820355, "grad_norm": 0.4222863018512726, "learning_rate": 0.0001887470588088093, "loss": 0.8853, "step": 3289 }, { "epoch": 0.7636068237205524, "grad_norm": 0.5004928112030029, "learning_rate": 0.00018874033619325126, "loss": 0.8641, "step": 3290 }, { "epoch": 0.7638389230590693, "grad_norm": 0.5010069608688354, "learning_rate": 0.00018873361169000866, "loss": 0.8947, "step": 3291 }, { "epoch": 0.7640710223975862, "grad_norm": 0.43055862188339233, "learning_rate": 0.00018872688529922456, "loss": 0.8799, "step": 3292 }, { "epoch": 0.764303121736103, "grad_norm": 0.4613746106624603, "learning_rate": 0.00018872015702104205, "loss": 0.9364, "step": 3293 }, { "epoch": 0.7645352210746199, "grad_norm": 0.45628494024276733, "learning_rate": 0.0001887134268556043, "loss": 0.8645, "step": 3294 }, { "epoch": 0.7647673204131368, "grad_norm": 0.4347071945667267, "learning_rate": 0.0001887066948030544, "loss": 0.834, "step": 3295 }, { "epoch": 0.7649994197516538, "grad_norm": 0.4646000266075134, "learning_rate": 0.00018869996086353562, "loss": 0.857, "step": 3296 }, { "epoch": 0.7652315190901706, "grad_norm": 0.46321535110473633, "learning_rate": 0.00018869322503719117, "loss": 0.8886, "step": 3297 }, { "epoch": 0.7654636184286875, "grad_norm": 0.41107702255249023, "learning_rate": 0.00018868648732416431, "loss": 0.8432, "step": 3298 }, { "epoch": 0.7656957177672044, "grad_norm": 0.4080386161804199, "learning_rate": 0.00018867974772459844, "loss": 0.9037, "step": 3299 }, { "epoch": 0.7659278171057212, "grad_norm": 0.4257357716560364, "learning_rate": 0.00018867300623863687, "loss": 0.8652, "step": 3300 }, { "epoch": 0.7661599164442381, "grad_norm": 0.4747604727745056, "learning_rate": 0.000188666262866423, "loss": 0.8052, "step": 3301 }, { "epoch": 0.7663920157827551, "grad_norm": 0.44749340415000916, "learning_rate": 0.0001886595176081003, "loss": 0.8526, "step": 3302 }, { "epoch": 0.7666241151212719, "grad_norm": 0.45419833064079285, "learning_rate": 0.00018865277046381222, "loss": 0.8616, "step": 3303 }, { "epoch": 0.7668562144597888, "grad_norm": 0.42325931787490845, "learning_rate": 0.00018864602143370233, "loss": 0.8116, "step": 3304 }, { "epoch": 0.7670883137983057, "grad_norm": 0.43192431330680847, "learning_rate": 0.00018863927051791416, "loss": 0.8629, "step": 3305 }, { "epoch": 0.7673204131368225, "grad_norm": 0.461934894323349, "learning_rate": 0.00018863251771659135, "loss": 0.8744, "step": 3306 }, { "epoch": 0.7675525124753394, "grad_norm": 0.41841477155685425, "learning_rate": 0.0001886257630298775, "loss": 0.7957, "step": 3307 }, { "epoch": 0.7677846118138564, "grad_norm": 0.44775447249412537, "learning_rate": 0.00018861900645791635, "loss": 0.8945, "step": 3308 }, { "epoch": 0.7680167111523732, "grad_norm": 0.41792336106300354, "learning_rate": 0.0001886122480008516, "loss": 0.8734, "step": 3309 }, { "epoch": 0.7682488104908901, "grad_norm": 0.42192143201828003, "learning_rate": 0.000188605487658827, "loss": 0.8765, "step": 3310 }, { "epoch": 0.768480909829407, "grad_norm": 0.4670575261116028, "learning_rate": 0.00018859872543198634, "loss": 0.8584, "step": 3311 }, { "epoch": 0.7687130091679238, "grad_norm": 0.40679651498794556, "learning_rate": 0.00018859196132047355, "loss": 0.8312, "step": 3312 }, { "epoch": 0.7689451085064407, "grad_norm": 0.4621047079563141, "learning_rate": 0.00018858519532443245, "loss": 0.8926, "step": 3313 }, { "epoch": 0.7691772078449577, "grad_norm": 0.44526734948158264, "learning_rate": 0.00018857842744400696, "loss": 0.8253, "step": 3314 }, { "epoch": 0.7694093071834746, "grad_norm": 0.4544711112976074, "learning_rate": 0.00018857165767934106, "loss": 0.8501, "step": 3315 }, { "epoch": 0.7696414065219914, "grad_norm": 0.4194268584251404, "learning_rate": 0.00018856488603057877, "loss": 0.7946, "step": 3316 }, { "epoch": 0.7698735058605083, "grad_norm": 0.4135521352291107, "learning_rate": 0.00018855811249786415, "loss": 0.8401, "step": 3317 }, { "epoch": 0.7701056051990252, "grad_norm": 0.4504725933074951, "learning_rate": 0.00018855133708134124, "loss": 0.8777, "step": 3318 }, { "epoch": 0.770337704537542, "grad_norm": 0.4386938512325287, "learning_rate": 0.00018854455978115417, "loss": 0.8754, "step": 3319 }, { "epoch": 0.770569803876059, "grad_norm": 0.4175586998462677, "learning_rate": 0.00018853778059744716, "loss": 0.8778, "step": 3320 }, { "epoch": 0.7708019032145759, "grad_norm": 0.4157339334487915, "learning_rate": 0.00018853099953036436, "loss": 0.8781, "step": 3321 }, { "epoch": 0.7710340025530927, "grad_norm": 0.42435452342033386, "learning_rate": 0.00018852421658005006, "loss": 0.8835, "step": 3322 }, { "epoch": 0.7712661018916096, "grad_norm": 0.4033529460430145, "learning_rate": 0.00018851743174664852, "loss": 0.8924, "step": 3323 }, { "epoch": 0.7714982012301265, "grad_norm": 0.42125362157821655, "learning_rate": 0.00018851064503030407, "loss": 0.8972, "step": 3324 }, { "epoch": 0.7717303005686433, "grad_norm": 0.4321385324001312, "learning_rate": 0.00018850385643116112, "loss": 0.8531, "step": 3325 }, { "epoch": 0.7719623999071603, "grad_norm": 0.43436968326568604, "learning_rate": 0.00018849706594936398, "loss": 0.863, "step": 3326 }, { "epoch": 0.7721944992456772, "grad_norm": 0.42387592792510986, "learning_rate": 0.0001884902735850572, "loss": 0.8828, "step": 3327 }, { "epoch": 0.772426598584194, "grad_norm": 0.48432788252830505, "learning_rate": 0.0001884834793383852, "loss": 0.8868, "step": 3328 }, { "epoch": 0.7726586979227109, "grad_norm": 0.3944893479347229, "learning_rate": 0.00018847668320949254, "loss": 0.856, "step": 3329 }, { "epoch": 0.7728907972612278, "grad_norm": 0.46433237195014954, "learning_rate": 0.00018846988519852378, "loss": 0.8457, "step": 3330 }, { "epoch": 0.7731228965997446, "grad_norm": 0.4153928756713867, "learning_rate": 0.0001884630853056235, "loss": 0.8747, "step": 3331 }, { "epoch": 0.7733549959382616, "grad_norm": 0.3733210265636444, "learning_rate": 0.00018845628353093643, "loss": 0.7893, "step": 3332 }, { "epoch": 0.7735870952767785, "grad_norm": 0.4383198022842407, "learning_rate": 0.00018844947987460716, "loss": 0.8807, "step": 3333 }, { "epoch": 0.7738191946152954, "grad_norm": 0.4277530908584595, "learning_rate": 0.00018844267433678044, "loss": 0.8949, "step": 3334 }, { "epoch": 0.7740512939538122, "grad_norm": 0.4016442894935608, "learning_rate": 0.00018843586691760108, "loss": 0.8357, "step": 3335 }, { "epoch": 0.7742833932923291, "grad_norm": 0.4334081709384918, "learning_rate": 0.00018842905761721386, "loss": 0.8324, "step": 3336 }, { "epoch": 0.774515492630846, "grad_norm": 0.42637497186660767, "learning_rate": 0.00018842224643576363, "loss": 0.807, "step": 3337 }, { "epoch": 0.7747475919693629, "grad_norm": 0.40150633454322815, "learning_rate": 0.00018841543337339527, "loss": 0.8234, "step": 3338 }, { "epoch": 0.7749796913078798, "grad_norm": 0.42021191120147705, "learning_rate": 0.00018840861843025372, "loss": 0.8552, "step": 3339 }, { "epoch": 0.7752117906463967, "grad_norm": 0.4452309012413025, "learning_rate": 0.00018840180160648395, "loss": 0.8135, "step": 3340 }, { "epoch": 0.7754438899849135, "grad_norm": 0.43206655979156494, "learning_rate": 0.00018839498290223095, "loss": 0.8596, "step": 3341 }, { "epoch": 0.7756759893234304, "grad_norm": 0.4130360782146454, "learning_rate": 0.00018838816231763977, "loss": 0.8541, "step": 3342 }, { "epoch": 0.7759080886619473, "grad_norm": 0.45082494616508484, "learning_rate": 0.00018838133985285552, "loss": 0.8843, "step": 3343 }, { "epoch": 0.7761401880004642, "grad_norm": 0.5338907837867737, "learning_rate": 0.0001883745155080233, "loss": 0.8309, "step": 3344 }, { "epoch": 0.7763722873389811, "grad_norm": 0.43897995352745056, "learning_rate": 0.00018836768928328826, "loss": 0.8785, "step": 3345 }, { "epoch": 0.776604386677498, "grad_norm": 1.2864813804626465, "learning_rate": 0.0001883608611787957, "loss": 0.8752, "step": 3346 }, { "epoch": 0.7768364860160148, "grad_norm": 0.5377257466316223, "learning_rate": 0.00018835403119469074, "loss": 0.8971, "step": 3347 }, { "epoch": 0.7770685853545317, "grad_norm": 0.4803743064403534, "learning_rate": 0.00018834719933111878, "loss": 0.8353, "step": 3348 }, { "epoch": 0.7773006846930486, "grad_norm": 0.44808346033096313, "learning_rate": 0.0001883403655882251, "loss": 0.8331, "step": 3349 }, { "epoch": 0.7775327840315656, "grad_norm": 0.5543433427810669, "learning_rate": 0.00018833352996615505, "loss": 0.904, "step": 3350 }, { "epoch": 0.7777648833700824, "grad_norm": 0.4741339683532715, "learning_rate": 0.00018832669246505405, "loss": 0.7935, "step": 3351 }, { "epoch": 0.7779969827085993, "grad_norm": 0.43485403060913086, "learning_rate": 0.00018831985308506754, "loss": 0.8698, "step": 3352 }, { "epoch": 0.7782290820471162, "grad_norm": 0.48178091645240784, "learning_rate": 0.00018831301182634105, "loss": 0.8731, "step": 3353 }, { "epoch": 0.778461181385633, "grad_norm": 0.42691439390182495, "learning_rate": 0.00018830616868902004, "loss": 0.912, "step": 3354 }, { "epoch": 0.7786932807241499, "grad_norm": 0.4419221878051758, "learning_rate": 0.00018829932367325013, "loss": 0.8685, "step": 3355 }, { "epoch": 0.7789253800626669, "grad_norm": 0.443511039018631, "learning_rate": 0.0001882924767791769, "loss": 0.8942, "step": 3356 }, { "epoch": 0.7791574794011837, "grad_norm": 0.4504185914993286, "learning_rate": 0.00018828562800694604, "loss": 0.885, "step": 3357 }, { "epoch": 0.7793895787397006, "grad_norm": 0.4655546247959137, "learning_rate": 0.00018827877735670318, "loss": 0.8535, "step": 3358 }, { "epoch": 0.7796216780782175, "grad_norm": 0.43752214312553406, "learning_rate": 0.00018827192482859408, "loss": 0.8566, "step": 3359 }, { "epoch": 0.7798537774167343, "grad_norm": 0.45721447467803955, "learning_rate": 0.00018826507042276448, "loss": 0.8964, "step": 3360 }, { "epoch": 0.7800858767552512, "grad_norm": 0.48499321937561035, "learning_rate": 0.00018825821413936023, "loss": 0.8371, "step": 3361 }, { "epoch": 0.7803179760937682, "grad_norm": 0.4402202367782593, "learning_rate": 0.00018825135597852716, "loss": 0.826, "step": 3362 }, { "epoch": 0.780550075432285, "grad_norm": 0.45571741461753845, "learning_rate": 0.0001882444959404111, "loss": 0.8696, "step": 3363 }, { "epoch": 0.7807821747708019, "grad_norm": 0.4512627124786377, "learning_rate": 0.00018823763402515807, "loss": 0.8568, "step": 3364 }, { "epoch": 0.7810142741093188, "grad_norm": 0.42780259251594543, "learning_rate": 0.00018823077023291397, "loss": 0.9127, "step": 3365 }, { "epoch": 0.7812463734478357, "grad_norm": 0.45455968379974365, "learning_rate": 0.0001882239045638248, "loss": 0.8986, "step": 3366 }, { "epoch": 0.7814784727863525, "grad_norm": 0.3917073607444763, "learning_rate": 0.0001882170370180367, "loss": 0.853, "step": 3367 }, { "epoch": 0.7817105721248695, "grad_norm": 0.38930073380470276, "learning_rate": 0.00018821016759569567, "loss": 0.7964, "step": 3368 }, { "epoch": 0.7819426714633864, "grad_norm": 0.4383113384246826, "learning_rate": 0.00018820329629694787, "loss": 0.8721, "step": 3369 }, { "epoch": 0.7821747708019032, "grad_norm": 0.4132974147796631, "learning_rate": 0.0001881964231219394, "loss": 0.8493, "step": 3370 }, { "epoch": 0.7824068701404201, "grad_norm": 0.459805428981781, "learning_rate": 0.00018818954807081658, "loss": 0.8508, "step": 3371 }, { "epoch": 0.782638969478937, "grad_norm": 0.4648784399032593, "learning_rate": 0.00018818267114372556, "loss": 0.8678, "step": 3372 }, { "epoch": 0.7828710688174538, "grad_norm": 0.44887879490852356, "learning_rate": 0.00018817579234081268, "loss": 0.8307, "step": 3373 }, { "epoch": 0.7831031681559708, "grad_norm": 0.6187593936920166, "learning_rate": 0.00018816891166222423, "loss": 0.8766, "step": 3374 }, { "epoch": 0.7833352674944877, "grad_norm": 0.4752577543258667, "learning_rate": 0.00018816202910810662, "loss": 0.8127, "step": 3375 }, { "epoch": 0.7835673668330045, "grad_norm": 0.4152193069458008, "learning_rate": 0.00018815514467860624, "loss": 0.8367, "step": 3376 }, { "epoch": 0.7837994661715214, "grad_norm": 0.38477200269699097, "learning_rate": 0.0001881482583738695, "loss": 0.7932, "step": 3377 }, { "epoch": 0.7840315655100383, "grad_norm": 0.4910799264907837, "learning_rate": 0.00018814137019404291, "loss": 0.8528, "step": 3378 }, { "epoch": 0.7842636648485551, "grad_norm": 0.45218974351882935, "learning_rate": 0.000188134480139273, "loss": 0.8581, "step": 3379 }, { "epoch": 0.7844957641870721, "grad_norm": 0.423141747713089, "learning_rate": 0.00018812758820970635, "loss": 0.8194, "step": 3380 }, { "epoch": 0.784727863525589, "grad_norm": 0.5799574255943298, "learning_rate": 0.00018812069440548953, "loss": 0.8778, "step": 3381 }, { "epoch": 0.7849599628641059, "grad_norm": 0.44098758697509766, "learning_rate": 0.0001881137987267692, "loss": 0.8379, "step": 3382 }, { "epoch": 0.7851920622026227, "grad_norm": 0.434326708316803, "learning_rate": 0.00018810690117369204, "loss": 0.8028, "step": 3383 }, { "epoch": 0.7854241615411396, "grad_norm": 0.49173396825790405, "learning_rate": 0.0001881000017464048, "loss": 0.8805, "step": 3384 }, { "epoch": 0.7856562608796565, "grad_norm": 0.394846647977829, "learning_rate": 0.00018809310044505423, "loss": 0.8237, "step": 3385 }, { "epoch": 0.7858883602181734, "grad_norm": 0.4229886531829834, "learning_rate": 0.0001880861972697871, "loss": 0.8303, "step": 3386 }, { "epoch": 0.7861204595566903, "grad_norm": 0.5183858871459961, "learning_rate": 0.00018807929222075032, "loss": 0.8395, "step": 3387 }, { "epoch": 0.7863525588952072, "grad_norm": 0.4625033736228943, "learning_rate": 0.0001880723852980907, "loss": 0.8433, "step": 3388 }, { "epoch": 0.786584658233724, "grad_norm": 0.48947569727897644, "learning_rate": 0.00018806547650195525, "loss": 0.8563, "step": 3389 }, { "epoch": 0.7868167575722409, "grad_norm": 0.46009567379951477, "learning_rate": 0.00018805856583249087, "loss": 0.8866, "step": 3390 }, { "epoch": 0.7870488569107578, "grad_norm": 0.4721662104129791, "learning_rate": 0.00018805165328984456, "loss": 0.8502, "step": 3391 }, { "epoch": 0.7872809562492747, "grad_norm": 0.5515340566635132, "learning_rate": 0.0001880447388741634, "loss": 0.8386, "step": 3392 }, { "epoch": 0.7875130555877916, "grad_norm": 0.4354664087295532, "learning_rate": 0.00018803782258559445, "loss": 0.8383, "step": 3393 }, { "epoch": 0.7877451549263085, "grad_norm": 0.4534926116466522, "learning_rate": 0.00018803090442428487, "loss": 0.8282, "step": 3394 }, { "epoch": 0.7879772542648253, "grad_norm": 0.4439781904220581, "learning_rate": 0.00018802398439038176, "loss": 0.8326, "step": 3395 }, { "epoch": 0.7882093536033422, "grad_norm": 0.4885489344596863, "learning_rate": 0.0001880170624840324, "loss": 0.8753, "step": 3396 }, { "epoch": 0.7884414529418591, "grad_norm": 0.43834125995635986, "learning_rate": 0.00018801013870538394, "loss": 0.83, "step": 3397 }, { "epoch": 0.7886735522803761, "grad_norm": 0.42443975806236267, "learning_rate": 0.00018800321305458375, "loss": 0.8488, "step": 3398 }, { "epoch": 0.7889056516188929, "grad_norm": 0.42599183320999146, "learning_rate": 0.0001879962855317791, "loss": 0.8496, "step": 3399 }, { "epoch": 0.7891377509574098, "grad_norm": 0.4471037685871124, "learning_rate": 0.00018798935613711736, "loss": 0.8278, "step": 3400 }, { "epoch": 0.7893698502959267, "grad_norm": 0.43585050106048584, "learning_rate": 0.00018798242487074598, "loss": 0.8416, "step": 3401 }, { "epoch": 0.7896019496344435, "grad_norm": 0.45447254180908203, "learning_rate": 0.0001879754917328123, "loss": 0.8898, "step": 3402 }, { "epoch": 0.7898340489729604, "grad_norm": 0.4820445477962494, "learning_rate": 0.0001879685567234639, "loss": 0.8794, "step": 3403 }, { "epoch": 0.7900661483114774, "grad_norm": 0.39793041348457336, "learning_rate": 0.00018796161984284827, "loss": 0.8464, "step": 3404 }, { "epoch": 0.7902982476499942, "grad_norm": 0.44046905636787415, "learning_rate": 0.00018795468109111295, "loss": 0.7844, "step": 3405 }, { "epoch": 0.7905303469885111, "grad_norm": 0.4133683145046234, "learning_rate": 0.00018794774046840558, "loss": 0.8363, "step": 3406 }, { "epoch": 0.790762446327028, "grad_norm": 0.4194641709327698, "learning_rate": 0.00018794079797487373, "loss": 0.8348, "step": 3407 }, { "epoch": 0.7909945456655448, "grad_norm": 0.41118744015693665, "learning_rate": 0.0001879338536106652, "loss": 0.8548, "step": 3408 }, { "epoch": 0.7912266450040617, "grad_norm": 0.39930063486099243, "learning_rate": 0.00018792690737592758, "loss": 0.8522, "step": 3409 }, { "epoch": 0.7914587443425786, "grad_norm": 0.5010667443275452, "learning_rate": 0.0001879199592708087, "loss": 0.885, "step": 3410 }, { "epoch": 0.7916908436810955, "grad_norm": 0.41491976380348206, "learning_rate": 0.00018791300929545636, "loss": 0.8322, "step": 3411 }, { "epoch": 0.7919229430196124, "grad_norm": 0.3828047811985016, "learning_rate": 0.0001879060574500184, "loss": 0.8105, "step": 3412 }, { "epoch": 0.7921550423581293, "grad_norm": 0.4636065363883972, "learning_rate": 0.00018789910373464267, "loss": 0.8345, "step": 3413 }, { "epoch": 0.7923871416966461, "grad_norm": 0.4853920638561249, "learning_rate": 0.00018789214814947714, "loss": 0.8505, "step": 3414 }, { "epoch": 0.792619241035163, "grad_norm": 0.42823195457458496, "learning_rate": 0.0001878851906946697, "loss": 0.8037, "step": 3415 }, { "epoch": 0.7928513403736799, "grad_norm": 0.4533211886882782, "learning_rate": 0.0001878782313703684, "loss": 0.8333, "step": 3416 }, { "epoch": 0.7930834397121969, "grad_norm": 0.4892052412033081, "learning_rate": 0.0001878712701767213, "loss": 0.8851, "step": 3417 }, { "epoch": 0.7933155390507137, "grad_norm": 0.4355521500110626, "learning_rate": 0.00018786430711387642, "loss": 0.8824, "step": 3418 }, { "epoch": 0.7935476383892306, "grad_norm": 0.44084426760673523, "learning_rate": 0.00018785734218198187, "loss": 0.8704, "step": 3419 }, { "epoch": 0.7937797377277475, "grad_norm": 0.38419628143310547, "learning_rate": 0.0001878503753811859, "loss": 0.7974, "step": 3420 }, { "epoch": 0.7940118370662643, "grad_norm": 0.45667892694473267, "learning_rate": 0.00018784340671163657, "loss": 0.8919, "step": 3421 }, { "epoch": 0.7942439364047812, "grad_norm": 0.4693489968776703, "learning_rate": 0.00018783643617348227, "loss": 0.8776, "step": 3422 }, { "epoch": 0.7944760357432982, "grad_norm": 0.40943169593811035, "learning_rate": 0.00018782946376687119, "loss": 0.8557, "step": 3423 }, { "epoch": 0.794708135081815, "grad_norm": 0.5191670656204224, "learning_rate": 0.00018782248949195162, "loss": 0.8518, "step": 3424 }, { "epoch": 0.7949402344203319, "grad_norm": 0.46812570095062256, "learning_rate": 0.00018781551334887201, "loss": 0.8196, "step": 3425 }, { "epoch": 0.7951723337588488, "grad_norm": 0.4430274963378906, "learning_rate": 0.00018780853533778067, "loss": 0.864, "step": 3426 }, { "epoch": 0.7954044330973656, "grad_norm": 0.47701045870780945, "learning_rate": 0.00018780155545882607, "loss": 0.8216, "step": 3427 }, { "epoch": 0.7956365324358825, "grad_norm": 0.42306727170944214, "learning_rate": 0.0001877945737121567, "loss": 0.8793, "step": 3428 }, { "epoch": 0.7958686317743995, "grad_norm": 0.4823848605155945, "learning_rate": 0.00018778759009792107, "loss": 0.8802, "step": 3429 }, { "epoch": 0.7961007311129163, "grad_norm": 0.44711732864379883, "learning_rate": 0.0001877806046162677, "loss": 0.8868, "step": 3430 }, { "epoch": 0.7963328304514332, "grad_norm": 0.408948689699173, "learning_rate": 0.00018777361726734525, "loss": 0.8636, "step": 3431 }, { "epoch": 0.7965649297899501, "grad_norm": 0.39797282218933105, "learning_rate": 0.00018776662805130228, "loss": 0.8507, "step": 3432 }, { "epoch": 0.796797029128467, "grad_norm": 0.4725627303123474, "learning_rate": 0.0001877596369682875, "loss": 0.8569, "step": 3433 }, { "epoch": 0.7970291284669838, "grad_norm": 0.41251513361930847, "learning_rate": 0.00018775264401844964, "loss": 0.8461, "step": 3434 }, { "epoch": 0.7972612278055008, "grad_norm": 0.42301279306411743, "learning_rate": 0.00018774564920193747, "loss": 0.8739, "step": 3435 }, { "epoch": 0.7974933271440177, "grad_norm": 0.3987971246242523, "learning_rate": 0.00018773865251889974, "loss": 0.8421, "step": 3436 }, { "epoch": 0.7977254264825345, "grad_norm": 0.40121906995773315, "learning_rate": 0.00018773165396948524, "loss": 0.8379, "step": 3437 }, { "epoch": 0.7979575258210514, "grad_norm": 0.4679865837097168, "learning_rate": 0.00018772465355384297, "loss": 0.8746, "step": 3438 }, { "epoch": 0.7981896251595683, "grad_norm": 0.39240580797195435, "learning_rate": 0.00018771765127212174, "loss": 0.8413, "step": 3439 }, { "epoch": 0.7984217244980851, "grad_norm": 0.44022026658058167, "learning_rate": 0.00018771064712447055, "loss": 0.8676, "step": 3440 }, { "epoch": 0.7986538238366021, "grad_norm": 0.4776321053504944, "learning_rate": 0.00018770364111103836, "loss": 0.8362, "step": 3441 }, { "epoch": 0.798885923175119, "grad_norm": 0.4371494948863983, "learning_rate": 0.00018769663323197423, "loss": 0.8479, "step": 3442 }, { "epoch": 0.7991180225136358, "grad_norm": 0.4880908131599426, "learning_rate": 0.00018768962348742724, "loss": 0.8788, "step": 3443 }, { "epoch": 0.7993501218521527, "grad_norm": 0.47458866238594055, "learning_rate": 0.00018768261187754646, "loss": 0.7764, "step": 3444 }, { "epoch": 0.7995822211906696, "grad_norm": 0.42567554116249084, "learning_rate": 0.00018767559840248107, "loss": 0.8426, "step": 3445 }, { "epoch": 0.7998143205291864, "grad_norm": 0.48489779233932495, "learning_rate": 0.00018766858306238024, "loss": 0.8177, "step": 3446 }, { "epoch": 0.8000464198677034, "grad_norm": 0.45326873660087585, "learning_rate": 0.00018766156585739324, "loss": 0.8606, "step": 3447 }, { "epoch": 0.8002785192062203, "grad_norm": 0.4719413220882416, "learning_rate": 0.0001876545467876693, "loss": 0.7925, "step": 3448 }, { "epoch": 0.8005106185447372, "grad_norm": 0.500838577747345, "learning_rate": 0.00018764752585335778, "loss": 0.8949, "step": 3449 }, { "epoch": 0.800742717883254, "grad_norm": 0.44163164496421814, "learning_rate": 0.00018764050305460797, "loss": 0.8813, "step": 3450 }, { "epoch": 0.8009748172217709, "grad_norm": 0.47142520546913147, "learning_rate": 0.00018763347839156926, "loss": 0.8996, "step": 3451 }, { "epoch": 0.8012069165602878, "grad_norm": 0.5530799031257629, "learning_rate": 0.00018762645186439113, "loss": 0.8646, "step": 3452 }, { "epoch": 0.8014390158988047, "grad_norm": 0.46285897493362427, "learning_rate": 0.00018761942347322302, "loss": 0.7762, "step": 3453 }, { "epoch": 0.8016711152373216, "grad_norm": 0.49290764331817627, "learning_rate": 0.00018761239321821442, "loss": 0.8737, "step": 3454 }, { "epoch": 0.8019032145758385, "grad_norm": 0.5251444578170776, "learning_rate": 0.0001876053610995149, "loss": 0.8508, "step": 3455 }, { "epoch": 0.8021353139143553, "grad_norm": 0.42739570140838623, "learning_rate": 0.00018759832711727403, "loss": 0.8732, "step": 3456 }, { "epoch": 0.8023674132528722, "grad_norm": 0.38648343086242676, "learning_rate": 0.00018759129127164146, "loss": 0.845, "step": 3457 }, { "epoch": 0.8025995125913891, "grad_norm": 0.4752078056335449, "learning_rate": 0.00018758425356276686, "loss": 0.832, "step": 3458 }, { "epoch": 0.802831611929906, "grad_norm": 0.4543495774269104, "learning_rate": 0.0001875772139907999, "loss": 0.8581, "step": 3459 }, { "epoch": 0.8030637112684229, "grad_norm": 0.3776591420173645, "learning_rate": 0.0001875701725558904, "loss": 0.8063, "step": 3460 }, { "epoch": 0.8032958106069398, "grad_norm": 0.4207751750946045, "learning_rate": 0.00018756312925818805, "loss": 0.835, "step": 3461 }, { "epoch": 0.8035279099454566, "grad_norm": 0.49569350481033325, "learning_rate": 0.00018755608409784272, "loss": 0.8415, "step": 3462 }, { "epoch": 0.8037600092839735, "grad_norm": 0.4109519422054291, "learning_rate": 0.00018754903707500427, "loss": 0.8449, "step": 3463 }, { "epoch": 0.8039921086224904, "grad_norm": 0.41846394538879395, "learning_rate": 0.0001875419881898226, "loss": 0.8537, "step": 3464 }, { "epoch": 0.8042242079610074, "grad_norm": 0.4197466969490051, "learning_rate": 0.00018753493744244767, "loss": 0.8315, "step": 3465 }, { "epoch": 0.8044563072995242, "grad_norm": 0.4171108901500702, "learning_rate": 0.0001875278848330295, "loss": 0.8567, "step": 3466 }, { "epoch": 0.8046884066380411, "grad_norm": 0.5104374289512634, "learning_rate": 0.000187520830361718, "loss": 0.8469, "step": 3467 }, { "epoch": 0.804920505976558, "grad_norm": 0.4508782625198364, "learning_rate": 0.0001875137740286633, "loss": 0.819, "step": 3468 }, { "epoch": 0.8051526053150748, "grad_norm": 0.4170377254486084, "learning_rate": 0.00018750671583401556, "loss": 0.8399, "step": 3469 }, { "epoch": 0.8053847046535917, "grad_norm": 0.45690974593162537, "learning_rate": 0.00018749965577792484, "loss": 0.8513, "step": 3470 }, { "epoch": 0.8056168039921087, "grad_norm": 0.4563653767108917, "learning_rate": 0.00018749259386054132, "loss": 0.8154, "step": 3471 }, { "epoch": 0.8058489033306255, "grad_norm": 0.47957828640937805, "learning_rate": 0.0001874855300820153, "loss": 0.8667, "step": 3472 }, { "epoch": 0.8060810026691424, "grad_norm": 0.41034868359565735, "learning_rate": 0.00018747846444249695, "loss": 0.8364, "step": 3473 }, { "epoch": 0.8063131020076593, "grad_norm": 0.4391375780105591, "learning_rate": 0.0001874713969421366, "loss": 0.8677, "step": 3474 }, { "epoch": 0.8065452013461761, "grad_norm": 0.45579496026039124, "learning_rate": 0.00018746432758108463, "loss": 0.8075, "step": 3475 }, { "epoch": 0.806777300684693, "grad_norm": 0.440252423286438, "learning_rate": 0.00018745725635949137, "loss": 0.8407, "step": 3476 }, { "epoch": 0.80700940002321, "grad_norm": 0.4207989275455475, "learning_rate": 0.00018745018327750724, "loss": 0.8821, "step": 3477 }, { "epoch": 0.8072414993617268, "grad_norm": 0.45414450764656067, "learning_rate": 0.00018744310833528275, "loss": 0.8627, "step": 3478 }, { "epoch": 0.8074735987002437, "grad_norm": 0.40927475690841675, "learning_rate": 0.00018743603153296835, "loss": 0.8379, "step": 3479 }, { "epoch": 0.8077056980387606, "grad_norm": 0.4099472165107727, "learning_rate": 0.0001874289528707146, "loss": 0.8633, "step": 3480 }, { "epoch": 0.8079377973772774, "grad_norm": 0.4048844277858734, "learning_rate": 0.00018742187234867207, "loss": 0.8502, "step": 3481 }, { "epoch": 0.8081698967157943, "grad_norm": 0.46391192078590393, "learning_rate": 0.00018741478996699135, "loss": 0.85, "step": 3482 }, { "epoch": 0.8084019960543113, "grad_norm": 0.43994155526161194, "learning_rate": 0.00018740770572582313, "loss": 0.8897, "step": 3483 }, { "epoch": 0.8086340953928282, "grad_norm": 0.4420682489871979, "learning_rate": 0.00018740061962531812, "loss": 0.9048, "step": 3484 }, { "epoch": 0.808866194731345, "grad_norm": 0.4250348210334778, "learning_rate": 0.000187393531665627, "loss": 0.7932, "step": 3485 }, { "epoch": 0.8090982940698619, "grad_norm": 0.46660420298576355, "learning_rate": 0.0001873864418469006, "loss": 0.8581, "step": 3486 }, { "epoch": 0.8093303934083788, "grad_norm": 0.4400591552257538, "learning_rate": 0.00018737935016928972, "loss": 0.848, "step": 3487 }, { "epoch": 0.8095624927468956, "grad_norm": 0.48466750979423523, "learning_rate": 0.0001873722566329452, "loss": 0.8375, "step": 3488 }, { "epoch": 0.8097945920854126, "grad_norm": 0.4431251585483551, "learning_rate": 0.00018736516123801794, "loss": 0.8411, "step": 3489 }, { "epoch": 0.8100266914239295, "grad_norm": 0.44356822967529297, "learning_rate": 0.00018735806398465887, "loss": 0.8404, "step": 3490 }, { "epoch": 0.8102587907624463, "grad_norm": 0.46663907170295715, "learning_rate": 0.00018735096487301897, "loss": 0.8485, "step": 3491 }, { "epoch": 0.8104908901009632, "grad_norm": 0.5056001543998718, "learning_rate": 0.00018734386390324926, "loss": 0.8944, "step": 3492 }, { "epoch": 0.8107229894394801, "grad_norm": 0.4424160122871399, "learning_rate": 0.00018733676107550077, "loss": 0.8675, "step": 3493 }, { "epoch": 0.8109550887779969, "grad_norm": 0.45616671442985535, "learning_rate": 0.00018732965638992459, "loss": 0.8661, "step": 3494 }, { "epoch": 0.8111871881165139, "grad_norm": 0.4922860860824585, "learning_rate": 0.00018732254984667188, "loss": 0.8026, "step": 3495 }, { "epoch": 0.8114192874550308, "grad_norm": 0.4441889226436615, "learning_rate": 0.0001873154414458938, "loss": 0.8479, "step": 3496 }, { "epoch": 0.8116513867935476, "grad_norm": 0.4523948132991791, "learning_rate": 0.00018730833118774153, "loss": 0.8567, "step": 3497 }, { "epoch": 0.8118834861320645, "grad_norm": 0.4471333622932434, "learning_rate": 0.00018730121907236638, "loss": 0.8648, "step": 3498 }, { "epoch": 0.8121155854705814, "grad_norm": 0.41624370217323303, "learning_rate": 0.00018729410509991958, "loss": 0.8269, "step": 3499 }, { "epoch": 0.8123476848090982, "grad_norm": 0.41084468364715576, "learning_rate": 0.0001872869892705525, "loss": 0.8443, "step": 3500 }, { "epoch": 0.8125797841476152, "grad_norm": 0.4553931653499603, "learning_rate": 0.00018727987158441645, "loss": 0.8326, "step": 3501 }, { "epoch": 0.8128118834861321, "grad_norm": 0.43934836983680725, "learning_rate": 0.0001872727520416629, "loss": 0.8798, "step": 3502 }, { "epoch": 0.813043982824649, "grad_norm": 0.4688604176044464, "learning_rate": 0.00018726563064244326, "loss": 0.8315, "step": 3503 }, { "epoch": 0.8132760821631658, "grad_norm": 0.3873254358768463, "learning_rate": 0.00018725850738690903, "loss": 0.8086, "step": 3504 }, { "epoch": 0.8135081815016827, "grad_norm": 0.4676206409931183, "learning_rate": 0.00018725138227521177, "loss": 0.8728, "step": 3505 }, { "epoch": 0.8137402808401996, "grad_norm": 0.5033950805664062, "learning_rate": 0.000187244255307503, "loss": 0.8376, "step": 3506 }, { "epoch": 0.8139723801787165, "grad_norm": 0.46513989567756653, "learning_rate": 0.0001872371264839343, "loss": 0.9058, "step": 3507 }, { "epoch": 0.8142044795172334, "grad_norm": 0.4766799509525299, "learning_rate": 0.00018722999580465737, "loss": 0.8674, "step": 3508 }, { "epoch": 0.8144365788557503, "grad_norm": 0.5466392636299133, "learning_rate": 0.00018722286326982386, "loss": 0.8832, "step": 3509 }, { "epoch": 0.8146686781942671, "grad_norm": 0.4438169598579407, "learning_rate": 0.00018721572887958553, "loss": 0.8947, "step": 3510 }, { "epoch": 0.814900777532784, "grad_norm": 0.5376529097557068, "learning_rate": 0.0001872085926340941, "loss": 0.905, "step": 3511 }, { "epoch": 0.8151328768713009, "grad_norm": 0.48021188378334045, "learning_rate": 0.00018720145453350143, "loss": 0.8622, "step": 3512 }, { "epoch": 0.8153649762098178, "grad_norm": 0.43965375423431396, "learning_rate": 0.0001871943145779593, "loss": 0.8335, "step": 3513 }, { "epoch": 0.8155970755483347, "grad_norm": 0.44212257862091064, "learning_rate": 0.00018718717276761958, "loss": 0.8207, "step": 3514 }, { "epoch": 0.8158291748868516, "grad_norm": 0.4037233293056488, "learning_rate": 0.00018718002910263426, "loss": 0.828, "step": 3515 }, { "epoch": 0.8160612742253684, "grad_norm": 0.4247400462627411, "learning_rate": 0.00018717288358315525, "loss": 0.8721, "step": 3516 }, { "epoch": 0.8162933735638853, "grad_norm": 0.4705268442630768, "learning_rate": 0.00018716573620933459, "loss": 0.8515, "step": 3517 }, { "epoch": 0.8165254729024022, "grad_norm": 0.44723042845726013, "learning_rate": 0.0001871585869813243, "loss": 0.8227, "step": 3518 }, { "epoch": 0.8167575722409192, "grad_norm": 0.45817816257476807, "learning_rate": 0.00018715143589927643, "loss": 0.8234, "step": 3519 }, { "epoch": 0.816989671579436, "grad_norm": 0.4230850636959076, "learning_rate": 0.0001871442829633431, "loss": 0.7768, "step": 3520 }, { "epoch": 0.8172217709179529, "grad_norm": 0.38340017199516296, "learning_rate": 0.00018713712817367651, "loss": 0.7895, "step": 3521 }, { "epoch": 0.8174538702564698, "grad_norm": 1.3191027641296387, "learning_rate": 0.00018712997153042885, "loss": 0.7843, "step": 3522 }, { "epoch": 0.8176859695949866, "grad_norm": 0.5354325771331787, "learning_rate": 0.00018712281303375232, "loss": 0.8346, "step": 3523 }, { "epoch": 0.8179180689335035, "grad_norm": 0.4314560294151306, "learning_rate": 0.0001871156526837992, "loss": 0.852, "step": 3524 }, { "epoch": 0.8181501682720205, "grad_norm": 0.4329346716403961, "learning_rate": 0.00018710849048072189, "loss": 0.8, "step": 3525 }, { "epoch": 0.8183822676105373, "grad_norm": 0.4811081886291504, "learning_rate": 0.0001871013264246726, "loss": 0.8342, "step": 3526 }, { "epoch": 0.8186143669490542, "grad_norm": 0.4746951162815094, "learning_rate": 0.0001870941605158038, "loss": 0.8933, "step": 3527 }, { "epoch": 0.8188464662875711, "grad_norm": 0.424167275428772, "learning_rate": 0.00018708699275426794, "loss": 0.8275, "step": 3528 }, { "epoch": 0.8190785656260879, "grad_norm": 0.47413644194602966, "learning_rate": 0.00018707982314021752, "loss": 0.8525, "step": 3529 }, { "epoch": 0.8193106649646048, "grad_norm": 0.4636114239692688, "learning_rate": 0.00018707265167380497, "loss": 0.864, "step": 3530 }, { "epoch": 0.8195427643031218, "grad_norm": 0.4753924310207367, "learning_rate": 0.00018706547835518287, "loss": 0.9, "step": 3531 }, { "epoch": 0.8197748636416387, "grad_norm": 0.45833784341812134, "learning_rate": 0.00018705830318450383, "loss": 0.8756, "step": 3532 }, { "epoch": 0.8200069629801555, "grad_norm": 0.5040803551673889, "learning_rate": 0.00018705112616192046, "loss": 0.866, "step": 3533 }, { "epoch": 0.8202390623186724, "grad_norm": 0.4119100272655487, "learning_rate": 0.00018704394728758544, "loss": 0.8635, "step": 3534 }, { "epoch": 0.8204711616571893, "grad_norm": 0.47678402066230774, "learning_rate": 0.00018703676656165147, "loss": 0.8208, "step": 3535 }, { "epoch": 0.8207032609957061, "grad_norm": 0.44619014859199524, "learning_rate": 0.00018702958398427133, "loss": 0.879, "step": 3536 }, { "epoch": 0.8209353603342231, "grad_norm": 0.44317182898521423, "learning_rate": 0.00018702239955559774, "loss": 0.8605, "step": 3537 }, { "epoch": 0.82116745967274, "grad_norm": 0.4118732810020447, "learning_rate": 0.00018701521327578363, "loss": 0.8283, "step": 3538 }, { "epoch": 0.8213995590112568, "grad_norm": 0.39580899477005005, "learning_rate": 0.00018700802514498178, "loss": 0.8301, "step": 3539 }, { "epoch": 0.8216316583497737, "grad_norm": 0.39395207166671753, "learning_rate": 0.00018700083516334513, "loss": 0.8229, "step": 3540 }, { "epoch": 0.8218637576882906, "grad_norm": 0.4375897943973541, "learning_rate": 0.0001869936433310266, "loss": 0.8543, "step": 3541 }, { "epoch": 0.8220958570268074, "grad_norm": 0.3816278576850891, "learning_rate": 0.0001869864496481792, "loss": 0.8233, "step": 3542 }, { "epoch": 0.8223279563653244, "grad_norm": 0.4020833373069763, "learning_rate": 0.00018697925411495598, "loss": 0.8464, "step": 3543 }, { "epoch": 0.8225600557038413, "grad_norm": 0.700670599937439, "learning_rate": 0.00018697205673150994, "loss": 0.8716, "step": 3544 }, { "epoch": 0.8227921550423581, "grad_norm": 0.4214612543582916, "learning_rate": 0.0001869648574979942, "loss": 0.8182, "step": 3545 }, { "epoch": 0.823024254380875, "grad_norm": 0.40272238850593567, "learning_rate": 0.00018695765641456197, "loss": 0.8637, "step": 3546 }, { "epoch": 0.8232563537193919, "grad_norm": 5.3370842933654785, "learning_rate": 0.00018695045348136633, "loss": 0.9138, "step": 3547 }, { "epoch": 0.8234884530579087, "grad_norm": 0.5729955434799194, "learning_rate": 0.00018694324869856054, "loss": 0.835, "step": 3548 }, { "epoch": 0.8237205523964257, "grad_norm": 0.4560796618461609, "learning_rate": 0.00018693604206629791, "loss": 0.8524, "step": 3549 }, { "epoch": 0.8239526517349426, "grad_norm": 0.379839152097702, "learning_rate": 0.00018692883358473167, "loss": 0.7941, "step": 3550 }, { "epoch": 0.8241847510734595, "grad_norm": 0.4597313404083252, "learning_rate": 0.00018692162325401518, "loss": 0.8218, "step": 3551 }, { "epoch": 0.8244168504119763, "grad_norm": 0.4852725565433502, "learning_rate": 0.00018691441107430187, "loss": 0.8713, "step": 3552 }, { "epoch": 0.8246489497504932, "grad_norm": 0.43331804871559143, "learning_rate": 0.00018690719704574508, "loss": 0.8717, "step": 3553 }, { "epoch": 0.82488104908901, "grad_norm": 0.5439433455467224, "learning_rate": 0.00018689998116849828, "loss": 0.8166, "step": 3554 }, { "epoch": 0.825113148427527, "grad_norm": 0.44353604316711426, "learning_rate": 0.00018689276344271502, "loss": 0.9165, "step": 3555 }, { "epoch": 0.8253452477660439, "grad_norm": 0.4763766825199127, "learning_rate": 0.0001868855438685488, "loss": 0.888, "step": 3556 }, { "epoch": 0.8255773471045608, "grad_norm": 0.46713805198669434, "learning_rate": 0.0001868783224461532, "loss": 0.9015, "step": 3557 }, { "epoch": 0.8258094464430776, "grad_norm": 0.45944944024086, "learning_rate": 0.00018687109917568177, "loss": 0.836, "step": 3558 }, { "epoch": 0.8260415457815945, "grad_norm": 0.48062145709991455, "learning_rate": 0.0001868638740572883, "loss": 0.8791, "step": 3559 }, { "epoch": 0.8262736451201114, "grad_norm": 0.4560006260871887, "learning_rate": 0.00018685664709112637, "loss": 0.8594, "step": 3560 }, { "epoch": 0.8265057444586283, "grad_norm": 0.3921787738800049, "learning_rate": 0.00018684941827734974, "loss": 0.8828, "step": 3561 }, { "epoch": 0.8267378437971452, "grad_norm": 0.4815680980682373, "learning_rate": 0.0001868421876161122, "loss": 0.8401, "step": 3562 }, { "epoch": 0.8269699431356621, "grad_norm": 0.4304710030555725, "learning_rate": 0.00018683495510756753, "loss": 0.8327, "step": 3563 }, { "epoch": 0.827202042474179, "grad_norm": 0.43320244550704956, "learning_rate": 0.00018682772075186962, "loss": 0.8685, "step": 3564 }, { "epoch": 0.8274341418126958, "grad_norm": 0.5130840539932251, "learning_rate": 0.00018682048454917233, "loss": 0.9112, "step": 3565 }, { "epoch": 0.8276662411512127, "grad_norm": 0.4930349290370941, "learning_rate": 0.00018681324649962963, "loss": 0.892, "step": 3566 }, { "epoch": 0.8278983404897297, "grad_norm": 0.4212011694908142, "learning_rate": 0.00018680600660339543, "loss": 0.8646, "step": 3567 }, { "epoch": 0.8281304398282465, "grad_norm": 0.43066468834877014, "learning_rate": 0.00018679876486062377, "loss": 0.8606, "step": 3568 }, { "epoch": 0.8283625391667634, "grad_norm": 0.417481005191803, "learning_rate": 0.00018679152127146868, "loss": 0.8825, "step": 3569 }, { "epoch": 0.8285946385052803, "grad_norm": 0.4242853820323944, "learning_rate": 0.00018678427583608426, "loss": 0.8141, "step": 3570 }, { "epoch": 0.8288267378437971, "grad_norm": 0.4471725821495056, "learning_rate": 0.0001867770285546246, "loss": 0.8705, "step": 3571 }, { "epoch": 0.829058837182314, "grad_norm": 0.429943323135376, "learning_rate": 0.00018676977942724395, "loss": 0.8079, "step": 3572 }, { "epoch": 0.829290936520831, "grad_norm": 0.4596618115901947, "learning_rate": 0.0001867625284540964, "loss": 0.8635, "step": 3573 }, { "epoch": 0.8295230358593478, "grad_norm": 0.4477384090423584, "learning_rate": 0.0001867552756353363, "loss": 0.8523, "step": 3574 }, { "epoch": 0.8297551351978647, "grad_norm": 0.4040454924106598, "learning_rate": 0.00018674802097111784, "loss": 0.8193, "step": 3575 }, { "epoch": 0.8299872345363816, "grad_norm": 0.4650207757949829, "learning_rate": 0.00018674076446159542, "loss": 0.8458, "step": 3576 }, { "epoch": 0.8302193338748984, "grad_norm": 0.48652219772338867, "learning_rate": 0.00018673350610692334, "loss": 0.8667, "step": 3577 }, { "epoch": 0.8304514332134153, "grad_norm": 0.46445751190185547, "learning_rate": 0.00018672624590725603, "loss": 0.8558, "step": 3578 }, { "epoch": 0.8306835325519323, "grad_norm": 0.5741584897041321, "learning_rate": 0.00018671898386274791, "loss": 0.8581, "step": 3579 }, { "epoch": 0.8309156318904491, "grad_norm": 0.43956178426742554, "learning_rate": 0.00018671171997355348, "loss": 0.8195, "step": 3580 }, { "epoch": 0.831147731228966, "grad_norm": 0.4666881859302521, "learning_rate": 0.00018670445423982725, "loss": 0.8914, "step": 3581 }, { "epoch": 0.8313798305674829, "grad_norm": 0.5098886489868164, "learning_rate": 0.00018669718666172375, "loss": 0.9134, "step": 3582 }, { "epoch": 0.8316119299059997, "grad_norm": 0.40267500281333923, "learning_rate": 0.0001866899172393976, "loss": 0.8695, "step": 3583 }, { "epoch": 0.8318440292445166, "grad_norm": 0.48588645458221436, "learning_rate": 0.0001866826459730035, "loss": 0.8711, "step": 3584 }, { "epoch": 0.8320761285830336, "grad_norm": 0.4441528916358948, "learning_rate": 0.000186675372862696, "loss": 0.8131, "step": 3585 }, { "epoch": 0.8323082279215505, "grad_norm": 0.39562228322029114, "learning_rate": 0.0001866680979086299, "loss": 0.8169, "step": 3586 }, { "epoch": 0.8325403272600673, "grad_norm": 0.4114947021007538, "learning_rate": 0.0001866608211109599, "loss": 0.8809, "step": 3587 }, { "epoch": 0.8327724265985842, "grad_norm": 0.39634478092193604, "learning_rate": 0.00018665354246984083, "loss": 0.8114, "step": 3588 }, { "epoch": 0.8330045259371011, "grad_norm": 0.41898730397224426, "learning_rate": 0.00018664626198542755, "loss": 0.8589, "step": 3589 }, { "epoch": 0.8332366252756179, "grad_norm": 0.4072995185852051, "learning_rate": 0.00018663897965787483, "loss": 0.8396, "step": 3590 }, { "epoch": 0.8334687246141349, "grad_norm": 0.38524600863456726, "learning_rate": 0.00018663169548733768, "loss": 0.8402, "step": 3591 }, { "epoch": 0.8337008239526518, "grad_norm": 0.4251926839351654, "learning_rate": 0.000186624409473971, "loss": 0.8449, "step": 3592 }, { "epoch": 0.8339329232911686, "grad_norm": 0.3901421129703522, "learning_rate": 0.00018661712161792976, "loss": 0.767, "step": 3593 }, { "epoch": 0.8341650226296855, "grad_norm": 0.3809567987918854, "learning_rate": 0.00018660983191936902, "loss": 0.8217, "step": 3594 }, { "epoch": 0.8343971219682024, "grad_norm": 0.4081445634365082, "learning_rate": 0.00018660254037844388, "loss": 0.8657, "step": 3595 }, { "epoch": 0.8346292213067192, "grad_norm": 0.45860999822616577, "learning_rate": 0.0001865952469953094, "loss": 0.776, "step": 3596 }, { "epoch": 0.8348613206452362, "grad_norm": 0.3801199495792389, "learning_rate": 0.0001865879517701207, "loss": 0.8379, "step": 3597 }, { "epoch": 0.8350934199837531, "grad_norm": 0.4840230643749237, "learning_rate": 0.00018658065470303302, "loss": 0.8699, "step": 3598 }, { "epoch": 0.83532551932227, "grad_norm": 0.5170982480049133, "learning_rate": 0.00018657335579420152, "loss": 0.8418, "step": 3599 }, { "epoch": 0.8355576186607868, "grad_norm": 1.8149453401565552, "learning_rate": 0.00018656605504378153, "loss": 0.8635, "step": 3600 }, { "epoch": 0.8357897179993037, "grad_norm": 0.5831320285797119, "learning_rate": 0.00018655875245192832, "loss": 0.9121, "step": 3601 }, { "epoch": 0.8360218173378205, "grad_norm": 0.49724313616752625, "learning_rate": 0.00018655144801879722, "loss": 0.8694, "step": 3602 }, { "epoch": 0.8362539166763375, "grad_norm": 0.4070315361022949, "learning_rate": 0.00018654414174454361, "loss": 0.8697, "step": 3603 }, { "epoch": 0.8364860160148544, "grad_norm": 0.45699378848075867, "learning_rate": 0.00018653683362932297, "loss": 0.8548, "step": 3604 }, { "epoch": 0.8367181153533713, "grad_norm": 0.41891008615493774, "learning_rate": 0.0001865295236732907, "loss": 0.8305, "step": 3605 }, { "epoch": 0.8369502146918881, "grad_norm": 0.422150194644928, "learning_rate": 0.00018652221187660228, "loss": 0.9228, "step": 3606 }, { "epoch": 0.837182314030405, "grad_norm": 0.4238840937614441, "learning_rate": 0.00018651489823941328, "loss": 0.8174, "step": 3607 }, { "epoch": 0.8374144133689219, "grad_norm": 0.4262438714504242, "learning_rate": 0.00018650758276187926, "loss": 0.8393, "step": 3608 }, { "epoch": 0.8376465127074387, "grad_norm": 0.4072905480861664, "learning_rate": 0.0001865002654441559, "loss": 0.8478, "step": 3609 }, { "epoch": 0.8378786120459557, "grad_norm": 0.5089958906173706, "learning_rate": 0.00018649294628639876, "loss": 0.8003, "step": 3610 }, { "epoch": 0.8381107113844726, "grad_norm": 0.3870745599269867, "learning_rate": 0.00018648562528876357, "loss": 0.78, "step": 3611 }, { "epoch": 0.8383428107229894, "grad_norm": 0.40581372380256653, "learning_rate": 0.00018647830245140608, "loss": 0.8298, "step": 3612 }, { "epoch": 0.8385749100615063, "grad_norm": 0.4862796664237976, "learning_rate": 0.000186470977774482, "loss": 0.8551, "step": 3613 }, { "epoch": 0.8388070094000232, "grad_norm": 0.37643569707870483, "learning_rate": 0.00018646365125814723, "loss": 0.8765, "step": 3614 }, { "epoch": 0.83903910873854, "grad_norm": 0.39773279428482056, "learning_rate": 0.00018645632290255758, "loss": 0.8302, "step": 3615 }, { "epoch": 0.839271208077057, "grad_norm": 0.4521380662918091, "learning_rate": 0.0001864489927078689, "loss": 0.8391, "step": 3616 }, { "epoch": 0.8395033074155739, "grad_norm": 0.4298209846019745, "learning_rate": 0.0001864416606742372, "loss": 0.8706, "step": 3617 }, { "epoch": 0.8397354067540908, "grad_norm": 0.5119231939315796, "learning_rate": 0.00018643432680181835, "loss": 0.7978, "step": 3618 }, { "epoch": 0.8399675060926076, "grad_norm": 0.4206373989582062, "learning_rate": 0.00018642699109076843, "loss": 0.8268, "step": 3619 }, { "epoch": 0.8401996054311245, "grad_norm": 0.45704206824302673, "learning_rate": 0.00018641965354124345, "loss": 0.8333, "step": 3620 }, { "epoch": 0.8404317047696414, "grad_norm": 0.41793394088745117, "learning_rate": 0.0001864123141533995, "loss": 0.8604, "step": 3621 }, { "epoch": 0.8406638041081583, "grad_norm": 0.47956162691116333, "learning_rate": 0.00018640497292739274, "loss": 0.8551, "step": 3622 }, { "epoch": 0.8408959034466752, "grad_norm": 0.47816702723503113, "learning_rate": 0.00018639762986337927, "loss": 0.8287, "step": 3623 }, { "epoch": 0.8411280027851921, "grad_norm": 0.43328189849853516, "learning_rate": 0.00018639028496151533, "loss": 0.8367, "step": 3624 }, { "epoch": 0.8413601021237089, "grad_norm": 0.47569748759269714, "learning_rate": 0.00018638293822195715, "loss": 0.8252, "step": 3625 }, { "epoch": 0.8415922014622258, "grad_norm": 0.518892765045166, "learning_rate": 0.00018637558964486103, "loss": 0.8379, "step": 3626 }, { "epoch": 0.8418243008007427, "grad_norm": 0.4990456700325012, "learning_rate": 0.0001863682392303833, "loss": 0.7981, "step": 3627 }, { "epoch": 0.8420564001392596, "grad_norm": 0.41057834029197693, "learning_rate": 0.00018636088697868023, "loss": 0.8372, "step": 3628 }, { "epoch": 0.8422884994777765, "grad_norm": 0.5051354169845581, "learning_rate": 0.00018635353288990828, "loss": 0.8939, "step": 3629 }, { "epoch": 0.8425205988162934, "grad_norm": 0.46350952982902527, "learning_rate": 0.00018634617696422395, "loss": 0.853, "step": 3630 }, { "epoch": 0.8427526981548102, "grad_norm": 0.36572298407554626, "learning_rate": 0.0001863388192017836, "loss": 0.8333, "step": 3631 }, { "epoch": 0.8429847974933271, "grad_norm": 0.5073897242546082, "learning_rate": 0.00018633145960274378, "loss": 0.8527, "step": 3632 }, { "epoch": 0.843216896831844, "grad_norm": 0.4910253584384918, "learning_rate": 0.00018632409816726105, "loss": 0.8612, "step": 3633 }, { "epoch": 0.843448996170361, "grad_norm": 0.44439107179641724, "learning_rate": 0.00018631673489549207, "loss": 0.8474, "step": 3634 }, { "epoch": 0.8436810955088778, "grad_norm": 0.4418814182281494, "learning_rate": 0.00018630936978759338, "loss": 0.8218, "step": 3635 }, { "epoch": 0.8439131948473947, "grad_norm": 0.4734269082546234, "learning_rate": 0.00018630200284372166, "loss": 0.8463, "step": 3636 }, { "epoch": 0.8441452941859116, "grad_norm": 0.44001299142837524, "learning_rate": 0.00018629463406403363, "loss": 0.8936, "step": 3637 }, { "epoch": 0.8443773935244284, "grad_norm": 0.42432811856269836, "learning_rate": 0.0001862872634486861, "loss": 0.8483, "step": 3638 }, { "epoch": 0.8446094928629453, "grad_norm": 0.4617139399051666, "learning_rate": 0.00018627989099783578, "loss": 0.8431, "step": 3639 }, { "epoch": 0.8448415922014623, "grad_norm": 0.41493532061576843, "learning_rate": 0.00018627251671163953, "loss": 0.8248, "step": 3640 }, { "epoch": 0.8450736915399791, "grad_norm": 0.4633040428161621, "learning_rate": 0.00018626514059025422, "loss": 0.8746, "step": 3641 }, { "epoch": 0.845305790878496, "grad_norm": 0.48771142959594727, "learning_rate": 0.00018625776263383675, "loss": 0.8288, "step": 3642 }, { "epoch": 0.8455378902170129, "grad_norm": 0.47017359733581543, "learning_rate": 0.00018625038284254402, "loss": 0.8607, "step": 3643 }, { "epoch": 0.8457699895555297, "grad_norm": 0.47195085883140564, "learning_rate": 0.00018624300121653309, "loss": 0.8055, "step": 3644 }, { "epoch": 0.8460020888940466, "grad_norm": 0.4053666293621063, "learning_rate": 0.0001862356177559609, "loss": 0.7866, "step": 3645 }, { "epoch": 0.8462341882325636, "grad_norm": 0.4887469708919525, "learning_rate": 0.0001862282324609846, "loss": 0.8869, "step": 3646 }, { "epoch": 0.8464662875710804, "grad_norm": 0.4616425931453705, "learning_rate": 0.00018622084533176123, "loss": 0.8621, "step": 3647 }, { "epoch": 0.8466983869095973, "grad_norm": 0.3864554166793823, "learning_rate": 0.00018621345636844793, "loss": 0.8349, "step": 3648 }, { "epoch": 0.8469304862481142, "grad_norm": 0.4063476026058197, "learning_rate": 0.0001862060655712019, "loss": 0.8843, "step": 3649 }, { "epoch": 0.847162585586631, "grad_norm": 0.4339485168457031, "learning_rate": 0.00018619867294018035, "loss": 0.8307, "step": 3650 }, { "epoch": 0.8473946849251479, "grad_norm": 0.4014281630516052, "learning_rate": 0.00018619127847554055, "loss": 0.8451, "step": 3651 }, { "epoch": 0.8476267842636649, "grad_norm": 0.3940604329109192, "learning_rate": 0.00018618388217743977, "loss": 0.7914, "step": 3652 }, { "epoch": 0.8478588836021818, "grad_norm": 0.45034539699554443, "learning_rate": 0.00018617648404603532, "loss": 0.8623, "step": 3653 }, { "epoch": 0.8480909829406986, "grad_norm": 0.4533842206001282, "learning_rate": 0.00018616908408148467, "loss": 0.8084, "step": 3654 }, { "epoch": 0.8483230822792155, "grad_norm": 0.4499308168888092, "learning_rate": 0.00018616168228394512, "loss": 0.8822, "step": 3655 }, { "epoch": 0.8485551816177324, "grad_norm": 0.44630166888237, "learning_rate": 0.00018615427865357418, "loss": 0.8592, "step": 3656 }, { "epoch": 0.8487872809562492, "grad_norm": 0.4563949406147003, "learning_rate": 0.0001861468731905293, "loss": 0.8279, "step": 3657 }, { "epoch": 0.8490193802947662, "grad_norm": 0.4844360947608948, "learning_rate": 0.0001861394658949681, "loss": 0.8848, "step": 3658 }, { "epoch": 0.8492514796332831, "grad_norm": 0.3996076285839081, "learning_rate": 0.00018613205676704805, "loss": 0.8833, "step": 3659 }, { "epoch": 0.8494835789717999, "grad_norm": 0.4185331165790558, "learning_rate": 0.00018612464580692684, "loss": 0.8837, "step": 3660 }, { "epoch": 0.8497156783103168, "grad_norm": 0.45031115412712097, "learning_rate": 0.00018611723301476202, "loss": 0.7964, "step": 3661 }, { "epoch": 0.8499477776488337, "grad_norm": 3.2544968128204346, "learning_rate": 0.00018610981839071132, "loss": 0.869, "step": 3662 }, { "epoch": 0.8501798769873505, "grad_norm": 0.5517664551734924, "learning_rate": 0.00018610240193493252, "loss": 0.8495, "step": 3663 }, { "epoch": 0.8504119763258675, "grad_norm": 0.47973403334617615, "learning_rate": 0.0001860949836475833, "loss": 0.834, "step": 3664 }, { "epoch": 0.8506440756643844, "grad_norm": 0.41269323229789734, "learning_rate": 0.00018608756352882152, "loss": 0.89, "step": 3665 }, { "epoch": 0.8508761750029012, "grad_norm": 0.4766026437282562, "learning_rate": 0.00018608014157880492, "loss": 0.8682, "step": 3666 }, { "epoch": 0.8511082743414181, "grad_norm": 0.4922116994857788, "learning_rate": 0.00018607271779769153, "loss": 0.794, "step": 3667 }, { "epoch": 0.851340373679935, "grad_norm": 0.49743103981018066, "learning_rate": 0.00018606529218563917, "loss": 0.8165, "step": 3668 }, { "epoch": 0.8515724730184518, "grad_norm": 0.4243033230304718, "learning_rate": 0.0001860578647428058, "loss": 0.8369, "step": 3669 }, { "epoch": 0.8518045723569688, "grad_norm": 0.4899313449859619, "learning_rate": 0.00018605043546934946, "loss": 0.8107, "step": 3670 }, { "epoch": 0.8520366716954857, "grad_norm": 0.49445003271102905, "learning_rate": 0.00018604300436542815, "loss": 0.8639, "step": 3671 }, { "epoch": 0.8522687710340026, "grad_norm": 0.4989785850048065, "learning_rate": 0.00018603557143119997, "loss": 0.855, "step": 3672 }, { "epoch": 0.8525008703725194, "grad_norm": 0.4344911277294159, "learning_rate": 0.00018602813666682302, "loss": 0.835, "step": 3673 }, { "epoch": 0.8527329697110363, "grad_norm": 0.4720595180988312, "learning_rate": 0.00018602070007245542, "loss": 0.8671, "step": 3674 }, { "epoch": 0.8529650690495532, "grad_norm": 0.4747980237007141, "learning_rate": 0.00018601326164825543, "loss": 0.8125, "step": 3675 }, { "epoch": 0.8531971683880701, "grad_norm": 0.40255007147789, "learning_rate": 0.00018600582139438124, "loss": 0.8028, "step": 3676 }, { "epoch": 0.853429267726587, "grad_norm": 0.4688321352005005, "learning_rate": 0.00018599837931099108, "loss": 0.8309, "step": 3677 }, { "epoch": 0.8536613670651039, "grad_norm": 0.462454617023468, "learning_rate": 0.00018599093539824337, "loss": 0.8725, "step": 3678 }, { "epoch": 0.8538934664036207, "grad_norm": 0.4675252139568329, "learning_rate": 0.00018598348965629633, "loss": 0.8549, "step": 3679 }, { "epoch": 0.8541255657421376, "grad_norm": 0.4787384569644928, "learning_rate": 0.00018597604208530846, "loss": 0.8685, "step": 3680 }, { "epoch": 0.8543576650806545, "grad_norm": 0.41962456703186035, "learning_rate": 0.00018596859268543806, "loss": 0.8517, "step": 3681 }, { "epoch": 0.8545897644191714, "grad_norm": 0.47045642137527466, "learning_rate": 0.00018596114145684372, "loss": 0.8101, "step": 3682 }, { "epoch": 0.8548218637576883, "grad_norm": 0.478563129901886, "learning_rate": 0.00018595368839968387, "loss": 0.8584, "step": 3683 }, { "epoch": 0.8550539630962052, "grad_norm": 0.4727185368537903, "learning_rate": 0.00018594623351411707, "loss": 0.8202, "step": 3684 }, { "epoch": 0.855286062434722, "grad_norm": 0.42539578676223755, "learning_rate": 0.00018593877680030187, "loss": 0.8826, "step": 3685 }, { "epoch": 0.8555181617732389, "grad_norm": 0.42231258749961853, "learning_rate": 0.00018593131825839692, "loss": 0.831, "step": 3686 }, { "epoch": 0.8557502611117558, "grad_norm": 0.4962049126625061, "learning_rate": 0.0001859238578885609, "loss": 0.8681, "step": 3687 }, { "epoch": 0.8559823604502728, "grad_norm": 0.44569656252861023, "learning_rate": 0.00018591639569095247, "loss": 0.8664, "step": 3688 }, { "epoch": 0.8562144597887896, "grad_norm": 0.43417832255363464, "learning_rate": 0.00018590893166573036, "loss": 0.8874, "step": 3689 }, { "epoch": 0.8564465591273065, "grad_norm": 0.4634045660495758, "learning_rate": 0.0001859014658130534, "loss": 0.8443, "step": 3690 }, { "epoch": 0.8566786584658234, "grad_norm": 0.44356751441955566, "learning_rate": 0.00018589399813308033, "loss": 0.8575, "step": 3691 }, { "epoch": 0.8569107578043402, "grad_norm": 0.3983425796031952, "learning_rate": 0.00018588652862597005, "loss": 0.8436, "step": 3692 }, { "epoch": 0.8571428571428571, "grad_norm": 0.48885035514831543, "learning_rate": 0.00018587905729188143, "loss": 0.8421, "step": 3693 }, { "epoch": 0.8573749564813741, "grad_norm": 0.5588403940200806, "learning_rate": 0.0001858715841309734, "loss": 0.8424, "step": 3694 }, { "epoch": 0.8576070558198909, "grad_norm": 0.4660573899745941, "learning_rate": 0.00018586410914340497, "loss": 0.849, "step": 3695 }, { "epoch": 0.8578391551584078, "grad_norm": 0.463493287563324, "learning_rate": 0.0001858566323293351, "loss": 0.9049, "step": 3696 }, { "epoch": 0.8580712544969247, "grad_norm": 0.39277905225753784, "learning_rate": 0.00018584915368892286, "loss": 0.8682, "step": 3697 }, { "epoch": 0.8583033538354415, "grad_norm": 0.4277889132499695, "learning_rate": 0.00018584167322232732, "loss": 0.8761, "step": 3698 }, { "epoch": 0.8585354531739584, "grad_norm": 0.42222461104393005, "learning_rate": 0.00018583419092970763, "loss": 0.8505, "step": 3699 }, { "epoch": 0.8587675525124754, "grad_norm": 0.39916637539863586, "learning_rate": 0.0001858267068112229, "loss": 0.8497, "step": 3700 }, { "epoch": 0.8589996518509923, "grad_norm": 0.47431501746177673, "learning_rate": 0.00018581922086703242, "loss": 0.8277, "step": 3701 }, { "epoch": 0.8592317511895091, "grad_norm": 0.41762733459472656, "learning_rate": 0.00018581173309729533, "loss": 0.8316, "step": 3702 }, { "epoch": 0.859463850528026, "grad_norm": 0.4501849412918091, "learning_rate": 0.00018580424350217095, "loss": 0.8426, "step": 3703 }, { "epoch": 0.8596959498665429, "grad_norm": 0.43986836075782776, "learning_rate": 0.00018579675208181865, "loss": 0.8682, "step": 3704 }, { "epoch": 0.8599280492050597, "grad_norm": 0.40839555859565735, "learning_rate": 0.00018578925883639772, "loss": 0.8732, "step": 3705 }, { "epoch": 0.8601601485435767, "grad_norm": 0.4279423952102661, "learning_rate": 0.0001857817637660676, "loss": 0.8463, "step": 3706 }, { "epoch": 0.8603922478820936, "grad_norm": 0.42398250102996826, "learning_rate": 0.00018577426687098772, "loss": 0.8737, "step": 3707 }, { "epoch": 0.8606243472206104, "grad_norm": 3.3749959468841553, "learning_rate": 0.00018576676815131753, "loss": 0.8313, "step": 3708 }, { "epoch": 0.8608564465591273, "grad_norm": 0.5624010562896729, "learning_rate": 0.00018575926760721655, "loss": 0.8269, "step": 3709 }, { "epoch": 0.8610885458976442, "grad_norm": 0.46690601110458374, "learning_rate": 0.00018575176523884433, "loss": 0.8155, "step": 3710 }, { "epoch": 0.861320645236161, "grad_norm": 1.0455248355865479, "learning_rate": 0.00018574426104636047, "loss": 0.8545, "step": 3711 }, { "epoch": 0.861552744574678, "grad_norm": 0.5710535645484924, "learning_rate": 0.00018573675502992458, "loss": 0.866, "step": 3712 }, { "epoch": 0.8617848439131949, "grad_norm": 0.4759901762008667, "learning_rate": 0.00018572924718969635, "loss": 0.8905, "step": 3713 }, { "epoch": 0.8620169432517117, "grad_norm": 0.3955191969871521, "learning_rate": 0.0001857217375258355, "loss": 0.8197, "step": 3714 }, { "epoch": 0.8622490425902286, "grad_norm": 0.5624427199363708, "learning_rate": 0.00018571422603850174, "loss": 0.7985, "step": 3715 }, { "epoch": 0.8624811419287455, "grad_norm": 0.5465355515480042, "learning_rate": 0.0001857067127278549, "loss": 0.8608, "step": 3716 }, { "epoch": 0.8627132412672623, "grad_norm": 0.4480397701263428, "learning_rate": 0.00018569919759405475, "loss": 0.8633, "step": 3717 }, { "epoch": 0.8629453406057793, "grad_norm": 0.4961203932762146, "learning_rate": 0.00018569168063726113, "loss": 0.8566, "step": 3718 }, { "epoch": 0.8631774399442962, "grad_norm": 0.44409114122390747, "learning_rate": 0.00018568416185763405, "loss": 0.8871, "step": 3719 }, { "epoch": 0.863409539282813, "grad_norm": 0.44211405515670776, "learning_rate": 0.00018567664125533335, "loss": 0.9323, "step": 3720 }, { "epoch": 0.8636416386213299, "grad_norm": 0.5025736093521118, "learning_rate": 0.00018566911883051906, "loss": 0.8567, "step": 3721 }, { "epoch": 0.8638737379598468, "grad_norm": 0.4884089529514313, "learning_rate": 0.00018566159458335117, "loss": 0.8826, "step": 3722 }, { "epoch": 0.8641058372983637, "grad_norm": 0.4494381546974182, "learning_rate": 0.00018565406851398978, "loss": 0.9205, "step": 3723 }, { "epoch": 0.8643379366368806, "grad_norm": 0.554206132888794, "learning_rate": 0.0001856465406225949, "loss": 0.8197, "step": 3724 }, { "epoch": 0.8645700359753975, "grad_norm": 0.4827885627746582, "learning_rate": 0.00018563901090932672, "loss": 0.8452, "step": 3725 }, { "epoch": 0.8648021353139144, "grad_norm": 0.5039483308792114, "learning_rate": 0.00018563147937434543, "loss": 0.8622, "step": 3726 }, { "epoch": 0.8650342346524312, "grad_norm": 0.49610593914985657, "learning_rate": 0.0001856239460178112, "loss": 0.8548, "step": 3727 }, { "epoch": 0.8652663339909481, "grad_norm": 0.4589429199695587, "learning_rate": 0.0001856164108398843, "loss": 0.8438, "step": 3728 }, { "epoch": 0.865498433329465, "grad_norm": 0.7688096165657043, "learning_rate": 0.00018560887384072502, "loss": 0.8582, "step": 3729 }, { "epoch": 0.865730532667982, "grad_norm": 0.5511279702186584, "learning_rate": 0.00018560133502049372, "loss": 0.8743, "step": 3730 }, { "epoch": 0.8659626320064988, "grad_norm": 0.5168042778968811, "learning_rate": 0.00018559379437935064, "loss": 0.8653, "step": 3731 }, { "epoch": 0.8661947313450157, "grad_norm": 0.39345741271972656, "learning_rate": 0.00018558625191745636, "loss": 0.8344, "step": 3732 }, { "epoch": 0.8664268306835325, "grad_norm": 0.517891526222229, "learning_rate": 0.00018557870763497117, "loss": 0.8245, "step": 3733 }, { "epoch": 0.8666589300220494, "grad_norm": 0.5025557279586792, "learning_rate": 0.00018557116153205568, "loss": 0.8741, "step": 3734 }, { "epoch": 0.8668910293605663, "grad_norm": 0.45522400736808777, "learning_rate": 0.00018556361360887028, "loss": 0.8834, "step": 3735 }, { "epoch": 0.8671231286990833, "grad_norm": 0.470336377620697, "learning_rate": 0.00018555606386557564, "loss": 0.8537, "step": 3736 }, { "epoch": 0.8673552280376001, "grad_norm": 0.42331936955451965, "learning_rate": 0.0001855485123023323, "loss": 0.8578, "step": 3737 }, { "epoch": 0.867587327376117, "grad_norm": 0.41115739941596985, "learning_rate": 0.00018554095891930094, "loss": 0.8409, "step": 3738 }, { "epoch": 0.8678194267146339, "grad_norm": 0.48640328645706177, "learning_rate": 0.00018553340371664216, "loss": 0.7981, "step": 3739 }, { "epoch": 0.8680515260531507, "grad_norm": 0.4436929523944855, "learning_rate": 0.00018552584669451677, "loss": 0.8551, "step": 3740 }, { "epoch": 0.8682836253916676, "grad_norm": 0.39534369111061096, "learning_rate": 0.00018551828785308544, "loss": 0.8231, "step": 3741 }, { "epoch": 0.8685157247301846, "grad_norm": 0.44925758242607117, "learning_rate": 0.000185510727192509, "loss": 0.8281, "step": 3742 }, { "epoch": 0.8687478240687014, "grad_norm": 0.42333513498306274, "learning_rate": 0.0001855031647129483, "loss": 0.8267, "step": 3743 }, { "epoch": 0.8689799234072183, "grad_norm": 0.46047094464302063, "learning_rate": 0.00018549560041456416, "loss": 0.8442, "step": 3744 }, { "epoch": 0.8692120227457352, "grad_norm": 0.42237868905067444, "learning_rate": 0.00018548803429751755, "loss": 0.8556, "step": 3745 }, { "epoch": 0.869444122084252, "grad_norm": 0.43160146474838257, "learning_rate": 0.00018548046636196935, "loss": 0.7921, "step": 3746 }, { "epoch": 0.8696762214227689, "grad_norm": 0.5053540468215942, "learning_rate": 0.0001854728966080806, "loss": 0.8341, "step": 3747 }, { "epoch": 0.8699083207612859, "grad_norm": 0.4376460313796997, "learning_rate": 0.0001854653250360123, "loss": 0.8171, "step": 3748 }, { "epoch": 0.8701404200998027, "grad_norm": 0.448899507522583, "learning_rate": 0.0001854577516459255, "loss": 0.8593, "step": 3749 }, { "epoch": 0.8703725194383196, "grad_norm": 0.5649166107177734, "learning_rate": 0.00018545017643798129, "loss": 0.8464, "step": 3750 }, { "epoch": 0.8706046187768365, "grad_norm": 0.4808577299118042, "learning_rate": 0.00018544259941234085, "loss": 0.8261, "step": 3751 }, { "epoch": 0.8708367181153533, "grad_norm": 0.4224403202533722, "learning_rate": 0.00018543502056916536, "loss": 0.8425, "step": 3752 }, { "epoch": 0.8710688174538702, "grad_norm": 0.5509401559829712, "learning_rate": 0.000185427439908616, "loss": 0.7942, "step": 3753 }, { "epoch": 0.8713009167923872, "grad_norm": 0.47354769706726074, "learning_rate": 0.00018541985743085405, "loss": 0.8791, "step": 3754 }, { "epoch": 0.8715330161309041, "grad_norm": 0.48359885811805725, "learning_rate": 0.00018541227313604078, "loss": 0.8611, "step": 3755 }, { "epoch": 0.8717651154694209, "grad_norm": 0.48973050713539124, "learning_rate": 0.00018540468702433758, "loss": 0.8084, "step": 3756 }, { "epoch": 0.8719972148079378, "grad_norm": 0.4059913158416748, "learning_rate": 0.00018539709909590576, "loss": 0.8935, "step": 3757 }, { "epoch": 0.8722293141464547, "grad_norm": 0.44113290309906006, "learning_rate": 0.00018538950935090677, "loss": 0.8441, "step": 3758 }, { "epoch": 0.8724614134849715, "grad_norm": 0.4337928891181946, "learning_rate": 0.00018538191778950204, "loss": 0.8548, "step": 3759 }, { "epoch": 0.8726935128234885, "grad_norm": 0.41244831681251526, "learning_rate": 0.00018537432441185304, "loss": 0.8356, "step": 3760 }, { "epoch": 0.8729256121620054, "grad_norm": 0.44714102149009705, "learning_rate": 0.00018536672921812134, "loss": 0.8085, "step": 3761 }, { "epoch": 0.8731577115005222, "grad_norm": 0.43154868483543396, "learning_rate": 0.00018535913220846847, "loss": 0.7995, "step": 3762 }, { "epoch": 0.8733898108390391, "grad_norm": 0.4167262017726898, "learning_rate": 0.00018535153338305603, "loss": 0.8501, "step": 3763 }, { "epoch": 0.873621910177556, "grad_norm": 0.398404598236084, "learning_rate": 0.00018534393274204574, "loss": 0.8162, "step": 3764 }, { "epoch": 0.8738540095160728, "grad_norm": 0.3954335153102875, "learning_rate": 0.00018533633028559917, "loss": 0.8697, "step": 3765 }, { "epoch": 0.8740861088545898, "grad_norm": 0.4284425377845764, "learning_rate": 0.00018532872601387807, "loss": 0.8538, "step": 3766 }, { "epoch": 0.8743182081931067, "grad_norm": 0.3925730586051941, "learning_rate": 0.00018532111992704424, "loss": 0.8329, "step": 3767 }, { "epoch": 0.8745503075316235, "grad_norm": 0.42586302757263184, "learning_rate": 0.00018531351202525945, "loss": 0.8452, "step": 3768 }, { "epoch": 0.8747824068701404, "grad_norm": 0.41396793723106384, "learning_rate": 0.00018530590230868556, "loss": 0.84, "step": 3769 }, { "epoch": 0.8750145062086573, "grad_norm": 0.421150267124176, "learning_rate": 0.00018529829077748442, "loss": 0.8413, "step": 3770 }, { "epoch": 0.8752466055471742, "grad_norm": 0.4445338249206543, "learning_rate": 0.00018529067743181793, "loss": 0.8299, "step": 3771 }, { "epoch": 0.8754787048856911, "grad_norm": 0.6780601143836975, "learning_rate": 0.00018528306227184806, "loss": 0.8644, "step": 3772 }, { "epoch": 0.875710804224208, "grad_norm": 0.4485917091369629, "learning_rate": 0.0001852754452977368, "loss": 0.8571, "step": 3773 }, { "epoch": 0.8759429035627249, "grad_norm": 0.43445441126823425, "learning_rate": 0.00018526782650964618, "loss": 0.8499, "step": 3774 }, { "epoch": 0.8761750029012417, "grad_norm": 0.43933218717575073, "learning_rate": 0.00018526020590773823, "loss": 0.8504, "step": 3775 }, { "epoch": 0.8764071022397586, "grad_norm": 0.4182621240615845, "learning_rate": 0.0001852525834921751, "loss": 0.8153, "step": 3776 }, { "epoch": 0.8766392015782755, "grad_norm": 0.4337303638458252, "learning_rate": 0.00018524495926311893, "loss": 0.8355, "step": 3777 }, { "epoch": 0.8768713009167924, "grad_norm": 0.45787665247917175, "learning_rate": 0.00018523733322073192, "loss": 0.8477, "step": 3778 }, { "epoch": 0.8771034002553093, "grad_norm": 0.44240859150886536, "learning_rate": 0.00018522970536517623, "loss": 0.8366, "step": 3779 }, { "epoch": 0.8773354995938262, "grad_norm": 0.5221067070960999, "learning_rate": 0.00018522207569661415, "loss": 0.7688, "step": 3780 }, { "epoch": 0.877567598932343, "grad_norm": 0.41580215096473694, "learning_rate": 0.000185214444215208, "loss": 0.8615, "step": 3781 }, { "epoch": 0.8777996982708599, "grad_norm": 0.43873414397239685, "learning_rate": 0.00018520681092112006, "loss": 0.8547, "step": 3782 }, { "epoch": 0.8780317976093768, "grad_norm": 0.37557294964790344, "learning_rate": 0.0001851991758145128, "loss": 0.8184, "step": 3783 }, { "epoch": 0.8782638969478938, "grad_norm": 0.4067942500114441, "learning_rate": 0.00018519153889554854, "loss": 0.8072, "step": 3784 }, { "epoch": 0.8784959962864106, "grad_norm": 0.38345038890838623, "learning_rate": 0.0001851839001643898, "loss": 0.8175, "step": 3785 }, { "epoch": 0.8787280956249275, "grad_norm": 0.4141753613948822, "learning_rate": 0.00018517625962119905, "loss": 0.7874, "step": 3786 }, { "epoch": 0.8789601949634444, "grad_norm": 0.410163015127182, "learning_rate": 0.00018516861726613877, "loss": 0.8337, "step": 3787 }, { "epoch": 0.8791922943019612, "grad_norm": 0.4118984639644623, "learning_rate": 0.00018516097309937156, "loss": 0.8398, "step": 3788 }, { "epoch": 0.8794243936404781, "grad_norm": 0.42656370997428894, "learning_rate": 0.0001851533271210601, "loss": 0.8233, "step": 3789 }, { "epoch": 0.8796564929789951, "grad_norm": 0.4185142517089844, "learning_rate": 0.00018514567933136693, "loss": 0.8372, "step": 3790 }, { "epoch": 0.8798885923175119, "grad_norm": 0.4009787440299988, "learning_rate": 0.0001851380297304548, "loss": 0.8194, "step": 3791 }, { "epoch": 0.8801206916560288, "grad_norm": 0.43166425824165344, "learning_rate": 0.00018513037831848639, "loss": 0.8333, "step": 3792 }, { "epoch": 0.8803527909945457, "grad_norm": 0.4240741431713104, "learning_rate": 0.00018512272509562446, "loss": 0.8087, "step": 3793 }, { "epoch": 0.8805848903330625, "grad_norm": 0.4606071710586548, "learning_rate": 0.00018511507006203188, "loss": 0.7864, "step": 3794 }, { "epoch": 0.8808169896715794, "grad_norm": 0.4614354372024536, "learning_rate": 0.0001851074132178714, "loss": 0.8406, "step": 3795 }, { "epoch": 0.8810490890100964, "grad_norm": 0.46533650159835815, "learning_rate": 0.00018509975456330592, "loss": 0.8695, "step": 3796 }, { "epoch": 0.8812811883486132, "grad_norm": 0.46137019991874695, "learning_rate": 0.00018509209409849843, "loss": 0.8355, "step": 3797 }, { "epoch": 0.8815132876871301, "grad_norm": 0.4653560221195221, "learning_rate": 0.00018508443182361175, "loss": 0.8749, "step": 3798 }, { "epoch": 0.881745387025647, "grad_norm": 0.39684346318244934, "learning_rate": 0.00018507676773880897, "loss": 0.8362, "step": 3799 }, { "epoch": 0.8819774863641638, "grad_norm": 0.4175236225128174, "learning_rate": 0.0001850691018442531, "loss": 0.8549, "step": 3800 } ], "logging_steps": 1, "max_steps": 21540, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 50, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.6867315219955712e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }