{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 5910, "global_step": 23638, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 4.2304763516371946e-05, "grad_norm": 0.8915818929672241, "learning_rate": 0.001, "loss": 2.2708, "step": 1 }, { "epoch": 8.460952703274389e-05, "grad_norm": 0.4329769015312195, "learning_rate": 0.001, "loss": 2.483, "step": 2 }, { "epoch": 0.00012691429054911582, "grad_norm": 0.45692744851112366, "learning_rate": 0.001, "loss": 3.5579, "step": 3 }, { "epoch": 0.00016921905406548778, "grad_norm": 0.40001946687698364, "learning_rate": 0.001, "loss": 2.4916, "step": 4 }, { "epoch": 0.00021152381758185972, "grad_norm": 0.530651867389679, "learning_rate": 0.001, "loss": 1.7408, "step": 5 }, { "epoch": 0.00025382858109823165, "grad_norm": 1.0012280941009521, "learning_rate": 0.001, "loss": 2.3514, "step": 6 }, { "epoch": 0.0002961333446146036, "grad_norm": 0.3786524534225464, "learning_rate": 0.001, "loss": 2.6244, "step": 7 }, { "epoch": 0.00033843810813097557, "grad_norm": 0.7540691494941711, "learning_rate": 0.001, "loss": 2.4326, "step": 8 }, { "epoch": 0.0003807428716473475, "grad_norm": 0.44959110021591187, "learning_rate": 0.001, "loss": 2.1347, "step": 9 }, { "epoch": 0.00042304763516371943, "grad_norm": 0.3813895881175995, "learning_rate": 0.001, "loss": 2.6791, "step": 10 }, { "epoch": 0.00046535239868009137, "grad_norm": 0.42967715859413147, "learning_rate": 0.001, "loss": 1.9963, "step": 11 }, { "epoch": 0.0005076571621964633, "grad_norm": 0.8154281377792358, "learning_rate": 0.001, "loss": 3.3512, "step": 12 }, { "epoch": 0.0005499619257128352, "grad_norm": 0.4104110896587372, "learning_rate": 0.001, "loss": 1.9401, "step": 13 }, { "epoch": 0.0005922666892292072, "grad_norm": 0.536148190498352, "learning_rate": 0.001, "loss": 2.0377, "step": 14 }, { "epoch": 0.0006345714527455792, "grad_norm": 0.3982076644897461, "learning_rate": 0.001, "loss": 2.6555, "step": 15 }, { "epoch": 0.0006768762162619511, "grad_norm": 0.41156458854675293, "learning_rate": 0.001, "loss": 2.0036, "step": 16 }, { "epoch": 0.0007191809797783231, "grad_norm": 0.5118016004562378, "learning_rate": 0.001, "loss": 2.4408, "step": 17 }, { "epoch": 0.000761485743294695, "grad_norm": 0.44030845165252686, "learning_rate": 0.001, "loss": 3.1642, "step": 18 }, { "epoch": 0.0008037905068110669, "grad_norm": 0.3349159061908722, "learning_rate": 0.001, "loss": 1.947, "step": 19 }, { "epoch": 0.0008460952703274389, "grad_norm": 0.44204267859458923, "learning_rate": 0.001, "loss": 2.3666, "step": 20 }, { "epoch": 0.0008884000338438108, "grad_norm": 0.4936717748641968, "learning_rate": 0.001, "loss": 3.2801, "step": 21 }, { "epoch": 0.0009307047973601827, "grad_norm": 0.48639440536499023, "learning_rate": 0.001, "loss": 2.9606, "step": 22 }, { "epoch": 0.0009730095608765547, "grad_norm": 0.3897702991962433, "learning_rate": 0.001, "loss": 2.0694, "step": 23 }, { "epoch": 0.0010153143243929266, "grad_norm": 0.47153130173683167, "learning_rate": 0.001, "loss": 1.8757, "step": 24 }, { "epoch": 0.0010576190879092986, "grad_norm": 0.7353273034095764, "learning_rate": 0.001, "loss": 3.3964, "step": 25 }, { "epoch": 0.0010999238514256705, "grad_norm": 0.4378684461116791, "learning_rate": 0.001, "loss": 3.3866, "step": 26 }, { "epoch": 0.0011422286149420425, "grad_norm": 0.5399907231330872, "learning_rate": 0.001, "loss": 3.5229, "step": 27 }, { "epoch": 0.0011845333784584143, "grad_norm": 0.4380228519439697, "learning_rate": 0.001, "loss": 3.3026, "step": 28 }, { "epoch": 0.0012268381419747864, "grad_norm": 0.5237672924995422, "learning_rate": 0.001, "loss": 2.7964, "step": 29 }, { "epoch": 0.0012691429054911584, "grad_norm": 0.5567352771759033, "learning_rate": 0.001, "loss": 2.2648, "step": 30 }, { "epoch": 0.0013114476690075302, "grad_norm": 0.4474937915802002, "learning_rate": 0.001, "loss": 2.6151, "step": 31 }, { "epoch": 0.0013537524325239023, "grad_norm": 0.5182416439056396, "learning_rate": 0.001, "loss": 3.4561, "step": 32 }, { "epoch": 0.001396057196040274, "grad_norm": 0.4204736649990082, "learning_rate": 0.001, "loss": 3.0416, "step": 33 }, { "epoch": 0.0014383619595566461, "grad_norm": 0.45871949195861816, "learning_rate": 0.001, "loss": 2.2159, "step": 34 }, { "epoch": 0.001480666723073018, "grad_norm": 0.4581148028373718, "learning_rate": 0.001, "loss": 2.0835, "step": 35 }, { "epoch": 0.00152297148658939, "grad_norm": 0.41536086797714233, "learning_rate": 0.001, "loss": 2.5392, "step": 36 }, { "epoch": 0.0015652762501057618, "grad_norm": 0.48674535751342773, "learning_rate": 0.001, "loss": 2.6758, "step": 37 }, { "epoch": 0.0016075810136221339, "grad_norm": 0.8809964656829834, "learning_rate": 0.001, "loss": 2.8721, "step": 38 }, { "epoch": 0.0016498857771385057, "grad_norm": 0.6416422724723816, "learning_rate": 0.001, "loss": 2.2551, "step": 39 }, { "epoch": 0.0016921905406548777, "grad_norm": 0.8650534152984619, "learning_rate": 0.001, "loss": 2.4497, "step": 40 }, { "epoch": 0.0017344953041712498, "grad_norm": 0.42286792397499084, "learning_rate": 0.001, "loss": 2.813, "step": 41 }, { "epoch": 0.0017768000676876216, "grad_norm": 0.7012924551963806, "learning_rate": 0.001, "loss": 3.2002, "step": 42 }, { "epoch": 0.0018191048312039936, "grad_norm": 0.6307465434074402, "learning_rate": 0.001, "loss": 2.1075, "step": 43 }, { "epoch": 0.0018614095947203655, "grad_norm": 0.3289887309074402, "learning_rate": 0.001, "loss": 1.9497, "step": 44 }, { "epoch": 0.0019037143582367375, "grad_norm": 0.5121589303016663, "learning_rate": 0.001, "loss": 2.9249, "step": 45 }, { "epoch": 0.0019460191217531093, "grad_norm": 0.545623242855072, "learning_rate": 0.001, "loss": 2.364, "step": 46 }, { "epoch": 0.001988323885269481, "grad_norm": 0.427653431892395, "learning_rate": 0.001, "loss": 2.1296, "step": 47 }, { "epoch": 0.002030628648785853, "grad_norm": 0.4887496531009674, "learning_rate": 0.001, "loss": 2.9931, "step": 48 }, { "epoch": 0.0020729334123022252, "grad_norm": 0.5210931897163391, "learning_rate": 0.001, "loss": 2.232, "step": 49 }, { "epoch": 0.0021152381758185973, "grad_norm": 0.4477267861366272, "learning_rate": 0.001, "loss": 2.4165, "step": 50 }, { "epoch": 0.0021575429393349693, "grad_norm": 1.6815810203552246, "learning_rate": 0.001, "loss": 2.0357, "step": 51 }, { "epoch": 0.002199847702851341, "grad_norm": 0.3510444164276123, "learning_rate": 0.001, "loss": 2.9298, "step": 52 }, { "epoch": 0.002242152466367713, "grad_norm": 0.42857974767684937, "learning_rate": 0.001, "loss": 2.8284, "step": 53 }, { "epoch": 0.002284457229884085, "grad_norm": 0.6068593263626099, "learning_rate": 0.001, "loss": 3.6971, "step": 54 }, { "epoch": 0.002326761993400457, "grad_norm": 0.46816545724868774, "learning_rate": 0.001, "loss": 2.0175, "step": 55 }, { "epoch": 0.0023690667569168287, "grad_norm": 0.44248470664024353, "learning_rate": 0.001, "loss": 2.1154, "step": 56 }, { "epoch": 0.0024113715204332007, "grad_norm": 0.4355832636356354, "learning_rate": 0.001, "loss": 2.5438, "step": 57 }, { "epoch": 0.0024536762839495727, "grad_norm": 0.39491739869117737, "learning_rate": 0.001, "loss": 2.5997, "step": 58 }, { "epoch": 0.002495981047465945, "grad_norm": 23.964534759521484, "learning_rate": 0.001, "loss": 2.3666, "step": 59 }, { "epoch": 0.002538285810982317, "grad_norm": 0.5575146079063416, "learning_rate": 0.001, "loss": 2.6327, "step": 60 }, { "epoch": 0.0025805905744986884, "grad_norm": 0.44521743059158325, "learning_rate": 0.001, "loss": 3.0179, "step": 61 }, { "epoch": 0.0026228953380150605, "grad_norm": 0.891205370426178, "learning_rate": 0.001, "loss": 1.837, "step": 62 }, { "epoch": 0.0026652001015314325, "grad_norm": 0.33089524507522583, "learning_rate": 0.001, "loss": 2.9327, "step": 63 }, { "epoch": 0.0027075048650478046, "grad_norm": 0.3559476435184479, "learning_rate": 0.001, "loss": 2.6628, "step": 64 }, { "epoch": 0.002749809628564176, "grad_norm": 0.4208162724971771, "learning_rate": 0.001, "loss": 2.1463, "step": 65 }, { "epoch": 0.002792114392080548, "grad_norm": 0.48232489824295044, "learning_rate": 0.001, "loss": 2.1562, "step": 66 }, { "epoch": 0.0028344191555969202, "grad_norm": 0.44821107387542725, "learning_rate": 0.001, "loss": 2.3234, "step": 67 }, { "epoch": 0.0028767239191132923, "grad_norm": 0.42845988273620605, "learning_rate": 0.001, "loss": 2.1317, "step": 68 }, { "epoch": 0.002919028682629664, "grad_norm": 0.6322043538093567, "learning_rate": 0.001, "loss": 1.8463, "step": 69 }, { "epoch": 0.002961333446146036, "grad_norm": 0.4234910011291504, "learning_rate": 0.001, "loss": 2.8342, "step": 70 }, { "epoch": 0.003003638209662408, "grad_norm": 0.4582638740539551, "learning_rate": 0.001, "loss": 2.1037, "step": 71 }, { "epoch": 0.00304594297317878, "grad_norm": 0.41287997364997864, "learning_rate": 0.001, "loss": 2.4761, "step": 72 }, { "epoch": 0.003088247736695152, "grad_norm": 0.6007514595985413, "learning_rate": 0.001, "loss": 2.221, "step": 73 }, { "epoch": 0.0031305525002115237, "grad_norm": 0.6128891110420227, "learning_rate": 0.001, "loss": 2.847, "step": 74 }, { "epoch": 0.0031728572637278957, "grad_norm": 0.713722288608551, "learning_rate": 0.001, "loss": 2.3765, "step": 75 }, { "epoch": 0.0032151620272442677, "grad_norm": 0.5172343254089355, "learning_rate": 0.001, "loss": 2.3498, "step": 76 }, { "epoch": 0.00325746679076064, "grad_norm": 0.44072654843330383, "learning_rate": 0.001, "loss": 2.6489, "step": 77 }, { "epoch": 0.0032997715542770114, "grad_norm": 0.4492231011390686, "learning_rate": 0.001, "loss": 2.786, "step": 78 }, { "epoch": 0.0033420763177933834, "grad_norm": 0.38566169142723083, "learning_rate": 0.001, "loss": 2.5199, "step": 79 }, { "epoch": 0.0033843810813097555, "grad_norm": 0.44315439462661743, "learning_rate": 0.001, "loss": 3.2877, "step": 80 }, { "epoch": 0.0034266858448261275, "grad_norm": 0.4293360710144043, "learning_rate": 0.001, "loss": 3.0169, "step": 81 }, { "epoch": 0.0034689906083424996, "grad_norm": 0.4374128580093384, "learning_rate": 0.001, "loss": 1.8099, "step": 82 }, { "epoch": 0.003511295371858871, "grad_norm": 0.3986782133579254, "learning_rate": 0.001, "loss": 1.953, "step": 83 }, { "epoch": 0.003553600135375243, "grad_norm": 0.4710613787174225, "learning_rate": 0.001, "loss": 2.3669, "step": 84 }, { "epoch": 0.0035959048988916152, "grad_norm": 0.8579991459846497, "learning_rate": 0.001, "loss": 1.8707, "step": 85 }, { "epoch": 0.0036382096624079873, "grad_norm": 0.5034980177879333, "learning_rate": 0.001, "loss": 3.2651, "step": 86 }, { "epoch": 0.003680514425924359, "grad_norm": 0.8276948928833008, "learning_rate": 0.001, "loss": 2.6157, "step": 87 }, { "epoch": 0.003722819189440731, "grad_norm": 0.4035736322402954, "learning_rate": 0.001, "loss": 2.4765, "step": 88 }, { "epoch": 0.003765123952957103, "grad_norm": 0.6690497994422913, "learning_rate": 0.001, "loss": 3.0138, "step": 89 }, { "epoch": 0.003807428716473475, "grad_norm": 0.3947993814945221, "learning_rate": 0.001, "loss": 2.2625, "step": 90 }, { "epoch": 0.003849733479989847, "grad_norm": 0.7430382966995239, "learning_rate": 0.001, "loss": 1.8479, "step": 91 }, { "epoch": 0.0038920382435062187, "grad_norm": 0.4126277267932892, "learning_rate": 0.001, "loss": 1.9788, "step": 92 }, { "epoch": 0.003934343007022591, "grad_norm": 0.39753755927085876, "learning_rate": 0.001, "loss": 2.1964, "step": 93 }, { "epoch": 0.003976647770538962, "grad_norm": 0.4967736601829529, "learning_rate": 0.001, "loss": 3.2626, "step": 94 }, { "epoch": 0.004018952534055335, "grad_norm": 0.3497101664543152, "learning_rate": 0.001, "loss": 2.0893, "step": 95 }, { "epoch": 0.004061257297571706, "grad_norm": 0.3811435401439667, "learning_rate": 0.001, "loss": 2.0541, "step": 96 }, { "epoch": 0.004103562061088079, "grad_norm": 0.43603935837745667, "learning_rate": 0.001, "loss": 2.3686, "step": 97 }, { "epoch": 0.0041458668246044505, "grad_norm": 0.7152244448661804, "learning_rate": 0.001, "loss": 2.3025, "step": 98 }, { "epoch": 0.004188171588120822, "grad_norm": 0.43975409865379333, "learning_rate": 0.001, "loss": 2.2561, "step": 99 }, { "epoch": 0.0042304763516371946, "grad_norm": 0.41654467582702637, "learning_rate": 0.001, "loss": 2.5949, "step": 100 }, { "epoch": 0.004272781115153566, "grad_norm": 0.5210958123207092, "learning_rate": 0.001, "loss": 2.3369, "step": 101 }, { "epoch": 0.004315085878669939, "grad_norm": 0.35408666729927063, "learning_rate": 0.001, "loss": 2.4174, "step": 102 }, { "epoch": 0.00435739064218631, "grad_norm": 0.38895490765571594, "learning_rate": 0.001, "loss": 2.6246, "step": 103 }, { "epoch": 0.004399695405702682, "grad_norm": 0.34460920095443726, "learning_rate": 0.001, "loss": 1.9521, "step": 104 }, { "epoch": 0.004442000169219054, "grad_norm": 0.3262885510921478, "learning_rate": 0.001, "loss": 2.4733, "step": 105 }, { "epoch": 0.004484304932735426, "grad_norm": 1.2628955841064453, "learning_rate": 0.001, "loss": 3.0509, "step": 106 }, { "epoch": 0.0045266096962517976, "grad_norm": 0.42988988757133484, "learning_rate": 0.001, "loss": 2.5068, "step": 107 }, { "epoch": 0.00456891445976817, "grad_norm": 0.42915594577789307, "learning_rate": 0.001, "loss": 2.6055, "step": 108 }, { "epoch": 0.004611219223284542, "grad_norm": 0.5522227883338928, "learning_rate": 0.001, "loss": 2.6447, "step": 109 }, { "epoch": 0.004653523986800914, "grad_norm": 0.3777415156364441, "learning_rate": 0.001, "loss": 2.4052, "step": 110 }, { "epoch": 0.004695828750317286, "grad_norm": 0.5317783951759338, "learning_rate": 0.001, "loss": 2.5688, "step": 111 }, { "epoch": 0.004738133513833657, "grad_norm": 0.4777368903160095, "learning_rate": 0.001, "loss": 2.3497, "step": 112 }, { "epoch": 0.00478043827735003, "grad_norm": 1.6108906269073486, "learning_rate": 0.001, "loss": 2.4328, "step": 113 }, { "epoch": 0.004822743040866401, "grad_norm": 0.3473729193210602, "learning_rate": 0.001, "loss": 2.3263, "step": 114 }, { "epoch": 0.004865047804382774, "grad_norm": 0.43898317217826843, "learning_rate": 0.001, "loss": 3.2201, "step": 115 }, { "epoch": 0.0049073525678991455, "grad_norm": 0.6918124556541443, "learning_rate": 0.001, "loss": 3.625, "step": 116 }, { "epoch": 0.004949657331415517, "grad_norm": 9.253972053527832, "learning_rate": 0.001, "loss": 2.8214, "step": 117 }, { "epoch": 0.00499196209493189, "grad_norm": 0.8446986079216003, "learning_rate": 0.001, "loss": 2.1835, "step": 118 }, { "epoch": 0.005034266858448261, "grad_norm": 2.1206488609313965, "learning_rate": 0.001, "loss": 2.2383, "step": 119 }, { "epoch": 0.005076571621964634, "grad_norm": 0.4840485453605652, "learning_rate": 0.001, "loss": 2.4211, "step": 120 }, { "epoch": 0.005118876385481005, "grad_norm": 0.39015278220176697, "learning_rate": 0.001, "loss": 2.548, "step": 121 }, { "epoch": 0.005161181148997377, "grad_norm": 0.3702382743358612, "learning_rate": 0.001, "loss": 2.992, "step": 122 }, { "epoch": 0.005203485912513749, "grad_norm": 0.4635108709335327, "learning_rate": 0.001, "loss": 2.5117, "step": 123 }, { "epoch": 0.005245790676030121, "grad_norm": 0.34755992889404297, "learning_rate": 0.001, "loss": 2.3573, "step": 124 }, { "epoch": 0.0052880954395464926, "grad_norm": 0.5302094221115112, "learning_rate": 0.001, "loss": 2.4309, "step": 125 }, { "epoch": 0.005330400203062865, "grad_norm": 0.562074601650238, "learning_rate": 0.001, "loss": 2.0844, "step": 126 }, { "epoch": 0.005372704966579237, "grad_norm": 0.42684710025787354, "learning_rate": 0.001, "loss": 2.7916, "step": 127 }, { "epoch": 0.005415009730095609, "grad_norm": 0.5416858792304993, "learning_rate": 0.001, "loss": 2.5613, "step": 128 }, { "epoch": 0.005457314493611981, "grad_norm": 0.4300224483013153, "learning_rate": 0.001, "loss": 3.3811, "step": 129 }, { "epoch": 0.005499619257128352, "grad_norm": 0.46849384903907776, "learning_rate": 0.001, "loss": 2.5776, "step": 130 }, { "epoch": 0.005541924020644725, "grad_norm": 0.46816426515579224, "learning_rate": 0.001, "loss": 2.3562, "step": 131 }, { "epoch": 0.005584228784161096, "grad_norm": 0.4470241069793701, "learning_rate": 0.001, "loss": 2.3042, "step": 132 }, { "epoch": 0.005626533547677469, "grad_norm": 0.7884194254875183, "learning_rate": 0.001, "loss": 2.8377, "step": 133 }, { "epoch": 0.0056688383111938405, "grad_norm": 0.6185025572776794, "learning_rate": 0.001, "loss": 2.3049, "step": 134 }, { "epoch": 0.005711143074710212, "grad_norm": 1.048901915550232, "learning_rate": 0.001, "loss": 2.1582, "step": 135 }, { "epoch": 0.005753447838226585, "grad_norm": 0.47046688199043274, "learning_rate": 0.001, "loss": 3.4005, "step": 136 }, { "epoch": 0.005795752601742956, "grad_norm": 0.435097873210907, "learning_rate": 0.001, "loss": 2.4624, "step": 137 }, { "epoch": 0.005838057365259328, "grad_norm": 0.41452470421791077, "learning_rate": 0.001, "loss": 2.7871, "step": 138 }, { "epoch": 0.0058803621287757, "grad_norm": 0.8315361142158508, "learning_rate": 0.001, "loss": 2.007, "step": 139 }, { "epoch": 0.005922666892292072, "grad_norm": 1.842594027519226, "learning_rate": 0.001, "loss": 2.3946, "step": 140 }, { "epoch": 0.005964971655808444, "grad_norm": 0.6004571318626404, "learning_rate": 0.001, "loss": 2.3669, "step": 141 }, { "epoch": 0.006007276419324816, "grad_norm": 0.534443199634552, "learning_rate": 0.001, "loss": 2.5264, "step": 142 }, { "epoch": 0.0060495811828411876, "grad_norm": 0.4516150951385498, "learning_rate": 0.001, "loss": 2.3313, "step": 143 }, { "epoch": 0.00609188594635756, "grad_norm": 0.4618544280529022, "learning_rate": 0.001, "loss": 2.4899, "step": 144 }, { "epoch": 0.006134190709873932, "grad_norm": 0.7234168648719788, "learning_rate": 0.001, "loss": 2.6252, "step": 145 }, { "epoch": 0.006176495473390304, "grad_norm": 0.5123864412307739, "learning_rate": 0.001, "loss": 2.9488, "step": 146 }, { "epoch": 0.006218800236906676, "grad_norm": 0.5958865284919739, "learning_rate": 0.001, "loss": 3.4051, "step": 147 }, { "epoch": 0.006261105000423047, "grad_norm": 0.5423961877822876, "learning_rate": 0.001, "loss": 4.0919, "step": 148 }, { "epoch": 0.00630340976393942, "grad_norm": 1.3664882183074951, "learning_rate": 0.001, "loss": 2.0117, "step": 149 }, { "epoch": 0.006345714527455791, "grad_norm": 0.35952356457710266, "learning_rate": 0.001, "loss": 2.8003, "step": 150 }, { "epoch": 0.006388019290972164, "grad_norm": 0.43103012442588806, "learning_rate": 0.001, "loss": 2.4838, "step": 151 }, { "epoch": 0.0064303240544885355, "grad_norm": 0.6071809530258179, "learning_rate": 0.001, "loss": 2.6764, "step": 152 }, { "epoch": 0.006472628818004907, "grad_norm": 0.38204053044319153, "learning_rate": 0.001, "loss": 2.2442, "step": 153 }, { "epoch": 0.00651493358152128, "grad_norm": 0.301871120929718, "learning_rate": 0.001, "loss": 1.855, "step": 154 }, { "epoch": 0.006557238345037651, "grad_norm": 0.4744417667388916, "learning_rate": 0.001, "loss": 2.5978, "step": 155 }, { "epoch": 0.006599543108554023, "grad_norm": 0.5253042578697205, "learning_rate": 0.001, "loss": 1.9776, "step": 156 }, { "epoch": 0.006641847872070395, "grad_norm": 0.611546516418457, "learning_rate": 0.001, "loss": 2.3661, "step": 157 }, { "epoch": 0.006684152635586767, "grad_norm": 0.40985289216041565, "learning_rate": 0.001, "loss": 2.6935, "step": 158 }, { "epoch": 0.006726457399103139, "grad_norm": 0.561894953250885, "learning_rate": 0.001, "loss": 2.7189, "step": 159 }, { "epoch": 0.006768762162619511, "grad_norm": 0.7344470620155334, "learning_rate": 0.001, "loss": 2.0996, "step": 160 }, { "epoch": 0.0068110669261358826, "grad_norm": 0.6005984544754028, "learning_rate": 0.001, "loss": 2.9467, "step": 161 }, { "epoch": 0.006853371689652255, "grad_norm": 1.274715542793274, "learning_rate": 0.001, "loss": 3.6441, "step": 162 }, { "epoch": 0.006895676453168627, "grad_norm": 0.44184061884880066, "learning_rate": 0.001, "loss": 2.4605, "step": 163 }, { "epoch": 0.006937981216684999, "grad_norm": 0.4935331642627716, "learning_rate": 0.001, "loss": 3.2329, "step": 164 }, { "epoch": 0.006980285980201371, "grad_norm": 1.1876336336135864, "learning_rate": 0.001, "loss": 1.9255, "step": 165 }, { "epoch": 0.007022590743717742, "grad_norm": 0.3959547281265259, "learning_rate": 0.001, "loss": 2.0602, "step": 166 }, { "epoch": 0.007064895507234115, "grad_norm": 0.6868635416030884, "learning_rate": 0.001, "loss": 3.2132, "step": 167 }, { "epoch": 0.007107200270750486, "grad_norm": 1.7616485357284546, "learning_rate": 0.001, "loss": 2.568, "step": 168 }, { "epoch": 0.007149505034266858, "grad_norm": 0.7364703416824341, "learning_rate": 0.001, "loss": 3.4709, "step": 169 }, { "epoch": 0.0071918097977832305, "grad_norm": 0.3650343716144562, "learning_rate": 0.001, "loss": 2.5879, "step": 170 }, { "epoch": 0.007234114561299602, "grad_norm": 0.5033729076385498, "learning_rate": 0.001, "loss": 2.3781, "step": 171 }, { "epoch": 0.007276419324815975, "grad_norm": 7.086312294006348, "learning_rate": 0.001, "loss": 3.8916, "step": 172 }, { "epoch": 0.007318724088332346, "grad_norm": 0.6279006600379944, "learning_rate": 0.001, "loss": 2.9944, "step": 173 }, { "epoch": 0.007361028851848718, "grad_norm": 0.4906074106693268, "learning_rate": 0.001, "loss": 3.5258, "step": 174 }, { "epoch": 0.00740333361536509, "grad_norm": 0.3576771318912506, "learning_rate": 0.001, "loss": 1.7132, "step": 175 }, { "epoch": 0.007445638378881462, "grad_norm": 0.39911970496177673, "learning_rate": 0.001, "loss": 3.7563, "step": 176 }, { "epoch": 0.007487943142397834, "grad_norm": 2.3106889724731445, "learning_rate": 0.001, "loss": 3.3329, "step": 177 }, { "epoch": 0.007530247905914206, "grad_norm": 1.1456013917922974, "learning_rate": 0.001, "loss": 1.9892, "step": 178 }, { "epoch": 0.007572552669430578, "grad_norm": 0.457940012216568, "learning_rate": 0.001, "loss": 2.1332, "step": 179 }, { "epoch": 0.00761485743294695, "grad_norm": 0.44287726283073425, "learning_rate": 0.001, "loss": 2.6956, "step": 180 }, { "epoch": 0.007657162196463322, "grad_norm": 0.4767186641693115, "learning_rate": 0.001, "loss": 2.0452, "step": 181 }, { "epoch": 0.007699466959979694, "grad_norm": 26.49453353881836, "learning_rate": 0.001, "loss": 2.0203, "step": 182 }, { "epoch": 0.007741771723496066, "grad_norm": 0.6047090888023376, "learning_rate": 0.001, "loss": 2.731, "step": 183 }, { "epoch": 0.007784076487012437, "grad_norm": 0.8945805430412292, "learning_rate": 0.001, "loss": 2.3029, "step": 184 }, { "epoch": 0.00782638125052881, "grad_norm": 0.8593363761901855, "learning_rate": 0.001, "loss": 2.5228, "step": 185 }, { "epoch": 0.007868686014045181, "grad_norm": 2.482999801635742, "learning_rate": 0.001, "loss": 3.0185, "step": 186 }, { "epoch": 0.007910990777561553, "grad_norm": 0.7577206492424011, "learning_rate": 0.001, "loss": 2.948, "step": 187 }, { "epoch": 0.007953295541077925, "grad_norm": 2.415463924407959, "learning_rate": 0.001, "loss": 3.2312, "step": 188 }, { "epoch": 0.007995600304594298, "grad_norm": 0.9507613182067871, "learning_rate": 0.001, "loss": 2.5924, "step": 189 }, { "epoch": 0.00803790506811067, "grad_norm": 0.7477337121963501, "learning_rate": 0.001, "loss": 3.3504, "step": 190 }, { "epoch": 0.008080209831627041, "grad_norm": 1.864358901977539, "learning_rate": 0.001, "loss": 2.9727, "step": 191 }, { "epoch": 0.008122514595143413, "grad_norm": 1.1556525230407715, "learning_rate": 0.001, "loss": 2.8587, "step": 192 }, { "epoch": 0.008164819358659784, "grad_norm": 1.382063388824463, "learning_rate": 0.001, "loss": 2.4826, "step": 193 }, { "epoch": 0.008207124122176158, "grad_norm": 0.43554991483688354, "learning_rate": 0.001, "loss": 2.5445, "step": 194 }, { "epoch": 0.00824942888569253, "grad_norm": 0.4753492772579193, "learning_rate": 0.001, "loss": 3.262, "step": 195 }, { "epoch": 0.008291733649208901, "grad_norm": 1.1016758680343628, "learning_rate": 0.001, "loss": 2.1801, "step": 196 }, { "epoch": 0.008334038412725273, "grad_norm": 0.6898307800292969, "learning_rate": 0.001, "loss": 2.3774, "step": 197 }, { "epoch": 0.008376343176241644, "grad_norm": 4.887700080871582, "learning_rate": 0.001, "loss": 3.0435, "step": 198 }, { "epoch": 0.008418647939758018, "grad_norm": 1.1673232316970825, "learning_rate": 0.001, "loss": 2.4091, "step": 199 }, { "epoch": 0.008460952703274389, "grad_norm": 0.5722813010215759, "learning_rate": 0.001, "loss": 2.1268, "step": 200 }, { "epoch": 0.00850325746679076, "grad_norm": 0.45928850769996643, "learning_rate": 0.001, "loss": 2.4795, "step": 201 }, { "epoch": 0.008545562230307132, "grad_norm": 0.429862380027771, "learning_rate": 0.001, "loss": 2.8722, "step": 202 }, { "epoch": 0.008587866993823504, "grad_norm": 0.45352330803871155, "learning_rate": 0.001, "loss": 2.2689, "step": 203 }, { "epoch": 0.008630171757339877, "grad_norm": 0.4726592004299164, "learning_rate": 0.001, "loss": 3.3355, "step": 204 }, { "epoch": 0.008672476520856249, "grad_norm": 0.44824856519699097, "learning_rate": 0.001, "loss": 2.5926, "step": 205 }, { "epoch": 0.00871478128437262, "grad_norm": 0.39711594581604004, "learning_rate": 0.001, "loss": 2.2368, "step": 206 }, { "epoch": 0.008757086047888992, "grad_norm": 0.47843319177627563, "learning_rate": 0.001, "loss": 2.983, "step": 207 }, { "epoch": 0.008799390811405364, "grad_norm": 0.33170491456985474, "learning_rate": 0.001, "loss": 2.1638, "step": 208 }, { "epoch": 0.008841695574921735, "grad_norm": 0.4759407937526703, "learning_rate": 0.001, "loss": 2.4175, "step": 209 }, { "epoch": 0.008884000338438109, "grad_norm": 0.5208505392074585, "learning_rate": 0.001, "loss": 3.6394, "step": 210 }, { "epoch": 0.00892630510195448, "grad_norm": 0.5411098599433899, "learning_rate": 0.001, "loss": 3.212, "step": 211 }, { "epoch": 0.008968609865470852, "grad_norm": 0.4449928104877472, "learning_rate": 0.001, "loss": 2.8441, "step": 212 }, { "epoch": 0.009010914628987223, "grad_norm": 0.7975568175315857, "learning_rate": 0.001, "loss": 2.4782, "step": 213 }, { "epoch": 0.009053219392503595, "grad_norm": 1.3413524627685547, "learning_rate": 0.001, "loss": 1.8429, "step": 214 }, { "epoch": 0.009095524156019968, "grad_norm": 1.463435411453247, "learning_rate": 0.001, "loss": 2.138, "step": 215 }, { "epoch": 0.00913782891953634, "grad_norm": 0.5830798745155334, "learning_rate": 0.001, "loss": 2.7381, "step": 216 }, { "epoch": 0.009180133683052712, "grad_norm": 1.1858150959014893, "learning_rate": 0.001, "loss": 2.9576, "step": 217 }, { "epoch": 0.009222438446569083, "grad_norm": 0.5822926163673401, "learning_rate": 0.001, "loss": 2.9898, "step": 218 }, { "epoch": 0.009264743210085455, "grad_norm": 0.38207611441612244, "learning_rate": 0.001, "loss": 2.5692, "step": 219 }, { "epoch": 0.009307047973601828, "grad_norm": 0.3924141228199005, "learning_rate": 0.001, "loss": 2.1596, "step": 220 }, { "epoch": 0.0093493527371182, "grad_norm": 0.3632948398590088, "learning_rate": 0.001, "loss": 1.9336, "step": 221 }, { "epoch": 0.009391657500634571, "grad_norm": 0.6469441652297974, "learning_rate": 0.001, "loss": 3.3174, "step": 222 }, { "epoch": 0.009433962264150943, "grad_norm": 0.4201594293117523, "learning_rate": 0.001, "loss": 2.3425, "step": 223 }, { "epoch": 0.009476267027667315, "grad_norm": 0.4190143942832947, "learning_rate": 0.001, "loss": 2.306, "step": 224 }, { "epoch": 0.009518571791183688, "grad_norm": 0.7149160504341125, "learning_rate": 0.001, "loss": 2.82, "step": 225 }, { "epoch": 0.00956087655470006, "grad_norm": 0.6239504814147949, "learning_rate": 0.001, "loss": 2.1841, "step": 226 }, { "epoch": 0.009603181318216431, "grad_norm": 0.5916045904159546, "learning_rate": 0.001, "loss": 2.1635, "step": 227 }, { "epoch": 0.009645486081732803, "grad_norm": 0.4300955832004547, "learning_rate": 0.001, "loss": 2.3918, "step": 228 }, { "epoch": 0.009687790845249174, "grad_norm": 0.7572110891342163, "learning_rate": 0.001, "loss": 2.1596, "step": 229 }, { "epoch": 0.009730095608765548, "grad_norm": 0.4874956011772156, "learning_rate": 0.001, "loss": 3.3631, "step": 230 }, { "epoch": 0.00977240037228192, "grad_norm": 0.43173378705978394, "learning_rate": 0.001, "loss": 2.446, "step": 231 }, { "epoch": 0.009814705135798291, "grad_norm": 0.6573549509048462, "learning_rate": 0.001, "loss": 3.0706, "step": 232 }, { "epoch": 0.009857009899314663, "grad_norm": 0.42250698804855347, "learning_rate": 0.001, "loss": 2.9873, "step": 233 }, { "epoch": 0.009899314662831034, "grad_norm": 3.47033953666687, "learning_rate": 0.001, "loss": 2.4825, "step": 234 }, { "epoch": 0.009941619426347408, "grad_norm": 0.39495372772216797, "learning_rate": 0.001, "loss": 2.5977, "step": 235 }, { "epoch": 0.00998392418986378, "grad_norm": 0.5418169498443604, "learning_rate": 0.001, "loss": 2.0364, "step": 236 }, { "epoch": 0.01002622895338015, "grad_norm": 0.4567147195339203, "learning_rate": 0.001, "loss": 2.9259, "step": 237 }, { "epoch": 0.010068533716896522, "grad_norm": 0.8719536066055298, "learning_rate": 0.001, "loss": 2.8369, "step": 238 }, { "epoch": 0.010110838480412894, "grad_norm": 0.44655323028564453, "learning_rate": 0.001, "loss": 2.4415, "step": 239 }, { "epoch": 0.010153143243929267, "grad_norm": 0.5208900570869446, "learning_rate": 0.001, "loss": 2.4879, "step": 240 }, { "epoch": 0.010195448007445639, "grad_norm": 0.3061939775943756, "learning_rate": 0.001, "loss": 1.9186, "step": 241 }, { "epoch": 0.01023775277096201, "grad_norm": 0.48679229617118835, "learning_rate": 0.001, "loss": 2.2632, "step": 242 }, { "epoch": 0.010280057534478382, "grad_norm": 0.5101296305656433, "learning_rate": 0.001, "loss": 3.1684, "step": 243 }, { "epoch": 0.010322362297994754, "grad_norm": 1.0889201164245605, "learning_rate": 0.001, "loss": 2.4285, "step": 244 }, { "epoch": 0.010364667061511125, "grad_norm": 0.4110303819179535, "learning_rate": 0.001, "loss": 2.9342, "step": 245 }, { "epoch": 0.010406971825027499, "grad_norm": 0.528121292591095, "learning_rate": 0.001, "loss": 3.7985, "step": 246 }, { "epoch": 0.01044927658854387, "grad_norm": 2.397808074951172, "learning_rate": 0.001, "loss": 3.0319, "step": 247 }, { "epoch": 0.010491581352060242, "grad_norm": 1.5704766511917114, "learning_rate": 0.001, "loss": 2.6339, "step": 248 }, { "epoch": 0.010533886115576614, "grad_norm": 2.5879034996032715, "learning_rate": 0.001, "loss": 2.4711, "step": 249 }, { "epoch": 0.010576190879092985, "grad_norm": 0.5719565153121948, "learning_rate": 0.001, "loss": 2.0952, "step": 250 }, { "epoch": 0.010618495642609358, "grad_norm": 0.48833173513412476, "learning_rate": 0.001, "loss": 1.9318, "step": 251 }, { "epoch": 0.01066080040612573, "grad_norm": 0.4771743416786194, "learning_rate": 0.001, "loss": 2.4739, "step": 252 }, { "epoch": 0.010703105169642102, "grad_norm": 0.6914018392562866, "learning_rate": 0.001, "loss": 3.0228, "step": 253 }, { "epoch": 0.010745409933158473, "grad_norm": 0.655381441116333, "learning_rate": 0.001, "loss": 2.2491, "step": 254 }, { "epoch": 0.010787714696674845, "grad_norm": 0.47917628288269043, "learning_rate": 0.001, "loss": 3.1589, "step": 255 }, { "epoch": 0.010830019460191218, "grad_norm": 0.5288345217704773, "learning_rate": 0.001, "loss": 2.5891, "step": 256 }, { "epoch": 0.01087232422370759, "grad_norm": 0.7679047584533691, "learning_rate": 0.001, "loss": 2.1992, "step": 257 }, { "epoch": 0.010914628987223961, "grad_norm": 0.4910162091255188, "learning_rate": 0.001, "loss": 2.5994, "step": 258 }, { "epoch": 0.010956933750740333, "grad_norm": 0.40495947003364563, "learning_rate": 0.001, "loss": 2.7402, "step": 259 }, { "epoch": 0.010999238514256705, "grad_norm": 0.32625123858451843, "learning_rate": 0.001, "loss": 2.6868, "step": 260 }, { "epoch": 0.011041543277773078, "grad_norm": 0.4822738468647003, "learning_rate": 0.001, "loss": 2.6367, "step": 261 }, { "epoch": 0.01108384804128945, "grad_norm": 0.5915836095809937, "learning_rate": 0.001, "loss": 2.3544, "step": 262 }, { "epoch": 0.011126152804805821, "grad_norm": 0.32710862159729004, "learning_rate": 0.001, "loss": 2.3255, "step": 263 }, { "epoch": 0.011168457568322193, "grad_norm": 0.4875565767288208, "learning_rate": 0.001, "loss": 2.5211, "step": 264 }, { "epoch": 0.011210762331838564, "grad_norm": 1.2803752422332764, "learning_rate": 0.001, "loss": 2.407, "step": 265 }, { "epoch": 0.011253067095354938, "grad_norm": 0.7037876844406128, "learning_rate": 0.001, "loss": 3.2321, "step": 266 }, { "epoch": 0.01129537185887131, "grad_norm": 0.36865493655204773, "learning_rate": 0.001, "loss": 2.4842, "step": 267 }, { "epoch": 0.011337676622387681, "grad_norm": 0.4892154037952423, "learning_rate": 0.001, "loss": 3.5454, "step": 268 }, { "epoch": 0.011379981385904053, "grad_norm": 0.5366520285606384, "learning_rate": 0.001, "loss": 2.1276, "step": 269 }, { "epoch": 0.011422286149420424, "grad_norm": 0.47337809205055237, "learning_rate": 0.001, "loss": 3.3193, "step": 270 }, { "epoch": 0.011464590912936798, "grad_norm": 0.4499555230140686, "learning_rate": 0.001, "loss": 3.5789, "step": 271 }, { "epoch": 0.01150689567645317, "grad_norm": 0.3636587858200073, "learning_rate": 0.001, "loss": 2.3532, "step": 272 }, { "epoch": 0.01154920043996954, "grad_norm": 0.5930542349815369, "learning_rate": 0.001, "loss": 2.3694, "step": 273 }, { "epoch": 0.011591505203485912, "grad_norm": 0.7394574284553528, "learning_rate": 0.001, "loss": 3.1853, "step": 274 }, { "epoch": 0.011633809967002284, "grad_norm": 0.6907839775085449, "learning_rate": 0.001, "loss": 2.5174, "step": 275 }, { "epoch": 0.011676114730518656, "grad_norm": 1.15705406665802, "learning_rate": 0.001, "loss": 2.315, "step": 276 }, { "epoch": 0.011718419494035029, "grad_norm": 0.5256633758544922, "learning_rate": 0.001, "loss": 2.429, "step": 277 }, { "epoch": 0.0117607242575514, "grad_norm": 0.9328014850616455, "learning_rate": 0.001, "loss": 2.4643, "step": 278 }, { "epoch": 0.011803029021067772, "grad_norm": 0.5655497312545776, "learning_rate": 0.001, "loss": 3.344, "step": 279 }, { "epoch": 0.011845333784584144, "grad_norm": 0.6000591516494751, "learning_rate": 0.001, "loss": 3.2783, "step": 280 }, { "epoch": 0.011887638548100515, "grad_norm": 1.3328900337219238, "learning_rate": 0.001, "loss": 4.1927, "step": 281 }, { "epoch": 0.011929943311616889, "grad_norm": 2.8590199947357178, "learning_rate": 0.001, "loss": 2.3905, "step": 282 }, { "epoch": 0.01197224807513326, "grad_norm": 0.7419810891151428, "learning_rate": 0.001, "loss": 2.6781, "step": 283 }, { "epoch": 0.012014552838649632, "grad_norm": 0.6407875418663025, "learning_rate": 0.001, "loss": 2.5239, "step": 284 }, { "epoch": 0.012056857602166004, "grad_norm": 0.7808129191398621, "learning_rate": 0.001, "loss": 2.41, "step": 285 }, { "epoch": 0.012099162365682375, "grad_norm": 0.5127401947975159, "learning_rate": 0.001, "loss": 2.5634, "step": 286 }, { "epoch": 0.012141467129198748, "grad_norm": 0.7659389972686768, "learning_rate": 0.001, "loss": 2.6071, "step": 287 }, { "epoch": 0.01218377189271512, "grad_norm": 0.4947212040424347, "learning_rate": 0.001, "loss": 2.8672, "step": 288 }, { "epoch": 0.012226076656231492, "grad_norm": 0.7338204979896545, "learning_rate": 0.001, "loss": 2.9333, "step": 289 }, { "epoch": 0.012268381419747863, "grad_norm": 0.5832387804985046, "learning_rate": 0.001, "loss": 3.106, "step": 290 }, { "epoch": 0.012310686183264235, "grad_norm": 0.3840453028678894, "learning_rate": 0.001, "loss": 1.9948, "step": 291 }, { "epoch": 0.012352990946780608, "grad_norm": 0.6502733826637268, "learning_rate": 0.001, "loss": 2.5404, "step": 292 }, { "epoch": 0.01239529571029698, "grad_norm": 0.4887555241584778, "learning_rate": 0.001, "loss": 2.4991, "step": 293 }, { "epoch": 0.012437600473813351, "grad_norm": 0.9977436661720276, "learning_rate": 0.001, "loss": 2.0015, "step": 294 }, { "epoch": 0.012479905237329723, "grad_norm": 0.6219228506088257, "learning_rate": 0.001, "loss": 2.7717, "step": 295 }, { "epoch": 0.012522210000846095, "grad_norm": 0.4836757481098175, "learning_rate": 0.001, "loss": 2.207, "step": 296 }, { "epoch": 0.012564514764362468, "grad_norm": 0.43116042017936707, "learning_rate": 0.001, "loss": 2.7609, "step": 297 }, { "epoch": 0.01260681952787884, "grad_norm": 0.990545928478241, "learning_rate": 0.001, "loss": 2.7015, "step": 298 }, { "epoch": 0.012649124291395211, "grad_norm": 0.39893725514411926, "learning_rate": 0.001, "loss": 1.6939, "step": 299 }, { "epoch": 0.012691429054911583, "grad_norm": 0.6461228132247925, "learning_rate": 0.001, "loss": 2.8863, "step": 300 }, { "epoch": 0.012733733818427954, "grad_norm": 1.7560275793075562, "learning_rate": 0.001, "loss": 2.7424, "step": 301 }, { "epoch": 0.012776038581944328, "grad_norm": 0.40187498927116394, "learning_rate": 0.001, "loss": 2.6793, "step": 302 }, { "epoch": 0.0128183433454607, "grad_norm": 1.5104907751083374, "learning_rate": 0.001, "loss": 3.1125, "step": 303 }, { "epoch": 0.012860648108977071, "grad_norm": 0.4994620382785797, "learning_rate": 0.001, "loss": 2.9088, "step": 304 }, { "epoch": 0.012902952872493443, "grad_norm": 0.4981285035610199, "learning_rate": 0.001, "loss": 2.1621, "step": 305 }, { "epoch": 0.012945257636009814, "grad_norm": 0.5565193295478821, "learning_rate": 0.001, "loss": 2.0941, "step": 306 }, { "epoch": 0.012987562399526186, "grad_norm": 0.5400714874267578, "learning_rate": 0.001, "loss": 2.6684, "step": 307 }, { "epoch": 0.01302986716304256, "grad_norm": 0.6254021525382996, "learning_rate": 0.001, "loss": 2.0742, "step": 308 }, { "epoch": 0.01307217192655893, "grad_norm": 0.6768165230751038, "learning_rate": 0.001, "loss": 2.6983, "step": 309 }, { "epoch": 0.013114476690075302, "grad_norm": 0.4339453876018524, "learning_rate": 0.001, "loss": 2.8612, "step": 310 }, { "epoch": 0.013156781453591674, "grad_norm": 0.5363678336143494, "learning_rate": 0.001, "loss": 2.3609, "step": 311 }, { "epoch": 0.013199086217108046, "grad_norm": 0.5207034945487976, "learning_rate": 0.001, "loss": 2.3486, "step": 312 }, { "epoch": 0.013241390980624419, "grad_norm": 0.6771058440208435, "learning_rate": 0.001, "loss": 1.5712, "step": 313 }, { "epoch": 0.01328369574414079, "grad_norm": 0.5648268461227417, "learning_rate": 0.001, "loss": 3.9228, "step": 314 }, { "epoch": 0.013326000507657162, "grad_norm": 0.464399516582489, "learning_rate": 0.001, "loss": 2.0737, "step": 315 }, { "epoch": 0.013368305271173534, "grad_norm": 0.47292885184288025, "learning_rate": 0.001, "loss": 2.1631, "step": 316 }, { "epoch": 0.013410610034689905, "grad_norm": 0.5086976885795593, "learning_rate": 0.001, "loss": 2.0718, "step": 317 }, { "epoch": 0.013452914798206279, "grad_norm": 0.9637677669525146, "learning_rate": 0.001, "loss": 2.6751, "step": 318 }, { "epoch": 0.01349521956172265, "grad_norm": 0.9571125507354736, "learning_rate": 0.001, "loss": 2.147, "step": 319 }, { "epoch": 0.013537524325239022, "grad_norm": 0.37801995873451233, "learning_rate": 0.001, "loss": 1.9653, "step": 320 }, { "epoch": 0.013579829088755394, "grad_norm": 0.5243141651153564, "learning_rate": 0.001, "loss": 2.2679, "step": 321 }, { "epoch": 0.013622133852271765, "grad_norm": 0.6378395557403564, "learning_rate": 0.001, "loss": 1.8723, "step": 322 }, { "epoch": 0.013664438615788138, "grad_norm": 0.45354440808296204, "learning_rate": 0.001, "loss": 2.2213, "step": 323 }, { "epoch": 0.01370674337930451, "grad_norm": 0.976237952709198, "learning_rate": 0.001, "loss": 2.6927, "step": 324 }, { "epoch": 0.013749048142820882, "grad_norm": 0.4840424358844757, "learning_rate": 0.001, "loss": 2.3498, "step": 325 }, { "epoch": 0.013791352906337253, "grad_norm": 0.5081096291542053, "learning_rate": 0.001, "loss": 2.9997, "step": 326 }, { "epoch": 0.013833657669853625, "grad_norm": 0.5560334324836731, "learning_rate": 0.001, "loss": 3.1096, "step": 327 }, { "epoch": 0.013875962433369998, "grad_norm": 0.41798263788223267, "learning_rate": 0.001, "loss": 2.0079, "step": 328 }, { "epoch": 0.01391826719688637, "grad_norm": 0.40997782349586487, "learning_rate": 0.001, "loss": 1.4621, "step": 329 }, { "epoch": 0.013960571960402741, "grad_norm": 0.4821615517139435, "learning_rate": 0.001, "loss": 2.2829, "step": 330 }, { "epoch": 0.014002876723919113, "grad_norm": 0.332916796207428, "learning_rate": 0.001, "loss": 1.9644, "step": 331 }, { "epoch": 0.014045181487435485, "grad_norm": 0.49843454360961914, "learning_rate": 0.001, "loss": 3.7125, "step": 332 }, { "epoch": 0.014087486250951858, "grad_norm": 0.6166870594024658, "learning_rate": 0.001, "loss": 1.9159, "step": 333 }, { "epoch": 0.01412979101446823, "grad_norm": 0.6194549202919006, "learning_rate": 0.001, "loss": 2.4323, "step": 334 }, { "epoch": 0.014172095777984601, "grad_norm": 0.5313667058944702, "learning_rate": 0.001, "loss": 2.9331, "step": 335 }, { "epoch": 0.014214400541500973, "grad_norm": 0.3673984408378601, "learning_rate": 0.001, "loss": 2.2797, "step": 336 }, { "epoch": 0.014256705305017344, "grad_norm": 0.4663550853729248, "learning_rate": 0.001, "loss": 3.1726, "step": 337 }, { "epoch": 0.014299010068533716, "grad_norm": 1.455588459968567, "learning_rate": 0.001, "loss": 1.7567, "step": 338 }, { "epoch": 0.01434131483205009, "grad_norm": 1.234546184539795, "learning_rate": 0.001, "loss": 2.1583, "step": 339 }, { "epoch": 0.014383619595566461, "grad_norm": 2.307666301727295, "learning_rate": 0.001, "loss": 2.2571, "step": 340 }, { "epoch": 0.014425924359082833, "grad_norm": 2.126053810119629, "learning_rate": 0.001, "loss": 2.0934, "step": 341 }, { "epoch": 0.014468229122599204, "grad_norm": 0.5785415768623352, "learning_rate": 0.001, "loss": 2.4343, "step": 342 }, { "epoch": 0.014510533886115576, "grad_norm": 0.5483373999595642, "learning_rate": 0.001, "loss": 1.878, "step": 343 }, { "epoch": 0.01455283864963195, "grad_norm": 0.7590854167938232, "learning_rate": 0.001, "loss": 3.4764, "step": 344 }, { "epoch": 0.01459514341314832, "grad_norm": 1.6678118705749512, "learning_rate": 0.001, "loss": 1.8089, "step": 345 }, { "epoch": 0.014637448176664692, "grad_norm": 0.5652883052825928, "learning_rate": 0.001, "loss": 4.2131, "step": 346 }, { "epoch": 0.014679752940181064, "grad_norm": 0.5262782573699951, "learning_rate": 0.001, "loss": 3.3063, "step": 347 }, { "epoch": 0.014722057703697436, "grad_norm": 0.5519827604293823, "learning_rate": 0.001, "loss": 2.9829, "step": 348 }, { "epoch": 0.014764362467213809, "grad_norm": 0.4387628436088562, "learning_rate": 0.001, "loss": 2.4457, "step": 349 }, { "epoch": 0.01480666723073018, "grad_norm": 0.5566309690475464, "learning_rate": 0.001, "loss": 2.4278, "step": 350 }, { "epoch": 0.014848971994246552, "grad_norm": 0.3900257647037506, "learning_rate": 0.001, "loss": 2.6352, "step": 351 }, { "epoch": 0.014891276757762924, "grad_norm": 0.4080370366573334, "learning_rate": 0.001, "loss": 2.1755, "step": 352 }, { "epoch": 0.014933581521279295, "grad_norm": 0.6504301428794861, "learning_rate": 0.001, "loss": 2.5511, "step": 353 }, { "epoch": 0.014975886284795669, "grad_norm": 0.9981370568275452, "learning_rate": 0.001, "loss": 2.4473, "step": 354 }, { "epoch": 0.01501819104831204, "grad_norm": 1.0626752376556396, "learning_rate": 0.001, "loss": 2.398, "step": 355 }, { "epoch": 0.015060495811828412, "grad_norm": 0.487619549036026, "learning_rate": 0.001, "loss": 2.8647, "step": 356 }, { "epoch": 0.015102800575344784, "grad_norm": 0.47419092059135437, "learning_rate": 0.001, "loss": 2.0267, "step": 357 }, { "epoch": 0.015145105338861155, "grad_norm": 0.6504938006401062, "learning_rate": 0.001, "loss": 2.5824, "step": 358 }, { "epoch": 0.015187410102377528, "grad_norm": 0.5113835334777832, "learning_rate": 0.001, "loss": 3.355, "step": 359 }, { "epoch": 0.0152297148658939, "grad_norm": 0.38182953000068665, "learning_rate": 0.001, "loss": 2.3596, "step": 360 }, { "epoch": 0.015272019629410272, "grad_norm": 0.5668423771858215, "learning_rate": 0.001, "loss": 2.5567, "step": 361 }, { "epoch": 0.015314324392926643, "grad_norm": 0.6734775304794312, "learning_rate": 0.001, "loss": 1.5854, "step": 362 }, { "epoch": 0.015356629156443015, "grad_norm": 3.227863073348999, "learning_rate": 0.001, "loss": 2.8558, "step": 363 }, { "epoch": 0.015398933919959388, "grad_norm": 1.0969175100326538, "learning_rate": 0.001, "loss": 2.2552, "step": 364 }, { "epoch": 0.01544123868347576, "grad_norm": 0.8865289092063904, "learning_rate": 0.001, "loss": 2.6505, "step": 365 }, { "epoch": 0.015483543446992131, "grad_norm": 0.506199836730957, "learning_rate": 0.001, "loss": 2.7035, "step": 366 }, { "epoch": 0.015525848210508503, "grad_norm": 0.4529375731945038, "learning_rate": 0.001, "loss": 3.187, "step": 367 }, { "epoch": 0.015568152974024875, "grad_norm": 0.49568095803260803, "learning_rate": 0.001, "loss": 2.9369, "step": 368 }, { "epoch": 0.015610457737541246, "grad_norm": 3.762782096862793, "learning_rate": 0.001, "loss": 3.8394, "step": 369 }, { "epoch": 0.01565276250105762, "grad_norm": 1.1418230533599854, "learning_rate": 0.001, "loss": 2.9679, "step": 370 }, { "epoch": 0.01569506726457399, "grad_norm": 1.2121808528900146, "learning_rate": 0.001, "loss": 3.15, "step": 371 }, { "epoch": 0.015737372028090363, "grad_norm": 0.39264196157455444, "learning_rate": 0.001, "loss": 2.4917, "step": 372 }, { "epoch": 0.015779676791606734, "grad_norm": 0.44038763642311096, "learning_rate": 0.001, "loss": 3.1301, "step": 373 }, { "epoch": 0.015821981555123106, "grad_norm": 0.5781055688858032, "learning_rate": 0.001, "loss": 2.0814, "step": 374 }, { "epoch": 0.015864286318639478, "grad_norm": 0.4739765226840973, "learning_rate": 0.001, "loss": 3.3641, "step": 375 }, { "epoch": 0.01590659108215585, "grad_norm": 0.45817050337791443, "learning_rate": 0.001, "loss": 2.6101, "step": 376 }, { "epoch": 0.015948895845672224, "grad_norm": 0.45967790484428406, "learning_rate": 0.001, "loss": 2.9429, "step": 377 }, { "epoch": 0.015991200609188596, "grad_norm": 0.512673556804657, "learning_rate": 0.001, "loss": 2.7608, "step": 378 }, { "epoch": 0.016033505372704968, "grad_norm": 0.32649508118629456, "learning_rate": 0.001, "loss": 2.9877, "step": 379 }, { "epoch": 0.01607581013622134, "grad_norm": 0.45146769285202026, "learning_rate": 0.001, "loss": 3.3331, "step": 380 }, { "epoch": 0.01611811489973771, "grad_norm": 0.7151511907577515, "learning_rate": 0.001, "loss": 2.1693, "step": 381 }, { "epoch": 0.016160419663254082, "grad_norm": 0.4157434403896332, "learning_rate": 0.001, "loss": 2.2174, "step": 382 }, { "epoch": 0.016202724426770454, "grad_norm": 0.4276852607727051, "learning_rate": 0.001, "loss": 2.6254, "step": 383 }, { "epoch": 0.016245029190286826, "grad_norm": 1.40143620967865, "learning_rate": 0.001, "loss": 3.6675, "step": 384 }, { "epoch": 0.016287333953803197, "grad_norm": 2.487680673599243, "learning_rate": 0.001, "loss": 1.7132, "step": 385 }, { "epoch": 0.01632963871731957, "grad_norm": 0.7442037463188171, "learning_rate": 0.001, "loss": 2.906, "step": 386 }, { "epoch": 0.01637194348083594, "grad_norm": 0.5867713093757629, "learning_rate": 0.001, "loss": 2.4791, "step": 387 }, { "epoch": 0.016414248244352315, "grad_norm": 3.043729066848755, "learning_rate": 0.001, "loss": 3.2289, "step": 388 }, { "epoch": 0.016456553007868687, "grad_norm": 1.29078209400177, "learning_rate": 0.001, "loss": 3.2969, "step": 389 }, { "epoch": 0.01649885777138506, "grad_norm": 0.46137046813964844, "learning_rate": 0.001, "loss": 2.3551, "step": 390 }, { "epoch": 0.01654116253490143, "grad_norm": 6.1766510009765625, "learning_rate": 0.001, "loss": 2.9738, "step": 391 }, { "epoch": 0.016583467298417802, "grad_norm": 0.5667681694030762, "learning_rate": 0.001, "loss": 2.3875, "step": 392 }, { "epoch": 0.016625772061934174, "grad_norm": 0.6917624473571777, "learning_rate": 0.001, "loss": 2.6381, "step": 393 }, { "epoch": 0.016668076825450545, "grad_norm": 0.7266809344291687, "learning_rate": 0.001, "loss": 1.8559, "step": 394 }, { "epoch": 0.016710381588966917, "grad_norm": 2.0730323791503906, "learning_rate": 0.001, "loss": 2.3098, "step": 395 }, { "epoch": 0.01675268635248329, "grad_norm": 0.48305729031562805, "learning_rate": 0.001, "loss": 2.1544, "step": 396 }, { "epoch": 0.01679499111599966, "grad_norm": 0.7128873467445374, "learning_rate": 0.001, "loss": 2.3416, "step": 397 }, { "epoch": 0.016837295879516035, "grad_norm": 2.950924873352051, "learning_rate": 0.001, "loss": 3.7996, "step": 398 }, { "epoch": 0.016879600643032407, "grad_norm": 0.8031635284423828, "learning_rate": 0.001, "loss": 2.3264, "step": 399 }, { "epoch": 0.016921905406548778, "grad_norm": 0.6791091561317444, "learning_rate": 0.001, "loss": 2.3023, "step": 400 }, { "epoch": 0.01696421017006515, "grad_norm": 0.445100873708725, "learning_rate": 0.001, "loss": 2.1986, "step": 401 }, { "epoch": 0.01700651493358152, "grad_norm": 0.43192756175994873, "learning_rate": 0.001, "loss": 2.4621, "step": 402 }, { "epoch": 0.017048819697097893, "grad_norm": 0.6217234134674072, "learning_rate": 0.001, "loss": 2.6163, "step": 403 }, { "epoch": 0.017091124460614265, "grad_norm": 7.2630791664123535, "learning_rate": 0.001, "loss": 2.2638, "step": 404 }, { "epoch": 0.017133429224130636, "grad_norm": 0.48048147559165955, "learning_rate": 0.001, "loss": 2.6667, "step": 405 }, { "epoch": 0.017175733987647008, "grad_norm": 1.1334668397903442, "learning_rate": 0.001, "loss": 3.1583, "step": 406 }, { "epoch": 0.01721803875116338, "grad_norm": 0.3903160095214844, "learning_rate": 0.001, "loss": 3.0678, "step": 407 }, { "epoch": 0.017260343514679755, "grad_norm": 0.8138815760612488, "learning_rate": 0.001, "loss": 2.5374, "step": 408 }, { "epoch": 0.017302648278196126, "grad_norm": 0.8442886471748352, "learning_rate": 0.001, "loss": 2.5721, "step": 409 }, { "epoch": 0.017344953041712498, "grad_norm": 0.6825190186500549, "learning_rate": 0.001, "loss": 2.7989, "step": 410 }, { "epoch": 0.01738725780522887, "grad_norm": 0.625694215297699, "learning_rate": 0.001, "loss": 3.3783, "step": 411 }, { "epoch": 0.01742956256874524, "grad_norm": 0.5650992393493652, "learning_rate": 0.001, "loss": 3.2683, "step": 412 }, { "epoch": 0.017471867332261613, "grad_norm": 0.5415240526199341, "learning_rate": 0.001, "loss": 3.2396, "step": 413 }, { "epoch": 0.017514172095777984, "grad_norm": 1.5903420448303223, "learning_rate": 0.001, "loss": 3.1021, "step": 414 }, { "epoch": 0.017556476859294356, "grad_norm": 0.4088042676448822, "learning_rate": 0.001, "loss": 2.5221, "step": 415 }, { "epoch": 0.017598781622810727, "grad_norm": 0.367471307516098, "learning_rate": 0.001, "loss": 2.0654, "step": 416 }, { "epoch": 0.0176410863863271, "grad_norm": 0.6564489603042603, "learning_rate": 0.001, "loss": 2.2583, "step": 417 }, { "epoch": 0.01768339114984347, "grad_norm": 0.681338369846344, "learning_rate": 0.001, "loss": 2.0275, "step": 418 }, { "epoch": 0.017725695913359846, "grad_norm": 0.36002209782600403, "learning_rate": 0.001, "loss": 2.3529, "step": 419 }, { "epoch": 0.017768000676876217, "grad_norm": 0.8276366591453552, "learning_rate": 0.001, "loss": 1.9392, "step": 420 }, { "epoch": 0.01781030544039259, "grad_norm": 0.6230726838111877, "learning_rate": 0.001, "loss": 2.0964, "step": 421 }, { "epoch": 0.01785261020390896, "grad_norm": 0.5634045004844666, "learning_rate": 0.001, "loss": 2.8694, "step": 422 }, { "epoch": 0.017894914967425332, "grad_norm": 0.6966080665588379, "learning_rate": 0.001, "loss": 3.0085, "step": 423 }, { "epoch": 0.017937219730941704, "grad_norm": 1.007291316986084, "learning_rate": 0.001, "loss": 2.2582, "step": 424 }, { "epoch": 0.017979524494458075, "grad_norm": 1.6382334232330322, "learning_rate": 0.001, "loss": 3.448, "step": 425 }, { "epoch": 0.018021829257974447, "grad_norm": 0.46437203884124756, "learning_rate": 0.001, "loss": 2.4852, "step": 426 }, { "epoch": 0.01806413402149082, "grad_norm": 0.6570785641670227, "learning_rate": 0.001, "loss": 2.6239, "step": 427 }, { "epoch": 0.01810643878500719, "grad_norm": 0.501592755317688, "learning_rate": 0.001, "loss": 2.7384, "step": 428 }, { "epoch": 0.018148743548523565, "grad_norm": 0.44482702016830444, "learning_rate": 0.001, "loss": 2.3664, "step": 429 }, { "epoch": 0.018191048312039937, "grad_norm": 0.4519723951816559, "learning_rate": 0.001, "loss": 3.4909, "step": 430 }, { "epoch": 0.01823335307555631, "grad_norm": 0.37969911098480225, "learning_rate": 0.001, "loss": 2.9331, "step": 431 }, { "epoch": 0.01827565783907268, "grad_norm": 0.35430458188056946, "learning_rate": 0.001, "loss": 1.9049, "step": 432 }, { "epoch": 0.01831796260258905, "grad_norm": 0.3929635286331177, "learning_rate": 0.001, "loss": 2.3463, "step": 433 }, { "epoch": 0.018360267366105423, "grad_norm": 0.6036363244056702, "learning_rate": 0.001, "loss": 2.2359, "step": 434 }, { "epoch": 0.018402572129621795, "grad_norm": 0.9288585186004639, "learning_rate": 0.001, "loss": 2.1569, "step": 435 }, { "epoch": 0.018444876893138167, "grad_norm": 0.5024868249893188, "learning_rate": 0.001, "loss": 2.3324, "step": 436 }, { "epoch": 0.018487181656654538, "grad_norm": 0.3713514804840088, "learning_rate": 0.001, "loss": 1.9664, "step": 437 }, { "epoch": 0.01852948642017091, "grad_norm": 0.3861111104488373, "learning_rate": 0.001, "loss": 1.807, "step": 438 }, { "epoch": 0.018571791183687285, "grad_norm": 0.42646774649620056, "learning_rate": 0.001, "loss": 2.1702, "step": 439 }, { "epoch": 0.018614095947203656, "grad_norm": 1.0648521184921265, "learning_rate": 0.001, "loss": 2.8879, "step": 440 }, { "epoch": 0.018656400710720028, "grad_norm": 0.5311859846115112, "learning_rate": 0.001, "loss": 2.2553, "step": 441 }, { "epoch": 0.0186987054742364, "grad_norm": 0.4035789966583252, "learning_rate": 0.001, "loss": 2.5759, "step": 442 }, { "epoch": 0.01874101023775277, "grad_norm": 0.4712333083152771, "learning_rate": 0.001, "loss": 3.1349, "step": 443 }, { "epoch": 0.018783315001269143, "grad_norm": 0.3229919373989105, "learning_rate": 0.001, "loss": 2.1652, "step": 444 }, { "epoch": 0.018825619764785514, "grad_norm": 0.3765084147453308, "learning_rate": 0.001, "loss": 2.9515, "step": 445 }, { "epoch": 0.018867924528301886, "grad_norm": 0.33263400197029114, "learning_rate": 0.001, "loss": 2.7202, "step": 446 }, { "epoch": 0.018910229291818258, "grad_norm": 1.0643253326416016, "learning_rate": 0.001, "loss": 2.129, "step": 447 }, { "epoch": 0.01895253405533463, "grad_norm": 0.5070205926895142, "learning_rate": 0.001, "loss": 2.2968, "step": 448 }, { "epoch": 0.018994838818851, "grad_norm": 0.8920692801475525, "learning_rate": 0.001, "loss": 2.2572, "step": 449 }, { "epoch": 0.019037143582367376, "grad_norm": 0.5254008173942566, "learning_rate": 0.001, "loss": 2.6386, "step": 450 }, { "epoch": 0.019079448345883748, "grad_norm": 0.7540894746780396, "learning_rate": 0.001, "loss": 3.8766, "step": 451 }, { "epoch": 0.01912175310940012, "grad_norm": 3.530369281768799, "learning_rate": 0.001, "loss": 2.6652, "step": 452 }, { "epoch": 0.01916405787291649, "grad_norm": 0.385513573884964, "learning_rate": 0.001, "loss": 2.0282, "step": 453 }, { "epoch": 0.019206362636432862, "grad_norm": 0.3878650367259979, "learning_rate": 0.001, "loss": 2.3184, "step": 454 }, { "epoch": 0.019248667399949234, "grad_norm": 0.47612032294273376, "learning_rate": 0.001, "loss": 2.2334, "step": 455 }, { "epoch": 0.019290972163465606, "grad_norm": 0.4401743412017822, "learning_rate": 0.001, "loss": 1.9957, "step": 456 }, { "epoch": 0.019333276926981977, "grad_norm": 0.4817490875720978, "learning_rate": 0.001, "loss": 2.1504, "step": 457 }, { "epoch": 0.01937558169049835, "grad_norm": 0.461186021566391, "learning_rate": 0.001, "loss": 2.0759, "step": 458 }, { "epoch": 0.01941788645401472, "grad_norm": 0.572381317615509, "learning_rate": 0.001, "loss": 2.7651, "step": 459 }, { "epoch": 0.019460191217531096, "grad_norm": 0.4275853931903839, "learning_rate": 0.001, "loss": 2.5997, "step": 460 }, { "epoch": 0.019502495981047467, "grad_norm": 0.4098559021949768, "learning_rate": 0.001, "loss": 2.641, "step": 461 }, { "epoch": 0.01954480074456384, "grad_norm": 0.4118167757987976, "learning_rate": 0.001, "loss": 1.7914, "step": 462 }, { "epoch": 0.01958710550808021, "grad_norm": 0.46635180711746216, "learning_rate": 0.001, "loss": 3.0353, "step": 463 }, { "epoch": 0.019629410271596582, "grad_norm": 1.0905091762542725, "learning_rate": 0.001, "loss": 2.4585, "step": 464 }, { "epoch": 0.019671715035112954, "grad_norm": 0.42049458622932434, "learning_rate": 0.001, "loss": 2.4426, "step": 465 }, { "epoch": 0.019714019798629325, "grad_norm": 0.5391716361045837, "learning_rate": 0.001, "loss": 2.2722, "step": 466 }, { "epoch": 0.019756324562145697, "grad_norm": 1.7512651681900024, "learning_rate": 0.001, "loss": 1.9385, "step": 467 }, { "epoch": 0.01979862932566207, "grad_norm": 0.33571651577949524, "learning_rate": 0.001, "loss": 2.7864, "step": 468 }, { "epoch": 0.01984093408917844, "grad_norm": 0.4144052267074585, "learning_rate": 0.001, "loss": 2.1114, "step": 469 }, { "epoch": 0.019883238852694815, "grad_norm": 0.39613720774650574, "learning_rate": 0.001, "loss": 2.5951, "step": 470 }, { "epoch": 0.019925543616211187, "grad_norm": 0.5358018279075623, "learning_rate": 0.001, "loss": 3.1943, "step": 471 }, { "epoch": 0.01996784837972756, "grad_norm": 0.5452474355697632, "learning_rate": 0.001, "loss": 2.9164, "step": 472 }, { "epoch": 0.02001015314324393, "grad_norm": 0.3648831248283386, "learning_rate": 0.001, "loss": 2.3703, "step": 473 }, { "epoch": 0.0200524579067603, "grad_norm": 0.3464433252811432, "learning_rate": 0.001, "loss": 2.7931, "step": 474 }, { "epoch": 0.020094762670276673, "grad_norm": 1.0535080432891846, "learning_rate": 0.001, "loss": 2.5576, "step": 475 }, { "epoch": 0.020137067433793045, "grad_norm": 0.2924720346927643, "learning_rate": 0.001, "loss": 2.6184, "step": 476 }, { "epoch": 0.020179372197309416, "grad_norm": 0.388220876455307, "learning_rate": 0.001, "loss": 1.9705, "step": 477 }, { "epoch": 0.020221676960825788, "grad_norm": 1.577642560005188, "learning_rate": 0.001, "loss": 2.0881, "step": 478 }, { "epoch": 0.02026398172434216, "grad_norm": 0.795059323310852, "learning_rate": 0.001, "loss": 2.5844, "step": 479 }, { "epoch": 0.020306286487858535, "grad_norm": 0.7602189183235168, "learning_rate": 0.001, "loss": 2.3127, "step": 480 }, { "epoch": 0.020348591251374906, "grad_norm": 0.44378194212913513, "learning_rate": 0.001, "loss": 2.1581, "step": 481 }, { "epoch": 0.020390896014891278, "grad_norm": 0.5162745714187622, "learning_rate": 0.001, "loss": 2.8813, "step": 482 }, { "epoch": 0.02043320077840765, "grad_norm": 0.39076024293899536, "learning_rate": 0.001, "loss": 2.1318, "step": 483 }, { "epoch": 0.02047550554192402, "grad_norm": 0.3666277825832367, "learning_rate": 0.001, "loss": 2.2807, "step": 484 }, { "epoch": 0.020517810305440393, "grad_norm": 0.5351219177246094, "learning_rate": 0.001, "loss": 2.2821, "step": 485 }, { "epoch": 0.020560115068956764, "grad_norm": 0.3654989004135132, "learning_rate": 0.001, "loss": 2.062, "step": 486 }, { "epoch": 0.020602419832473136, "grad_norm": 0.3425626754760742, "learning_rate": 0.001, "loss": 1.9772, "step": 487 }, { "epoch": 0.020644724595989507, "grad_norm": 6.2479166984558105, "learning_rate": 0.001, "loss": 1.9888, "step": 488 }, { "epoch": 0.02068702935950588, "grad_norm": 0.381218820810318, "learning_rate": 0.001, "loss": 2.1181, "step": 489 }, { "epoch": 0.02072933412302225, "grad_norm": 0.37633004784584045, "learning_rate": 0.001, "loss": 1.9825, "step": 490 }, { "epoch": 0.020771638886538626, "grad_norm": 0.5049352049827576, "learning_rate": 0.001, "loss": 2.3675, "step": 491 }, { "epoch": 0.020813943650054997, "grad_norm": 0.3255545496940613, "learning_rate": 0.001, "loss": 1.4964, "step": 492 }, { "epoch": 0.02085624841357137, "grad_norm": 0.43696948885917664, "learning_rate": 0.001, "loss": 2.4114, "step": 493 }, { "epoch": 0.02089855317708774, "grad_norm": 0.37952399253845215, "learning_rate": 0.001, "loss": 1.6125, "step": 494 }, { "epoch": 0.020940857940604112, "grad_norm": 0.41570955514907837, "learning_rate": 0.001, "loss": 3.0489, "step": 495 }, { "epoch": 0.020983162704120484, "grad_norm": 0.46523749828338623, "learning_rate": 0.001, "loss": 2.5591, "step": 496 }, { "epoch": 0.021025467467636855, "grad_norm": 0.8702875971794128, "learning_rate": 0.001, "loss": 2.686, "step": 497 }, { "epoch": 0.021067772231153227, "grad_norm": 2.071568012237549, "learning_rate": 0.001, "loss": 2.4348, "step": 498 }, { "epoch": 0.0211100769946696, "grad_norm": 0.8501281142234802, "learning_rate": 0.001, "loss": 1.8937, "step": 499 }, { "epoch": 0.02115238175818597, "grad_norm": 22.378358840942383, "learning_rate": 0.001, "loss": 2.139, "step": 500 }, { "epoch": 0.021194686521702345, "grad_norm": 56.75684356689453, "learning_rate": 0.001, "loss": 3.3663, "step": 501 }, { "epoch": 0.021236991285218717, "grad_norm": 0.6316158771514893, "learning_rate": 0.001, "loss": 2.0616, "step": 502 }, { "epoch": 0.02127929604873509, "grad_norm": 0.6567540764808655, "learning_rate": 0.001, "loss": 3.1767, "step": 503 }, { "epoch": 0.02132160081225146, "grad_norm": 3.0959877967834473, "learning_rate": 0.001, "loss": 3.7518, "step": 504 }, { "epoch": 0.02136390557576783, "grad_norm": 0.4583059549331665, "learning_rate": 0.001, "loss": 2.7923, "step": 505 }, { "epoch": 0.021406210339284203, "grad_norm": 0.7765421271324158, "learning_rate": 0.001, "loss": 3.0666, "step": 506 }, { "epoch": 0.021448515102800575, "grad_norm": 0.5920457243919373, "learning_rate": 0.001, "loss": 2.7377, "step": 507 }, { "epoch": 0.021490819866316947, "grad_norm": 0.5121840834617615, "learning_rate": 0.001, "loss": 2.056, "step": 508 }, { "epoch": 0.021533124629833318, "grad_norm": 0.3588508367538452, "learning_rate": 0.001, "loss": 2.4136, "step": 509 }, { "epoch": 0.02157542939334969, "grad_norm": 0.5358933806419373, "learning_rate": 0.001, "loss": 2.65, "step": 510 }, { "epoch": 0.021617734156866065, "grad_norm": 0.45659172534942627, "learning_rate": 0.001, "loss": 3.4225, "step": 511 }, { "epoch": 0.021660038920382436, "grad_norm": 0.4603763520717621, "learning_rate": 0.001, "loss": 2.3112, "step": 512 }, { "epoch": 0.021702343683898808, "grad_norm": 0.4111917316913605, "learning_rate": 0.001, "loss": 2.4298, "step": 513 }, { "epoch": 0.02174464844741518, "grad_norm": 0.3395131826400757, "learning_rate": 0.001, "loss": 2.235, "step": 514 }, { "epoch": 0.02178695321093155, "grad_norm": 0.4213927388191223, "learning_rate": 0.001, "loss": 2.3075, "step": 515 }, { "epoch": 0.021829257974447923, "grad_norm": 0.6425489187240601, "learning_rate": 0.001, "loss": 3.7484, "step": 516 }, { "epoch": 0.021871562737964294, "grad_norm": 0.7505455613136292, "learning_rate": 0.001, "loss": 1.9084, "step": 517 }, { "epoch": 0.021913867501480666, "grad_norm": 0.35921424627304077, "learning_rate": 0.001, "loss": 2.0054, "step": 518 }, { "epoch": 0.021956172264997038, "grad_norm": 0.31863313913345337, "learning_rate": 0.001, "loss": 2.5181, "step": 519 }, { "epoch": 0.02199847702851341, "grad_norm": 0.34935393929481506, "learning_rate": 0.001, "loss": 2.407, "step": 520 }, { "epoch": 0.02204078179202978, "grad_norm": 0.34255489706993103, "learning_rate": 0.001, "loss": 2.0617, "step": 521 }, { "epoch": 0.022083086555546156, "grad_norm": 0.3920708894729614, "learning_rate": 0.001, "loss": 2.9539, "step": 522 }, { "epoch": 0.022125391319062528, "grad_norm": 0.3370257318019867, "learning_rate": 0.001, "loss": 1.8662, "step": 523 }, { "epoch": 0.0221676960825789, "grad_norm": 0.4249412417411804, "learning_rate": 0.001, "loss": 1.8416, "step": 524 }, { "epoch": 0.02221000084609527, "grad_norm": 0.35336267948150635, "learning_rate": 0.001, "loss": 2.1076, "step": 525 }, { "epoch": 0.022252305609611642, "grad_norm": 0.3903971016407013, "learning_rate": 0.001, "loss": 2.2353, "step": 526 }, { "epoch": 0.022294610373128014, "grad_norm": 0.3893096446990967, "learning_rate": 0.001, "loss": 2.6471, "step": 527 }, { "epoch": 0.022336915136644386, "grad_norm": 0.3965369462966919, "learning_rate": 0.001, "loss": 2.8242, "step": 528 }, { "epoch": 0.022379219900160757, "grad_norm": 0.5321797728538513, "learning_rate": 0.001, "loss": 2.8859, "step": 529 }, { "epoch": 0.02242152466367713, "grad_norm": 0.4475747346878052, "learning_rate": 0.001, "loss": 3.02, "step": 530 }, { "epoch": 0.0224638294271935, "grad_norm": 1.6769299507141113, "learning_rate": 0.001, "loss": 3.5111, "step": 531 }, { "epoch": 0.022506134190709876, "grad_norm": 0.40092411637306213, "learning_rate": 0.001, "loss": 1.5887, "step": 532 }, { "epoch": 0.022548438954226247, "grad_norm": 1.476195216178894, "learning_rate": 0.001, "loss": 2.5106, "step": 533 }, { "epoch": 0.02259074371774262, "grad_norm": 1.381837248802185, "learning_rate": 0.001, "loss": 1.9169, "step": 534 }, { "epoch": 0.02263304848125899, "grad_norm": 0.8170017004013062, "learning_rate": 0.001, "loss": 1.8514, "step": 535 }, { "epoch": 0.022675353244775362, "grad_norm": 0.6372575163841248, "learning_rate": 0.001, "loss": 2.8408, "step": 536 }, { "epoch": 0.022717658008291734, "grad_norm": 0.5463494658470154, "learning_rate": 0.001, "loss": 2.3254, "step": 537 }, { "epoch": 0.022759962771808105, "grad_norm": 0.5595137476921082, "learning_rate": 0.001, "loss": 3.0805, "step": 538 }, { "epoch": 0.022802267535324477, "grad_norm": 0.7195737361907959, "learning_rate": 0.001, "loss": 2.4751, "step": 539 }, { "epoch": 0.02284457229884085, "grad_norm": 0.3829958438873291, "learning_rate": 0.001, "loss": 2.0523, "step": 540 }, { "epoch": 0.02288687706235722, "grad_norm": 1.4294495582580566, "learning_rate": 0.001, "loss": 2.5157, "step": 541 }, { "epoch": 0.022929181825873595, "grad_norm": 0.5000084638595581, "learning_rate": 0.001, "loss": 2.1641, "step": 542 }, { "epoch": 0.022971486589389967, "grad_norm": 0.8625162243843079, "learning_rate": 0.001, "loss": 3.6346, "step": 543 }, { "epoch": 0.02301379135290634, "grad_norm": 0.5158907771110535, "learning_rate": 0.001, "loss": 2.6786, "step": 544 }, { "epoch": 0.02305609611642271, "grad_norm": 1.403883457183838, "learning_rate": 0.001, "loss": 2.3591, "step": 545 }, { "epoch": 0.02309840087993908, "grad_norm": 1.1025398969650269, "learning_rate": 0.001, "loss": 4.0568, "step": 546 }, { "epoch": 0.023140705643455453, "grad_norm": 0.7397220730781555, "learning_rate": 0.001, "loss": 2.6037, "step": 547 }, { "epoch": 0.023183010406971825, "grad_norm": 1.0023000240325928, "learning_rate": 0.001, "loss": 2.5716, "step": 548 }, { "epoch": 0.023225315170488196, "grad_norm": 0.9285343885421753, "learning_rate": 0.001, "loss": 2.373, "step": 549 }, { "epoch": 0.023267619934004568, "grad_norm": 0.5404331088066101, "learning_rate": 0.001, "loss": 3.0031, "step": 550 }, { "epoch": 0.02330992469752094, "grad_norm": 0.6443062424659729, "learning_rate": 0.001, "loss": 2.2552, "step": 551 }, { "epoch": 0.02335222946103731, "grad_norm": 0.4823873043060303, "learning_rate": 0.001, "loss": 3.3305, "step": 552 }, { "epoch": 0.023394534224553686, "grad_norm": 0.37984201312065125, "learning_rate": 0.001, "loss": 2.3388, "step": 553 }, { "epoch": 0.023436838988070058, "grad_norm": 2.326221466064453, "learning_rate": 0.001, "loss": 2.7738, "step": 554 }, { "epoch": 0.02347914375158643, "grad_norm": 0.5138685703277588, "learning_rate": 0.001, "loss": 2.2619, "step": 555 }, { "epoch": 0.0235214485151028, "grad_norm": 1.178184151649475, "learning_rate": 0.001, "loss": 2.28, "step": 556 }, { "epoch": 0.023563753278619173, "grad_norm": 0.4544781446456909, "learning_rate": 0.001, "loss": 2.7337, "step": 557 }, { "epoch": 0.023606058042135544, "grad_norm": 0.589658260345459, "learning_rate": 0.001, "loss": 3.867, "step": 558 }, { "epoch": 0.023648362805651916, "grad_norm": 0.7427922487258911, "learning_rate": 0.001, "loss": 2.7503, "step": 559 }, { "epoch": 0.023690667569168287, "grad_norm": 0.3987663686275482, "learning_rate": 0.001, "loss": 1.9502, "step": 560 }, { "epoch": 0.02373297233268466, "grad_norm": 0.45180320739746094, "learning_rate": 0.001, "loss": 2.6439, "step": 561 }, { "epoch": 0.02377527709620103, "grad_norm": 0.677946150302887, "learning_rate": 0.001, "loss": 2.3775, "step": 562 }, { "epoch": 0.023817581859717406, "grad_norm": 0.6633649468421936, "learning_rate": 0.001, "loss": 3.2441, "step": 563 }, { "epoch": 0.023859886623233777, "grad_norm": 0.44053682684898376, "learning_rate": 0.001, "loss": 2.1561, "step": 564 }, { "epoch": 0.02390219138675015, "grad_norm": 0.47400352358818054, "learning_rate": 0.001, "loss": 2.6964, "step": 565 }, { "epoch": 0.02394449615026652, "grad_norm": 0.394731730222702, "learning_rate": 0.001, "loss": 2.7092, "step": 566 }, { "epoch": 0.023986800913782892, "grad_norm": 0.34284764528274536, "learning_rate": 0.001, "loss": 2.6047, "step": 567 }, { "epoch": 0.024029105677299264, "grad_norm": 0.8231601119041443, "learning_rate": 0.001, "loss": 2.5821, "step": 568 }, { "epoch": 0.024071410440815635, "grad_norm": 0.47505271434783936, "learning_rate": 0.001, "loss": 2.4732, "step": 569 }, { "epoch": 0.024113715204332007, "grad_norm": 0.40913763642311096, "learning_rate": 0.001, "loss": 2.8477, "step": 570 }, { "epoch": 0.02415601996784838, "grad_norm": 0.3416093587875366, "learning_rate": 0.001, "loss": 2.3009, "step": 571 }, { "epoch": 0.02419832473136475, "grad_norm": 0.3416256010532379, "learning_rate": 0.001, "loss": 2.4531, "step": 572 }, { "epoch": 0.024240629494881125, "grad_norm": 0.355182409286499, "learning_rate": 0.001, "loss": 2.9286, "step": 573 }, { "epoch": 0.024282934258397497, "grad_norm": 0.5212019085884094, "learning_rate": 0.001, "loss": 2.0053, "step": 574 }, { "epoch": 0.02432523902191387, "grad_norm": 0.4017347991466522, "learning_rate": 0.001, "loss": 3.4724, "step": 575 }, { "epoch": 0.02436754378543024, "grad_norm": 0.34895554184913635, "learning_rate": 0.001, "loss": 2.7992, "step": 576 }, { "epoch": 0.024409848548946612, "grad_norm": 0.3911406993865967, "learning_rate": 0.001, "loss": 2.4038, "step": 577 }, { "epoch": 0.024452153312462983, "grad_norm": 0.38320183753967285, "learning_rate": 0.001, "loss": 3.2096, "step": 578 }, { "epoch": 0.024494458075979355, "grad_norm": 0.4434884786605835, "learning_rate": 0.001, "loss": 2.6099, "step": 579 }, { "epoch": 0.024536762839495727, "grad_norm": 0.34895849227905273, "learning_rate": 0.001, "loss": 2.2875, "step": 580 }, { "epoch": 0.024579067603012098, "grad_norm": 0.38560599088668823, "learning_rate": 0.001, "loss": 2.8941, "step": 581 }, { "epoch": 0.02462137236652847, "grad_norm": 1.215502381324768, "learning_rate": 0.001, "loss": 3.2749, "step": 582 }, { "epoch": 0.02466367713004484, "grad_norm": 0.38922634720802307, "learning_rate": 0.001, "loss": 3.0296, "step": 583 }, { "epoch": 0.024705981893561216, "grad_norm": 0.39207783341407776, "learning_rate": 0.001, "loss": 2.1744, "step": 584 }, { "epoch": 0.024748286657077588, "grad_norm": 0.4072285294532776, "learning_rate": 0.001, "loss": 3.4887, "step": 585 }, { "epoch": 0.02479059142059396, "grad_norm": 0.4496638774871826, "learning_rate": 0.001, "loss": 2.5288, "step": 586 }, { "epoch": 0.02483289618411033, "grad_norm": 0.6576820015907288, "learning_rate": 0.001, "loss": 2.7665, "step": 587 }, { "epoch": 0.024875200947626703, "grad_norm": 0.32960543036460876, "learning_rate": 0.001, "loss": 2.256, "step": 588 }, { "epoch": 0.024917505711143074, "grad_norm": 0.44357311725616455, "learning_rate": 0.001, "loss": 3.0581, "step": 589 }, { "epoch": 0.024959810474659446, "grad_norm": 0.5914167165756226, "learning_rate": 0.001, "loss": 2.8782, "step": 590 }, { "epoch": 0.025002115238175818, "grad_norm": 0.6511535048484802, "learning_rate": 0.001, "loss": 3.7855, "step": 591 }, { "epoch": 0.02504442000169219, "grad_norm": 0.42436838150024414, "learning_rate": 0.001, "loss": 1.9582, "step": 592 }, { "epoch": 0.02508672476520856, "grad_norm": 0.38704079389572144, "learning_rate": 0.001, "loss": 3.0496, "step": 593 }, { "epoch": 0.025129029528724936, "grad_norm": 13.88198184967041, "learning_rate": 0.001, "loss": 2.714, "step": 594 }, { "epoch": 0.025171334292241308, "grad_norm": 0.7542757987976074, "learning_rate": 0.001, "loss": 2.4161, "step": 595 }, { "epoch": 0.02521363905575768, "grad_norm": 0.475299209356308, "learning_rate": 0.001, "loss": 2.1213, "step": 596 }, { "epoch": 0.02525594381927405, "grad_norm": 0.599808931350708, "learning_rate": 0.001, "loss": 2.5554, "step": 597 }, { "epoch": 0.025298248582790422, "grad_norm": 0.45108288526535034, "learning_rate": 0.001, "loss": 2.0113, "step": 598 }, { "epoch": 0.025340553346306794, "grad_norm": 0.471336305141449, "learning_rate": 0.001, "loss": 2.5406, "step": 599 }, { "epoch": 0.025382858109823166, "grad_norm": 0.5292516946792603, "learning_rate": 0.001, "loss": 2.4529, "step": 600 }, { "epoch": 0.025425162873339537, "grad_norm": 0.7199414968490601, "learning_rate": 0.001, "loss": 2.9323, "step": 601 }, { "epoch": 0.02546746763685591, "grad_norm": 0.48330041766166687, "learning_rate": 0.001, "loss": 2.0881, "step": 602 }, { "epoch": 0.02550977240037228, "grad_norm": 0.3391055464744568, "learning_rate": 0.001, "loss": 1.897, "step": 603 }, { "epoch": 0.025552077163888656, "grad_norm": 0.4490862190723419, "learning_rate": 0.001, "loss": 2.6384, "step": 604 }, { "epoch": 0.025594381927405027, "grad_norm": 0.7531641125679016, "learning_rate": 0.001, "loss": 3.3676, "step": 605 }, { "epoch": 0.0256366866909214, "grad_norm": 0.3735019266605377, "learning_rate": 0.001, "loss": 2.1751, "step": 606 }, { "epoch": 0.02567899145443777, "grad_norm": 0.5445840954780579, "learning_rate": 0.001, "loss": 2.6547, "step": 607 }, { "epoch": 0.025721296217954142, "grad_norm": 0.4098416566848755, "learning_rate": 0.001, "loss": 2.2004, "step": 608 }, { "epoch": 0.025763600981470514, "grad_norm": 1.7501581907272339, "learning_rate": 0.001, "loss": 3.0225, "step": 609 }, { "epoch": 0.025805905744986885, "grad_norm": 0.3755500912666321, "learning_rate": 0.001, "loss": 3.3205, "step": 610 }, { "epoch": 0.025848210508503257, "grad_norm": 0.37451812624931335, "learning_rate": 0.001, "loss": 2.1556, "step": 611 }, { "epoch": 0.02589051527201963, "grad_norm": 0.3815903663635254, "learning_rate": 0.001, "loss": 2.7517, "step": 612 }, { "epoch": 0.025932820035536, "grad_norm": 0.327603280544281, "learning_rate": 0.001, "loss": 2.0281, "step": 613 }, { "epoch": 0.02597512479905237, "grad_norm": 0.40311387181282043, "learning_rate": 0.001, "loss": 2.5982, "step": 614 }, { "epoch": 0.026017429562568747, "grad_norm": 0.412945032119751, "learning_rate": 0.001, "loss": 2.4766, "step": 615 }, { "epoch": 0.02605973432608512, "grad_norm": 0.7048538327217102, "learning_rate": 0.001, "loss": 2.8419, "step": 616 }, { "epoch": 0.02610203908960149, "grad_norm": 0.3429849147796631, "learning_rate": 0.001, "loss": 2.3492, "step": 617 }, { "epoch": 0.02614434385311786, "grad_norm": 0.39611124992370605, "learning_rate": 0.001, "loss": 2.307, "step": 618 }, { "epoch": 0.026186648616634233, "grad_norm": 0.8335956931114197, "learning_rate": 0.001, "loss": 1.8694, "step": 619 }, { "epoch": 0.026228953380150605, "grad_norm": 0.4806004762649536, "learning_rate": 0.001, "loss": 4.0374, "step": 620 }, { "epoch": 0.026271258143666976, "grad_norm": 0.5001326203346252, "learning_rate": 0.001, "loss": 2.9919, "step": 621 }, { "epoch": 0.026313562907183348, "grad_norm": 0.32230761647224426, "learning_rate": 0.001, "loss": 1.7651, "step": 622 }, { "epoch": 0.02635586767069972, "grad_norm": 0.351005494594574, "learning_rate": 0.001, "loss": 2.1334, "step": 623 }, { "epoch": 0.02639817243421609, "grad_norm": 0.3018239438533783, "learning_rate": 0.001, "loss": 2.0244, "step": 624 }, { "epoch": 0.026440477197732466, "grad_norm": 0.4218401610851288, "learning_rate": 0.001, "loss": 2.0751, "step": 625 }, { "epoch": 0.026482781961248838, "grad_norm": 0.4317582845687866, "learning_rate": 0.001, "loss": 3.2093, "step": 626 }, { "epoch": 0.02652508672476521, "grad_norm": 0.604067325592041, "learning_rate": 0.001, "loss": 1.9582, "step": 627 }, { "epoch": 0.02656739148828158, "grad_norm": 0.7366513013839722, "learning_rate": 0.001, "loss": 2.2498, "step": 628 }, { "epoch": 0.026609696251797953, "grad_norm": 0.4882150888442993, "learning_rate": 0.001, "loss": 2.1566, "step": 629 }, { "epoch": 0.026652001015314324, "grad_norm": 0.4116131663322449, "learning_rate": 0.001, "loss": 2.4123, "step": 630 }, { "epoch": 0.026694305778830696, "grad_norm": 0.3892541527748108, "learning_rate": 0.001, "loss": 2.4224, "step": 631 }, { "epoch": 0.026736610542347067, "grad_norm": 1.0133605003356934, "learning_rate": 0.001, "loss": 2.1201, "step": 632 }, { "epoch": 0.02677891530586344, "grad_norm": 0.4348303973674774, "learning_rate": 0.001, "loss": 2.1447, "step": 633 }, { "epoch": 0.02682122006937981, "grad_norm": 0.5337515473365784, "learning_rate": 0.001, "loss": 2.2251, "step": 634 }, { "epoch": 0.026863524832896186, "grad_norm": 0.696890652179718, "learning_rate": 0.001, "loss": 2.4125, "step": 635 }, { "epoch": 0.026905829596412557, "grad_norm": 0.4275985360145569, "learning_rate": 0.001, "loss": 2.661, "step": 636 }, { "epoch": 0.02694813435992893, "grad_norm": 0.6423033475875854, "learning_rate": 0.001, "loss": 2.4138, "step": 637 }, { "epoch": 0.0269904391234453, "grad_norm": 0.39524486660957336, "learning_rate": 0.001, "loss": 2.1282, "step": 638 }, { "epoch": 0.027032743886961672, "grad_norm": 0.37805479764938354, "learning_rate": 0.001, "loss": 2.4532, "step": 639 }, { "epoch": 0.027075048650478044, "grad_norm": 0.38140320777893066, "learning_rate": 0.001, "loss": 2.911, "step": 640 }, { "epoch": 0.027117353413994415, "grad_norm": 0.33297136425971985, "learning_rate": 0.001, "loss": 1.9244, "step": 641 }, { "epoch": 0.027159658177510787, "grad_norm": 0.5293309688568115, "learning_rate": 0.001, "loss": 2.1008, "step": 642 }, { "epoch": 0.02720196294102716, "grad_norm": 0.7821404933929443, "learning_rate": 0.001, "loss": 2.258, "step": 643 }, { "epoch": 0.02724426770454353, "grad_norm": 0.3198223412036896, "learning_rate": 0.001, "loss": 2.4415, "step": 644 }, { "epoch": 0.027286572468059902, "grad_norm": 0.38337278366088867, "learning_rate": 0.001, "loss": 3.3935, "step": 645 }, { "epoch": 0.027328877231576277, "grad_norm": 0.34149906039237976, "learning_rate": 0.001, "loss": 2.4166, "step": 646 }, { "epoch": 0.02737118199509265, "grad_norm": 0.3562192916870117, "learning_rate": 0.001, "loss": 1.8025, "step": 647 }, { "epoch": 0.02741348675860902, "grad_norm": 0.42415452003479004, "learning_rate": 0.001, "loss": 4.2058, "step": 648 }, { "epoch": 0.027455791522125392, "grad_norm": 0.5879824757575989, "learning_rate": 0.001, "loss": 1.9211, "step": 649 }, { "epoch": 0.027498096285641763, "grad_norm": 0.27752256393432617, "learning_rate": 0.001, "loss": 2.2914, "step": 650 }, { "epoch": 0.027540401049158135, "grad_norm": 0.399371474981308, "learning_rate": 0.001, "loss": 2.4115, "step": 651 }, { "epoch": 0.027582705812674507, "grad_norm": 0.3457944691181183, "learning_rate": 0.001, "loss": 3.1942, "step": 652 }, { "epoch": 0.027625010576190878, "grad_norm": 0.30785703659057617, "learning_rate": 0.001, "loss": 2.3755, "step": 653 }, { "epoch": 0.02766731533970725, "grad_norm": 2.2121496200561523, "learning_rate": 0.001, "loss": 3.7683, "step": 654 }, { "epoch": 0.02770962010322362, "grad_norm": 0.5387346148490906, "learning_rate": 0.001, "loss": 3.003, "step": 655 }, { "epoch": 0.027751924866739996, "grad_norm": 0.3397753834724426, "learning_rate": 0.001, "loss": 2.1727, "step": 656 }, { "epoch": 0.027794229630256368, "grad_norm": 0.5060691237449646, "learning_rate": 0.001, "loss": 1.9395, "step": 657 }, { "epoch": 0.02783653439377274, "grad_norm": 0.8157616853713989, "learning_rate": 0.001, "loss": 2.0495, "step": 658 }, { "epoch": 0.02787883915728911, "grad_norm": 0.2972865402698517, "learning_rate": 0.001, "loss": 2.1643, "step": 659 }, { "epoch": 0.027921143920805483, "grad_norm": 0.3544447124004364, "learning_rate": 0.001, "loss": 1.8471, "step": 660 }, { "epoch": 0.027963448684321855, "grad_norm": 0.4713522791862488, "learning_rate": 0.001, "loss": 2.3587, "step": 661 }, { "epoch": 0.028005753447838226, "grad_norm": 0.6973052024841309, "learning_rate": 0.001, "loss": 2.1898, "step": 662 }, { "epoch": 0.028048058211354598, "grad_norm": 0.30767449736595154, "learning_rate": 0.001, "loss": 1.8533, "step": 663 }, { "epoch": 0.02809036297487097, "grad_norm": 0.4573628902435303, "learning_rate": 0.001, "loss": 2.3476, "step": 664 }, { "epoch": 0.02813266773838734, "grad_norm": 0.3506143391132355, "learning_rate": 0.001, "loss": 2.35, "step": 665 }, { "epoch": 0.028174972501903716, "grad_norm": 0.3929474353790283, "learning_rate": 0.001, "loss": 2.7286, "step": 666 }, { "epoch": 0.028217277265420088, "grad_norm": 0.5329930782318115, "learning_rate": 0.001, "loss": 2.3529, "step": 667 }, { "epoch": 0.02825958202893646, "grad_norm": 0.41064202785491943, "learning_rate": 0.001, "loss": 2.9625, "step": 668 }, { "epoch": 0.02830188679245283, "grad_norm": 13.079408645629883, "learning_rate": 0.001, "loss": 1.974, "step": 669 }, { "epoch": 0.028344191555969202, "grad_norm": 0.3811262845993042, "learning_rate": 0.001, "loss": 3.0237, "step": 670 }, { "epoch": 0.028386496319485574, "grad_norm": 0.48839476704597473, "learning_rate": 0.001, "loss": 2.8337, "step": 671 }, { "epoch": 0.028428801083001946, "grad_norm": 0.6662212014198303, "learning_rate": 0.001, "loss": 2.888, "step": 672 }, { "epoch": 0.028471105846518317, "grad_norm": 0.3799011707305908, "learning_rate": 0.001, "loss": 3.3166, "step": 673 }, { "epoch": 0.02851341061003469, "grad_norm": 0.4989493191242218, "learning_rate": 0.001, "loss": 2.6335, "step": 674 }, { "epoch": 0.02855571537355106, "grad_norm": 0.3663240075111389, "learning_rate": 0.001, "loss": 2.1542, "step": 675 }, { "epoch": 0.028598020137067432, "grad_norm": 0.3251192271709442, "learning_rate": 0.001, "loss": 2.1317, "step": 676 }, { "epoch": 0.028640324900583807, "grad_norm": 0.36757683753967285, "learning_rate": 0.001, "loss": 2.2327, "step": 677 }, { "epoch": 0.02868262966410018, "grad_norm": 0.34744536876678467, "learning_rate": 0.001, "loss": 2.2255, "step": 678 }, { "epoch": 0.02872493442761655, "grad_norm": 0.6469220519065857, "learning_rate": 0.001, "loss": 2.9052, "step": 679 }, { "epoch": 0.028767239191132922, "grad_norm": 0.3472583591938019, "learning_rate": 0.001, "loss": 2.1404, "step": 680 }, { "epoch": 0.028809543954649294, "grad_norm": 0.3611651062965393, "learning_rate": 0.001, "loss": 2.7106, "step": 681 }, { "epoch": 0.028851848718165665, "grad_norm": 1.0853683948516846, "learning_rate": 0.001, "loss": 2.0889, "step": 682 }, { "epoch": 0.028894153481682037, "grad_norm": 0.5070203542709351, "learning_rate": 0.001, "loss": 2.1454, "step": 683 }, { "epoch": 0.02893645824519841, "grad_norm": 1.9084913730621338, "learning_rate": 0.001, "loss": 2.6574, "step": 684 }, { "epoch": 0.02897876300871478, "grad_norm": 0.4699901044368744, "learning_rate": 0.001, "loss": 2.2962, "step": 685 }, { "epoch": 0.02902106777223115, "grad_norm": 0.5868147611618042, "learning_rate": 0.001, "loss": 1.9462, "step": 686 }, { "epoch": 0.029063372535747527, "grad_norm": 0.4815481901168823, "learning_rate": 0.001, "loss": 2.3967, "step": 687 }, { "epoch": 0.0291056772992639, "grad_norm": 1.597350001335144, "learning_rate": 0.001, "loss": 2.6351, "step": 688 }, { "epoch": 0.02914798206278027, "grad_norm": 0.3565700352191925, "learning_rate": 0.001, "loss": 2.237, "step": 689 }, { "epoch": 0.02919028682629664, "grad_norm": 0.362229585647583, "learning_rate": 0.001, "loss": 2.5553, "step": 690 }, { "epoch": 0.029232591589813013, "grad_norm": 0.3516606390476227, "learning_rate": 0.001, "loss": 2.0371, "step": 691 }, { "epoch": 0.029274896353329385, "grad_norm": 0.3809257447719574, "learning_rate": 0.001, "loss": 2.0524, "step": 692 }, { "epoch": 0.029317201116845756, "grad_norm": 0.3532337248325348, "learning_rate": 0.001, "loss": 2.1929, "step": 693 }, { "epoch": 0.029359505880362128, "grad_norm": 0.5409374833106995, "learning_rate": 0.001, "loss": 2.7353, "step": 694 }, { "epoch": 0.0294018106438785, "grad_norm": 0.3616213798522949, "learning_rate": 0.001, "loss": 1.8789, "step": 695 }, { "epoch": 0.02944411540739487, "grad_norm": 0.31754282116889954, "learning_rate": 0.001, "loss": 2.617, "step": 696 }, { "epoch": 0.029486420170911246, "grad_norm": 0.4599725604057312, "learning_rate": 0.001, "loss": 2.535, "step": 697 }, { "epoch": 0.029528724934427618, "grad_norm": 1.1604878902435303, "learning_rate": 0.001, "loss": 2.6166, "step": 698 }, { "epoch": 0.02957102969794399, "grad_norm": 0.3580074906349182, "learning_rate": 0.001, "loss": 2.5489, "step": 699 }, { "epoch": 0.02961333446146036, "grad_norm": 0.3962548077106476, "learning_rate": 0.001, "loss": 1.9984, "step": 700 }, { "epoch": 0.029655639224976733, "grad_norm": 0.342292845249176, "learning_rate": 0.001, "loss": 2.9691, "step": 701 }, { "epoch": 0.029697943988493104, "grad_norm": 0.4738942086696625, "learning_rate": 0.001, "loss": 2.2661, "step": 702 }, { "epoch": 0.029740248752009476, "grad_norm": 0.4526740610599518, "learning_rate": 0.001, "loss": 2.084, "step": 703 }, { "epoch": 0.029782553515525848, "grad_norm": 0.37444889545440674, "learning_rate": 0.001, "loss": 2.6041, "step": 704 }, { "epoch": 0.02982485827904222, "grad_norm": 0.3665851950645447, "learning_rate": 0.001, "loss": 2.6698, "step": 705 }, { "epoch": 0.02986716304255859, "grad_norm": 0.3661838471889496, "learning_rate": 0.001, "loss": 2.8261, "step": 706 }, { "epoch": 0.029909467806074962, "grad_norm": 9.27115249633789, "learning_rate": 0.001, "loss": 2.6095, "step": 707 }, { "epoch": 0.029951772569591337, "grad_norm": 0.7346689701080322, "learning_rate": 0.001, "loss": 2.503, "step": 708 }, { "epoch": 0.02999407733310771, "grad_norm": 45.74531555175781, "learning_rate": 0.001, "loss": 2.5024, "step": 709 }, { "epoch": 0.03003638209662408, "grad_norm": 0.5024700164794922, "learning_rate": 0.001, "loss": 2.0991, "step": 710 }, { "epoch": 0.030078686860140452, "grad_norm": 0.44415217638015747, "learning_rate": 0.001, "loss": 3.5278, "step": 711 }, { "epoch": 0.030120991623656824, "grad_norm": 0.3363064229488373, "learning_rate": 0.001, "loss": 2.9116, "step": 712 }, { "epoch": 0.030163296387173195, "grad_norm": 0.4409061670303345, "learning_rate": 0.001, "loss": 2.5084, "step": 713 }, { "epoch": 0.030205601150689567, "grad_norm": 0.9301056265830994, "learning_rate": 0.001, "loss": 3.2272, "step": 714 }, { "epoch": 0.03024790591420594, "grad_norm": 0.3383950889110565, "learning_rate": 0.001, "loss": 2.6067, "step": 715 }, { "epoch": 0.03029021067772231, "grad_norm": 2.455920934677124, "learning_rate": 0.001, "loss": 3.4956, "step": 716 }, { "epoch": 0.030332515441238682, "grad_norm": 0.31969428062438965, "learning_rate": 0.001, "loss": 2.7223, "step": 717 }, { "epoch": 0.030374820204755057, "grad_norm": 0.4326886534690857, "learning_rate": 0.001, "loss": 2.7745, "step": 718 }, { "epoch": 0.03041712496827143, "grad_norm": 0.2943534553050995, "learning_rate": 0.001, "loss": 1.881, "step": 719 }, { "epoch": 0.0304594297317878, "grad_norm": 0.31899815797805786, "learning_rate": 0.001, "loss": 1.6605, "step": 720 }, { "epoch": 0.030501734495304172, "grad_norm": 1.0850050449371338, "learning_rate": 0.001, "loss": 2.5878, "step": 721 }, { "epoch": 0.030544039258820543, "grad_norm": 0.4966890215873718, "learning_rate": 0.001, "loss": 3.5874, "step": 722 }, { "epoch": 0.030586344022336915, "grad_norm": 0.44982099533081055, "learning_rate": 0.001, "loss": 2.1516, "step": 723 }, { "epoch": 0.030628648785853287, "grad_norm": 0.5142163038253784, "learning_rate": 0.001, "loss": 2.5352, "step": 724 }, { "epoch": 0.030670953549369658, "grad_norm": 0.4751744270324707, "learning_rate": 0.001, "loss": 2.1874, "step": 725 }, { "epoch": 0.03071325831288603, "grad_norm": 0.5114376544952393, "learning_rate": 0.001, "loss": 2.2583, "step": 726 }, { "epoch": 0.0307555630764024, "grad_norm": 0.4771519601345062, "learning_rate": 0.001, "loss": 2.4049, "step": 727 }, { "epoch": 0.030797867839918776, "grad_norm": 0.3221670389175415, "learning_rate": 0.001, "loss": 1.871, "step": 728 }, { "epoch": 0.030840172603435148, "grad_norm": 0.6503878235816956, "learning_rate": 0.001, "loss": 2.5809, "step": 729 }, { "epoch": 0.03088247736695152, "grad_norm": 0.6677407622337341, "learning_rate": 0.001, "loss": 3.2025, "step": 730 }, { "epoch": 0.03092478213046789, "grad_norm": 0.46367889642715454, "learning_rate": 0.001, "loss": 2.6524, "step": 731 }, { "epoch": 0.030967086893984263, "grad_norm": 0.5044416189193726, "learning_rate": 0.001, "loss": 2.579, "step": 732 }, { "epoch": 0.031009391657500635, "grad_norm": 1.4938032627105713, "learning_rate": 0.001, "loss": 2.0139, "step": 733 }, { "epoch": 0.031051696421017006, "grad_norm": 0.6672400236129761, "learning_rate": 0.001, "loss": 2.7173, "step": 734 }, { "epoch": 0.031094001184533378, "grad_norm": 1.8434799909591675, "learning_rate": 0.001, "loss": 2.4786, "step": 735 }, { "epoch": 0.03113630594804975, "grad_norm": 0.4245956540107727, "learning_rate": 0.001, "loss": 2.2883, "step": 736 }, { "epoch": 0.03117861071156612, "grad_norm": 0.6225207448005676, "learning_rate": 0.001, "loss": 2.9485, "step": 737 }, { "epoch": 0.031220915475082493, "grad_norm": 1.0561782121658325, "learning_rate": 0.001, "loss": 2.7129, "step": 738 }, { "epoch": 0.031263220238598864, "grad_norm": 0.46243247389793396, "learning_rate": 0.001, "loss": 2.3125, "step": 739 }, { "epoch": 0.03130552500211524, "grad_norm": 0.8395326137542725, "learning_rate": 0.001, "loss": 2.0963, "step": 740 }, { "epoch": 0.03134782976563161, "grad_norm": 0.49101972579956055, "learning_rate": 0.001, "loss": 2.9097, "step": 741 }, { "epoch": 0.03139013452914798, "grad_norm": 1.3258086442947388, "learning_rate": 0.001, "loss": 2.8042, "step": 742 }, { "epoch": 0.03143243929266435, "grad_norm": 3.282763719558716, "learning_rate": 0.001, "loss": 2.2387, "step": 743 }, { "epoch": 0.031474744056180726, "grad_norm": 1.5577962398529053, "learning_rate": 0.001, "loss": 3.241, "step": 744 }, { "epoch": 0.0315170488196971, "grad_norm": 0.40975457429885864, "learning_rate": 0.001, "loss": 2.8257, "step": 745 }, { "epoch": 0.03155935358321347, "grad_norm": 0.9148398041725159, "learning_rate": 0.001, "loss": 3.0109, "step": 746 }, { "epoch": 0.031601658346729844, "grad_norm": 0.45257365703582764, "learning_rate": 0.001, "loss": 2.0356, "step": 747 }, { "epoch": 0.03164396311024621, "grad_norm": 1.465282917022705, "learning_rate": 0.001, "loss": 2.6872, "step": 748 }, { "epoch": 0.03168626787376259, "grad_norm": 0.41279497742652893, "learning_rate": 0.001, "loss": 2.452, "step": 749 }, { "epoch": 0.031728572637278955, "grad_norm": 0.48798638582229614, "learning_rate": 0.001, "loss": 2.6751, "step": 750 }, { "epoch": 0.03177087740079533, "grad_norm": 7.742496967315674, "learning_rate": 0.001, "loss": 1.8392, "step": 751 }, { "epoch": 0.0318131821643117, "grad_norm": 0.8577462434768677, "learning_rate": 0.001, "loss": 2.1078, "step": 752 }, { "epoch": 0.031855486927828074, "grad_norm": 0.454903781414032, "learning_rate": 0.001, "loss": 2.7662, "step": 753 }, { "epoch": 0.03189779169134445, "grad_norm": 0.7064248919487, "learning_rate": 0.001, "loss": 2.8522, "step": 754 }, { "epoch": 0.03194009645486082, "grad_norm": 2.0568008422851562, "learning_rate": 0.001, "loss": 2.7169, "step": 755 }, { "epoch": 0.03198240121837719, "grad_norm": 0.5723927617073059, "learning_rate": 0.001, "loss": 2.5269, "step": 756 }, { "epoch": 0.03202470598189356, "grad_norm": 0.37855952978134155, "learning_rate": 0.001, "loss": 1.8934, "step": 757 }, { "epoch": 0.032067010745409935, "grad_norm": 0.6945350170135498, "learning_rate": 0.001, "loss": 2.7209, "step": 758 }, { "epoch": 0.0321093155089263, "grad_norm": 0.45397865772247314, "learning_rate": 0.001, "loss": 2.3489, "step": 759 }, { "epoch": 0.03215162027244268, "grad_norm": 0.36017417907714844, "learning_rate": 0.001, "loss": 1.7156, "step": 760 }, { "epoch": 0.032193925035959046, "grad_norm": 0.5965908765792847, "learning_rate": 0.001, "loss": 2.2643, "step": 761 }, { "epoch": 0.03223622979947542, "grad_norm": 0.4590226709842682, "learning_rate": 0.001, "loss": 2.5763, "step": 762 }, { "epoch": 0.03227853456299179, "grad_norm": 1.0792022943496704, "learning_rate": 0.001, "loss": 2.0132, "step": 763 }, { "epoch": 0.032320839326508165, "grad_norm": 0.4761914908885956, "learning_rate": 0.001, "loss": 1.9762, "step": 764 }, { "epoch": 0.03236314409002454, "grad_norm": 0.5108850598335266, "learning_rate": 0.001, "loss": 2.6186, "step": 765 }, { "epoch": 0.03240544885354091, "grad_norm": 0.47809892892837524, "learning_rate": 0.001, "loss": 2.1778, "step": 766 }, { "epoch": 0.03244775361705728, "grad_norm": 1.9831953048706055, "learning_rate": 0.001, "loss": 3.3741, "step": 767 }, { "epoch": 0.03249005838057365, "grad_norm": 1.5907195806503296, "learning_rate": 0.001, "loss": 2.9347, "step": 768 }, { "epoch": 0.032532363144090026, "grad_norm": 0.43444883823394775, "learning_rate": 0.001, "loss": 1.9747, "step": 769 }, { "epoch": 0.032574667907606394, "grad_norm": 0.4542893171310425, "learning_rate": 0.001, "loss": 2.4121, "step": 770 }, { "epoch": 0.03261697267112277, "grad_norm": 0.41902869939804077, "learning_rate": 0.001, "loss": 2.1587, "step": 771 }, { "epoch": 0.03265927743463914, "grad_norm": 5.790309906005859, "learning_rate": 0.001, "loss": 2.8927, "step": 772 }, { "epoch": 0.03270158219815551, "grad_norm": 0.6397790908813477, "learning_rate": 0.001, "loss": 2.8784, "step": 773 }, { "epoch": 0.03274388696167188, "grad_norm": 1.084983468055725, "learning_rate": 0.001, "loss": 3.2401, "step": 774 }, { "epoch": 0.032786191725188256, "grad_norm": 0.4556136429309845, "learning_rate": 0.001, "loss": 2.6491, "step": 775 }, { "epoch": 0.03282849648870463, "grad_norm": 1.9562443494796753, "learning_rate": 0.001, "loss": 3.3097, "step": 776 }, { "epoch": 0.032870801252221, "grad_norm": 0.3327184319496155, "learning_rate": 0.001, "loss": 2.3438, "step": 777 }, { "epoch": 0.032913106015737374, "grad_norm": 0.29970988631248474, "learning_rate": 0.001, "loss": 1.7802, "step": 778 }, { "epoch": 0.03295541077925374, "grad_norm": 3.424224615097046, "learning_rate": 0.001, "loss": 3.0777, "step": 779 }, { "epoch": 0.03299771554277012, "grad_norm": 0.7678899168968201, "learning_rate": 0.001, "loss": 3.1746, "step": 780 }, { "epoch": 0.033040020306286486, "grad_norm": 11.198646545410156, "learning_rate": 0.001, "loss": 4.0651, "step": 781 }, { "epoch": 0.03308232506980286, "grad_norm": 0.5121546387672424, "learning_rate": 0.001, "loss": 2.7573, "step": 782 }, { "epoch": 0.03312462983331923, "grad_norm": 0.449379563331604, "learning_rate": 0.001, "loss": 2.4772, "step": 783 }, { "epoch": 0.033166934596835604, "grad_norm": 1.0964363813400269, "learning_rate": 0.001, "loss": 2.0905, "step": 784 }, { "epoch": 0.03320923936035198, "grad_norm": 60.26259994506836, "learning_rate": 0.001, "loss": 2.596, "step": 785 }, { "epoch": 0.03325154412386835, "grad_norm": 0.7746313810348511, "learning_rate": 0.001, "loss": 3.0838, "step": 786 }, { "epoch": 0.03329384888738472, "grad_norm": 0.37214627861976624, "learning_rate": 0.001, "loss": 2.0005, "step": 787 }, { "epoch": 0.03333615365090109, "grad_norm": 0.42488759756088257, "learning_rate": 0.001, "loss": 2.6669, "step": 788 }, { "epoch": 0.033378458414417465, "grad_norm": 0.41743898391723633, "learning_rate": 0.001, "loss": 2.7365, "step": 789 }, { "epoch": 0.033420763177933833, "grad_norm": 0.3555799424648285, "learning_rate": 0.001, "loss": 2.8075, "step": 790 }, { "epoch": 0.03346306794145021, "grad_norm": 0.4383140504360199, "learning_rate": 0.001, "loss": 2.7863, "step": 791 }, { "epoch": 0.03350537270496658, "grad_norm": 0.38277480006217957, "learning_rate": 0.001, "loss": 1.9109, "step": 792 }, { "epoch": 0.03354767746848295, "grad_norm": 0.3921913802623749, "learning_rate": 0.001, "loss": 2.5242, "step": 793 }, { "epoch": 0.03358998223199932, "grad_norm": 0.6960230469703674, "learning_rate": 0.001, "loss": 2.7836, "step": 794 }, { "epoch": 0.033632286995515695, "grad_norm": 0.38849663734436035, "learning_rate": 0.001, "loss": 2.4461, "step": 795 }, { "epoch": 0.03367459175903207, "grad_norm": 0.37054547667503357, "learning_rate": 0.001, "loss": 2.7659, "step": 796 }, { "epoch": 0.03371689652254844, "grad_norm": 0.3550194501876831, "learning_rate": 0.001, "loss": 2.3983, "step": 797 }, { "epoch": 0.03375920128606481, "grad_norm": 0.48828840255737305, "learning_rate": 0.001, "loss": 2.2206, "step": 798 }, { "epoch": 0.03380150604958118, "grad_norm": 1.516994833946228, "learning_rate": 0.001, "loss": 1.6824, "step": 799 }, { "epoch": 0.033843810813097557, "grad_norm": 0.5133375525474548, "learning_rate": 0.001, "loss": 2.385, "step": 800 }, { "epoch": 0.033886115576613925, "grad_norm": 1.752416729927063, "learning_rate": 0.001, "loss": 2.2985, "step": 801 }, { "epoch": 0.0339284203401303, "grad_norm": 0.3678469657897949, "learning_rate": 0.001, "loss": 2.3368, "step": 802 }, { "epoch": 0.03397072510364667, "grad_norm": 0.5483198165893555, "learning_rate": 0.001, "loss": 2.8708, "step": 803 }, { "epoch": 0.03401302986716304, "grad_norm": 0.7874423265457153, "learning_rate": 0.001, "loss": 2.8457, "step": 804 }, { "epoch": 0.03405533463067941, "grad_norm": 1.40812087059021, "learning_rate": 0.001, "loss": 2.5861, "step": 805 }, { "epoch": 0.034097639394195786, "grad_norm": 1.082838535308838, "learning_rate": 0.001, "loss": 1.8972, "step": 806 }, { "epoch": 0.03413994415771216, "grad_norm": 1.259365200996399, "learning_rate": 0.001, "loss": 3.0101, "step": 807 }, { "epoch": 0.03418224892122853, "grad_norm": 2.4589128494262695, "learning_rate": 0.001, "loss": 2.2503, "step": 808 }, { "epoch": 0.034224553684744904, "grad_norm": 0.39903524518013, "learning_rate": 0.001, "loss": 3.306, "step": 809 }, { "epoch": 0.03426685844826127, "grad_norm": 1.4496986865997314, "learning_rate": 0.001, "loss": 2.5615, "step": 810 }, { "epoch": 0.03430916321177765, "grad_norm": 1.3750876188278198, "learning_rate": 0.001, "loss": 3.1478, "step": 811 }, { "epoch": 0.034351467975294016, "grad_norm": 0.6277172565460205, "learning_rate": 0.001, "loss": 3.0823, "step": 812 }, { "epoch": 0.03439377273881039, "grad_norm": 1.379515290260315, "learning_rate": 0.001, "loss": 2.5736, "step": 813 }, { "epoch": 0.03443607750232676, "grad_norm": 0.5735925436019897, "learning_rate": 0.001, "loss": 2.4369, "step": 814 }, { "epoch": 0.034478382265843134, "grad_norm": 0.3423369228839874, "learning_rate": 0.001, "loss": 2.274, "step": 815 }, { "epoch": 0.03452068702935951, "grad_norm": 1.0366361141204834, "learning_rate": 0.001, "loss": 2.4531, "step": 816 }, { "epoch": 0.03456299179287588, "grad_norm": 0.40095871686935425, "learning_rate": 0.001, "loss": 1.8635, "step": 817 }, { "epoch": 0.03460529655639225, "grad_norm": 0.44527915120124817, "learning_rate": 0.001, "loss": 2.0943, "step": 818 }, { "epoch": 0.03464760131990862, "grad_norm": 0.6049659848213196, "learning_rate": 0.001, "loss": 1.9373, "step": 819 }, { "epoch": 0.034689906083424996, "grad_norm": 0.3163706064224243, "learning_rate": 0.001, "loss": 2.163, "step": 820 }, { "epoch": 0.034732210846941364, "grad_norm": 0.7788311243057251, "learning_rate": 0.001, "loss": 2.3464, "step": 821 }, { "epoch": 0.03477451561045774, "grad_norm": 0.46176034212112427, "learning_rate": 0.001, "loss": 3.2508, "step": 822 }, { "epoch": 0.03481682037397411, "grad_norm": 0.3383532464504242, "learning_rate": 0.001, "loss": 1.9256, "step": 823 }, { "epoch": 0.03485912513749048, "grad_norm": 0.4552175998687744, "learning_rate": 0.001, "loss": 2.0759, "step": 824 }, { "epoch": 0.03490142990100685, "grad_norm": 1.0574527978897095, "learning_rate": 0.001, "loss": 2.2718, "step": 825 }, { "epoch": 0.034943734664523225, "grad_norm": 0.3664693236351013, "learning_rate": 0.001, "loss": 2.5472, "step": 826 }, { "epoch": 0.0349860394280396, "grad_norm": 1.357131838798523, "learning_rate": 0.001, "loss": 2.9158, "step": 827 }, { "epoch": 0.03502834419155597, "grad_norm": 0.5285866260528564, "learning_rate": 0.001, "loss": 2.1292, "step": 828 }, { "epoch": 0.035070648955072344, "grad_norm": 0.3371189534664154, "learning_rate": 0.001, "loss": 2.5495, "step": 829 }, { "epoch": 0.03511295371858871, "grad_norm": 0.33508920669555664, "learning_rate": 0.001, "loss": 1.6964, "step": 830 }, { "epoch": 0.03515525848210509, "grad_norm": 4.2542643547058105, "learning_rate": 0.001, "loss": 2.4585, "step": 831 }, { "epoch": 0.035197563245621455, "grad_norm": 0.29248476028442383, "learning_rate": 0.001, "loss": 2.0451, "step": 832 }, { "epoch": 0.03523986800913783, "grad_norm": 0.2993159592151642, "learning_rate": 0.001, "loss": 2.3614, "step": 833 }, { "epoch": 0.0352821727726542, "grad_norm": 0.34885692596435547, "learning_rate": 0.001, "loss": 2.5053, "step": 834 }, { "epoch": 0.03532447753617057, "grad_norm": 0.3291875422000885, "learning_rate": 0.001, "loss": 2.3983, "step": 835 }, { "epoch": 0.03536678229968694, "grad_norm": 0.34914126992225647, "learning_rate": 0.001, "loss": 2.0887, "step": 836 }, { "epoch": 0.035409087063203316, "grad_norm": 0.38519594073295593, "learning_rate": 0.001, "loss": 2.7274, "step": 837 }, { "epoch": 0.03545139182671969, "grad_norm": 5.37352991104126, "learning_rate": 0.001, "loss": 2.5118, "step": 838 }, { "epoch": 0.03549369659023606, "grad_norm": 0.6151306629180908, "learning_rate": 0.001, "loss": 2.654, "step": 839 }, { "epoch": 0.035536001353752435, "grad_norm": 0.504226803779602, "learning_rate": 0.001, "loss": 2.674, "step": 840 }, { "epoch": 0.0355783061172688, "grad_norm": 0.3421787619590759, "learning_rate": 0.001, "loss": 2.526, "step": 841 }, { "epoch": 0.03562061088078518, "grad_norm": 0.3042963445186615, "learning_rate": 0.001, "loss": 3.2472, "step": 842 }, { "epoch": 0.035662915644301546, "grad_norm": 0.3438011705875397, "learning_rate": 0.001, "loss": 2.2514, "step": 843 }, { "epoch": 0.03570522040781792, "grad_norm": 0.3311542570590973, "learning_rate": 0.001, "loss": 2.3859, "step": 844 }, { "epoch": 0.03574752517133429, "grad_norm": 0.5320329666137695, "learning_rate": 0.001, "loss": 1.6051, "step": 845 }, { "epoch": 0.035789829934850664, "grad_norm": 0.3768901526927948, "learning_rate": 0.001, "loss": 2.8369, "step": 846 }, { "epoch": 0.03583213469836704, "grad_norm": 1.0556464195251465, "learning_rate": 0.001, "loss": 2.1928, "step": 847 }, { "epoch": 0.03587443946188341, "grad_norm": 0.40488889813423157, "learning_rate": 0.001, "loss": 1.6987, "step": 848 }, { "epoch": 0.03591674422539978, "grad_norm": 2.706275224685669, "learning_rate": 0.001, "loss": 2.6072, "step": 849 }, { "epoch": 0.03595904898891615, "grad_norm": 0.8498132824897766, "learning_rate": 0.001, "loss": 3.2416, "step": 850 }, { "epoch": 0.036001353752432526, "grad_norm": 0.38959065079689026, "learning_rate": 0.001, "loss": 2.3458, "step": 851 }, { "epoch": 0.036043658515948894, "grad_norm": 0.41819003224372864, "learning_rate": 0.001, "loss": 2.1264, "step": 852 }, { "epoch": 0.03608596327946527, "grad_norm": 0.48070141673088074, "learning_rate": 0.001, "loss": 2.6504, "step": 853 }, { "epoch": 0.03612826804298164, "grad_norm": 0.5976713299751282, "learning_rate": 0.001, "loss": 3.0266, "step": 854 }, { "epoch": 0.03617057280649801, "grad_norm": 0.3724801242351532, "learning_rate": 0.001, "loss": 3.0966, "step": 855 }, { "epoch": 0.03621287757001438, "grad_norm": 0.35969069600105286, "learning_rate": 0.001, "loss": 3.1912, "step": 856 }, { "epoch": 0.036255182333530755, "grad_norm": 4.334183692932129, "learning_rate": 0.001, "loss": 3.7079, "step": 857 }, { "epoch": 0.03629748709704713, "grad_norm": 1.282253384590149, "learning_rate": 0.001, "loss": 2.2486, "step": 858 }, { "epoch": 0.0363397918605635, "grad_norm": 0.44701820611953735, "learning_rate": 0.001, "loss": 2.8982, "step": 859 }, { "epoch": 0.036382096624079874, "grad_norm": 0.37352272868156433, "learning_rate": 0.001, "loss": 3.1632, "step": 860 }, { "epoch": 0.03642440138759624, "grad_norm": 0.33013972640037537, "learning_rate": 0.001, "loss": 2.0044, "step": 861 }, { "epoch": 0.03646670615111262, "grad_norm": 0.37367743253707886, "learning_rate": 0.001, "loss": 3.3361, "step": 862 }, { "epoch": 0.036509010914628985, "grad_norm": 0.5262033343315125, "learning_rate": 0.001, "loss": 4.0203, "step": 863 }, { "epoch": 0.03655131567814536, "grad_norm": 0.8196743726730347, "learning_rate": 0.001, "loss": 1.9911, "step": 864 }, { "epoch": 0.03659362044166173, "grad_norm": 1.1237256526947021, "learning_rate": 0.001, "loss": 3.1662, "step": 865 }, { "epoch": 0.0366359252051781, "grad_norm": 2.1621487140655518, "learning_rate": 0.001, "loss": 1.6604, "step": 866 }, { "epoch": 0.03667822996869447, "grad_norm": 0.5566956400871277, "learning_rate": 0.001, "loss": 2.1015, "step": 867 }, { "epoch": 0.03672053473221085, "grad_norm": 0.35224634408950806, "learning_rate": 0.001, "loss": 2.6242, "step": 868 }, { "epoch": 0.03676283949572722, "grad_norm": 0.33982589840888977, "learning_rate": 0.001, "loss": 2.6346, "step": 869 }, { "epoch": 0.03680514425924359, "grad_norm": 0.9707451462745667, "learning_rate": 0.001, "loss": 2.7817, "step": 870 }, { "epoch": 0.036847449022759965, "grad_norm": 0.32144203782081604, "learning_rate": 0.001, "loss": 1.9405, "step": 871 }, { "epoch": 0.03688975378627633, "grad_norm": 0.31683671474456787, "learning_rate": 0.001, "loss": 2.9229, "step": 872 }, { "epoch": 0.03693205854979271, "grad_norm": 0.36165180802345276, "learning_rate": 0.001, "loss": 3.1064, "step": 873 }, { "epoch": 0.036974363313309076, "grad_norm": 0.311084508895874, "learning_rate": 0.001, "loss": 2.1846, "step": 874 }, { "epoch": 0.03701666807682545, "grad_norm": 0.3572933077812195, "learning_rate": 0.001, "loss": 2.8808, "step": 875 }, { "epoch": 0.03705897284034182, "grad_norm": 1.0521661043167114, "learning_rate": 0.001, "loss": 2.187, "step": 876 }, { "epoch": 0.037101277603858195, "grad_norm": 0.41594183444976807, "learning_rate": 0.001, "loss": 2.4503, "step": 877 }, { "epoch": 0.03714358236737457, "grad_norm": 1.126168131828308, "learning_rate": 0.001, "loss": 1.9875, "step": 878 }, { "epoch": 0.03718588713089094, "grad_norm": 0.3451598882675171, "learning_rate": 0.001, "loss": 2.6444, "step": 879 }, { "epoch": 0.03722819189440731, "grad_norm": 0.7697860598564148, "learning_rate": 0.001, "loss": 2.1816, "step": 880 }, { "epoch": 0.03727049665792368, "grad_norm": 0.7067566514015198, "learning_rate": 0.001, "loss": 2.5282, "step": 881 }, { "epoch": 0.037312801421440056, "grad_norm": 0.4576680064201355, "learning_rate": 0.001, "loss": 2.2394, "step": 882 }, { "epoch": 0.037355106184956424, "grad_norm": 0.47805696725845337, "learning_rate": 0.001, "loss": 2.2489, "step": 883 }, { "epoch": 0.0373974109484728, "grad_norm": 18.085206985473633, "learning_rate": 0.001, "loss": 3.4886, "step": 884 }, { "epoch": 0.03743971571198917, "grad_norm": 0.35168933868408203, "learning_rate": 0.001, "loss": 2.0473, "step": 885 }, { "epoch": 0.03748202047550554, "grad_norm": 0.4354495108127594, "learning_rate": 0.001, "loss": 2.42, "step": 886 }, { "epoch": 0.03752432523902191, "grad_norm": 0.4347662329673767, "learning_rate": 0.001, "loss": 2.7022, "step": 887 }, { "epoch": 0.037566630002538286, "grad_norm": 2.830768346786499, "learning_rate": 0.001, "loss": 2.2454, "step": 888 }, { "epoch": 0.03760893476605466, "grad_norm": 0.7625808119773865, "learning_rate": 0.001, "loss": 2.8905, "step": 889 }, { "epoch": 0.03765123952957103, "grad_norm": 0.4779190719127655, "learning_rate": 0.001, "loss": 2.4054, "step": 890 }, { "epoch": 0.037693544293087404, "grad_norm": 0.3955821692943573, "learning_rate": 0.001, "loss": 2.4408, "step": 891 }, { "epoch": 0.03773584905660377, "grad_norm": 1.995564579963684, "learning_rate": 0.001, "loss": 1.9623, "step": 892 }, { "epoch": 0.03777815382012015, "grad_norm": 1.702072024345398, "learning_rate": 0.001, "loss": 2.2569, "step": 893 }, { "epoch": 0.037820458583636515, "grad_norm": 0.8360947370529175, "learning_rate": 0.001, "loss": 2.1785, "step": 894 }, { "epoch": 0.03786276334715289, "grad_norm": 0.4631440043449402, "learning_rate": 0.001, "loss": 3.6477, "step": 895 }, { "epoch": 0.03790506811066926, "grad_norm": 0.4883536100387573, "learning_rate": 0.001, "loss": 2.1303, "step": 896 }, { "epoch": 0.037947372874185634, "grad_norm": 1.4146642684936523, "learning_rate": 0.001, "loss": 2.8125, "step": 897 }, { "epoch": 0.037989677637702, "grad_norm": 33.48441696166992, "learning_rate": 0.001, "loss": 2.3088, "step": 898 }, { "epoch": 0.03803198240121838, "grad_norm": 1.0009665489196777, "learning_rate": 0.001, "loss": 3.2976, "step": 899 }, { "epoch": 0.03807428716473475, "grad_norm": 0.6075432896614075, "learning_rate": 0.001, "loss": 3.0491, "step": 900 }, { "epoch": 0.03811659192825112, "grad_norm": 0.6014549136161804, "learning_rate": 0.001, "loss": 2.6115, "step": 901 }, { "epoch": 0.038158896691767495, "grad_norm": 4.742966175079346, "learning_rate": 0.001, "loss": 3.1309, "step": 902 }, { "epoch": 0.03820120145528386, "grad_norm": 1.2808791399002075, "learning_rate": 0.001, "loss": 2.7616, "step": 903 }, { "epoch": 0.03824350621880024, "grad_norm": 0.6155859231948853, "learning_rate": 0.001, "loss": 3.0479, "step": 904 }, { "epoch": 0.038285810982316607, "grad_norm": 0.4951868951320648, "learning_rate": 0.001, "loss": 2.4222, "step": 905 }, { "epoch": 0.03832811574583298, "grad_norm": 3.6468491554260254, "learning_rate": 0.001, "loss": 2.7734, "step": 906 }, { "epoch": 0.03837042050934935, "grad_norm": 0.6409814953804016, "learning_rate": 0.001, "loss": 2.7391, "step": 907 }, { "epoch": 0.038412725272865725, "grad_norm": 1.037394642829895, "learning_rate": 0.001, "loss": 3.3225, "step": 908 }, { "epoch": 0.0384550300363821, "grad_norm": 0.7472317218780518, "learning_rate": 0.001, "loss": 4.0019, "step": 909 }, { "epoch": 0.03849733479989847, "grad_norm": 0.5227680802345276, "learning_rate": 0.001, "loss": 2.0158, "step": 910 }, { "epoch": 0.03853963956341484, "grad_norm": 0.5070827603340149, "learning_rate": 0.001, "loss": 2.1145, "step": 911 }, { "epoch": 0.03858194432693121, "grad_norm": 1.9357705116271973, "learning_rate": 0.001, "loss": 2.4728, "step": 912 }, { "epoch": 0.038624249090447586, "grad_norm": 3.635646104812622, "learning_rate": 0.001, "loss": 2.7857, "step": 913 }, { "epoch": 0.038666553853963954, "grad_norm": 0.9932013154029846, "learning_rate": 0.001, "loss": 2.1222, "step": 914 }, { "epoch": 0.03870885861748033, "grad_norm": 1.2177910804748535, "learning_rate": 0.001, "loss": 2.5434, "step": 915 }, { "epoch": 0.0387511633809967, "grad_norm": 0.6072919964790344, "learning_rate": 0.001, "loss": 2.2818, "step": 916 }, { "epoch": 0.03879346814451307, "grad_norm": 1.0168566703796387, "learning_rate": 0.001, "loss": 3.5772, "step": 917 }, { "epoch": 0.03883577290802944, "grad_norm": 12.94654655456543, "learning_rate": 0.001, "loss": 3.1531, "step": 918 }, { "epoch": 0.038878077671545816, "grad_norm": 0.9608505964279175, "learning_rate": 0.001, "loss": 4.1271, "step": 919 }, { "epoch": 0.03892038243506219, "grad_norm": 0.9271876215934753, "learning_rate": 0.001, "loss": 3.0042, "step": 920 }, { "epoch": 0.03896268719857856, "grad_norm": 0.7582124471664429, "learning_rate": 0.001, "loss": 2.4802, "step": 921 }, { "epoch": 0.039004991962094934, "grad_norm": 3.8157336711883545, "learning_rate": 0.001, "loss": 4.0794, "step": 922 }, { "epoch": 0.0390472967256113, "grad_norm": 0.4885561764240265, "learning_rate": 0.001, "loss": 3.57, "step": 923 }, { "epoch": 0.03908960148912768, "grad_norm": 0.5436772704124451, "learning_rate": 0.001, "loss": 2.4098, "step": 924 }, { "epoch": 0.039131906252644046, "grad_norm": 0.5220960378646851, "learning_rate": 0.001, "loss": 2.5934, "step": 925 }, { "epoch": 0.03917421101616042, "grad_norm": 0.4250609278678894, "learning_rate": 0.001, "loss": 2.1337, "step": 926 }, { "epoch": 0.03921651577967679, "grad_norm": 0.45072197914123535, "learning_rate": 0.001, "loss": 2.2017, "step": 927 }, { "epoch": 0.039258820543193164, "grad_norm": 0.5734637379646301, "learning_rate": 0.001, "loss": 3.231, "step": 928 }, { "epoch": 0.03930112530670953, "grad_norm": 2.7498257160186768, "learning_rate": 0.001, "loss": 2.0577, "step": 929 }, { "epoch": 0.03934343007022591, "grad_norm": 1.0663455724716187, "learning_rate": 0.001, "loss": 2.2796, "step": 930 }, { "epoch": 0.03938573483374228, "grad_norm": 0.4455733895301819, "learning_rate": 0.001, "loss": 2.7131, "step": 931 }, { "epoch": 0.03942803959725865, "grad_norm": 10.824166297912598, "learning_rate": 0.001, "loss": 3.0632, "step": 932 }, { "epoch": 0.039470344360775025, "grad_norm": 0.9566563367843628, "learning_rate": 0.001, "loss": 2.4747, "step": 933 }, { "epoch": 0.039512649124291394, "grad_norm": 0.694932222366333, "learning_rate": 0.001, "loss": 3.1625, "step": 934 }, { "epoch": 0.03955495388780777, "grad_norm": 2.0650901794433594, "learning_rate": 0.001, "loss": 2.1686, "step": 935 }, { "epoch": 0.03959725865132414, "grad_norm": 0.47802698612213135, "learning_rate": 0.001, "loss": 3.7587, "step": 936 }, { "epoch": 0.03963956341484051, "grad_norm": 1.5605882406234741, "learning_rate": 0.001, "loss": 2.1477, "step": 937 }, { "epoch": 0.03968186817835688, "grad_norm": 3.1594226360321045, "learning_rate": 0.001, "loss": 2.8316, "step": 938 }, { "epoch": 0.039724172941873255, "grad_norm": 1.2251648902893066, "learning_rate": 0.001, "loss": 3.0162, "step": 939 }, { "epoch": 0.03976647770538963, "grad_norm": 0.4009862542152405, "learning_rate": 0.001, "loss": 2.5269, "step": 940 }, { "epoch": 0.039808782468906, "grad_norm": 0.426503449678421, "learning_rate": 0.001, "loss": 2.6667, "step": 941 }, { "epoch": 0.03985108723242237, "grad_norm": 0.5413312315940857, "learning_rate": 0.001, "loss": 2.9823, "step": 942 }, { "epoch": 0.03989339199593874, "grad_norm": 0.4892127811908722, "learning_rate": 0.001, "loss": 1.9651, "step": 943 }, { "epoch": 0.03993569675945512, "grad_norm": 0.47731220722198486, "learning_rate": 0.001, "loss": 1.7261, "step": 944 }, { "epoch": 0.039978001522971485, "grad_norm": 0.44389474391937256, "learning_rate": 0.001, "loss": 2.0669, "step": 945 }, { "epoch": 0.04002030628648786, "grad_norm": 0.6225860714912415, "learning_rate": 0.001, "loss": 2.501, "step": 946 }, { "epoch": 0.04006261105000423, "grad_norm": 0.5539319515228271, "learning_rate": 0.001, "loss": 2.2267, "step": 947 }, { "epoch": 0.0401049158135206, "grad_norm": 0.34385305643081665, "learning_rate": 0.001, "loss": 2.3557, "step": 948 }, { "epoch": 0.04014722057703697, "grad_norm": 0.41491398215293884, "learning_rate": 0.001, "loss": 2.2824, "step": 949 }, { "epoch": 0.040189525340553346, "grad_norm": 0.8437483906745911, "learning_rate": 0.001, "loss": 3.2014, "step": 950 }, { "epoch": 0.04023183010406972, "grad_norm": 0.3619152307510376, "learning_rate": 0.001, "loss": 2.7257, "step": 951 }, { "epoch": 0.04027413486758609, "grad_norm": 0.8494651317596436, "learning_rate": 0.001, "loss": 2.5558, "step": 952 }, { "epoch": 0.040316439631102464, "grad_norm": 1.8171448707580566, "learning_rate": 0.001, "loss": 3.2215, "step": 953 }, { "epoch": 0.04035874439461883, "grad_norm": 1.0728228092193604, "learning_rate": 0.001, "loss": 3.5815, "step": 954 }, { "epoch": 0.04040104915813521, "grad_norm": 3.6907944679260254, "learning_rate": 0.001, "loss": 3.3357, "step": 955 }, { "epoch": 0.040443353921651576, "grad_norm": 0.7766746282577515, "learning_rate": 0.001, "loss": 3.0304, "step": 956 }, { "epoch": 0.04048565868516795, "grad_norm": 1.010837197303772, "learning_rate": 0.001, "loss": 2.4535, "step": 957 }, { "epoch": 0.04052796344868432, "grad_norm": 0.6098260879516602, "learning_rate": 0.001, "loss": 3.0176, "step": 958 }, { "epoch": 0.040570268212200694, "grad_norm": 1.3032490015029907, "learning_rate": 0.001, "loss": 2.7713, "step": 959 }, { "epoch": 0.04061257297571707, "grad_norm": 6.024738788604736, "learning_rate": 0.001, "loss": 2.4266, "step": 960 }, { "epoch": 0.04065487773923344, "grad_norm": 0.8786823749542236, "learning_rate": 0.001, "loss": 2.61, "step": 961 }, { "epoch": 0.04069718250274981, "grad_norm": 0.5354873538017273, "learning_rate": 0.001, "loss": 2.3966, "step": 962 }, { "epoch": 0.04073948726626618, "grad_norm": 0.8180838823318481, "learning_rate": 0.001, "loss": 3.0533, "step": 963 }, { "epoch": 0.040781792029782556, "grad_norm": 0.6282289028167725, "learning_rate": 0.001, "loss": 3.1841, "step": 964 }, { "epoch": 0.040824096793298924, "grad_norm": 0.783220648765564, "learning_rate": 0.001, "loss": 2.6245, "step": 965 }, { "epoch": 0.0408664015568153, "grad_norm": 0.8866094946861267, "learning_rate": 0.001, "loss": 3.3324, "step": 966 }, { "epoch": 0.04090870632033167, "grad_norm": 0.9526342153549194, "learning_rate": 0.001, "loss": 2.7561, "step": 967 }, { "epoch": 0.04095101108384804, "grad_norm": 0.7338128685951233, "learning_rate": 0.001, "loss": 3.8083, "step": 968 }, { "epoch": 0.04099331584736441, "grad_norm": 0.7096972465515137, "learning_rate": 0.001, "loss": 2.2787, "step": 969 }, { "epoch": 0.041035620610880785, "grad_norm": 0.459326833486557, "learning_rate": 0.001, "loss": 2.6205, "step": 970 }, { "epoch": 0.04107792537439716, "grad_norm": 0.5549113154411316, "learning_rate": 0.001, "loss": 3.9624, "step": 971 }, { "epoch": 0.04112023013791353, "grad_norm": 0.5863713026046753, "learning_rate": 0.001, "loss": 2.6585, "step": 972 }, { "epoch": 0.041162534901429904, "grad_norm": 0.435687780380249, "learning_rate": 0.001, "loss": 3.875, "step": 973 }, { "epoch": 0.04120483966494627, "grad_norm": 0.43353310227394104, "learning_rate": 0.001, "loss": 3.5305, "step": 974 }, { "epoch": 0.04124714442846265, "grad_norm": 0.3369213342666626, "learning_rate": 0.001, "loss": 3.0293, "step": 975 }, { "epoch": 0.041289449191979015, "grad_norm": 0.37502485513687134, "learning_rate": 0.001, "loss": 2.6536, "step": 976 }, { "epoch": 0.04133175395549539, "grad_norm": 3.277416229248047, "learning_rate": 0.001, "loss": 2.3418, "step": 977 }, { "epoch": 0.04137405871901176, "grad_norm": 0.30327439308166504, "learning_rate": 0.001, "loss": 2.6483, "step": 978 }, { "epoch": 0.04141636348252813, "grad_norm": 0.3673608601093292, "learning_rate": 0.001, "loss": 2.884, "step": 979 }, { "epoch": 0.0414586682460445, "grad_norm": 0.49414071440696716, "learning_rate": 0.001, "loss": 2.3366, "step": 980 }, { "epoch": 0.041500973009560876, "grad_norm": 2.8874402046203613, "learning_rate": 0.001, "loss": 2.0754, "step": 981 }, { "epoch": 0.04154327777307725, "grad_norm": 0.5763828754425049, "learning_rate": 0.001, "loss": 3.3845, "step": 982 }, { "epoch": 0.04158558253659362, "grad_norm": 0.5612279772758484, "learning_rate": 0.001, "loss": 2.315, "step": 983 }, { "epoch": 0.041627887300109995, "grad_norm": 1.2401258945465088, "learning_rate": 0.001, "loss": 3.435, "step": 984 }, { "epoch": 0.04167019206362636, "grad_norm": 0.9611271023750305, "learning_rate": 0.001, "loss": 2.7392, "step": 985 }, { "epoch": 0.04171249682714274, "grad_norm": 0.4988934397697449, "learning_rate": 0.001, "loss": 2.2294, "step": 986 }, { "epoch": 0.041754801590659106, "grad_norm": 1.0643762350082397, "learning_rate": 0.001, "loss": 2.4758, "step": 987 }, { "epoch": 0.04179710635417548, "grad_norm": 0.3355838656425476, "learning_rate": 0.001, "loss": 2.1455, "step": 988 }, { "epoch": 0.04183941111769185, "grad_norm": 17.996070861816406, "learning_rate": 0.001, "loss": 3.2964, "step": 989 }, { "epoch": 0.041881715881208224, "grad_norm": 4.5193610191345215, "learning_rate": 0.001, "loss": 2.0008, "step": 990 }, { "epoch": 0.0419240206447246, "grad_norm": 0.4277860224246979, "learning_rate": 0.001, "loss": 3.1608, "step": 991 }, { "epoch": 0.04196632540824097, "grad_norm": 0.47893789410591125, "learning_rate": 0.001, "loss": 2.6076, "step": 992 }, { "epoch": 0.04200863017175734, "grad_norm": 0.5489811301231384, "learning_rate": 0.001, "loss": 2.1761, "step": 993 }, { "epoch": 0.04205093493527371, "grad_norm": 0.5169845819473267, "learning_rate": 0.001, "loss": 2.3756, "step": 994 }, { "epoch": 0.042093239698790086, "grad_norm": 1.397851586341858, "learning_rate": 0.001, "loss": 3.6065, "step": 995 }, { "epoch": 0.042135544462306454, "grad_norm": 0.43019843101501465, "learning_rate": 0.001, "loss": 2.4526, "step": 996 }, { "epoch": 0.04217784922582283, "grad_norm": 0.45522528886795044, "learning_rate": 0.001, "loss": 1.9483, "step": 997 }, { "epoch": 0.0422201539893392, "grad_norm": 0.405224472284317, "learning_rate": 0.001, "loss": 2.6085, "step": 998 }, { "epoch": 0.04226245875285557, "grad_norm": 0.6403412818908691, "learning_rate": 0.001, "loss": 3.2959, "step": 999 }, { "epoch": 0.04230476351637194, "grad_norm": 0.4327405095100403, "learning_rate": 0.001, "loss": 2.4858, "step": 1000 }, { "epoch": 0.042347068279888316, "grad_norm": 1.4853562116622925, "learning_rate": 0.001, "loss": 2.0621, "step": 1001 }, { "epoch": 0.04238937304340469, "grad_norm": 0.42528292536735535, "learning_rate": 0.001, "loss": 2.4629, "step": 1002 }, { "epoch": 0.04243167780692106, "grad_norm": 0.30595675110816956, "learning_rate": 0.001, "loss": 2.1818, "step": 1003 }, { "epoch": 0.042473982570437434, "grad_norm": 0.4422406554222107, "learning_rate": 0.001, "loss": 1.949, "step": 1004 }, { "epoch": 0.0425162873339538, "grad_norm": 0.3010110557079315, "learning_rate": 0.001, "loss": 2.3222, "step": 1005 }, { "epoch": 0.04255859209747018, "grad_norm": 12.802899360656738, "learning_rate": 0.001, "loss": 2.388, "step": 1006 }, { "epoch": 0.042600896860986545, "grad_norm": 0.438760906457901, "learning_rate": 0.001, "loss": 2.7597, "step": 1007 }, { "epoch": 0.04264320162450292, "grad_norm": 0.9350125789642334, "learning_rate": 0.001, "loss": 2.3453, "step": 1008 }, { "epoch": 0.04268550638801929, "grad_norm": 0.4918479919433594, "learning_rate": 0.001, "loss": 2.1911, "step": 1009 }, { "epoch": 0.04272781115153566, "grad_norm": 0.47994211316108704, "learning_rate": 0.001, "loss": 2.7534, "step": 1010 }, { "epoch": 0.04277011591505203, "grad_norm": 0.34078550338745117, "learning_rate": 0.001, "loss": 2.1767, "step": 1011 }, { "epoch": 0.04281242067856841, "grad_norm": 0.6441370844841003, "learning_rate": 0.001, "loss": 2.1742, "step": 1012 }, { "epoch": 0.04285472544208478, "grad_norm": 1.00253427028656, "learning_rate": 0.001, "loss": 2.6111, "step": 1013 }, { "epoch": 0.04289703020560115, "grad_norm": 0.42675742506980896, "learning_rate": 0.001, "loss": 3.7831, "step": 1014 }, { "epoch": 0.042939334969117525, "grad_norm": 0.5508003830909729, "learning_rate": 0.001, "loss": 2.6718, "step": 1015 }, { "epoch": 0.04298163973263389, "grad_norm": 0.5830764174461365, "learning_rate": 0.001, "loss": 3.2418, "step": 1016 }, { "epoch": 0.04302394449615027, "grad_norm": 0.5782317519187927, "learning_rate": 0.001, "loss": 2.5268, "step": 1017 }, { "epoch": 0.043066249259666636, "grad_norm": 0.39523276686668396, "learning_rate": 0.001, "loss": 2.3105, "step": 1018 }, { "epoch": 0.04310855402318301, "grad_norm": 0.5414249300956726, "learning_rate": 0.001, "loss": 2.5811, "step": 1019 }, { "epoch": 0.04315085878669938, "grad_norm": 0.5326202511787415, "learning_rate": 0.001, "loss": 2.5664, "step": 1020 }, { "epoch": 0.043193163550215755, "grad_norm": 0.3412122428417206, "learning_rate": 0.001, "loss": 2.6221, "step": 1021 }, { "epoch": 0.04323546831373213, "grad_norm": 0.3936321437358856, "learning_rate": 0.001, "loss": 2.1314, "step": 1022 }, { "epoch": 0.0432777730772485, "grad_norm": 1.0190694332122803, "learning_rate": 0.001, "loss": 2.6642, "step": 1023 }, { "epoch": 0.04332007784076487, "grad_norm": 2.318169116973877, "learning_rate": 0.001, "loss": 3.3849, "step": 1024 }, { "epoch": 0.04336238260428124, "grad_norm": 0.29711923003196716, "learning_rate": 0.001, "loss": 2.2662, "step": 1025 }, { "epoch": 0.043404687367797616, "grad_norm": 0.4354766011238098, "learning_rate": 0.001, "loss": 2.5792, "step": 1026 }, { "epoch": 0.043446992131313984, "grad_norm": 0.3557169735431671, "learning_rate": 0.001, "loss": 3.1672, "step": 1027 }, { "epoch": 0.04348929689483036, "grad_norm": 0.48224884271621704, "learning_rate": 0.001, "loss": 2.0798, "step": 1028 }, { "epoch": 0.04353160165834673, "grad_norm": 0.3722802698612213, "learning_rate": 0.001, "loss": 2.6505, "step": 1029 }, { "epoch": 0.0435739064218631, "grad_norm": 0.40177568793296814, "learning_rate": 0.001, "loss": 2.6731, "step": 1030 }, { "epoch": 0.04361621118537947, "grad_norm": 0.34859174489974976, "learning_rate": 0.001, "loss": 2.0711, "step": 1031 }, { "epoch": 0.043658515948895846, "grad_norm": 0.35582149028778076, "learning_rate": 0.001, "loss": 2.5389, "step": 1032 }, { "epoch": 0.04370082071241222, "grad_norm": 0.35008880496025085, "learning_rate": 0.001, "loss": 2.0354, "step": 1033 }, { "epoch": 0.04374312547592859, "grad_norm": 0.7123366594314575, "learning_rate": 0.001, "loss": 2.6122, "step": 1034 }, { "epoch": 0.043785430239444964, "grad_norm": 0.6655288338661194, "learning_rate": 0.001, "loss": 2.3297, "step": 1035 }, { "epoch": 0.04382773500296133, "grad_norm": 2.140918493270874, "learning_rate": 0.001, "loss": 2.2363, "step": 1036 }, { "epoch": 0.04387003976647771, "grad_norm": 1.8651494979858398, "learning_rate": 0.001, "loss": 3.1583, "step": 1037 }, { "epoch": 0.043912344529994075, "grad_norm": 0.5514461994171143, "learning_rate": 0.001, "loss": 3.5841, "step": 1038 }, { "epoch": 0.04395464929351045, "grad_norm": 0.48088452219963074, "learning_rate": 0.001, "loss": 2.3557, "step": 1039 }, { "epoch": 0.04399695405702682, "grad_norm": 1.1052600145339966, "learning_rate": 0.001, "loss": 2.5092, "step": 1040 }, { "epoch": 0.044039258820543194, "grad_norm": 0.4182952344417572, "learning_rate": 0.001, "loss": 3.016, "step": 1041 }, { "epoch": 0.04408156358405956, "grad_norm": 0.30450159311294556, "learning_rate": 0.001, "loss": 2.5603, "step": 1042 }, { "epoch": 0.04412386834757594, "grad_norm": 0.2754423916339874, "learning_rate": 0.001, "loss": 1.9932, "step": 1043 }, { "epoch": 0.04416617311109231, "grad_norm": 2.453713893890381, "learning_rate": 0.001, "loss": 3.1293, "step": 1044 }, { "epoch": 0.04420847787460868, "grad_norm": 0.606838583946228, "learning_rate": 0.001, "loss": 3.1959, "step": 1045 }, { "epoch": 0.044250782638125055, "grad_norm": 0.580777645111084, "learning_rate": 0.001, "loss": 2.0586, "step": 1046 }, { "epoch": 0.04429308740164142, "grad_norm": 5.384679317474365, "learning_rate": 0.001, "loss": 2.4947, "step": 1047 }, { "epoch": 0.0443353921651578, "grad_norm": 0.3704220652580261, "learning_rate": 0.001, "loss": 2.4293, "step": 1048 }, { "epoch": 0.04437769692867417, "grad_norm": 0.3065285086631775, "learning_rate": 0.001, "loss": 2.7632, "step": 1049 }, { "epoch": 0.04442000169219054, "grad_norm": 0.3605315089225769, "learning_rate": 0.001, "loss": 2.3277, "step": 1050 }, { "epoch": 0.04446230645570691, "grad_norm": 0.46234673261642456, "learning_rate": 0.001, "loss": 2.1531, "step": 1051 }, { "epoch": 0.044504611219223285, "grad_norm": 0.43919745087623596, "learning_rate": 0.001, "loss": 2.3706, "step": 1052 }, { "epoch": 0.04454691598273966, "grad_norm": 0.37155017256736755, "learning_rate": 0.001, "loss": 3.1104, "step": 1053 }, { "epoch": 0.04458922074625603, "grad_norm": 0.31615880131721497, "learning_rate": 0.001, "loss": 3.1019, "step": 1054 }, { "epoch": 0.0446315255097724, "grad_norm": 1.206450343132019, "learning_rate": 0.001, "loss": 3.4134, "step": 1055 }, { "epoch": 0.04467383027328877, "grad_norm": 0.6831628084182739, "learning_rate": 0.001, "loss": 2.5709, "step": 1056 }, { "epoch": 0.044716135036805146, "grad_norm": 0.4207130968570709, "learning_rate": 0.001, "loss": 2.2903, "step": 1057 }, { "epoch": 0.044758439800321514, "grad_norm": 0.3084408938884735, "learning_rate": 0.001, "loss": 2.8131, "step": 1058 }, { "epoch": 0.04480074456383789, "grad_norm": 0.6155023574829102, "learning_rate": 0.001, "loss": 2.3744, "step": 1059 }, { "epoch": 0.04484304932735426, "grad_norm": 0.6356950998306274, "learning_rate": 0.001, "loss": 2.5759, "step": 1060 }, { "epoch": 0.04488535409087063, "grad_norm": 0.41177189350128174, "learning_rate": 0.001, "loss": 3.0073, "step": 1061 }, { "epoch": 0.044927658854387, "grad_norm": 0.30051976442337036, "learning_rate": 0.001, "loss": 2.8997, "step": 1062 }, { "epoch": 0.044969963617903376, "grad_norm": 0.28739258646965027, "learning_rate": 0.001, "loss": 1.8987, "step": 1063 }, { "epoch": 0.04501226838141975, "grad_norm": 0.950710117816925, "learning_rate": 0.001, "loss": 2.3666, "step": 1064 }, { "epoch": 0.04505457314493612, "grad_norm": 0.5854257941246033, "learning_rate": 0.001, "loss": 2.323, "step": 1065 }, { "epoch": 0.045096877908452494, "grad_norm": 0.3358246088027954, "learning_rate": 0.001, "loss": 2.5187, "step": 1066 }, { "epoch": 0.04513918267196886, "grad_norm": 1.6332861185073853, "learning_rate": 0.001, "loss": 2.3758, "step": 1067 }, { "epoch": 0.04518148743548524, "grad_norm": 1.7713162899017334, "learning_rate": 0.001, "loss": 2.3417, "step": 1068 }, { "epoch": 0.045223792199001606, "grad_norm": 0.3794574439525604, "learning_rate": 0.001, "loss": 2.0666, "step": 1069 }, { "epoch": 0.04526609696251798, "grad_norm": 0.2956782281398773, "learning_rate": 0.001, "loss": 2.8441, "step": 1070 }, { "epoch": 0.04530840172603435, "grad_norm": 0.2897772192955017, "learning_rate": 0.001, "loss": 2.3111, "step": 1071 }, { "epoch": 0.045350706489550724, "grad_norm": 4.40648078918457, "learning_rate": 0.001, "loss": 2.0011, "step": 1072 }, { "epoch": 0.04539301125306709, "grad_norm": 7.493185520172119, "learning_rate": 0.001, "loss": 3.1528, "step": 1073 }, { "epoch": 0.04543531601658347, "grad_norm": 0.34881994128227234, "learning_rate": 0.001, "loss": 2.0333, "step": 1074 }, { "epoch": 0.04547762078009984, "grad_norm": 0.9052633047103882, "learning_rate": 0.001, "loss": 2.4499, "step": 1075 }, { "epoch": 0.04551992554361621, "grad_norm": 0.7866429090499878, "learning_rate": 0.001, "loss": 2.5423, "step": 1076 }, { "epoch": 0.045562230307132585, "grad_norm": 0.8085727691650391, "learning_rate": 0.001, "loss": 2.8811, "step": 1077 }, { "epoch": 0.045604535070648954, "grad_norm": 0.517099142074585, "learning_rate": 0.001, "loss": 2.1502, "step": 1078 }, { "epoch": 0.04564683983416533, "grad_norm": 0.5327157378196716, "learning_rate": 0.001, "loss": 3.1626, "step": 1079 }, { "epoch": 0.0456891445976817, "grad_norm": 0.28011730313301086, "learning_rate": 0.001, "loss": 1.6698, "step": 1080 }, { "epoch": 0.04573144936119807, "grad_norm": 2.291832208633423, "learning_rate": 0.001, "loss": 2.2235, "step": 1081 }, { "epoch": 0.04577375412471444, "grad_norm": 0.5037325024604797, "learning_rate": 0.001, "loss": 2.2568, "step": 1082 }, { "epoch": 0.045816058888230815, "grad_norm": 0.7182853817939758, "learning_rate": 0.001, "loss": 2.6144, "step": 1083 }, { "epoch": 0.04585836365174719, "grad_norm": 0.3494111895561218, "learning_rate": 0.001, "loss": 3.3147, "step": 1084 }, { "epoch": 0.04590066841526356, "grad_norm": 0.35580259561538696, "learning_rate": 0.001, "loss": 3.1962, "step": 1085 }, { "epoch": 0.04594297317877993, "grad_norm": 0.5036289691925049, "learning_rate": 0.001, "loss": 2.1262, "step": 1086 }, { "epoch": 0.0459852779422963, "grad_norm": 0.5229755640029907, "learning_rate": 0.001, "loss": 1.7954, "step": 1087 }, { "epoch": 0.04602758270581268, "grad_norm": 0.5959165096282959, "learning_rate": 0.001, "loss": 3.3961, "step": 1088 }, { "epoch": 0.046069887469329045, "grad_norm": 0.43716666102409363, "learning_rate": 0.001, "loss": 2.0928, "step": 1089 }, { "epoch": 0.04611219223284542, "grad_norm": 0.38793283700942993, "learning_rate": 0.001, "loss": 2.1611, "step": 1090 }, { "epoch": 0.04615449699636179, "grad_norm": 0.3964957892894745, "learning_rate": 0.001, "loss": 2.4315, "step": 1091 }, { "epoch": 0.04619680175987816, "grad_norm": 0.7259196043014526, "learning_rate": 0.001, "loss": 2.5655, "step": 1092 }, { "epoch": 0.04623910652339453, "grad_norm": 20.704673767089844, "learning_rate": 0.001, "loss": 2.1808, "step": 1093 }, { "epoch": 0.046281411286910906, "grad_norm": 1.8599133491516113, "learning_rate": 0.001, "loss": 3.0483, "step": 1094 }, { "epoch": 0.04632371605042728, "grad_norm": 0.3958137035369873, "learning_rate": 0.001, "loss": 2.8665, "step": 1095 }, { "epoch": 0.04636602081394365, "grad_norm": 0.46354708075523376, "learning_rate": 0.001, "loss": 2.1897, "step": 1096 }, { "epoch": 0.046408325577460025, "grad_norm": 0.5653846263885498, "learning_rate": 0.001, "loss": 2.2344, "step": 1097 }, { "epoch": 0.04645063034097639, "grad_norm": 0.3511918783187866, "learning_rate": 0.001, "loss": 1.8777, "step": 1098 }, { "epoch": 0.04649293510449277, "grad_norm": 0.4230622351169586, "learning_rate": 0.001, "loss": 2.2688, "step": 1099 }, { "epoch": 0.046535239868009136, "grad_norm": 0.5649506449699402, "learning_rate": 0.001, "loss": 1.8348, "step": 1100 }, { "epoch": 0.04657754463152551, "grad_norm": 0.3206387162208557, "learning_rate": 0.001, "loss": 2.2039, "step": 1101 }, { "epoch": 0.04661984939504188, "grad_norm": 0.3505328297615051, "learning_rate": 0.001, "loss": 2.8421, "step": 1102 }, { "epoch": 0.046662154158558254, "grad_norm": 0.8784352540969849, "learning_rate": 0.001, "loss": 3.6394, "step": 1103 }, { "epoch": 0.04670445892207462, "grad_norm": 0.3867740035057068, "learning_rate": 0.001, "loss": 2.2488, "step": 1104 }, { "epoch": 0.046746763685591, "grad_norm": 0.3754059374332428, "learning_rate": 0.001, "loss": 2.4647, "step": 1105 }, { "epoch": 0.04678906844910737, "grad_norm": 0.3429076671600342, "learning_rate": 0.001, "loss": 2.658, "step": 1106 }, { "epoch": 0.04683137321262374, "grad_norm": 0.35917940735816956, "learning_rate": 0.001, "loss": 2.8441, "step": 1107 }, { "epoch": 0.046873677976140116, "grad_norm": 0.4225330948829651, "learning_rate": 0.001, "loss": 3.4419, "step": 1108 }, { "epoch": 0.046915982739656484, "grad_norm": 0.2866974174976349, "learning_rate": 0.001, "loss": 1.7171, "step": 1109 }, { "epoch": 0.04695828750317286, "grad_norm": 0.655272364616394, "learning_rate": 0.001, "loss": 2.1575, "step": 1110 }, { "epoch": 0.04700059226668923, "grad_norm": 0.32389822602272034, "learning_rate": 0.001, "loss": 2.9878, "step": 1111 }, { "epoch": 0.0470428970302056, "grad_norm": 0.33824819326400757, "learning_rate": 0.001, "loss": 2.6459, "step": 1112 }, { "epoch": 0.04708520179372197, "grad_norm": 0.2966809570789337, "learning_rate": 0.001, "loss": 2.6308, "step": 1113 }, { "epoch": 0.047127506557238345, "grad_norm": 0.3179897665977478, "learning_rate": 0.001, "loss": 2.525, "step": 1114 }, { "epoch": 0.04716981132075472, "grad_norm": 1.4565528631210327, "learning_rate": 0.001, "loss": 2.2061, "step": 1115 }, { "epoch": 0.04721211608427109, "grad_norm": 0.4614794850349426, "learning_rate": 0.001, "loss": 3.2028, "step": 1116 }, { "epoch": 0.047254420847787464, "grad_norm": 0.3581281900405884, "learning_rate": 0.001, "loss": 2.3384, "step": 1117 }, { "epoch": 0.04729672561130383, "grad_norm": 0.3326326310634613, "learning_rate": 0.001, "loss": 4.0398, "step": 1118 }, { "epoch": 0.04733903037482021, "grad_norm": 0.3846263289451599, "learning_rate": 0.001, "loss": 2.437, "step": 1119 }, { "epoch": 0.047381335138336575, "grad_norm": 0.3471648395061493, "learning_rate": 0.001, "loss": 2.8839, "step": 1120 }, { "epoch": 0.04742363990185295, "grad_norm": 0.8793920874595642, "learning_rate": 0.001, "loss": 2.2851, "step": 1121 }, { "epoch": 0.04746594466536932, "grad_norm": 0.4244942367076874, "learning_rate": 0.001, "loss": 2.1305, "step": 1122 }, { "epoch": 0.04750824942888569, "grad_norm": 0.272441565990448, "learning_rate": 0.001, "loss": 2.3609, "step": 1123 }, { "epoch": 0.04755055419240206, "grad_norm": 0.5901132225990295, "learning_rate": 0.001, "loss": 2.5462, "step": 1124 }, { "epoch": 0.047592858955918436, "grad_norm": 1.15731680393219, "learning_rate": 0.001, "loss": 2.0628, "step": 1125 }, { "epoch": 0.04763516371943481, "grad_norm": 0.35768887400627136, "learning_rate": 0.001, "loss": 2.415, "step": 1126 }, { "epoch": 0.04767746848295118, "grad_norm": 0.5913015604019165, "learning_rate": 0.001, "loss": 2.6869, "step": 1127 }, { "epoch": 0.047719773246467555, "grad_norm": 0.7240321040153503, "learning_rate": 0.001, "loss": 2.7932, "step": 1128 }, { "epoch": 0.04776207800998392, "grad_norm": 0.29713061451911926, "learning_rate": 0.001, "loss": 1.7194, "step": 1129 }, { "epoch": 0.0478043827735003, "grad_norm": 0.30949392914772034, "learning_rate": 0.001, "loss": 2.3119, "step": 1130 }, { "epoch": 0.047846687537016666, "grad_norm": 0.7915238738059998, "learning_rate": 0.001, "loss": 2.2115, "step": 1131 }, { "epoch": 0.04788899230053304, "grad_norm": 0.33516794443130493, "learning_rate": 0.001, "loss": 2.4067, "step": 1132 }, { "epoch": 0.04793129706404941, "grad_norm": 0.4222777783870697, "learning_rate": 0.001, "loss": 2.3102, "step": 1133 }, { "epoch": 0.047973601827565784, "grad_norm": 0.3512939512729645, "learning_rate": 0.001, "loss": 4.4249, "step": 1134 }, { "epoch": 0.04801590659108215, "grad_norm": 0.7207156419754028, "learning_rate": 0.001, "loss": 2.2423, "step": 1135 }, { "epoch": 0.04805821135459853, "grad_norm": 0.3659185767173767, "learning_rate": 0.001, "loss": 2.3806, "step": 1136 }, { "epoch": 0.0481005161181149, "grad_norm": 0.30134308338165283, "learning_rate": 0.001, "loss": 2.3715, "step": 1137 }, { "epoch": 0.04814282088163127, "grad_norm": 0.5804815888404846, "learning_rate": 0.001, "loss": 4.0196, "step": 1138 }, { "epoch": 0.048185125645147646, "grad_norm": 0.2848808169364929, "learning_rate": 0.001, "loss": 2.0329, "step": 1139 }, { "epoch": 0.048227430408664014, "grad_norm": 0.5376827716827393, "learning_rate": 0.001, "loss": 2.5122, "step": 1140 }, { "epoch": 0.04826973517218039, "grad_norm": 0.393577516078949, "learning_rate": 0.001, "loss": 3.0434, "step": 1141 }, { "epoch": 0.04831203993569676, "grad_norm": 1.210608959197998, "learning_rate": 0.001, "loss": 2.1217, "step": 1142 }, { "epoch": 0.04835434469921313, "grad_norm": 0.9917371273040771, "learning_rate": 0.001, "loss": 4.2548, "step": 1143 }, { "epoch": 0.0483966494627295, "grad_norm": 4.173092365264893, "learning_rate": 0.001, "loss": 2.1519, "step": 1144 }, { "epoch": 0.048438954226245876, "grad_norm": 0.4106460213661194, "learning_rate": 0.001, "loss": 3.6214, "step": 1145 }, { "epoch": 0.04848125898976225, "grad_norm": 1.5029926300048828, "learning_rate": 0.001, "loss": 3.2724, "step": 1146 }, { "epoch": 0.04852356375327862, "grad_norm": 0.5879022479057312, "learning_rate": 0.001, "loss": 1.8398, "step": 1147 }, { "epoch": 0.048565868516794994, "grad_norm": 0.36776503920555115, "learning_rate": 0.001, "loss": 2.2025, "step": 1148 }, { "epoch": 0.04860817328031136, "grad_norm": 0.9697319269180298, "learning_rate": 0.001, "loss": 2.4534, "step": 1149 }, { "epoch": 0.04865047804382774, "grad_norm": 0.640369176864624, "learning_rate": 0.001, "loss": 4.1821, "step": 1150 }, { "epoch": 0.048692782807344105, "grad_norm": 0.5271949768066406, "learning_rate": 0.001, "loss": 2.5097, "step": 1151 }, { "epoch": 0.04873508757086048, "grad_norm": 0.498152494430542, "learning_rate": 0.001, "loss": 2.097, "step": 1152 }, { "epoch": 0.04877739233437685, "grad_norm": 0.45384323596954346, "learning_rate": 0.001, "loss": 2.8774, "step": 1153 }, { "epoch": 0.048819697097893223, "grad_norm": 0.6459994912147522, "learning_rate": 0.001, "loss": 2.267, "step": 1154 }, { "epoch": 0.04886200186140959, "grad_norm": 0.8564043045043945, "learning_rate": 0.001, "loss": 2.7848, "step": 1155 }, { "epoch": 0.04890430662492597, "grad_norm": 0.33357423543930054, "learning_rate": 0.001, "loss": 2.5382, "step": 1156 }, { "epoch": 0.04894661138844234, "grad_norm": 1.739807367324829, "learning_rate": 0.001, "loss": 2.3483, "step": 1157 }, { "epoch": 0.04898891615195871, "grad_norm": 0.6629720330238342, "learning_rate": 0.001, "loss": 2.345, "step": 1158 }, { "epoch": 0.049031220915475085, "grad_norm": 0.42855751514434814, "learning_rate": 0.001, "loss": 2.7333, "step": 1159 }, { "epoch": 0.04907352567899145, "grad_norm": 0.5585654973983765, "learning_rate": 0.001, "loss": 2.7187, "step": 1160 }, { "epoch": 0.04911583044250783, "grad_norm": 1.3134695291519165, "learning_rate": 0.001, "loss": 2.2319, "step": 1161 }, { "epoch": 0.049158135206024196, "grad_norm": 16.80616569519043, "learning_rate": 0.001, "loss": 3.13, "step": 1162 }, { "epoch": 0.04920043996954057, "grad_norm": 0.4731372594833374, "learning_rate": 0.001, "loss": 2.9317, "step": 1163 }, { "epoch": 0.04924274473305694, "grad_norm": 0.7382914423942566, "learning_rate": 0.001, "loss": 3.251, "step": 1164 }, { "epoch": 0.049285049496573315, "grad_norm": 1.2850393056869507, "learning_rate": 0.001, "loss": 2.4139, "step": 1165 }, { "epoch": 0.04932735426008968, "grad_norm": 0.45804908871650696, "learning_rate": 0.001, "loss": 2.1216, "step": 1166 }, { "epoch": 0.04936965902360606, "grad_norm": 0.41841673851013184, "learning_rate": 0.001, "loss": 2.3593, "step": 1167 }, { "epoch": 0.04941196378712243, "grad_norm": 0.3579320013523102, "learning_rate": 0.001, "loss": 2.733, "step": 1168 }, { "epoch": 0.0494542685506388, "grad_norm": 0.460112988948822, "learning_rate": 0.001, "loss": 1.9814, "step": 1169 }, { "epoch": 0.049496573314155176, "grad_norm": 0.6798352599143982, "learning_rate": 0.001, "loss": 2.1811, "step": 1170 }, { "epoch": 0.049538878077671544, "grad_norm": 13.641300201416016, "learning_rate": 0.001, "loss": 2.4548, "step": 1171 }, { "epoch": 0.04958118284118792, "grad_norm": 1.915924310684204, "learning_rate": 0.001, "loss": 2.3664, "step": 1172 }, { "epoch": 0.04962348760470429, "grad_norm": 0.34269511699676514, "learning_rate": 0.001, "loss": 2.571, "step": 1173 }, { "epoch": 0.04966579236822066, "grad_norm": 1.606256127357483, "learning_rate": 0.001, "loss": 1.7933, "step": 1174 }, { "epoch": 0.04970809713173703, "grad_norm": 0.6439388394355774, "learning_rate": 0.001, "loss": 2.7446, "step": 1175 }, { "epoch": 0.049750401895253406, "grad_norm": 0.554309606552124, "learning_rate": 0.001, "loss": 2.9953, "step": 1176 }, { "epoch": 0.04979270665876978, "grad_norm": 0.36114075779914856, "learning_rate": 0.001, "loss": 2.1207, "step": 1177 }, { "epoch": 0.04983501142228615, "grad_norm": 0.36802929639816284, "learning_rate": 0.001, "loss": 2.7589, "step": 1178 }, { "epoch": 0.049877316185802524, "grad_norm": 0.38982877135276794, "learning_rate": 0.001, "loss": 2.7034, "step": 1179 }, { "epoch": 0.04991962094931889, "grad_norm": 0.46610337495803833, "learning_rate": 0.001, "loss": 2.2733, "step": 1180 }, { "epoch": 0.04996192571283527, "grad_norm": 1.6383532285690308, "learning_rate": 0.001, "loss": 2.6442, "step": 1181 }, { "epoch": 0.050004230476351635, "grad_norm": 0.3796882927417755, "learning_rate": 0.001, "loss": 2.8773, "step": 1182 }, { "epoch": 0.05004653523986801, "grad_norm": 0.3811698257923126, "learning_rate": 0.001, "loss": 2.2252, "step": 1183 }, { "epoch": 0.05008884000338438, "grad_norm": 0.6414541602134705, "learning_rate": 0.001, "loss": 2.8175, "step": 1184 }, { "epoch": 0.050131144766900754, "grad_norm": 0.8355805277824402, "learning_rate": 0.001, "loss": 2.3944, "step": 1185 }, { "epoch": 0.05017344953041712, "grad_norm": 1.5542443990707397, "learning_rate": 0.001, "loss": 2.544, "step": 1186 }, { "epoch": 0.0502157542939335, "grad_norm": 1.7193444967269897, "learning_rate": 0.001, "loss": 2.0804, "step": 1187 }, { "epoch": 0.05025805905744987, "grad_norm": 1.0862737894058228, "learning_rate": 0.001, "loss": 2.5055, "step": 1188 }, { "epoch": 0.05030036382096624, "grad_norm": 0.4850480258464813, "learning_rate": 0.001, "loss": 3.1904, "step": 1189 }, { "epoch": 0.050342668584482615, "grad_norm": 0.5566414594650269, "learning_rate": 0.001, "loss": 1.9222, "step": 1190 }, { "epoch": 0.05038497334799898, "grad_norm": 7.770880699157715, "learning_rate": 0.001, "loss": 3.1213, "step": 1191 }, { "epoch": 0.05042727811151536, "grad_norm": 0.9301654100418091, "learning_rate": 0.001, "loss": 2.1625, "step": 1192 }, { "epoch": 0.05046958287503173, "grad_norm": 0.28750085830688477, "learning_rate": 0.001, "loss": 2.0768, "step": 1193 }, { "epoch": 0.0505118876385481, "grad_norm": 0.33357667922973633, "learning_rate": 0.001, "loss": 1.2852, "step": 1194 }, { "epoch": 0.05055419240206447, "grad_norm": 0.4574757218360901, "learning_rate": 0.001, "loss": 1.9372, "step": 1195 }, { "epoch": 0.050596497165580845, "grad_norm": 2.3129217624664307, "learning_rate": 0.001, "loss": 3.4428, "step": 1196 }, { "epoch": 0.05063880192909721, "grad_norm": 11.059386253356934, "learning_rate": 0.001, "loss": 2.063, "step": 1197 }, { "epoch": 0.05068110669261359, "grad_norm": 0.5615996718406677, "learning_rate": 0.001, "loss": 2.9518, "step": 1198 }, { "epoch": 0.05072341145612996, "grad_norm": 1.5756652355194092, "learning_rate": 0.001, "loss": 2.7033, "step": 1199 }, { "epoch": 0.05076571621964633, "grad_norm": 0.5411480069160461, "learning_rate": 0.001, "loss": 2.3017, "step": 1200 }, { "epoch": 0.050808020983162706, "grad_norm": 0.41354984045028687, "learning_rate": 0.001, "loss": 3.1848, "step": 1201 }, { "epoch": 0.050850325746679075, "grad_norm": 1.6077523231506348, "learning_rate": 0.001, "loss": 3.4731, "step": 1202 }, { "epoch": 0.05089263051019545, "grad_norm": 0.8017259240150452, "learning_rate": 0.001, "loss": 1.9944, "step": 1203 }, { "epoch": 0.05093493527371182, "grad_norm": 1.3641940355300903, "learning_rate": 0.001, "loss": 3.2556, "step": 1204 }, { "epoch": 0.05097724003722819, "grad_norm": 0.9652554392814636, "learning_rate": 0.001, "loss": 3.2552, "step": 1205 }, { "epoch": 0.05101954480074456, "grad_norm": 0.44206491112709045, "learning_rate": 0.001, "loss": 2.1191, "step": 1206 }, { "epoch": 0.051061849564260936, "grad_norm": 0.7324578762054443, "learning_rate": 0.001, "loss": 3.5191, "step": 1207 }, { "epoch": 0.05110415432777731, "grad_norm": 1.9216930866241455, "learning_rate": 0.001, "loss": 3.5364, "step": 1208 }, { "epoch": 0.05114645909129368, "grad_norm": 0.8697239756584167, "learning_rate": 0.001, "loss": 2.362, "step": 1209 }, { "epoch": 0.051188763854810054, "grad_norm": 0.6998451948165894, "learning_rate": 0.001, "loss": 2.7623, "step": 1210 }, { "epoch": 0.05123106861832642, "grad_norm": 0.9253522753715515, "learning_rate": 0.001, "loss": 3.4004, "step": 1211 }, { "epoch": 0.0512733733818428, "grad_norm": 0.9361282587051392, "learning_rate": 0.001, "loss": 3.1784, "step": 1212 }, { "epoch": 0.051315678145359166, "grad_norm": 0.38276004791259766, "learning_rate": 0.001, "loss": 2.448, "step": 1213 }, { "epoch": 0.05135798290887554, "grad_norm": 0.5177932977676392, "learning_rate": 0.001, "loss": 3.3615, "step": 1214 }, { "epoch": 0.05140028767239191, "grad_norm": 1.2303301095962524, "learning_rate": 0.001, "loss": 2.7344, "step": 1215 }, { "epoch": 0.051442592435908284, "grad_norm": 25.516271591186523, "learning_rate": 0.001, "loss": 2.5343, "step": 1216 }, { "epoch": 0.05148489719942465, "grad_norm": 0.7592980861663818, "learning_rate": 0.001, "loss": 3.7762, "step": 1217 }, { "epoch": 0.05152720196294103, "grad_norm": 0.9871262311935425, "learning_rate": 0.001, "loss": 2.7732, "step": 1218 }, { "epoch": 0.0515695067264574, "grad_norm": 0.6601408123970032, "learning_rate": 0.001, "loss": 2.7274, "step": 1219 }, { "epoch": 0.05161181148997377, "grad_norm": 0.8926149606704712, "learning_rate": 0.001, "loss": 3.5338, "step": 1220 }, { "epoch": 0.051654116253490145, "grad_norm": 0.6787442564964294, "learning_rate": 0.001, "loss": 2.4065, "step": 1221 }, { "epoch": 0.051696421017006514, "grad_norm": 0.8640452027320862, "learning_rate": 0.001, "loss": 2.5657, "step": 1222 }, { "epoch": 0.05173872578052289, "grad_norm": 1.1135860681533813, "learning_rate": 0.001, "loss": 2.0519, "step": 1223 }, { "epoch": 0.05178103054403926, "grad_norm": 0.481350302696228, "learning_rate": 0.001, "loss": 2.7644, "step": 1224 }, { "epoch": 0.05182333530755563, "grad_norm": 0.4704929292201996, "learning_rate": 0.001, "loss": 2.403, "step": 1225 }, { "epoch": 0.051865640071072, "grad_norm": 6.178280353546143, "learning_rate": 0.001, "loss": 2.2048, "step": 1226 }, { "epoch": 0.051907944834588375, "grad_norm": 0.5791310667991638, "learning_rate": 0.001, "loss": 3.0232, "step": 1227 }, { "epoch": 0.05195024959810474, "grad_norm": 2.9677908420562744, "learning_rate": 0.001, "loss": 3.8193, "step": 1228 }, { "epoch": 0.05199255436162112, "grad_norm": 1.5064809322357178, "learning_rate": 0.001, "loss": 3.0974, "step": 1229 }, { "epoch": 0.05203485912513749, "grad_norm": 0.3698960542678833, "learning_rate": 0.001, "loss": 3.0401, "step": 1230 }, { "epoch": 0.05207716388865386, "grad_norm": 0.5180457830429077, "learning_rate": 0.001, "loss": 3.2249, "step": 1231 }, { "epoch": 0.05211946865217024, "grad_norm": 0.3810598850250244, "learning_rate": 0.001, "loss": 2.5986, "step": 1232 }, { "epoch": 0.052161773415686605, "grad_norm": 0.33539965748786926, "learning_rate": 0.001, "loss": 3.7299, "step": 1233 }, { "epoch": 0.05220407817920298, "grad_norm": 0.5705146789550781, "learning_rate": 0.001, "loss": 3.6431, "step": 1234 }, { "epoch": 0.05224638294271935, "grad_norm": 0.4359552562236786, "learning_rate": 0.001, "loss": 3.0, "step": 1235 }, { "epoch": 0.05228868770623572, "grad_norm": 0.9745590686798096, "learning_rate": 0.001, "loss": 2.4393, "step": 1236 }, { "epoch": 0.05233099246975209, "grad_norm": 0.30867111682891846, "learning_rate": 0.001, "loss": 2.227, "step": 1237 }, { "epoch": 0.052373297233268466, "grad_norm": 0.335039883852005, "learning_rate": 0.001, "loss": 2.2415, "step": 1238 }, { "epoch": 0.05241560199678484, "grad_norm": 0.6804436445236206, "learning_rate": 0.001, "loss": 2.7192, "step": 1239 }, { "epoch": 0.05245790676030121, "grad_norm": 0.41609200835227966, "learning_rate": 0.001, "loss": 3.3984, "step": 1240 }, { "epoch": 0.052500211523817585, "grad_norm": 0.3207823932170868, "learning_rate": 0.001, "loss": 2.5688, "step": 1241 }, { "epoch": 0.05254251628733395, "grad_norm": 0.33397310972213745, "learning_rate": 0.001, "loss": 2.5408, "step": 1242 }, { "epoch": 0.05258482105085033, "grad_norm": 0.38078972697257996, "learning_rate": 0.001, "loss": 2.1729, "step": 1243 }, { "epoch": 0.052627125814366696, "grad_norm": 0.34104421734809875, "learning_rate": 0.001, "loss": 3.0408, "step": 1244 }, { "epoch": 0.05266943057788307, "grad_norm": 0.32771986722946167, "learning_rate": 0.001, "loss": 2.7228, "step": 1245 }, { "epoch": 0.05271173534139944, "grad_norm": 0.8869002461433411, "learning_rate": 0.001, "loss": 3.8551, "step": 1246 }, { "epoch": 0.052754040104915814, "grad_norm": 0.2952006459236145, "learning_rate": 0.001, "loss": 2.3158, "step": 1247 }, { "epoch": 0.05279634486843218, "grad_norm": 0.8608886003494263, "learning_rate": 0.001, "loss": 2.2941, "step": 1248 }, { "epoch": 0.05283864963194856, "grad_norm": 0.5748282074928284, "learning_rate": 0.001, "loss": 2.9898, "step": 1249 }, { "epoch": 0.05288095439546493, "grad_norm": 0.6410783529281616, "learning_rate": 0.001, "loss": 1.971, "step": 1250 }, { "epoch": 0.0529232591589813, "grad_norm": 1.4829449653625488, "learning_rate": 0.001, "loss": 3.4999, "step": 1251 }, { "epoch": 0.052965563922497676, "grad_norm": 1.1416040658950806, "learning_rate": 0.001, "loss": 2.4466, "step": 1252 }, { "epoch": 0.053007868686014044, "grad_norm": 1.1560173034667969, "learning_rate": 0.001, "loss": 2.3162, "step": 1253 }, { "epoch": 0.05305017344953042, "grad_norm": 1.0749090909957886, "learning_rate": 0.001, "loss": 3.186, "step": 1254 }, { "epoch": 0.05309247821304679, "grad_norm": 0.3261003792285919, "learning_rate": 0.001, "loss": 2.3392, "step": 1255 }, { "epoch": 0.05313478297656316, "grad_norm": 0.4178333282470703, "learning_rate": 0.001, "loss": 1.8991, "step": 1256 }, { "epoch": 0.05317708774007953, "grad_norm": 0.3390306234359741, "learning_rate": 0.001, "loss": 2.6713, "step": 1257 }, { "epoch": 0.053219392503595905, "grad_norm": 0.46924909949302673, "learning_rate": 0.001, "loss": 3.4529, "step": 1258 }, { "epoch": 0.053261697267112273, "grad_norm": 0.45819172263145447, "learning_rate": 0.001, "loss": 2.3935, "step": 1259 }, { "epoch": 0.05330400203062865, "grad_norm": 0.36325275897979736, "learning_rate": 0.001, "loss": 1.8038, "step": 1260 }, { "epoch": 0.053346306794145024, "grad_norm": 0.4765758216381073, "learning_rate": 0.001, "loss": 2.941, "step": 1261 }, { "epoch": 0.05338861155766139, "grad_norm": 0.5178735256195068, "learning_rate": 0.001, "loss": 2.8948, "step": 1262 }, { "epoch": 0.05343091632117777, "grad_norm": 0.3561374545097351, "learning_rate": 0.001, "loss": 3.3945, "step": 1263 }, { "epoch": 0.053473221084694135, "grad_norm": 0.29234689474105835, "learning_rate": 0.001, "loss": 1.8964, "step": 1264 }, { "epoch": 0.05351552584821051, "grad_norm": 1.8673244714736938, "learning_rate": 0.001, "loss": 2.2186, "step": 1265 }, { "epoch": 0.05355783061172688, "grad_norm": 1.1225836277008057, "learning_rate": 0.001, "loss": 1.8668, "step": 1266 }, { "epoch": 0.05360013537524325, "grad_norm": 0.8716728091239929, "learning_rate": 0.001, "loss": 2.8729, "step": 1267 }, { "epoch": 0.05364244013875962, "grad_norm": 2.1635050773620605, "learning_rate": 0.001, "loss": 2.645, "step": 1268 }, { "epoch": 0.053684744902275996, "grad_norm": 0.38754305243492126, "learning_rate": 0.001, "loss": 3.3657, "step": 1269 }, { "epoch": 0.05372704966579237, "grad_norm": 0.35894379019737244, "learning_rate": 0.001, "loss": 1.9849, "step": 1270 }, { "epoch": 0.05376935442930874, "grad_norm": 0.45211154222488403, "learning_rate": 0.001, "loss": 3.4621, "step": 1271 }, { "epoch": 0.053811659192825115, "grad_norm": 1.4661130905151367, "learning_rate": 0.001, "loss": 2.2278, "step": 1272 }, { "epoch": 0.05385396395634148, "grad_norm": 0.3701130151748657, "learning_rate": 0.001, "loss": 1.8248, "step": 1273 }, { "epoch": 0.05389626871985786, "grad_norm": 0.3744787871837616, "learning_rate": 0.001, "loss": 2.5166, "step": 1274 }, { "epoch": 0.053938573483374226, "grad_norm": 0.6254411935806274, "learning_rate": 0.001, "loss": 2.1034, "step": 1275 }, { "epoch": 0.0539808782468906, "grad_norm": 0.5083110928535461, "learning_rate": 0.001, "loss": 2.5448, "step": 1276 }, { "epoch": 0.05402318301040697, "grad_norm": 0.4066379964351654, "learning_rate": 0.001, "loss": 2.726, "step": 1277 }, { "epoch": 0.054065487773923344, "grad_norm": 0.326869398355484, "learning_rate": 0.001, "loss": 2.1421, "step": 1278 }, { "epoch": 0.05410779253743971, "grad_norm": 0.51470547914505, "learning_rate": 0.001, "loss": 3.0139, "step": 1279 }, { "epoch": 0.05415009730095609, "grad_norm": 1.5484551191329956, "learning_rate": 0.001, "loss": 2.947, "step": 1280 }, { "epoch": 0.05419240206447246, "grad_norm": 0.4610329270362854, "learning_rate": 0.001, "loss": 2.0814, "step": 1281 }, { "epoch": 0.05423470682798883, "grad_norm": 1.545684576034546, "learning_rate": 0.001, "loss": 4.4303, "step": 1282 }, { "epoch": 0.054277011591505206, "grad_norm": 1.627662181854248, "learning_rate": 0.001, "loss": 2.4313, "step": 1283 }, { "epoch": 0.054319316355021574, "grad_norm": 0.37047141790390015, "learning_rate": 0.001, "loss": 2.1884, "step": 1284 }, { "epoch": 0.05436162111853795, "grad_norm": 0.46785968542099, "learning_rate": 0.001, "loss": 3.1745, "step": 1285 }, { "epoch": 0.05440392588205432, "grad_norm": 0.3685908615589142, "learning_rate": 0.001, "loss": 2.1659, "step": 1286 }, { "epoch": 0.05444623064557069, "grad_norm": 0.8433362245559692, "learning_rate": 0.001, "loss": 2.7851, "step": 1287 }, { "epoch": 0.05448853540908706, "grad_norm": 1.0135656595230103, "learning_rate": 0.001, "loss": 2.2148, "step": 1288 }, { "epoch": 0.054530840172603436, "grad_norm": 0.9696979522705078, "learning_rate": 0.001, "loss": 2.5305, "step": 1289 }, { "epoch": 0.054573144936119804, "grad_norm": 0.38324859738349915, "learning_rate": 0.001, "loss": 2.2144, "step": 1290 }, { "epoch": 0.05461544969963618, "grad_norm": 0.4518929421901703, "learning_rate": 0.001, "loss": 2.0715, "step": 1291 }, { "epoch": 0.054657754463152554, "grad_norm": 0.29709556698799133, "learning_rate": 0.001, "loss": 1.878, "step": 1292 }, { "epoch": 0.05470005922666892, "grad_norm": 0.41647666692733765, "learning_rate": 0.001, "loss": 3.2047, "step": 1293 }, { "epoch": 0.0547423639901853, "grad_norm": 0.40319982171058655, "learning_rate": 0.001, "loss": 2.8011, "step": 1294 }, { "epoch": 0.054784668753701665, "grad_norm": 1.4222816228866577, "learning_rate": 0.001, "loss": 2.4923, "step": 1295 }, { "epoch": 0.05482697351721804, "grad_norm": 0.6435485482215881, "learning_rate": 0.001, "loss": 2.7776, "step": 1296 }, { "epoch": 0.05486927828073441, "grad_norm": 0.3867262899875641, "learning_rate": 0.001, "loss": 2.9806, "step": 1297 }, { "epoch": 0.054911583044250784, "grad_norm": 0.32293111085891724, "learning_rate": 0.001, "loss": 2.2782, "step": 1298 }, { "epoch": 0.05495388780776715, "grad_norm": 1.7177414894104004, "learning_rate": 0.001, "loss": 2.5676, "step": 1299 }, { "epoch": 0.05499619257128353, "grad_norm": 4.7346272468566895, "learning_rate": 0.001, "loss": 2.7508, "step": 1300 }, { "epoch": 0.0550384973347999, "grad_norm": 3.0460281372070312, "learning_rate": 0.001, "loss": 2.946, "step": 1301 }, { "epoch": 0.05508080209831627, "grad_norm": 3.609449863433838, "learning_rate": 0.001, "loss": 2.9066, "step": 1302 }, { "epoch": 0.055123106861832645, "grad_norm": 0.6072207689285278, "learning_rate": 0.001, "loss": 2.9887, "step": 1303 }, { "epoch": 0.05516541162534901, "grad_norm": 0.3145444691181183, "learning_rate": 0.001, "loss": 2.0579, "step": 1304 }, { "epoch": 0.05520771638886539, "grad_norm": 0.4488667845726013, "learning_rate": 0.001, "loss": 3.0136, "step": 1305 }, { "epoch": 0.055250021152381756, "grad_norm": 2.8112523555755615, "learning_rate": 0.001, "loss": 2.0162, "step": 1306 }, { "epoch": 0.05529232591589813, "grad_norm": 0.3468235731124878, "learning_rate": 0.001, "loss": 2.5913, "step": 1307 }, { "epoch": 0.0553346306794145, "grad_norm": 0.32054612040519714, "learning_rate": 0.001, "loss": 2.9339, "step": 1308 }, { "epoch": 0.055376935442930875, "grad_norm": 1.1245250701904297, "learning_rate": 0.001, "loss": 2.1141, "step": 1309 }, { "epoch": 0.05541924020644724, "grad_norm": 0.8368834257125854, "learning_rate": 0.001, "loss": 2.7699, "step": 1310 }, { "epoch": 0.05546154496996362, "grad_norm": 0.5362788438796997, "learning_rate": 0.001, "loss": 3.1062, "step": 1311 }, { "epoch": 0.05550384973347999, "grad_norm": 0.3507515490055084, "learning_rate": 0.001, "loss": 2.5279, "step": 1312 }, { "epoch": 0.05554615449699636, "grad_norm": 0.4399697184562683, "learning_rate": 0.001, "loss": 2.6847, "step": 1313 }, { "epoch": 0.055588459260512736, "grad_norm": 0.693962812423706, "learning_rate": 0.001, "loss": 1.86, "step": 1314 }, { "epoch": 0.055630764024029104, "grad_norm": 0.5763394236564636, "learning_rate": 0.001, "loss": 2.8907, "step": 1315 }, { "epoch": 0.05567306878754548, "grad_norm": 0.3348402678966522, "learning_rate": 0.001, "loss": 2.6682, "step": 1316 }, { "epoch": 0.05571537355106185, "grad_norm": 2.0695788860321045, "learning_rate": 0.001, "loss": 2.2931, "step": 1317 }, { "epoch": 0.05575767831457822, "grad_norm": 1.0537198781967163, "learning_rate": 0.001, "loss": 2.7975, "step": 1318 }, { "epoch": 0.05579998307809459, "grad_norm": 0.38374683260917664, "learning_rate": 0.001, "loss": 3.1988, "step": 1319 }, { "epoch": 0.055842287841610966, "grad_norm": 16.446990966796875, "learning_rate": 0.001, "loss": 2.2754, "step": 1320 }, { "epoch": 0.055884592605127334, "grad_norm": 0.378416508436203, "learning_rate": 0.001, "loss": 3.6752, "step": 1321 }, { "epoch": 0.05592689736864371, "grad_norm": 3.8668839931488037, "learning_rate": 0.001, "loss": 3.5075, "step": 1322 }, { "epoch": 0.055969202132160084, "grad_norm": 0.40051063895225525, "learning_rate": 0.001, "loss": 1.935, "step": 1323 }, { "epoch": 0.05601150689567645, "grad_norm": 0.3199704587459564, "learning_rate": 0.001, "loss": 2.8692, "step": 1324 }, { "epoch": 0.05605381165919283, "grad_norm": 0.39655637741088867, "learning_rate": 0.001, "loss": 2.1795, "step": 1325 }, { "epoch": 0.056096116422709195, "grad_norm": 0.32414257526397705, "learning_rate": 0.001, "loss": 2.3473, "step": 1326 }, { "epoch": 0.05613842118622557, "grad_norm": 0.30689844489097595, "learning_rate": 0.001, "loss": 1.9908, "step": 1327 }, { "epoch": 0.05618072594974194, "grad_norm": 0.3849036693572998, "learning_rate": 0.001, "loss": 2.2868, "step": 1328 }, { "epoch": 0.056223030713258314, "grad_norm": 0.7097468376159668, "learning_rate": 0.001, "loss": 2.9331, "step": 1329 }, { "epoch": 0.05626533547677468, "grad_norm": 0.3276157081127167, "learning_rate": 0.001, "loss": 2.3295, "step": 1330 }, { "epoch": 0.05630764024029106, "grad_norm": 0.30598339438438416, "learning_rate": 0.001, "loss": 2.8663, "step": 1331 }, { "epoch": 0.05634994500380743, "grad_norm": 5.709721565246582, "learning_rate": 0.001, "loss": 2.2997, "step": 1332 }, { "epoch": 0.0563922497673238, "grad_norm": 1.1615700721740723, "learning_rate": 0.001, "loss": 2.2019, "step": 1333 }, { "epoch": 0.056434554530840175, "grad_norm": 0.8189941644668579, "learning_rate": 0.001, "loss": 1.9522, "step": 1334 }, { "epoch": 0.05647685929435654, "grad_norm": 0.4313448965549469, "learning_rate": 0.001, "loss": 2.1067, "step": 1335 }, { "epoch": 0.05651916405787292, "grad_norm": 0.38705217838287354, "learning_rate": 0.001, "loss": 2.9772, "step": 1336 }, { "epoch": 0.05656146882138929, "grad_norm": 0.4954543709754944, "learning_rate": 0.001, "loss": 2.298, "step": 1337 }, { "epoch": 0.05660377358490566, "grad_norm": 0.5692545771598816, "learning_rate": 0.001, "loss": 2.3607, "step": 1338 }, { "epoch": 0.05664607834842203, "grad_norm": 1.1173979043960571, "learning_rate": 0.001, "loss": 2.9379, "step": 1339 }, { "epoch": 0.056688383111938405, "grad_norm": 2.1702587604522705, "learning_rate": 0.001, "loss": 2.4834, "step": 1340 }, { "epoch": 0.05673068787545477, "grad_norm": 3.090153694152832, "learning_rate": 0.001, "loss": 1.9018, "step": 1341 }, { "epoch": 0.05677299263897115, "grad_norm": 0.7734284996986389, "learning_rate": 0.001, "loss": 2.3851, "step": 1342 }, { "epoch": 0.05681529740248752, "grad_norm": 0.2983352243900299, "learning_rate": 0.001, "loss": 2.5298, "step": 1343 }, { "epoch": 0.05685760216600389, "grad_norm": 0.8630567193031311, "learning_rate": 0.001, "loss": 3.0804, "step": 1344 }, { "epoch": 0.056899906929520266, "grad_norm": 0.3336477279663086, "learning_rate": 0.001, "loss": 2.2696, "step": 1345 }, { "epoch": 0.056942211693036635, "grad_norm": 0.30996400117874146, "learning_rate": 0.001, "loss": 2.3269, "step": 1346 }, { "epoch": 0.05698451645655301, "grad_norm": 0.38334864377975464, "learning_rate": 0.001, "loss": 2.4821, "step": 1347 }, { "epoch": 0.05702682122006938, "grad_norm": 0.29219967126846313, "learning_rate": 0.001, "loss": 2.192, "step": 1348 }, { "epoch": 0.05706912598358575, "grad_norm": 1.8894317150115967, "learning_rate": 0.001, "loss": 2.2862, "step": 1349 }, { "epoch": 0.05711143074710212, "grad_norm": 3.3716726303100586, "learning_rate": 0.001, "loss": 2.0416, "step": 1350 }, { "epoch": 0.057153735510618496, "grad_norm": 0.3227848708629608, "learning_rate": 0.001, "loss": 2.7895, "step": 1351 }, { "epoch": 0.057196040274134864, "grad_norm": 0.31334635615348816, "learning_rate": 0.001, "loss": 2.7555, "step": 1352 }, { "epoch": 0.05723834503765124, "grad_norm": 0.32888343930244446, "learning_rate": 0.001, "loss": 2.6685, "step": 1353 }, { "epoch": 0.057280649801167614, "grad_norm": 1.3726975917816162, "learning_rate": 0.001, "loss": 2.4858, "step": 1354 }, { "epoch": 0.05732295456468398, "grad_norm": 0.296441912651062, "learning_rate": 0.001, "loss": 2.4981, "step": 1355 }, { "epoch": 0.05736525932820036, "grad_norm": 0.40023407340049744, "learning_rate": 0.001, "loss": 2.3818, "step": 1356 }, { "epoch": 0.057407564091716726, "grad_norm": 0.23572492599487305, "learning_rate": 0.001, "loss": 1.7868, "step": 1357 }, { "epoch": 0.0574498688552331, "grad_norm": 0.3053801357746124, "learning_rate": 0.001, "loss": 2.115, "step": 1358 }, { "epoch": 0.05749217361874947, "grad_norm": 0.4453347623348236, "learning_rate": 0.001, "loss": 2.3336, "step": 1359 }, { "epoch": 0.057534478382265844, "grad_norm": 0.29795461893081665, "learning_rate": 0.001, "loss": 2.3624, "step": 1360 }, { "epoch": 0.05757678314578221, "grad_norm": 0.3462240993976593, "learning_rate": 0.001, "loss": 2.4219, "step": 1361 }, { "epoch": 0.05761908790929859, "grad_norm": 0.7424085140228271, "learning_rate": 0.001, "loss": 2.0359, "step": 1362 }, { "epoch": 0.05766139267281496, "grad_norm": 0.33206912875175476, "learning_rate": 0.001, "loss": 2.3683, "step": 1363 }, { "epoch": 0.05770369743633133, "grad_norm": 0.5105282068252563, "learning_rate": 0.001, "loss": 2.5832, "step": 1364 }, { "epoch": 0.057746002199847705, "grad_norm": 0.29361414909362793, "learning_rate": 0.001, "loss": 1.5788, "step": 1365 }, { "epoch": 0.057788306963364074, "grad_norm": 0.7224523425102234, "learning_rate": 0.001, "loss": 1.6817, "step": 1366 }, { "epoch": 0.05783061172688045, "grad_norm": 0.33219602704048157, "learning_rate": 0.001, "loss": 2.8996, "step": 1367 }, { "epoch": 0.05787291649039682, "grad_norm": 0.6146803498268127, "learning_rate": 0.001, "loss": 2.8432, "step": 1368 }, { "epoch": 0.05791522125391319, "grad_norm": 0.4107002019882202, "learning_rate": 0.001, "loss": 2.1509, "step": 1369 }, { "epoch": 0.05795752601742956, "grad_norm": 0.7845795750617981, "learning_rate": 0.001, "loss": 2.5862, "step": 1370 }, { "epoch": 0.057999830780945935, "grad_norm": 0.2706119120121002, "learning_rate": 0.001, "loss": 2.4746, "step": 1371 }, { "epoch": 0.0580421355444623, "grad_norm": 0.26624995470046997, "learning_rate": 0.001, "loss": 2.1325, "step": 1372 }, { "epoch": 0.05808444030797868, "grad_norm": 1.2387701272964478, "learning_rate": 0.001, "loss": 3.0806, "step": 1373 }, { "epoch": 0.05812674507149505, "grad_norm": 0.30188238620758057, "learning_rate": 0.001, "loss": 2.6137, "step": 1374 }, { "epoch": 0.05816904983501142, "grad_norm": 0.47407734394073486, "learning_rate": 0.001, "loss": 2.949, "step": 1375 }, { "epoch": 0.0582113545985278, "grad_norm": 13.492250442504883, "learning_rate": 0.001, "loss": 2.3764, "step": 1376 }, { "epoch": 0.058253659362044165, "grad_norm": 1.4784618616104126, "learning_rate": 0.001, "loss": 2.6514, "step": 1377 }, { "epoch": 0.05829596412556054, "grad_norm": 0.5186235904693604, "learning_rate": 0.001, "loss": 2.5127, "step": 1378 }, { "epoch": 0.05833826888907691, "grad_norm": 0.33871689438819885, "learning_rate": 0.001, "loss": 1.9724, "step": 1379 }, { "epoch": 0.05838057365259328, "grad_norm": 0.6185868382453918, "learning_rate": 0.001, "loss": 2.2309, "step": 1380 }, { "epoch": 0.05842287841610965, "grad_norm": 0.3206985890865326, "learning_rate": 0.001, "loss": 2.2691, "step": 1381 }, { "epoch": 0.058465183179626026, "grad_norm": 0.34144043922424316, "learning_rate": 0.001, "loss": 2.1018, "step": 1382 }, { "epoch": 0.058507487943142394, "grad_norm": 2.2303543090820312, "learning_rate": 0.001, "loss": 2.0798, "step": 1383 }, { "epoch": 0.05854979270665877, "grad_norm": 0.36561813950538635, "learning_rate": 0.001, "loss": 2.8894, "step": 1384 }, { "epoch": 0.058592097470175145, "grad_norm": 0.5910583138465881, "learning_rate": 0.001, "loss": 2.6603, "step": 1385 }, { "epoch": 0.05863440223369151, "grad_norm": 0.3511381149291992, "learning_rate": 0.001, "loss": 3.2612, "step": 1386 }, { "epoch": 0.05867670699720789, "grad_norm": 1.0417660474777222, "learning_rate": 0.001, "loss": 2.0464, "step": 1387 }, { "epoch": 0.058719011760724256, "grad_norm": 0.3064709007740021, "learning_rate": 0.001, "loss": 1.7901, "step": 1388 }, { "epoch": 0.05876131652424063, "grad_norm": 0.6421626210212708, "learning_rate": 0.001, "loss": 2.4181, "step": 1389 }, { "epoch": 0.058803621287757, "grad_norm": 0.5801759958267212, "learning_rate": 0.001, "loss": 1.9541, "step": 1390 }, { "epoch": 0.058845926051273374, "grad_norm": 0.32130327820777893, "learning_rate": 0.001, "loss": 2.5807, "step": 1391 }, { "epoch": 0.05888823081478974, "grad_norm": 0.31312790513038635, "learning_rate": 0.001, "loss": 3.0668, "step": 1392 }, { "epoch": 0.05893053557830612, "grad_norm": 0.5199546813964844, "learning_rate": 0.001, "loss": 2.9197, "step": 1393 }, { "epoch": 0.05897284034182249, "grad_norm": 0.45629438757896423, "learning_rate": 0.001, "loss": 1.9592, "step": 1394 }, { "epoch": 0.05901514510533886, "grad_norm": 0.2890334129333496, "learning_rate": 0.001, "loss": 2.716, "step": 1395 }, { "epoch": 0.059057449868855236, "grad_norm": 0.47239354252815247, "learning_rate": 0.001, "loss": 2.2804, "step": 1396 }, { "epoch": 0.059099754632371604, "grad_norm": 0.989652693271637, "learning_rate": 0.001, "loss": 2.0827, "step": 1397 }, { "epoch": 0.05914205939588798, "grad_norm": 0.40251368284225464, "learning_rate": 0.001, "loss": 1.9777, "step": 1398 }, { "epoch": 0.05918436415940435, "grad_norm": 0.2854582965373993, "learning_rate": 0.001, "loss": 1.8564, "step": 1399 }, { "epoch": 0.05922666892292072, "grad_norm": 0.4429548382759094, "learning_rate": 0.001, "loss": 3.2216, "step": 1400 }, { "epoch": 0.05926897368643709, "grad_norm": 0.3628709316253662, "learning_rate": 0.001, "loss": 1.898, "step": 1401 }, { "epoch": 0.059311278449953465, "grad_norm": 0.37840116024017334, "learning_rate": 0.001, "loss": 3.3639, "step": 1402 }, { "epoch": 0.059353583213469834, "grad_norm": 1.245588779449463, "learning_rate": 0.001, "loss": 3.1546, "step": 1403 }, { "epoch": 0.05939588797698621, "grad_norm": 0.304123193025589, "learning_rate": 0.001, "loss": 3.7022, "step": 1404 }, { "epoch": 0.059438192740502584, "grad_norm": 0.4658697843551636, "learning_rate": 0.001, "loss": 2.8618, "step": 1405 }, { "epoch": 0.05948049750401895, "grad_norm": 0.3281252682209015, "learning_rate": 0.001, "loss": 2.5643, "step": 1406 }, { "epoch": 0.05952280226753533, "grad_norm": 0.28439781069755554, "learning_rate": 0.001, "loss": 3.376, "step": 1407 }, { "epoch": 0.059565107031051695, "grad_norm": 0.6219123601913452, "learning_rate": 0.001, "loss": 4.5267, "step": 1408 }, { "epoch": 0.05960741179456807, "grad_norm": 0.30366063117980957, "learning_rate": 0.001, "loss": 2.8077, "step": 1409 }, { "epoch": 0.05964971655808444, "grad_norm": 0.2643471658229828, "learning_rate": 0.001, "loss": 2.5821, "step": 1410 }, { "epoch": 0.05969202132160081, "grad_norm": 0.5798410177230835, "learning_rate": 0.001, "loss": 2.406, "step": 1411 }, { "epoch": 0.05973432608511718, "grad_norm": 0.2849283218383789, "learning_rate": 0.001, "loss": 1.9369, "step": 1412 }, { "epoch": 0.059776630848633557, "grad_norm": 0.35845881700515747, "learning_rate": 0.001, "loss": 2.6967, "step": 1413 }, { "epoch": 0.059818935612149925, "grad_norm": 1.3240238428115845, "learning_rate": 0.001, "loss": 3.4985, "step": 1414 }, { "epoch": 0.0598612403756663, "grad_norm": 0.2925819754600525, "learning_rate": 0.001, "loss": 2.6216, "step": 1415 }, { "epoch": 0.059903545139182675, "grad_norm": 0.8406017422676086, "learning_rate": 0.001, "loss": 2.1828, "step": 1416 }, { "epoch": 0.05994584990269904, "grad_norm": 0.344137966632843, "learning_rate": 0.001, "loss": 2.3818, "step": 1417 }, { "epoch": 0.05998815466621542, "grad_norm": 0.3151387870311737, "learning_rate": 0.001, "loss": 2.53, "step": 1418 }, { "epoch": 0.060030459429731786, "grad_norm": 0.3660660982131958, "learning_rate": 0.001, "loss": 2.4767, "step": 1419 }, { "epoch": 0.06007276419324816, "grad_norm": 3.185765266418457, "learning_rate": 0.001, "loss": 1.7922, "step": 1420 }, { "epoch": 0.06011506895676453, "grad_norm": 0.4721246659755707, "learning_rate": 0.001, "loss": 2.1482, "step": 1421 }, { "epoch": 0.060157373720280904, "grad_norm": 0.281838983297348, "learning_rate": 0.001, "loss": 2.1035, "step": 1422 }, { "epoch": 0.06019967848379727, "grad_norm": 0.5046917796134949, "learning_rate": 0.001, "loss": 2.3257, "step": 1423 }, { "epoch": 0.06024198324731365, "grad_norm": 0.36857858300209045, "learning_rate": 0.001, "loss": 2.5842, "step": 1424 }, { "epoch": 0.06028428801083002, "grad_norm": 0.6424447894096375, "learning_rate": 0.001, "loss": 2.5619, "step": 1425 }, { "epoch": 0.06032659277434639, "grad_norm": 0.3190594017505646, "learning_rate": 0.001, "loss": 2.6482, "step": 1426 }, { "epoch": 0.060368897537862766, "grad_norm": 0.3126913905143738, "learning_rate": 0.001, "loss": 3.0516, "step": 1427 }, { "epoch": 0.060411202301379134, "grad_norm": 0.41238340735435486, "learning_rate": 0.001, "loss": 3.3109, "step": 1428 }, { "epoch": 0.06045350706489551, "grad_norm": 0.44219493865966797, "learning_rate": 0.001, "loss": 3.6026, "step": 1429 }, { "epoch": 0.06049581182841188, "grad_norm": 0.32209640741348267, "learning_rate": 0.001, "loss": 2.6428, "step": 1430 }, { "epoch": 0.06053811659192825, "grad_norm": 0.39168989658355713, "learning_rate": 0.001, "loss": 3.429, "step": 1431 }, { "epoch": 0.06058042135544462, "grad_norm": 0.27055805921554565, "learning_rate": 0.001, "loss": 3.8608, "step": 1432 }, { "epoch": 0.060622726118960996, "grad_norm": 0.49823957681655884, "learning_rate": 0.001, "loss": 3.4317, "step": 1433 }, { "epoch": 0.060665030882477364, "grad_norm": 0.32492002844810486, "learning_rate": 0.001, "loss": 2.4563, "step": 1434 }, { "epoch": 0.06070733564599374, "grad_norm": 0.34951508045196533, "learning_rate": 0.001, "loss": 3.2943, "step": 1435 }, { "epoch": 0.060749640409510114, "grad_norm": 0.4263649880886078, "learning_rate": 0.001, "loss": 2.6012, "step": 1436 }, { "epoch": 0.06079194517302648, "grad_norm": 1.0415583848953247, "learning_rate": 0.001, "loss": 2.7861, "step": 1437 }, { "epoch": 0.06083424993654286, "grad_norm": 0.331173837184906, "learning_rate": 0.001, "loss": 2.4296, "step": 1438 }, { "epoch": 0.060876554700059225, "grad_norm": 0.88392573595047, "learning_rate": 0.001, "loss": 2.3683, "step": 1439 }, { "epoch": 0.0609188594635756, "grad_norm": 1.8237016201019287, "learning_rate": 0.001, "loss": 2.9384, "step": 1440 }, { "epoch": 0.06096116422709197, "grad_norm": 0.40476563572883606, "learning_rate": 0.001, "loss": 3.0706, "step": 1441 }, { "epoch": 0.061003468990608344, "grad_norm": 2.381662607192993, "learning_rate": 0.001, "loss": 2.1478, "step": 1442 }, { "epoch": 0.06104577375412471, "grad_norm": 0.3319908082485199, "learning_rate": 0.001, "loss": 3.0221, "step": 1443 }, { "epoch": 0.06108807851764109, "grad_norm": 1.0969241857528687, "learning_rate": 0.001, "loss": 2.5479, "step": 1444 }, { "epoch": 0.061130383281157455, "grad_norm": 0.510847806930542, "learning_rate": 0.001, "loss": 1.8381, "step": 1445 }, { "epoch": 0.06117268804467383, "grad_norm": 0.5893344879150391, "learning_rate": 0.001, "loss": 2.661, "step": 1446 }, { "epoch": 0.061214992808190205, "grad_norm": 0.36038708686828613, "learning_rate": 0.001, "loss": 3.1335, "step": 1447 }, { "epoch": 0.06125729757170657, "grad_norm": 0.3727112412452698, "learning_rate": 0.001, "loss": 3.2217, "step": 1448 }, { "epoch": 0.06129960233522295, "grad_norm": 0.31264758110046387, "learning_rate": 0.001, "loss": 2.0928, "step": 1449 }, { "epoch": 0.061341907098739316, "grad_norm": 0.3144207298755646, "learning_rate": 0.001, "loss": 2.5247, "step": 1450 }, { "epoch": 0.06138421186225569, "grad_norm": 0.5916879773139954, "learning_rate": 0.001, "loss": 3.4771, "step": 1451 }, { "epoch": 0.06142651662577206, "grad_norm": 0.34228265285491943, "learning_rate": 0.001, "loss": 1.9271, "step": 1452 }, { "epoch": 0.061468821389288435, "grad_norm": 0.31869953870773315, "learning_rate": 0.001, "loss": 1.9089, "step": 1453 }, { "epoch": 0.0615111261528048, "grad_norm": 0.2902565598487854, "learning_rate": 0.001, "loss": 1.9665, "step": 1454 }, { "epoch": 0.06155343091632118, "grad_norm": 0.8417028188705444, "learning_rate": 0.001, "loss": 2.1853, "step": 1455 }, { "epoch": 0.06159573567983755, "grad_norm": 0.29936543107032776, "learning_rate": 0.001, "loss": 2.2901, "step": 1456 }, { "epoch": 0.06163804044335392, "grad_norm": 1.0548635721206665, "learning_rate": 0.001, "loss": 3.0753, "step": 1457 }, { "epoch": 0.061680345206870296, "grad_norm": 0.5437618494033813, "learning_rate": 0.001, "loss": 2.3487, "step": 1458 }, { "epoch": 0.061722649970386664, "grad_norm": 0.35127514600753784, "learning_rate": 0.001, "loss": 2.761, "step": 1459 }, { "epoch": 0.06176495473390304, "grad_norm": 0.3495043218135834, "learning_rate": 0.001, "loss": 2.1056, "step": 1460 }, { "epoch": 0.06180725949741941, "grad_norm": 0.3617175817489624, "learning_rate": 0.001, "loss": 2.226, "step": 1461 }, { "epoch": 0.06184956426093578, "grad_norm": 2.682561159133911, "learning_rate": 0.001, "loss": 2.2108, "step": 1462 }, { "epoch": 0.06189186902445215, "grad_norm": 0.6340761184692383, "learning_rate": 0.001, "loss": 3.0965, "step": 1463 }, { "epoch": 0.061934173787968526, "grad_norm": 0.43284621834754944, "learning_rate": 0.001, "loss": 3.3314, "step": 1464 }, { "epoch": 0.061976478551484894, "grad_norm": 0.9071413278579712, "learning_rate": 0.001, "loss": 1.9998, "step": 1465 }, { "epoch": 0.06201878331500127, "grad_norm": 0.32309386134147644, "learning_rate": 0.001, "loss": 1.6943, "step": 1466 }, { "epoch": 0.062061088078517644, "grad_norm": 1.5545170307159424, "learning_rate": 0.001, "loss": 2.5362, "step": 1467 }, { "epoch": 0.06210339284203401, "grad_norm": 0.4889759123325348, "learning_rate": 0.001, "loss": 2.4579, "step": 1468 }, { "epoch": 0.06214569760555039, "grad_norm": 0.3919735252857208, "learning_rate": 0.001, "loss": 2.5643, "step": 1469 }, { "epoch": 0.062188002369066755, "grad_norm": 0.8115111589431763, "learning_rate": 0.001, "loss": 2.6917, "step": 1470 }, { "epoch": 0.06223030713258313, "grad_norm": 1.6906379461288452, "learning_rate": 0.001, "loss": 1.7673, "step": 1471 }, { "epoch": 0.0622726118960995, "grad_norm": 0.33348721265792847, "learning_rate": 0.001, "loss": 1.9239, "step": 1472 }, { "epoch": 0.062314916659615874, "grad_norm": 0.5403802990913391, "learning_rate": 0.001, "loss": 2.6563, "step": 1473 }, { "epoch": 0.06235722142313224, "grad_norm": 0.6966021060943604, "learning_rate": 0.001, "loss": 1.9165, "step": 1474 }, { "epoch": 0.06239952618664862, "grad_norm": 0.720427930355072, "learning_rate": 0.001, "loss": 2.4165, "step": 1475 }, { "epoch": 0.062441830950164985, "grad_norm": 0.617408275604248, "learning_rate": 0.001, "loss": 3.6807, "step": 1476 }, { "epoch": 0.06248413571368136, "grad_norm": 0.5452759265899658, "learning_rate": 0.001, "loss": 3.4443, "step": 1477 }, { "epoch": 0.06252644047719773, "grad_norm": 0.4260646402835846, "learning_rate": 0.001, "loss": 3.0329, "step": 1478 }, { "epoch": 0.06256874524071411, "grad_norm": 0.3713223338127136, "learning_rate": 0.001, "loss": 3.1337, "step": 1479 }, { "epoch": 0.06261105000423048, "grad_norm": 0.4795995056629181, "learning_rate": 0.001, "loss": 2.6778, "step": 1480 }, { "epoch": 0.06265335476774685, "grad_norm": 1.03213369846344, "learning_rate": 0.001, "loss": 2.7868, "step": 1481 }, { "epoch": 0.06269565953126321, "grad_norm": 0.7824976444244385, "learning_rate": 0.001, "loss": 2.982, "step": 1482 }, { "epoch": 0.0627379642947796, "grad_norm": 1.2638541460037231, "learning_rate": 0.001, "loss": 3.0771, "step": 1483 }, { "epoch": 0.06278026905829596, "grad_norm": 1.2298089265823364, "learning_rate": 0.001, "loss": 2.0408, "step": 1484 }, { "epoch": 0.06282257382181233, "grad_norm": 0.8441680073738098, "learning_rate": 0.001, "loss": 2.4082, "step": 1485 }, { "epoch": 0.0628648785853287, "grad_norm": 0.34452885389328003, "learning_rate": 0.001, "loss": 2.2197, "step": 1486 }, { "epoch": 0.06290718334884508, "grad_norm": 0.39541852474212646, "learning_rate": 0.001, "loss": 2.9002, "step": 1487 }, { "epoch": 0.06294948811236145, "grad_norm": 0.49740156531333923, "learning_rate": 0.001, "loss": 2.4369, "step": 1488 }, { "epoch": 0.06299179287587782, "grad_norm": 1.0628348588943481, "learning_rate": 0.001, "loss": 2.9808, "step": 1489 }, { "epoch": 0.0630340976393942, "grad_norm": 0.3743760883808136, "learning_rate": 0.001, "loss": 2.8068, "step": 1490 }, { "epoch": 0.06307640240291057, "grad_norm": 0.6638221740722656, "learning_rate": 0.001, "loss": 2.0364, "step": 1491 }, { "epoch": 0.06311870716642694, "grad_norm": 0.34382185339927673, "learning_rate": 0.001, "loss": 2.6025, "step": 1492 }, { "epoch": 0.0631610119299433, "grad_norm": 0.378578782081604, "learning_rate": 0.001, "loss": 2.762, "step": 1493 }, { "epoch": 0.06320331669345969, "grad_norm": 0.38604044914245605, "learning_rate": 0.001, "loss": 2.0935, "step": 1494 }, { "epoch": 0.06324562145697606, "grad_norm": 1.129136562347412, "learning_rate": 0.001, "loss": 2.8483, "step": 1495 }, { "epoch": 0.06328792622049242, "grad_norm": 0.30852529406547546, "learning_rate": 0.001, "loss": 2.3716, "step": 1496 }, { "epoch": 0.0633302309840088, "grad_norm": 2.0638482570648193, "learning_rate": 0.001, "loss": 2.6199, "step": 1497 }, { "epoch": 0.06337253574752517, "grad_norm": 0.25943711400032043, "learning_rate": 0.001, "loss": 2.2946, "step": 1498 }, { "epoch": 0.06341484051104154, "grad_norm": 0.31075453758239746, "learning_rate": 0.001, "loss": 2.1379, "step": 1499 }, { "epoch": 0.06345714527455791, "grad_norm": 0.34476733207702637, "learning_rate": 0.001, "loss": 3.0886, "step": 1500 }, { "epoch": 0.06349945003807429, "grad_norm": 0.3541569709777832, "learning_rate": 0.001, "loss": 2.0473, "step": 1501 }, { "epoch": 0.06354175480159066, "grad_norm": 0.6460628509521484, "learning_rate": 0.001, "loss": 2.4141, "step": 1502 }, { "epoch": 0.06358405956510703, "grad_norm": 0.30732765793800354, "learning_rate": 0.001, "loss": 2.4192, "step": 1503 }, { "epoch": 0.0636263643286234, "grad_norm": 0.3573780655860901, "learning_rate": 0.001, "loss": 3.1885, "step": 1504 }, { "epoch": 0.06366866909213978, "grad_norm": 0.26040375232696533, "learning_rate": 0.001, "loss": 1.8493, "step": 1505 }, { "epoch": 0.06371097385565615, "grad_norm": 0.2959687411785126, "learning_rate": 0.001, "loss": 2.4594, "step": 1506 }, { "epoch": 0.06375327861917252, "grad_norm": 0.3453410267829895, "learning_rate": 0.001, "loss": 3.036, "step": 1507 }, { "epoch": 0.0637955833826889, "grad_norm": 0.35461366176605225, "learning_rate": 0.001, "loss": 3.1924, "step": 1508 }, { "epoch": 0.06383788814620527, "grad_norm": 0.34828194975852966, "learning_rate": 0.001, "loss": 2.1851, "step": 1509 }, { "epoch": 0.06388019290972163, "grad_norm": 2.283674955368042, "learning_rate": 0.001, "loss": 2.278, "step": 1510 }, { "epoch": 0.063922497673238, "grad_norm": 0.3838748335838318, "learning_rate": 0.001, "loss": 2.2081, "step": 1511 }, { "epoch": 0.06396480243675438, "grad_norm": 0.2536729574203491, "learning_rate": 0.001, "loss": 1.9966, "step": 1512 }, { "epoch": 0.06400710720027075, "grad_norm": 3.2230656147003174, "learning_rate": 0.001, "loss": 2.9543, "step": 1513 }, { "epoch": 0.06404941196378712, "grad_norm": 0.2882685661315918, "learning_rate": 0.001, "loss": 2.0072, "step": 1514 }, { "epoch": 0.06409171672730349, "grad_norm": 0.34128835797309875, "learning_rate": 0.001, "loss": 2.4368, "step": 1515 }, { "epoch": 0.06413402149081987, "grad_norm": 0.326494425535202, "learning_rate": 0.001, "loss": 2.5767, "step": 1516 }, { "epoch": 0.06417632625433624, "grad_norm": 0.4839684069156647, "learning_rate": 0.001, "loss": 2.0286, "step": 1517 }, { "epoch": 0.0642186310178526, "grad_norm": 0.43224430084228516, "learning_rate": 0.001, "loss": 3.4441, "step": 1518 }, { "epoch": 0.06426093578136899, "grad_norm": 0.41439104080200195, "learning_rate": 0.001, "loss": 2.9004, "step": 1519 }, { "epoch": 0.06430324054488536, "grad_norm": 0.35360193252563477, "learning_rate": 0.001, "loss": 2.659, "step": 1520 }, { "epoch": 0.06434554530840172, "grad_norm": 0.25587019324302673, "learning_rate": 0.001, "loss": 2.0744, "step": 1521 }, { "epoch": 0.06438785007191809, "grad_norm": 0.2605937719345093, "learning_rate": 0.001, "loss": 2.4958, "step": 1522 }, { "epoch": 0.06443015483543447, "grad_norm": 0.26113244891166687, "learning_rate": 0.001, "loss": 2.1607, "step": 1523 }, { "epoch": 0.06447245959895084, "grad_norm": 0.3792099356651306, "learning_rate": 0.001, "loss": 2.3428, "step": 1524 }, { "epoch": 0.06451476436246721, "grad_norm": 0.44862857460975647, "learning_rate": 0.001, "loss": 2.1672, "step": 1525 }, { "epoch": 0.06455706912598358, "grad_norm": 0.2584918737411499, "learning_rate": 0.001, "loss": 2.2928, "step": 1526 }, { "epoch": 0.06459937388949996, "grad_norm": 0.2768254280090332, "learning_rate": 0.001, "loss": 1.6838, "step": 1527 }, { "epoch": 0.06464167865301633, "grad_norm": 0.30833518505096436, "learning_rate": 0.001, "loss": 1.8115, "step": 1528 }, { "epoch": 0.0646839834165327, "grad_norm": 0.8284555077552795, "learning_rate": 0.001, "loss": 2.5659, "step": 1529 }, { "epoch": 0.06472628818004908, "grad_norm": 0.5380930304527283, "learning_rate": 0.001, "loss": 2.6105, "step": 1530 }, { "epoch": 0.06476859294356545, "grad_norm": 0.5618930459022522, "learning_rate": 0.001, "loss": 2.2628, "step": 1531 }, { "epoch": 0.06481089770708182, "grad_norm": 0.27346742153167725, "learning_rate": 0.001, "loss": 2.1463, "step": 1532 }, { "epoch": 0.06485320247059818, "grad_norm": 0.30352386832237244, "learning_rate": 0.001, "loss": 3.6644, "step": 1533 }, { "epoch": 0.06489550723411457, "grad_norm": 0.31220725178718567, "learning_rate": 0.001, "loss": 2.6265, "step": 1534 }, { "epoch": 0.06493781199763093, "grad_norm": 0.27084285020828247, "learning_rate": 0.001, "loss": 2.3142, "step": 1535 }, { "epoch": 0.0649801167611473, "grad_norm": 0.5739223957061768, "learning_rate": 0.001, "loss": 2.2806, "step": 1536 }, { "epoch": 0.06502242152466367, "grad_norm": 0.7535676956176758, "learning_rate": 0.001, "loss": 2.3798, "step": 1537 }, { "epoch": 0.06506472628818005, "grad_norm": 0.27977368235588074, "learning_rate": 0.001, "loss": 2.7091, "step": 1538 }, { "epoch": 0.06510703105169642, "grad_norm": 0.6584524512290955, "learning_rate": 0.001, "loss": 2.9503, "step": 1539 }, { "epoch": 0.06514933581521279, "grad_norm": 0.34777507185935974, "learning_rate": 0.001, "loss": 2.8689, "step": 1540 }, { "epoch": 0.06519164057872917, "grad_norm": 0.5156388282775879, "learning_rate": 0.001, "loss": 2.4906, "step": 1541 }, { "epoch": 0.06523394534224554, "grad_norm": 0.28216788172721863, "learning_rate": 0.001, "loss": 1.8898, "step": 1542 }, { "epoch": 0.06527625010576191, "grad_norm": 1.3255605697631836, "learning_rate": 0.001, "loss": 1.7489, "step": 1543 }, { "epoch": 0.06531855486927828, "grad_norm": 0.28111153841018677, "learning_rate": 0.001, "loss": 2.2489, "step": 1544 }, { "epoch": 0.06536085963279466, "grad_norm": 0.2950698435306549, "learning_rate": 0.001, "loss": 1.6588, "step": 1545 }, { "epoch": 0.06540316439631103, "grad_norm": 0.2627398371696472, "learning_rate": 0.001, "loss": 2.2555, "step": 1546 }, { "epoch": 0.0654454691598274, "grad_norm": 0.545285701751709, "learning_rate": 0.001, "loss": 2.238, "step": 1547 }, { "epoch": 0.06548777392334376, "grad_norm": 0.2821153402328491, "learning_rate": 0.001, "loss": 2.5748, "step": 1548 }, { "epoch": 0.06553007868686014, "grad_norm": 0.32898885011672974, "learning_rate": 0.001, "loss": 2.6032, "step": 1549 }, { "epoch": 0.06557238345037651, "grad_norm": 0.6069597601890564, "learning_rate": 0.001, "loss": 2.3282, "step": 1550 }, { "epoch": 0.06561468821389288, "grad_norm": 0.33791178464889526, "learning_rate": 0.001, "loss": 2.3444, "step": 1551 }, { "epoch": 0.06565699297740926, "grad_norm": 0.630935788154602, "learning_rate": 0.001, "loss": 2.574, "step": 1552 }, { "epoch": 0.06569929774092563, "grad_norm": 0.35226690769195557, "learning_rate": 0.001, "loss": 2.3944, "step": 1553 }, { "epoch": 0.065741602504442, "grad_norm": 0.254507452249527, "learning_rate": 0.001, "loss": 2.4492, "step": 1554 }, { "epoch": 0.06578390726795837, "grad_norm": 1.1583757400512695, "learning_rate": 0.001, "loss": 2.0208, "step": 1555 }, { "epoch": 0.06582621203147475, "grad_norm": 0.27121299505233765, "learning_rate": 0.001, "loss": 2.2889, "step": 1556 }, { "epoch": 0.06586851679499112, "grad_norm": 0.6531930565834045, "learning_rate": 0.001, "loss": 3.3624, "step": 1557 }, { "epoch": 0.06591082155850748, "grad_norm": 0.42395731806755066, "learning_rate": 0.001, "loss": 2.3959, "step": 1558 }, { "epoch": 0.06595312632202387, "grad_norm": 0.3243398368358612, "learning_rate": 0.001, "loss": 2.0876, "step": 1559 }, { "epoch": 0.06599543108554023, "grad_norm": 0.3197888135910034, "learning_rate": 0.001, "loss": 2.2143, "step": 1560 }, { "epoch": 0.0660377358490566, "grad_norm": 0.33776190876960754, "learning_rate": 0.001, "loss": 2.6401, "step": 1561 }, { "epoch": 0.06608004061257297, "grad_norm": 0.34774449467658997, "learning_rate": 0.001, "loss": 2.5507, "step": 1562 }, { "epoch": 0.06612234537608935, "grad_norm": 0.5661643147468567, "learning_rate": 0.001, "loss": 2.9336, "step": 1563 }, { "epoch": 0.06616465013960572, "grad_norm": 0.3465413749217987, "learning_rate": 0.001, "loss": 1.8256, "step": 1564 }, { "epoch": 0.06620695490312209, "grad_norm": 0.3140278458595276, "learning_rate": 0.001, "loss": 2.3683, "step": 1565 }, { "epoch": 0.06624925966663846, "grad_norm": 0.6140979528427124, "learning_rate": 0.001, "loss": 2.4981, "step": 1566 }, { "epoch": 0.06629156443015484, "grad_norm": 0.36587056517601013, "learning_rate": 0.001, "loss": 2.1976, "step": 1567 }, { "epoch": 0.06633386919367121, "grad_norm": 0.2791414260864258, "learning_rate": 0.001, "loss": 2.1657, "step": 1568 }, { "epoch": 0.06637617395718758, "grad_norm": 0.974829375743866, "learning_rate": 0.001, "loss": 2.604, "step": 1569 }, { "epoch": 0.06641847872070396, "grad_norm": 0.3969157934188843, "learning_rate": 0.001, "loss": 3.1704, "step": 1570 }, { "epoch": 0.06646078348422033, "grad_norm": 0.31570157408714294, "learning_rate": 0.001, "loss": 2.6029, "step": 1571 }, { "epoch": 0.0665030882477367, "grad_norm": 0.5296083688735962, "learning_rate": 0.001, "loss": 2.375, "step": 1572 }, { "epoch": 0.06654539301125306, "grad_norm": 0.8200310468673706, "learning_rate": 0.001, "loss": 1.9998, "step": 1573 }, { "epoch": 0.06658769777476944, "grad_norm": 0.3001185953617096, "learning_rate": 0.001, "loss": 2.5948, "step": 1574 }, { "epoch": 0.06663000253828581, "grad_norm": 0.7405486106872559, "learning_rate": 0.001, "loss": 3.2701, "step": 1575 }, { "epoch": 0.06667230730180218, "grad_norm": 1.4987202882766724, "learning_rate": 0.001, "loss": 3.5723, "step": 1576 }, { "epoch": 0.06671461206531855, "grad_norm": 0.7644462585449219, "learning_rate": 0.001, "loss": 2.1558, "step": 1577 }, { "epoch": 0.06675691682883493, "grad_norm": 0.3527809977531433, "learning_rate": 0.001, "loss": 2.5948, "step": 1578 }, { "epoch": 0.0667992215923513, "grad_norm": 1.6607075929641724, "learning_rate": 0.001, "loss": 2.2302, "step": 1579 }, { "epoch": 0.06684152635586767, "grad_norm": 0.42727428674697876, "learning_rate": 0.001, "loss": 2.4521, "step": 1580 }, { "epoch": 0.06688383111938405, "grad_norm": 0.5329534411430359, "learning_rate": 0.001, "loss": 2.3643, "step": 1581 }, { "epoch": 0.06692613588290042, "grad_norm": 0.32826146483421326, "learning_rate": 0.001, "loss": 3.578, "step": 1582 }, { "epoch": 0.06696844064641679, "grad_norm": 0.5298377275466919, "learning_rate": 0.001, "loss": 2.7983, "step": 1583 }, { "epoch": 0.06701074540993315, "grad_norm": 0.3430579900741577, "learning_rate": 0.001, "loss": 1.9132, "step": 1584 }, { "epoch": 0.06705305017344954, "grad_norm": 0.31380248069763184, "learning_rate": 0.001, "loss": 2.7339, "step": 1585 }, { "epoch": 0.0670953549369659, "grad_norm": 0.4660479724407196, "learning_rate": 0.001, "loss": 2.183, "step": 1586 }, { "epoch": 0.06713765970048227, "grad_norm": 1.5428110361099243, "learning_rate": 0.001, "loss": 2.3808, "step": 1587 }, { "epoch": 0.06717996446399864, "grad_norm": 1.2535374164581299, "learning_rate": 0.001, "loss": 2.1735, "step": 1588 }, { "epoch": 0.06722226922751502, "grad_norm": 0.438996285200119, "learning_rate": 0.001, "loss": 2.3571, "step": 1589 }, { "epoch": 0.06726457399103139, "grad_norm": 0.3860979378223419, "learning_rate": 0.001, "loss": 3.1402, "step": 1590 }, { "epoch": 0.06730687875454776, "grad_norm": 0.32492074370384216, "learning_rate": 0.001, "loss": 2.5053, "step": 1591 }, { "epoch": 0.06734918351806414, "grad_norm": 0.4713243246078491, "learning_rate": 0.001, "loss": 3.3592, "step": 1592 }, { "epoch": 0.06739148828158051, "grad_norm": 0.30050158500671387, "learning_rate": 0.001, "loss": 2.1694, "step": 1593 }, { "epoch": 0.06743379304509688, "grad_norm": 0.34924885630607605, "learning_rate": 0.001, "loss": 1.9499, "step": 1594 }, { "epoch": 0.06747609780861324, "grad_norm": 0.4567183256149292, "learning_rate": 0.001, "loss": 2.2653, "step": 1595 }, { "epoch": 0.06751840257212963, "grad_norm": 4.099420070648193, "learning_rate": 0.001, "loss": 2.9475, "step": 1596 }, { "epoch": 0.067560707335646, "grad_norm": 0.3680780827999115, "learning_rate": 0.001, "loss": 2.3772, "step": 1597 }, { "epoch": 0.06760301209916236, "grad_norm": 3.2965519428253174, "learning_rate": 0.001, "loss": 1.8148, "step": 1598 }, { "epoch": 0.06764531686267873, "grad_norm": 0.4864022433757782, "learning_rate": 0.001, "loss": 2.3886, "step": 1599 }, { "epoch": 0.06768762162619511, "grad_norm": 0.5703839063644409, "learning_rate": 0.001, "loss": 2.1813, "step": 1600 }, { "epoch": 0.06772992638971148, "grad_norm": 0.28068217635154724, "learning_rate": 0.001, "loss": 2.4237, "step": 1601 }, { "epoch": 0.06777223115322785, "grad_norm": 0.27317923307418823, "learning_rate": 0.001, "loss": 2.2093, "step": 1602 }, { "epoch": 0.06781453591674423, "grad_norm": 0.3868054151535034, "learning_rate": 0.001, "loss": 2.4154, "step": 1603 }, { "epoch": 0.0678568406802606, "grad_norm": 0.36695969104766846, "learning_rate": 0.001, "loss": 2.3344, "step": 1604 }, { "epoch": 0.06789914544377697, "grad_norm": 0.37811872363090515, "learning_rate": 0.001, "loss": 1.7114, "step": 1605 }, { "epoch": 0.06794145020729334, "grad_norm": 0.3852631449699402, "learning_rate": 0.001, "loss": 3.0529, "step": 1606 }, { "epoch": 0.06798375497080972, "grad_norm": 0.9019683003425598, "learning_rate": 0.001, "loss": 2.0123, "step": 1607 }, { "epoch": 0.06802605973432609, "grad_norm": 1.809870958328247, "learning_rate": 0.001, "loss": 2.5859, "step": 1608 }, { "epoch": 0.06806836449784245, "grad_norm": 0.31933197379112244, "learning_rate": 0.001, "loss": 2.0505, "step": 1609 }, { "epoch": 0.06811066926135882, "grad_norm": 0.37392449378967285, "learning_rate": 0.001, "loss": 3.3611, "step": 1610 }, { "epoch": 0.0681529740248752, "grad_norm": 0.3455560803413391, "learning_rate": 0.001, "loss": 2.3174, "step": 1611 }, { "epoch": 0.06819527878839157, "grad_norm": 0.5688546299934387, "learning_rate": 0.001, "loss": 2.2655, "step": 1612 }, { "epoch": 0.06823758355190794, "grad_norm": 0.6909453868865967, "learning_rate": 0.001, "loss": 1.8641, "step": 1613 }, { "epoch": 0.06827988831542432, "grad_norm": 0.31672239303588867, "learning_rate": 0.001, "loss": 1.9912, "step": 1614 }, { "epoch": 0.06832219307894069, "grad_norm": 0.8048245310783386, "learning_rate": 0.001, "loss": 2.0888, "step": 1615 }, { "epoch": 0.06836449784245706, "grad_norm": 0.28497985005378723, "learning_rate": 0.001, "loss": 2.5377, "step": 1616 }, { "epoch": 0.06840680260597343, "grad_norm": 0.8506115674972534, "learning_rate": 0.001, "loss": 2.3879, "step": 1617 }, { "epoch": 0.06844910736948981, "grad_norm": 1.2071772813796997, "learning_rate": 0.001, "loss": 2.7813, "step": 1618 }, { "epoch": 0.06849141213300618, "grad_norm": 0.37529999017715454, "learning_rate": 0.001, "loss": 2.6804, "step": 1619 }, { "epoch": 0.06853371689652255, "grad_norm": 0.5811559557914734, "learning_rate": 0.001, "loss": 2.3751, "step": 1620 }, { "epoch": 0.06857602166003893, "grad_norm": 1.8456511497497559, "learning_rate": 0.001, "loss": 2.3837, "step": 1621 }, { "epoch": 0.0686183264235553, "grad_norm": 0.7621955871582031, "learning_rate": 0.001, "loss": 2.696, "step": 1622 }, { "epoch": 0.06866063118707166, "grad_norm": 0.8707615733146667, "learning_rate": 0.001, "loss": 2.1565, "step": 1623 }, { "epoch": 0.06870293595058803, "grad_norm": 0.6202014684677124, "learning_rate": 0.001, "loss": 1.9679, "step": 1624 }, { "epoch": 0.06874524071410441, "grad_norm": 0.38461774587631226, "learning_rate": 0.001, "loss": 2.1663, "step": 1625 }, { "epoch": 0.06878754547762078, "grad_norm": 0.3248027265071869, "learning_rate": 0.001, "loss": 2.6196, "step": 1626 }, { "epoch": 0.06882985024113715, "grad_norm": 0.5045840740203857, "learning_rate": 0.001, "loss": 4.0164, "step": 1627 }, { "epoch": 0.06887215500465352, "grad_norm": 0.31521186232566833, "learning_rate": 0.001, "loss": 1.885, "step": 1628 }, { "epoch": 0.0689144597681699, "grad_norm": 0.3150392770767212, "learning_rate": 0.001, "loss": 2.2383, "step": 1629 }, { "epoch": 0.06895676453168627, "grad_norm": 0.45550379157066345, "learning_rate": 0.001, "loss": 2.2817, "step": 1630 }, { "epoch": 0.06899906929520264, "grad_norm": 0.30408960580825806, "learning_rate": 0.001, "loss": 2.213, "step": 1631 }, { "epoch": 0.06904137405871902, "grad_norm": 0.36207836866378784, "learning_rate": 0.001, "loss": 2.593, "step": 1632 }, { "epoch": 0.06908367882223539, "grad_norm": 0.37449923157691956, "learning_rate": 0.001, "loss": 2.0836, "step": 1633 }, { "epoch": 0.06912598358575175, "grad_norm": 0.29214954376220703, "learning_rate": 0.001, "loss": 2.2552, "step": 1634 }, { "epoch": 0.06916828834926812, "grad_norm": 0.8513506650924683, "learning_rate": 0.001, "loss": 1.975, "step": 1635 }, { "epoch": 0.0692105931127845, "grad_norm": 0.5883151888847351, "learning_rate": 0.001, "loss": 3.0927, "step": 1636 }, { "epoch": 0.06925289787630087, "grad_norm": 0.350691020488739, "learning_rate": 0.001, "loss": 2.0645, "step": 1637 }, { "epoch": 0.06929520263981724, "grad_norm": 0.8605084419250488, "learning_rate": 0.001, "loss": 1.8086, "step": 1638 }, { "epoch": 0.06933750740333361, "grad_norm": 1.2016950845718384, "learning_rate": 0.001, "loss": 1.9914, "step": 1639 }, { "epoch": 0.06937981216684999, "grad_norm": 0.2884148955345154, "learning_rate": 0.001, "loss": 2.1365, "step": 1640 }, { "epoch": 0.06942211693036636, "grad_norm": 0.337169349193573, "learning_rate": 0.001, "loss": 2.0075, "step": 1641 }, { "epoch": 0.06946442169388273, "grad_norm": 0.6522495150566101, "learning_rate": 0.001, "loss": 3.0513, "step": 1642 }, { "epoch": 0.06950672645739911, "grad_norm": 0.44939300417900085, "learning_rate": 0.001, "loss": 2.8989, "step": 1643 }, { "epoch": 0.06954903122091548, "grad_norm": 0.30578479170799255, "learning_rate": 0.001, "loss": 3.2959, "step": 1644 }, { "epoch": 0.06959133598443185, "grad_norm": 45.22916793823242, "learning_rate": 0.001, "loss": 2.4688, "step": 1645 }, { "epoch": 0.06963364074794821, "grad_norm": 0.2697926461696625, "learning_rate": 0.001, "loss": 3.0138, "step": 1646 }, { "epoch": 0.0696759455114646, "grad_norm": 0.32543253898620605, "learning_rate": 0.001, "loss": 2.884, "step": 1647 }, { "epoch": 0.06971825027498096, "grad_norm": 1.5244373083114624, "learning_rate": 0.001, "loss": 2.605, "step": 1648 }, { "epoch": 0.06976055503849733, "grad_norm": 0.2659820318222046, "learning_rate": 0.001, "loss": 2.1016, "step": 1649 }, { "epoch": 0.0698028598020137, "grad_norm": 0.30054983496665955, "learning_rate": 0.001, "loss": 2.4681, "step": 1650 }, { "epoch": 0.06984516456553008, "grad_norm": 0.3234836161136627, "learning_rate": 0.001, "loss": 1.9912, "step": 1651 }, { "epoch": 0.06988746932904645, "grad_norm": 0.27745023369789124, "learning_rate": 0.001, "loss": 2.2214, "step": 1652 }, { "epoch": 0.06992977409256282, "grad_norm": 0.6211910247802734, "learning_rate": 0.001, "loss": 2.1174, "step": 1653 }, { "epoch": 0.0699720788560792, "grad_norm": 6.255242824554443, "learning_rate": 0.001, "loss": 2.5594, "step": 1654 }, { "epoch": 0.07001438361959557, "grad_norm": 0.3245622217655182, "learning_rate": 0.001, "loss": 1.9634, "step": 1655 }, { "epoch": 0.07005668838311194, "grad_norm": 0.4616556167602539, "learning_rate": 0.001, "loss": 2.2543, "step": 1656 }, { "epoch": 0.0700989931466283, "grad_norm": 0.2637854516506195, "learning_rate": 0.001, "loss": 2.29, "step": 1657 }, { "epoch": 0.07014129791014469, "grad_norm": 0.9866844415664673, "learning_rate": 0.001, "loss": 2.8994, "step": 1658 }, { "epoch": 0.07018360267366106, "grad_norm": 0.7608810663223267, "learning_rate": 0.001, "loss": 2.2228, "step": 1659 }, { "epoch": 0.07022590743717742, "grad_norm": 0.31685671210289, "learning_rate": 0.001, "loss": 2.2006, "step": 1660 }, { "epoch": 0.07026821220069379, "grad_norm": 1.3476111888885498, "learning_rate": 0.001, "loss": 3.0012, "step": 1661 }, { "epoch": 0.07031051696421017, "grad_norm": 5.591330051422119, "learning_rate": 0.001, "loss": 2.1398, "step": 1662 }, { "epoch": 0.07035282172772654, "grad_norm": 0.6403492093086243, "learning_rate": 0.001, "loss": 3.7632, "step": 1663 }, { "epoch": 0.07039512649124291, "grad_norm": 0.4311082661151886, "learning_rate": 0.001, "loss": 2.0528, "step": 1664 }, { "epoch": 0.07043743125475929, "grad_norm": 0.9901355504989624, "learning_rate": 0.001, "loss": 2.5533, "step": 1665 }, { "epoch": 0.07047973601827566, "grad_norm": 2.0582873821258545, "learning_rate": 0.001, "loss": 3.0359, "step": 1666 }, { "epoch": 0.07052204078179203, "grad_norm": 0.428562194108963, "learning_rate": 0.001, "loss": 3.0728, "step": 1667 }, { "epoch": 0.0705643455453084, "grad_norm": 0.2768668532371521, "learning_rate": 0.001, "loss": 1.7863, "step": 1668 }, { "epoch": 0.07060665030882478, "grad_norm": 0.593871533870697, "learning_rate": 0.001, "loss": 2.8978, "step": 1669 }, { "epoch": 0.07064895507234115, "grad_norm": 0.41889962553977966, "learning_rate": 0.001, "loss": 2.3901, "step": 1670 }, { "epoch": 0.07069125983585751, "grad_norm": 0.8739703297615051, "learning_rate": 0.001, "loss": 2.9089, "step": 1671 }, { "epoch": 0.07073356459937388, "grad_norm": 0.3771756589412689, "learning_rate": 0.001, "loss": 2.338, "step": 1672 }, { "epoch": 0.07077586936289026, "grad_norm": 0.29926592111587524, "learning_rate": 0.001, "loss": 3.1151, "step": 1673 }, { "epoch": 0.07081817412640663, "grad_norm": 0.43917933106422424, "learning_rate": 0.001, "loss": 2.3074, "step": 1674 }, { "epoch": 0.070860478889923, "grad_norm": 4.316728591918945, "learning_rate": 0.001, "loss": 2.0206, "step": 1675 }, { "epoch": 0.07090278365343938, "grad_norm": 0.5438099503517151, "learning_rate": 0.001, "loss": 2.57, "step": 1676 }, { "epoch": 0.07094508841695575, "grad_norm": 0.4939629137516022, "learning_rate": 0.001, "loss": 2.4069, "step": 1677 }, { "epoch": 0.07098739318047212, "grad_norm": 0.4457460343837738, "learning_rate": 0.001, "loss": 2.7809, "step": 1678 }, { "epoch": 0.07102969794398849, "grad_norm": 0.45648688077926636, "learning_rate": 0.001, "loss": 2.7326, "step": 1679 }, { "epoch": 0.07107200270750487, "grad_norm": 0.3010053038597107, "learning_rate": 0.001, "loss": 2.2902, "step": 1680 }, { "epoch": 0.07111430747102124, "grad_norm": 0.42142006754875183, "learning_rate": 0.001, "loss": 1.9107, "step": 1681 }, { "epoch": 0.0711566122345376, "grad_norm": 0.3928402364253998, "learning_rate": 0.001, "loss": 3.0004, "step": 1682 }, { "epoch": 0.07119891699805399, "grad_norm": 0.28071898221969604, "learning_rate": 0.001, "loss": 2.289, "step": 1683 }, { "epoch": 0.07124122176157036, "grad_norm": 0.3247803747653961, "learning_rate": 0.001, "loss": 2.2083, "step": 1684 }, { "epoch": 0.07128352652508672, "grad_norm": 0.5170036554336548, "learning_rate": 0.001, "loss": 2.1761, "step": 1685 }, { "epoch": 0.07132583128860309, "grad_norm": 1.5233675241470337, "learning_rate": 0.001, "loss": 2.3741, "step": 1686 }, { "epoch": 0.07136813605211947, "grad_norm": 0.33674490451812744, "learning_rate": 0.001, "loss": 2.2987, "step": 1687 }, { "epoch": 0.07141044081563584, "grad_norm": 3.1619656085968018, "learning_rate": 0.001, "loss": 1.9981, "step": 1688 }, { "epoch": 0.07145274557915221, "grad_norm": 0.3328041732311249, "learning_rate": 0.001, "loss": 3.4872, "step": 1689 }, { "epoch": 0.07149505034266858, "grad_norm": 0.3385929465293884, "learning_rate": 0.001, "loss": 2.4917, "step": 1690 }, { "epoch": 0.07153735510618496, "grad_norm": 0.3292979300022125, "learning_rate": 0.001, "loss": 1.7646, "step": 1691 }, { "epoch": 0.07157965986970133, "grad_norm": 9.468561172485352, "learning_rate": 0.001, "loss": 3.2305, "step": 1692 }, { "epoch": 0.0716219646332177, "grad_norm": 0.339837908744812, "learning_rate": 0.001, "loss": 4.1912, "step": 1693 }, { "epoch": 0.07166426939673408, "grad_norm": 0.7992456555366516, "learning_rate": 0.001, "loss": 1.7365, "step": 1694 }, { "epoch": 0.07170657416025045, "grad_norm": 0.3533322513103485, "learning_rate": 0.001, "loss": 1.6018, "step": 1695 }, { "epoch": 0.07174887892376682, "grad_norm": 0.4883526563644409, "learning_rate": 0.001, "loss": 2.5816, "step": 1696 }, { "epoch": 0.07179118368728318, "grad_norm": 0.3907789885997772, "learning_rate": 0.001, "loss": 1.906, "step": 1697 }, { "epoch": 0.07183348845079957, "grad_norm": 0.5203762054443359, "learning_rate": 0.001, "loss": 2.2438, "step": 1698 }, { "epoch": 0.07187579321431593, "grad_norm": 1.2718244791030884, "learning_rate": 0.001, "loss": 2.1247, "step": 1699 }, { "epoch": 0.0719180979778323, "grad_norm": 0.3142852485179901, "learning_rate": 0.001, "loss": 2.0239, "step": 1700 }, { "epoch": 0.07196040274134867, "grad_norm": 0.4279821515083313, "learning_rate": 0.001, "loss": 2.2428, "step": 1701 }, { "epoch": 0.07200270750486505, "grad_norm": 0.6683199405670166, "learning_rate": 0.001, "loss": 2.4512, "step": 1702 }, { "epoch": 0.07204501226838142, "grad_norm": 2.1548545360565186, "learning_rate": 0.001, "loss": 2.6076, "step": 1703 }, { "epoch": 0.07208731703189779, "grad_norm": 0.5022167563438416, "learning_rate": 0.001, "loss": 2.6147, "step": 1704 }, { "epoch": 0.07212962179541417, "grad_norm": 4.610467910766602, "learning_rate": 0.001, "loss": 2.8257, "step": 1705 }, { "epoch": 0.07217192655893054, "grad_norm": 0.35856351256370544, "learning_rate": 0.001, "loss": 2.0853, "step": 1706 }, { "epoch": 0.0722142313224469, "grad_norm": 0.45485618710517883, "learning_rate": 0.001, "loss": 2.5217, "step": 1707 }, { "epoch": 0.07225653608596327, "grad_norm": 0.3743135333061218, "learning_rate": 0.001, "loss": 2.2795, "step": 1708 }, { "epoch": 0.07229884084947966, "grad_norm": 0.286580890417099, "learning_rate": 0.001, "loss": 2.0651, "step": 1709 }, { "epoch": 0.07234114561299602, "grad_norm": 0.574466347694397, "learning_rate": 0.001, "loss": 2.1261, "step": 1710 }, { "epoch": 0.07238345037651239, "grad_norm": 0.35179415345191956, "learning_rate": 0.001, "loss": 2.6743, "step": 1711 }, { "epoch": 0.07242575514002876, "grad_norm": 0.3282983601093292, "learning_rate": 0.001, "loss": 2.7438, "step": 1712 }, { "epoch": 0.07246805990354514, "grad_norm": 0.41283783316612244, "learning_rate": 0.001, "loss": 2.4672, "step": 1713 }, { "epoch": 0.07251036466706151, "grad_norm": 0.4331008791923523, "learning_rate": 0.001, "loss": 2.4509, "step": 1714 }, { "epoch": 0.07255266943057788, "grad_norm": 0.4134253263473511, "learning_rate": 0.001, "loss": 2.7329, "step": 1715 }, { "epoch": 0.07259497419409426, "grad_norm": 0.3073793649673462, "learning_rate": 0.001, "loss": 2.2086, "step": 1716 }, { "epoch": 0.07263727895761063, "grad_norm": 0.3595843017101288, "learning_rate": 0.001, "loss": 2.2972, "step": 1717 }, { "epoch": 0.072679583721127, "grad_norm": 1.8754022121429443, "learning_rate": 0.001, "loss": 2.6782, "step": 1718 }, { "epoch": 0.07272188848464337, "grad_norm": 0.2694728374481201, "learning_rate": 0.001, "loss": 2.5075, "step": 1719 }, { "epoch": 0.07276419324815975, "grad_norm": 0.266966849565506, "learning_rate": 0.001, "loss": 2.7838, "step": 1720 }, { "epoch": 0.07280649801167612, "grad_norm": 0.6708036661148071, "learning_rate": 0.001, "loss": 2.5704, "step": 1721 }, { "epoch": 0.07284880277519248, "grad_norm": 0.32971417903900146, "learning_rate": 0.001, "loss": 2.1911, "step": 1722 }, { "epoch": 0.07289110753870885, "grad_norm": 0.5026300549507141, "learning_rate": 0.001, "loss": 2.1365, "step": 1723 }, { "epoch": 0.07293341230222523, "grad_norm": 0.6025309562683105, "learning_rate": 0.001, "loss": 2.7638, "step": 1724 }, { "epoch": 0.0729757170657416, "grad_norm": 0.35378944873809814, "learning_rate": 0.001, "loss": 2.5012, "step": 1725 }, { "epoch": 0.07301802182925797, "grad_norm": 0.2955608665943146, "learning_rate": 0.001, "loss": 3.3105, "step": 1726 }, { "epoch": 0.07306032659277435, "grad_norm": 1.2950598001480103, "learning_rate": 0.001, "loss": 1.4412, "step": 1727 }, { "epoch": 0.07310263135629072, "grad_norm": 0.34854191541671753, "learning_rate": 0.001, "loss": 2.1458, "step": 1728 }, { "epoch": 0.07314493611980709, "grad_norm": 0.2604203224182129, "learning_rate": 0.001, "loss": 2.5409, "step": 1729 }, { "epoch": 0.07318724088332346, "grad_norm": 0.28911593556404114, "learning_rate": 0.001, "loss": 2.3813, "step": 1730 }, { "epoch": 0.07322954564683984, "grad_norm": 0.4041121304035187, "learning_rate": 0.001, "loss": 2.1355, "step": 1731 }, { "epoch": 0.0732718504103562, "grad_norm": 3.5361766815185547, "learning_rate": 0.001, "loss": 2.6676, "step": 1732 }, { "epoch": 0.07331415517387257, "grad_norm": 0.38924169540405273, "learning_rate": 0.001, "loss": 2.1344, "step": 1733 }, { "epoch": 0.07335645993738894, "grad_norm": 0.36835405230522156, "learning_rate": 0.001, "loss": 2.3473, "step": 1734 }, { "epoch": 0.07339876470090533, "grad_norm": 2.3047337532043457, "learning_rate": 0.001, "loss": 2.1468, "step": 1735 }, { "epoch": 0.0734410694644217, "grad_norm": 0.47445395588874817, "learning_rate": 0.001, "loss": 2.6338, "step": 1736 }, { "epoch": 0.07348337422793806, "grad_norm": 0.5361562967300415, "learning_rate": 0.001, "loss": 2.8039, "step": 1737 }, { "epoch": 0.07352567899145444, "grad_norm": 0.362362802028656, "learning_rate": 0.001, "loss": 1.9508, "step": 1738 }, { "epoch": 0.07356798375497081, "grad_norm": 1.4122487306594849, "learning_rate": 0.001, "loss": 2.8017, "step": 1739 }, { "epoch": 0.07361028851848718, "grad_norm": 0.5565959215164185, "learning_rate": 0.001, "loss": 2.2053, "step": 1740 }, { "epoch": 0.07365259328200355, "grad_norm": 0.3117307722568512, "learning_rate": 0.001, "loss": 2.1428, "step": 1741 }, { "epoch": 0.07369489804551993, "grad_norm": 0.4470238983631134, "learning_rate": 0.001, "loss": 3.3104, "step": 1742 }, { "epoch": 0.0737372028090363, "grad_norm": 0.31840646266937256, "learning_rate": 0.001, "loss": 3.3071, "step": 1743 }, { "epoch": 0.07377950757255267, "grad_norm": 0.29003921151161194, "learning_rate": 0.001, "loss": 1.8832, "step": 1744 }, { "epoch": 0.07382181233606905, "grad_norm": 0.2651999890804291, "learning_rate": 0.001, "loss": 2.1535, "step": 1745 }, { "epoch": 0.07386411709958542, "grad_norm": 0.34940871596336365, "learning_rate": 0.001, "loss": 2.5327, "step": 1746 }, { "epoch": 0.07390642186310178, "grad_norm": 0.6055050492286682, "learning_rate": 0.001, "loss": 2.8376, "step": 1747 }, { "epoch": 0.07394872662661815, "grad_norm": 0.372227281332016, "learning_rate": 0.001, "loss": 2.1704, "step": 1748 }, { "epoch": 0.07399103139013453, "grad_norm": 0.31508174538612366, "learning_rate": 0.001, "loss": 2.3462, "step": 1749 }, { "epoch": 0.0740333361536509, "grad_norm": 0.40334755182266235, "learning_rate": 0.001, "loss": 2.2007, "step": 1750 }, { "epoch": 0.07407564091716727, "grad_norm": 0.23412242531776428, "learning_rate": 0.001, "loss": 1.9882, "step": 1751 }, { "epoch": 0.07411794568068364, "grad_norm": 0.47561314702033997, "learning_rate": 0.001, "loss": 2.3195, "step": 1752 }, { "epoch": 0.07416025044420002, "grad_norm": 0.3126150965690613, "learning_rate": 0.001, "loss": 3.2531, "step": 1753 }, { "epoch": 0.07420255520771639, "grad_norm": 0.6425087451934814, "learning_rate": 0.001, "loss": 1.549, "step": 1754 }, { "epoch": 0.07424485997123276, "grad_norm": 0.3294941484928131, "learning_rate": 0.001, "loss": 3.2919, "step": 1755 }, { "epoch": 0.07428716473474914, "grad_norm": 0.32722482085227966, "learning_rate": 0.001, "loss": 2.3305, "step": 1756 }, { "epoch": 0.07432946949826551, "grad_norm": 0.30937889218330383, "learning_rate": 0.001, "loss": 1.9746, "step": 1757 }, { "epoch": 0.07437177426178188, "grad_norm": 1.4851760864257812, "learning_rate": 0.001, "loss": 2.4941, "step": 1758 }, { "epoch": 0.07441407902529824, "grad_norm": 0.2669662833213806, "learning_rate": 0.001, "loss": 2.0385, "step": 1759 }, { "epoch": 0.07445638378881463, "grad_norm": 0.44635874032974243, "learning_rate": 0.001, "loss": 2.5292, "step": 1760 }, { "epoch": 0.074498688552331, "grad_norm": 0.28977322578430176, "learning_rate": 0.001, "loss": 2.7165, "step": 1761 }, { "epoch": 0.07454099331584736, "grad_norm": 0.25588658452033997, "learning_rate": 0.001, "loss": 2.1741, "step": 1762 }, { "epoch": 0.07458329807936373, "grad_norm": 0.28512755036354065, "learning_rate": 0.001, "loss": 2.8152, "step": 1763 }, { "epoch": 0.07462560284288011, "grad_norm": 0.8499886393547058, "learning_rate": 0.001, "loss": 2.7018, "step": 1764 }, { "epoch": 0.07466790760639648, "grad_norm": 0.5321717262268066, "learning_rate": 0.001, "loss": 2.3566, "step": 1765 }, { "epoch": 0.07471021236991285, "grad_norm": 0.3150945007801056, "learning_rate": 0.001, "loss": 2.2778, "step": 1766 }, { "epoch": 0.07475251713342923, "grad_norm": 1.8098750114440918, "learning_rate": 0.001, "loss": 2.2745, "step": 1767 }, { "epoch": 0.0747948218969456, "grad_norm": 0.2686639726161957, "learning_rate": 0.001, "loss": 3.0812, "step": 1768 }, { "epoch": 0.07483712666046197, "grad_norm": 0.3387921154499054, "learning_rate": 0.001, "loss": 2.8251, "step": 1769 }, { "epoch": 0.07487943142397833, "grad_norm": 0.5623756647109985, "learning_rate": 0.001, "loss": 2.115, "step": 1770 }, { "epoch": 0.07492173618749472, "grad_norm": 0.2617107927799225, "learning_rate": 0.001, "loss": 2.468, "step": 1771 }, { "epoch": 0.07496404095101109, "grad_norm": 0.6408830285072327, "learning_rate": 0.001, "loss": 2.4926, "step": 1772 }, { "epoch": 0.07500634571452745, "grad_norm": 0.38034066557884216, "learning_rate": 0.001, "loss": 2.867, "step": 1773 }, { "epoch": 0.07504865047804382, "grad_norm": 0.39489755034446716, "learning_rate": 0.001, "loss": 3.4283, "step": 1774 }, { "epoch": 0.0750909552415602, "grad_norm": 2.249427318572998, "learning_rate": 0.001, "loss": 2.2082, "step": 1775 }, { "epoch": 0.07513326000507657, "grad_norm": 4.038248062133789, "learning_rate": 0.001, "loss": 3.7159, "step": 1776 }, { "epoch": 0.07517556476859294, "grad_norm": 0.28461596369743347, "learning_rate": 0.001, "loss": 2.4496, "step": 1777 }, { "epoch": 0.07521786953210932, "grad_norm": 0.3818615674972534, "learning_rate": 0.001, "loss": 3.797, "step": 1778 }, { "epoch": 0.07526017429562569, "grad_norm": 0.3321216404438019, "learning_rate": 0.001, "loss": 2.7859, "step": 1779 }, { "epoch": 0.07530247905914206, "grad_norm": 0.394345223903656, "learning_rate": 0.001, "loss": 2.7472, "step": 1780 }, { "epoch": 0.07534478382265843, "grad_norm": 0.32292380928993225, "learning_rate": 0.001, "loss": 2.2914, "step": 1781 }, { "epoch": 0.07538708858617481, "grad_norm": 0.24881044030189514, "learning_rate": 0.001, "loss": 1.5237, "step": 1782 }, { "epoch": 0.07542939334969118, "grad_norm": 0.2507427930831909, "learning_rate": 0.001, "loss": 2.339, "step": 1783 }, { "epoch": 0.07547169811320754, "grad_norm": 0.340909868478775, "learning_rate": 0.001, "loss": 3.373, "step": 1784 }, { "epoch": 0.07551400287672391, "grad_norm": 0.32472652196884155, "learning_rate": 0.001, "loss": 2.0487, "step": 1785 }, { "epoch": 0.0755563076402403, "grad_norm": 0.3497520983219147, "learning_rate": 0.001, "loss": 3.3913, "step": 1786 }, { "epoch": 0.07559861240375666, "grad_norm": 1.4078129529953003, "learning_rate": 0.001, "loss": 2.1573, "step": 1787 }, { "epoch": 0.07564091716727303, "grad_norm": 0.5234339237213135, "learning_rate": 0.001, "loss": 3.5763, "step": 1788 }, { "epoch": 0.07568322193078941, "grad_norm": 0.32184237241744995, "learning_rate": 0.001, "loss": 2.3567, "step": 1789 }, { "epoch": 0.07572552669430578, "grad_norm": 0.32312852144241333, "learning_rate": 0.001, "loss": 1.9989, "step": 1790 }, { "epoch": 0.07576783145782215, "grad_norm": 0.47937512397766113, "learning_rate": 0.001, "loss": 3.2448, "step": 1791 }, { "epoch": 0.07581013622133852, "grad_norm": 0.6073363423347473, "learning_rate": 0.001, "loss": 2.1826, "step": 1792 }, { "epoch": 0.0758524409848549, "grad_norm": 0.24768002331256866, "learning_rate": 0.001, "loss": 2.2067, "step": 1793 }, { "epoch": 0.07589474574837127, "grad_norm": 0.7116523385047913, "learning_rate": 0.001, "loss": 2.6321, "step": 1794 }, { "epoch": 0.07593705051188764, "grad_norm": 0.3589756488800049, "learning_rate": 0.001, "loss": 3.1839, "step": 1795 }, { "epoch": 0.075979355275404, "grad_norm": 0.3133421540260315, "learning_rate": 0.001, "loss": 2.097, "step": 1796 }, { "epoch": 0.07602166003892039, "grad_norm": 0.28984275460243225, "learning_rate": 0.001, "loss": 3.0274, "step": 1797 }, { "epoch": 0.07606396480243675, "grad_norm": 0.3593312203884125, "learning_rate": 0.001, "loss": 2.0219, "step": 1798 }, { "epoch": 0.07610626956595312, "grad_norm": 0.4126046299934387, "learning_rate": 0.001, "loss": 1.9161, "step": 1799 }, { "epoch": 0.0761485743294695, "grad_norm": 0.7341791987419128, "learning_rate": 0.001, "loss": 2.4186, "step": 1800 }, { "epoch": 0.07619087909298587, "grad_norm": 0.27145180106163025, "learning_rate": 0.001, "loss": 1.9538, "step": 1801 }, { "epoch": 0.07623318385650224, "grad_norm": 0.5113809108734131, "learning_rate": 0.001, "loss": 2.2766, "step": 1802 }, { "epoch": 0.07627548862001861, "grad_norm": 0.27664855122566223, "learning_rate": 0.001, "loss": 2.3126, "step": 1803 }, { "epoch": 0.07631779338353499, "grad_norm": 0.23161768913269043, "learning_rate": 0.001, "loss": 1.7487, "step": 1804 }, { "epoch": 0.07636009814705136, "grad_norm": 1.3314400911331177, "learning_rate": 0.001, "loss": 2.0632, "step": 1805 }, { "epoch": 0.07640240291056773, "grad_norm": 0.3629261553287506, "learning_rate": 0.001, "loss": 2.7344, "step": 1806 }, { "epoch": 0.07644470767408411, "grad_norm": 0.2677980363368988, "learning_rate": 0.001, "loss": 1.9242, "step": 1807 }, { "epoch": 0.07648701243760048, "grad_norm": 0.26410120725631714, "learning_rate": 0.001, "loss": 2.2882, "step": 1808 }, { "epoch": 0.07652931720111684, "grad_norm": 1.109229326248169, "learning_rate": 0.001, "loss": 2.2232, "step": 1809 }, { "epoch": 0.07657162196463321, "grad_norm": 0.533427357673645, "learning_rate": 0.001, "loss": 2.5592, "step": 1810 }, { "epoch": 0.0766139267281496, "grad_norm": 0.32029929757118225, "learning_rate": 0.001, "loss": 2.2293, "step": 1811 }, { "epoch": 0.07665623149166596, "grad_norm": 0.3942636251449585, "learning_rate": 0.001, "loss": 2.5888, "step": 1812 }, { "epoch": 0.07669853625518233, "grad_norm": 0.28876644372940063, "learning_rate": 0.001, "loss": 2.1704, "step": 1813 }, { "epoch": 0.0767408410186987, "grad_norm": 0.30077075958251953, "learning_rate": 0.001, "loss": 3.1774, "step": 1814 }, { "epoch": 0.07678314578221508, "grad_norm": 0.6673346161842346, "learning_rate": 0.001, "loss": 2.055, "step": 1815 }, { "epoch": 0.07682545054573145, "grad_norm": 0.2500164806842804, "learning_rate": 0.001, "loss": 2.6555, "step": 1816 }, { "epoch": 0.07686775530924782, "grad_norm": 0.2818383574485779, "learning_rate": 0.001, "loss": 2.6336, "step": 1817 }, { "epoch": 0.0769100600727642, "grad_norm": 0.4379955232143402, "learning_rate": 0.001, "loss": 1.9602, "step": 1818 }, { "epoch": 0.07695236483628057, "grad_norm": 0.27242425084114075, "learning_rate": 0.001, "loss": 2.0259, "step": 1819 }, { "epoch": 0.07699466959979694, "grad_norm": 0.2450658231973648, "learning_rate": 0.001, "loss": 2.2079, "step": 1820 }, { "epoch": 0.0770369743633133, "grad_norm": 0.3834247291088104, "learning_rate": 0.001, "loss": 1.8011, "step": 1821 }, { "epoch": 0.07707927912682969, "grad_norm": 0.3078935742378235, "learning_rate": 0.001, "loss": 2.2373, "step": 1822 }, { "epoch": 0.07712158389034605, "grad_norm": 0.31686580181121826, "learning_rate": 0.001, "loss": 2.4674, "step": 1823 }, { "epoch": 0.07716388865386242, "grad_norm": 0.28138643503189087, "learning_rate": 0.001, "loss": 1.9822, "step": 1824 }, { "epoch": 0.07720619341737879, "grad_norm": 0.6008328795433044, "learning_rate": 0.001, "loss": 2.6718, "step": 1825 }, { "epoch": 0.07724849818089517, "grad_norm": 0.3610805869102478, "learning_rate": 0.001, "loss": 3.121, "step": 1826 }, { "epoch": 0.07729080294441154, "grad_norm": 0.44791126251220703, "learning_rate": 0.001, "loss": 2.4597, "step": 1827 }, { "epoch": 0.07733310770792791, "grad_norm": 0.6357278823852539, "learning_rate": 0.001, "loss": 2.6391, "step": 1828 }, { "epoch": 0.07737541247144429, "grad_norm": 0.47252053022384644, "learning_rate": 0.001, "loss": 2.4176, "step": 1829 }, { "epoch": 0.07741771723496066, "grad_norm": 0.3132396340370178, "learning_rate": 0.001, "loss": 2.105, "step": 1830 }, { "epoch": 0.07746002199847703, "grad_norm": 0.2682265341281891, "learning_rate": 0.001, "loss": 2.9813, "step": 1831 }, { "epoch": 0.0775023267619934, "grad_norm": 0.44940343499183655, "learning_rate": 0.001, "loss": 1.8856, "step": 1832 }, { "epoch": 0.07754463152550978, "grad_norm": 0.2868255376815796, "learning_rate": 0.001, "loss": 2.9346, "step": 1833 }, { "epoch": 0.07758693628902615, "grad_norm": 0.3130701780319214, "learning_rate": 0.001, "loss": 2.0693, "step": 1834 }, { "epoch": 0.07762924105254251, "grad_norm": 0.3155618906021118, "learning_rate": 0.001, "loss": 2.8602, "step": 1835 }, { "epoch": 0.07767154581605888, "grad_norm": 0.3293514847755432, "learning_rate": 0.001, "loss": 2.2591, "step": 1836 }, { "epoch": 0.07771385057957526, "grad_norm": 0.2850898504257202, "learning_rate": 0.001, "loss": 2.0444, "step": 1837 }, { "epoch": 0.07775615534309163, "grad_norm": 0.8841787576675415, "learning_rate": 0.001, "loss": 2.7438, "step": 1838 }, { "epoch": 0.077798460106608, "grad_norm": 0.4035090506076813, "learning_rate": 0.001, "loss": 2.5178, "step": 1839 }, { "epoch": 0.07784076487012438, "grad_norm": 0.2477097362279892, "learning_rate": 0.001, "loss": 2.2061, "step": 1840 }, { "epoch": 0.07788306963364075, "grad_norm": 0.32553189992904663, "learning_rate": 0.001, "loss": 2.4148, "step": 1841 }, { "epoch": 0.07792537439715712, "grad_norm": 0.3204468786716461, "learning_rate": 0.001, "loss": 3.0719, "step": 1842 }, { "epoch": 0.07796767916067349, "grad_norm": 0.30373045802116394, "learning_rate": 0.001, "loss": 2.8824, "step": 1843 }, { "epoch": 0.07800998392418987, "grad_norm": 2.558303117752075, "learning_rate": 0.001, "loss": 3.2411, "step": 1844 }, { "epoch": 0.07805228868770624, "grad_norm": 0.3434048295021057, "learning_rate": 0.001, "loss": 2.8942, "step": 1845 }, { "epoch": 0.0780945934512226, "grad_norm": 0.30965694785118103, "learning_rate": 0.001, "loss": 3.0761, "step": 1846 }, { "epoch": 0.07813689821473897, "grad_norm": 0.3516073226928711, "learning_rate": 0.001, "loss": 2.8424, "step": 1847 }, { "epoch": 0.07817920297825535, "grad_norm": 0.3779698312282562, "learning_rate": 0.001, "loss": 2.8813, "step": 1848 }, { "epoch": 0.07822150774177172, "grad_norm": 0.30136117339134216, "learning_rate": 0.001, "loss": 1.8542, "step": 1849 }, { "epoch": 0.07826381250528809, "grad_norm": 0.25570622086524963, "learning_rate": 0.001, "loss": 2.0118, "step": 1850 }, { "epoch": 0.07830611726880447, "grad_norm": 0.45675748586654663, "learning_rate": 0.001, "loss": 2.1994, "step": 1851 }, { "epoch": 0.07834842203232084, "grad_norm": 0.5405208468437195, "learning_rate": 0.001, "loss": 2.8306, "step": 1852 }, { "epoch": 0.07839072679583721, "grad_norm": 0.28319215774536133, "learning_rate": 0.001, "loss": 2.2087, "step": 1853 }, { "epoch": 0.07843303155935358, "grad_norm": 0.5920930504798889, "learning_rate": 0.001, "loss": 2.1219, "step": 1854 }, { "epoch": 0.07847533632286996, "grad_norm": 0.6683090329170227, "learning_rate": 0.001, "loss": 2.048, "step": 1855 }, { "epoch": 0.07851764108638633, "grad_norm": 0.28934597969055176, "learning_rate": 0.001, "loss": 2.4109, "step": 1856 }, { "epoch": 0.0785599458499027, "grad_norm": 1.1490424871444702, "learning_rate": 0.001, "loss": 2.8251, "step": 1857 }, { "epoch": 0.07860225061341906, "grad_norm": 0.5859717130661011, "learning_rate": 0.001, "loss": 2.2749, "step": 1858 }, { "epoch": 0.07864455537693545, "grad_norm": 0.614921510219574, "learning_rate": 0.001, "loss": 2.7844, "step": 1859 }, { "epoch": 0.07868686014045181, "grad_norm": 0.3503010869026184, "learning_rate": 0.001, "loss": 2.4979, "step": 1860 }, { "epoch": 0.07872916490396818, "grad_norm": 0.42067447304725647, "learning_rate": 0.001, "loss": 2.9724, "step": 1861 }, { "epoch": 0.07877146966748456, "grad_norm": 0.45878085494041443, "learning_rate": 0.001, "loss": 1.98, "step": 1862 }, { "epoch": 0.07881377443100093, "grad_norm": 1.4618866443634033, "learning_rate": 0.001, "loss": 1.9614, "step": 1863 }, { "epoch": 0.0788560791945173, "grad_norm": 0.33975115418434143, "learning_rate": 0.001, "loss": 2.7163, "step": 1864 }, { "epoch": 0.07889838395803367, "grad_norm": 27.44309425354004, "learning_rate": 0.001, "loss": 3.4932, "step": 1865 }, { "epoch": 0.07894068872155005, "grad_norm": 0.4054664373397827, "learning_rate": 0.001, "loss": 2.5312, "step": 1866 }, { "epoch": 0.07898299348506642, "grad_norm": 0.3030809462070465, "learning_rate": 0.001, "loss": 2.5558, "step": 1867 }, { "epoch": 0.07902529824858279, "grad_norm": 0.40705251693725586, "learning_rate": 0.001, "loss": 3.12, "step": 1868 }, { "epoch": 0.07906760301209917, "grad_norm": 16.77686882019043, "learning_rate": 0.001, "loss": 3.2163, "step": 1869 }, { "epoch": 0.07910990777561554, "grad_norm": 1.4358597993850708, "learning_rate": 0.001, "loss": 3.0532, "step": 1870 }, { "epoch": 0.0791522125391319, "grad_norm": 0.3366057872772217, "learning_rate": 0.001, "loss": 2.595, "step": 1871 }, { "epoch": 0.07919451730264827, "grad_norm": 0.3896106779575348, "learning_rate": 0.001, "loss": 2.9058, "step": 1872 }, { "epoch": 0.07923682206616466, "grad_norm": 0.34666091203689575, "learning_rate": 0.001, "loss": 1.9996, "step": 1873 }, { "epoch": 0.07927912682968102, "grad_norm": 0.34630462527275085, "learning_rate": 0.001, "loss": 2.6526, "step": 1874 }, { "epoch": 0.07932143159319739, "grad_norm": 0.43369296193122864, "learning_rate": 0.001, "loss": 3.2299, "step": 1875 }, { "epoch": 0.07936373635671376, "grad_norm": 0.323746919631958, "learning_rate": 0.001, "loss": 2.6891, "step": 1876 }, { "epoch": 0.07940604112023014, "grad_norm": 3.8398473262786865, "learning_rate": 0.001, "loss": 4.0965, "step": 1877 }, { "epoch": 0.07944834588374651, "grad_norm": 0.5422288775444031, "learning_rate": 0.001, "loss": 2.9482, "step": 1878 }, { "epoch": 0.07949065064726288, "grad_norm": 0.701977014541626, "learning_rate": 0.001, "loss": 2.8954, "step": 1879 }, { "epoch": 0.07953295541077926, "grad_norm": 0.7025458216667175, "learning_rate": 0.001, "loss": 2.7868, "step": 1880 }, { "epoch": 0.07957526017429563, "grad_norm": 0.8017560243606567, "learning_rate": 0.001, "loss": 1.9359, "step": 1881 }, { "epoch": 0.079617564937812, "grad_norm": 0.45160236954689026, "learning_rate": 0.001, "loss": 2.2321, "step": 1882 }, { "epoch": 0.07965986970132836, "grad_norm": 0.4654178023338318, "learning_rate": 0.001, "loss": 3.4246, "step": 1883 }, { "epoch": 0.07970217446484475, "grad_norm": 0.3444591164588928, "learning_rate": 0.001, "loss": 2.8039, "step": 1884 }, { "epoch": 0.07974447922836111, "grad_norm": 1.3339020013809204, "learning_rate": 0.001, "loss": 3.6179, "step": 1885 }, { "epoch": 0.07978678399187748, "grad_norm": 0.23645636439323425, "learning_rate": 0.001, "loss": 1.7768, "step": 1886 }, { "epoch": 0.07982908875539385, "grad_norm": 0.3817785680294037, "learning_rate": 0.001, "loss": 2.7453, "step": 1887 }, { "epoch": 0.07987139351891023, "grad_norm": 0.41465362906455994, "learning_rate": 0.001, "loss": 3.0675, "step": 1888 }, { "epoch": 0.0799136982824266, "grad_norm": 0.37037500739097595, "learning_rate": 0.001, "loss": 2.3173, "step": 1889 }, { "epoch": 0.07995600304594297, "grad_norm": 0.31565260887145996, "learning_rate": 0.001, "loss": 2.4285, "step": 1890 }, { "epoch": 0.07999830780945935, "grad_norm": 6.897204399108887, "learning_rate": 0.001, "loss": 2.5709, "step": 1891 }, { "epoch": 0.08004061257297572, "grad_norm": 1.544342279434204, "learning_rate": 0.001, "loss": 2.1807, "step": 1892 }, { "epoch": 0.08008291733649209, "grad_norm": 1.1234506368637085, "learning_rate": 0.001, "loss": 2.368, "step": 1893 }, { "epoch": 0.08012522210000846, "grad_norm": 0.2884044051170349, "learning_rate": 0.001, "loss": 1.9447, "step": 1894 }, { "epoch": 0.08016752686352484, "grad_norm": 0.38709041476249695, "learning_rate": 0.001, "loss": 2.1735, "step": 1895 }, { "epoch": 0.0802098316270412, "grad_norm": 41.802032470703125, "learning_rate": 0.001, "loss": 1.8604, "step": 1896 }, { "epoch": 0.08025213639055757, "grad_norm": 0.44042840600013733, "learning_rate": 0.001, "loss": 1.6504, "step": 1897 }, { "epoch": 0.08029444115407394, "grad_norm": 1.7440584897994995, "learning_rate": 0.001, "loss": 2.5165, "step": 1898 }, { "epoch": 0.08033674591759032, "grad_norm": 1.517181396484375, "learning_rate": 0.001, "loss": 2.1762, "step": 1899 }, { "epoch": 0.08037905068110669, "grad_norm": 0.5725038051605225, "learning_rate": 0.001, "loss": 2.5455, "step": 1900 }, { "epoch": 0.08042135544462306, "grad_norm": 0.45639267563819885, "learning_rate": 0.001, "loss": 2.379, "step": 1901 }, { "epoch": 0.08046366020813944, "grad_norm": 1.4812275171279907, "learning_rate": 0.001, "loss": 2.4805, "step": 1902 }, { "epoch": 0.08050596497165581, "grad_norm": 0.3487553000450134, "learning_rate": 0.001, "loss": 1.9803, "step": 1903 }, { "epoch": 0.08054826973517218, "grad_norm": 0.36132708191871643, "learning_rate": 0.001, "loss": 2.0808, "step": 1904 }, { "epoch": 0.08059057449868855, "grad_norm": 0.3573780357837677, "learning_rate": 0.001, "loss": 1.9821, "step": 1905 }, { "epoch": 0.08063287926220493, "grad_norm": 2.551344156265259, "learning_rate": 0.001, "loss": 2.4851, "step": 1906 }, { "epoch": 0.0806751840257213, "grad_norm": 1.2544894218444824, "learning_rate": 0.001, "loss": 2.0348, "step": 1907 }, { "epoch": 0.08071748878923767, "grad_norm": 0.3534647524356842, "learning_rate": 0.001, "loss": 4.1648, "step": 1908 }, { "epoch": 0.08075979355275403, "grad_norm": 0.5673227310180664, "learning_rate": 0.001, "loss": 2.953, "step": 1909 }, { "epoch": 0.08080209831627042, "grad_norm": 5.431023120880127, "learning_rate": 0.001, "loss": 2.5128, "step": 1910 }, { "epoch": 0.08084440307978678, "grad_norm": 15.899325370788574, "learning_rate": 0.001, "loss": 2.9833, "step": 1911 }, { "epoch": 0.08088670784330315, "grad_norm": 0.5933341383934021, "learning_rate": 0.001, "loss": 2.0336, "step": 1912 }, { "epoch": 0.08092901260681953, "grad_norm": 0.572313129901886, "learning_rate": 0.001, "loss": 2.8636, "step": 1913 }, { "epoch": 0.0809713173703359, "grad_norm": 2.6062185764312744, "learning_rate": 0.001, "loss": 2.3718, "step": 1914 }, { "epoch": 0.08101362213385227, "grad_norm": 0.769607424736023, "learning_rate": 0.001, "loss": 2.6415, "step": 1915 }, { "epoch": 0.08105592689736864, "grad_norm": 0.4920905828475952, "learning_rate": 0.001, "loss": 2.1552, "step": 1916 }, { "epoch": 0.08109823166088502, "grad_norm": 0.5874459147453308, "learning_rate": 0.001, "loss": 2.376, "step": 1917 }, { "epoch": 0.08114053642440139, "grad_norm": 0.48925819993019104, "learning_rate": 0.001, "loss": 1.7734, "step": 1918 }, { "epoch": 0.08118284118791776, "grad_norm": 0.7119834423065186, "learning_rate": 0.001, "loss": 2.6034, "step": 1919 }, { "epoch": 0.08122514595143414, "grad_norm": 3.461428165435791, "learning_rate": 0.001, "loss": 2.6797, "step": 1920 }, { "epoch": 0.0812674507149505, "grad_norm": 0.7845181822776794, "learning_rate": 0.001, "loss": 4.2376, "step": 1921 }, { "epoch": 0.08130975547846687, "grad_norm": 0.5165002942085266, "learning_rate": 0.001, "loss": 3.3205, "step": 1922 }, { "epoch": 0.08135206024198324, "grad_norm": 1.0320379734039307, "learning_rate": 0.001, "loss": 2.7671, "step": 1923 }, { "epoch": 0.08139436500549962, "grad_norm": 7.306519031524658, "learning_rate": 0.001, "loss": 3.0783, "step": 1924 }, { "epoch": 0.08143666976901599, "grad_norm": 0.51955646276474, "learning_rate": 0.001, "loss": 2.2582, "step": 1925 }, { "epoch": 0.08147897453253236, "grad_norm": 0.405947744846344, "learning_rate": 0.001, "loss": 3.2719, "step": 1926 }, { "epoch": 0.08152127929604873, "grad_norm": 2.2772488594055176, "learning_rate": 0.001, "loss": 2.8051, "step": 1927 }, { "epoch": 0.08156358405956511, "grad_norm": 0.4286925494670868, "learning_rate": 0.001, "loss": 2.4886, "step": 1928 }, { "epoch": 0.08160588882308148, "grad_norm": 0.3987908661365509, "learning_rate": 0.001, "loss": 2.1242, "step": 1929 }, { "epoch": 0.08164819358659785, "grad_norm": 0.42939138412475586, "learning_rate": 0.001, "loss": 3.0869, "step": 1930 }, { "epoch": 0.08169049835011423, "grad_norm": 0.7904039025306702, "learning_rate": 0.001, "loss": 2.6297, "step": 1931 }, { "epoch": 0.0817328031136306, "grad_norm": 0.2795187532901764, "learning_rate": 0.001, "loss": 1.8094, "step": 1932 }, { "epoch": 0.08177510787714697, "grad_norm": 0.7740998268127441, "learning_rate": 0.001, "loss": 2.6684, "step": 1933 }, { "epoch": 0.08181741264066333, "grad_norm": 0.5167595148086548, "learning_rate": 0.001, "loss": 2.8719, "step": 1934 }, { "epoch": 0.08185971740417972, "grad_norm": 0.354657381772995, "learning_rate": 0.001, "loss": 2.6624, "step": 1935 }, { "epoch": 0.08190202216769608, "grad_norm": 0.35921430587768555, "learning_rate": 0.001, "loss": 2.4581, "step": 1936 }, { "epoch": 0.08194432693121245, "grad_norm": 0.6091867089271545, "learning_rate": 0.001, "loss": 2.978, "step": 1937 }, { "epoch": 0.08198663169472882, "grad_norm": 0.3409866988658905, "learning_rate": 0.001, "loss": 2.5926, "step": 1938 }, { "epoch": 0.0820289364582452, "grad_norm": 0.5260420441627502, "learning_rate": 0.001, "loss": 3.2647, "step": 1939 }, { "epoch": 0.08207124122176157, "grad_norm": 0.2958446741104126, "learning_rate": 0.001, "loss": 2.509, "step": 1940 }, { "epoch": 0.08211354598527794, "grad_norm": 0.2943398058414459, "learning_rate": 0.001, "loss": 2.0493, "step": 1941 }, { "epoch": 0.08215585074879432, "grad_norm": 0.3319268226623535, "learning_rate": 0.001, "loss": 2.3304, "step": 1942 }, { "epoch": 0.08219815551231069, "grad_norm": 0.5134836435317993, "learning_rate": 0.001, "loss": 2.693, "step": 1943 }, { "epoch": 0.08224046027582706, "grad_norm": 0.3512554168701172, "learning_rate": 0.001, "loss": 3.3824, "step": 1944 }, { "epoch": 0.08228276503934343, "grad_norm": 1.8263537883758545, "learning_rate": 0.001, "loss": 2.3778, "step": 1945 }, { "epoch": 0.08232506980285981, "grad_norm": 0.3825913667678833, "learning_rate": 0.001, "loss": 2.2277, "step": 1946 }, { "epoch": 0.08236737456637618, "grad_norm": 0.3037860691547394, "learning_rate": 0.001, "loss": 2.3199, "step": 1947 }, { "epoch": 0.08240967932989254, "grad_norm": 0.4019972085952759, "learning_rate": 0.001, "loss": 2.3972, "step": 1948 }, { "epoch": 0.08245198409340891, "grad_norm": 1.4788627624511719, "learning_rate": 0.001, "loss": 2.4204, "step": 1949 }, { "epoch": 0.0824942888569253, "grad_norm": 0.5321111083030701, "learning_rate": 0.001, "loss": 2.8231, "step": 1950 }, { "epoch": 0.08253659362044166, "grad_norm": 0.5626177191734314, "learning_rate": 0.001, "loss": 3.8786, "step": 1951 }, { "epoch": 0.08257889838395803, "grad_norm": 0.5114405751228333, "learning_rate": 0.001, "loss": 2.941, "step": 1952 }, { "epoch": 0.08262120314747441, "grad_norm": 0.48595815896987915, "learning_rate": 0.001, "loss": 2.6246, "step": 1953 }, { "epoch": 0.08266350791099078, "grad_norm": 0.28523850440979004, "learning_rate": 0.001, "loss": 2.7918, "step": 1954 }, { "epoch": 0.08270581267450715, "grad_norm": 0.7027163505554199, "learning_rate": 0.001, "loss": 2.7862, "step": 1955 }, { "epoch": 0.08274811743802352, "grad_norm": 0.2511918544769287, "learning_rate": 0.001, "loss": 1.811, "step": 1956 }, { "epoch": 0.0827904222015399, "grad_norm": 0.2575523555278778, "learning_rate": 0.001, "loss": 3.1751, "step": 1957 }, { "epoch": 0.08283272696505627, "grad_norm": 0.3014431297779083, "learning_rate": 0.001, "loss": 2.4499, "step": 1958 }, { "epoch": 0.08287503172857263, "grad_norm": 0.24527455866336823, "learning_rate": 0.001, "loss": 2.0457, "step": 1959 }, { "epoch": 0.082917336492089, "grad_norm": 0.312063992023468, "learning_rate": 0.001, "loss": 2.063, "step": 1960 }, { "epoch": 0.08295964125560538, "grad_norm": 1.2686574459075928, "learning_rate": 0.001, "loss": 3.0543, "step": 1961 }, { "epoch": 0.08300194601912175, "grad_norm": 0.3444327712059021, "learning_rate": 0.001, "loss": 2.5528, "step": 1962 }, { "epoch": 0.08304425078263812, "grad_norm": 0.5791609287261963, "learning_rate": 0.001, "loss": 3.4888, "step": 1963 }, { "epoch": 0.0830865555461545, "grad_norm": 1.6436971426010132, "learning_rate": 0.001, "loss": 2.6672, "step": 1964 }, { "epoch": 0.08312886030967087, "grad_norm": 0.25417083501815796, "learning_rate": 0.001, "loss": 2.1457, "step": 1965 }, { "epoch": 0.08317116507318724, "grad_norm": 0.3741660714149475, "learning_rate": 0.001, "loss": 1.8138, "step": 1966 }, { "epoch": 0.08321346983670361, "grad_norm": 0.3511454164981842, "learning_rate": 0.001, "loss": 1.9674, "step": 1967 }, { "epoch": 0.08325577460021999, "grad_norm": 0.3857306241989136, "learning_rate": 0.001, "loss": 3.0311, "step": 1968 }, { "epoch": 0.08329807936373636, "grad_norm": 0.6367977261543274, "learning_rate": 0.001, "loss": 2.3158, "step": 1969 }, { "epoch": 0.08334038412725273, "grad_norm": 0.30183205008506775, "learning_rate": 0.001, "loss": 2.4227, "step": 1970 }, { "epoch": 0.0833826888907691, "grad_norm": 0.3112057149410248, "learning_rate": 0.001, "loss": 2.2976, "step": 1971 }, { "epoch": 0.08342499365428548, "grad_norm": 0.3398885726928711, "learning_rate": 0.001, "loss": 2.5514, "step": 1972 }, { "epoch": 0.08346729841780184, "grad_norm": 0.5639985799789429, "learning_rate": 0.001, "loss": 3.3706, "step": 1973 }, { "epoch": 0.08350960318131821, "grad_norm": 0.2720838785171509, "learning_rate": 0.001, "loss": 2.0734, "step": 1974 }, { "epoch": 0.0835519079448346, "grad_norm": 0.3382203280925751, "learning_rate": 0.001, "loss": 2.1143, "step": 1975 }, { "epoch": 0.08359421270835096, "grad_norm": 0.9592907428741455, "learning_rate": 0.001, "loss": 1.722, "step": 1976 }, { "epoch": 0.08363651747186733, "grad_norm": 0.4691549241542816, "learning_rate": 0.001, "loss": 3.0425, "step": 1977 }, { "epoch": 0.0836788222353837, "grad_norm": 0.4217078983783722, "learning_rate": 0.001, "loss": 2.8026, "step": 1978 }, { "epoch": 0.08372112699890008, "grad_norm": 0.9768161773681641, "learning_rate": 0.001, "loss": 3.152, "step": 1979 }, { "epoch": 0.08376343176241645, "grad_norm": 0.4018516540527344, "learning_rate": 0.001, "loss": 1.9038, "step": 1980 }, { "epoch": 0.08380573652593282, "grad_norm": 0.3423294126987457, "learning_rate": 0.001, "loss": 3.554, "step": 1981 }, { "epoch": 0.0838480412894492, "grad_norm": 4.468573093414307, "learning_rate": 0.001, "loss": 1.7724, "step": 1982 }, { "epoch": 0.08389034605296557, "grad_norm": 2.0579144954681396, "learning_rate": 0.001, "loss": 2.9118, "step": 1983 }, { "epoch": 0.08393265081648194, "grad_norm": 0.3974030911922455, "learning_rate": 0.001, "loss": 1.8146, "step": 1984 }, { "epoch": 0.0839749555799983, "grad_norm": 2.071157693862915, "learning_rate": 0.001, "loss": 3.3286, "step": 1985 }, { "epoch": 0.08401726034351469, "grad_norm": 0.8951895236968994, "learning_rate": 0.001, "loss": 2.1807, "step": 1986 }, { "epoch": 0.08405956510703105, "grad_norm": 0.3309991955757141, "learning_rate": 0.001, "loss": 2.9089, "step": 1987 }, { "epoch": 0.08410186987054742, "grad_norm": 0.3316001892089844, "learning_rate": 0.001, "loss": 2.4278, "step": 1988 }, { "epoch": 0.08414417463406379, "grad_norm": 0.4614253342151642, "learning_rate": 0.001, "loss": 1.8007, "step": 1989 }, { "epoch": 0.08418647939758017, "grad_norm": 0.4860163629055023, "learning_rate": 0.001, "loss": 2.394, "step": 1990 }, { "epoch": 0.08422878416109654, "grad_norm": 0.30347874760627747, "learning_rate": 0.001, "loss": 2.4783, "step": 1991 }, { "epoch": 0.08427108892461291, "grad_norm": 0.3198263943195343, "learning_rate": 0.001, "loss": 2.4848, "step": 1992 }, { "epoch": 0.08431339368812929, "grad_norm": 6.526590824127197, "learning_rate": 0.001, "loss": 2.6791, "step": 1993 }, { "epoch": 0.08435569845164566, "grad_norm": 0.4787849485874176, "learning_rate": 0.001, "loss": 3.7781, "step": 1994 }, { "epoch": 0.08439800321516203, "grad_norm": 0.34780532121658325, "learning_rate": 0.001, "loss": 2.3788, "step": 1995 }, { "epoch": 0.0844403079786784, "grad_norm": 0.30479609966278076, "learning_rate": 0.001, "loss": 2.1992, "step": 1996 }, { "epoch": 0.08448261274219478, "grad_norm": 1.0337961912155151, "learning_rate": 0.001, "loss": 2.4048, "step": 1997 }, { "epoch": 0.08452491750571114, "grad_norm": 0.5477790832519531, "learning_rate": 0.001, "loss": 2.2986, "step": 1998 }, { "epoch": 0.08456722226922751, "grad_norm": 0.7585234045982361, "learning_rate": 0.001, "loss": 2.7247, "step": 1999 }, { "epoch": 0.08460952703274388, "grad_norm": 12.078346252441406, "learning_rate": 0.001, "loss": 1.9748, "step": 2000 }, { "epoch": 0.08465183179626026, "grad_norm": 0.26165372133255005, "learning_rate": 0.001, "loss": 2.2865, "step": 2001 }, { "epoch": 0.08469413655977663, "grad_norm": 0.21518190205097198, "learning_rate": 0.001, "loss": 1.9516, "step": 2002 }, { "epoch": 0.084736441323293, "grad_norm": 1.5820046663284302, "learning_rate": 0.001, "loss": 2.2892, "step": 2003 }, { "epoch": 0.08477874608680938, "grad_norm": 0.3572734296321869, "learning_rate": 0.001, "loss": 3.4287, "step": 2004 }, { "epoch": 0.08482105085032575, "grad_norm": 3.6497509479522705, "learning_rate": 0.001, "loss": 3.2188, "step": 2005 }, { "epoch": 0.08486335561384212, "grad_norm": 0.38023465871810913, "learning_rate": 0.001, "loss": 2.0518, "step": 2006 }, { "epoch": 0.08490566037735849, "grad_norm": 0.24291670322418213, "learning_rate": 0.001, "loss": 2.0684, "step": 2007 }, { "epoch": 0.08494796514087487, "grad_norm": 0.3124212622642517, "learning_rate": 0.001, "loss": 2.3747, "step": 2008 }, { "epoch": 0.08499026990439124, "grad_norm": 1.313674807548523, "learning_rate": 0.001, "loss": 1.7892, "step": 2009 }, { "epoch": 0.0850325746679076, "grad_norm": 0.5036904215812683, "learning_rate": 0.001, "loss": 2.5027, "step": 2010 }, { "epoch": 0.08507487943142397, "grad_norm": 0.4840260148048401, "learning_rate": 0.001, "loss": 2.3056, "step": 2011 }, { "epoch": 0.08511718419494035, "grad_norm": 0.48675820231437683, "learning_rate": 0.001, "loss": 2.6466, "step": 2012 }, { "epoch": 0.08515948895845672, "grad_norm": 6.131993770599365, "learning_rate": 0.001, "loss": 2.6512, "step": 2013 }, { "epoch": 0.08520179372197309, "grad_norm": 0.37686774134635925, "learning_rate": 0.001, "loss": 3.5639, "step": 2014 }, { "epoch": 0.08524409848548947, "grad_norm": 0.6002863645553589, "learning_rate": 0.001, "loss": 3.1445, "step": 2015 }, { "epoch": 0.08528640324900584, "grad_norm": 0.46876654028892517, "learning_rate": 0.001, "loss": 2.5458, "step": 2016 }, { "epoch": 0.08532870801252221, "grad_norm": 0.38201484084129333, "learning_rate": 0.001, "loss": 2.5112, "step": 2017 }, { "epoch": 0.08537101277603858, "grad_norm": 0.31709811091423035, "learning_rate": 0.001, "loss": 1.8442, "step": 2018 }, { "epoch": 0.08541331753955496, "grad_norm": 3.5930492877960205, "learning_rate": 0.001, "loss": 2.654, "step": 2019 }, { "epoch": 0.08545562230307133, "grad_norm": 0.48756277561187744, "learning_rate": 0.001, "loss": 2.5555, "step": 2020 }, { "epoch": 0.0854979270665877, "grad_norm": 0.3280019164085388, "learning_rate": 0.001, "loss": 3.0868, "step": 2021 }, { "epoch": 0.08554023183010406, "grad_norm": 3.57021164894104, "learning_rate": 0.001, "loss": 2.7273, "step": 2022 }, { "epoch": 0.08558253659362045, "grad_norm": 2.2349965572357178, "learning_rate": 0.001, "loss": 3.3427, "step": 2023 }, { "epoch": 0.08562484135713681, "grad_norm": 0.333631694316864, "learning_rate": 0.001, "loss": 2.8953, "step": 2024 }, { "epoch": 0.08566714612065318, "grad_norm": 0.31778064370155334, "learning_rate": 0.001, "loss": 2.4624, "step": 2025 }, { "epoch": 0.08570945088416956, "grad_norm": 0.3415403664112091, "learning_rate": 0.001, "loss": 2.1366, "step": 2026 }, { "epoch": 0.08575175564768593, "grad_norm": 3.173630952835083, "learning_rate": 0.001, "loss": 2.8478, "step": 2027 }, { "epoch": 0.0857940604112023, "grad_norm": 0.27932387590408325, "learning_rate": 0.001, "loss": 2.1341, "step": 2028 }, { "epoch": 0.08583636517471867, "grad_norm": 0.4122964143753052, "learning_rate": 0.001, "loss": 2.9483, "step": 2029 }, { "epoch": 0.08587866993823505, "grad_norm": 2.471703052520752, "learning_rate": 0.001, "loss": 3.5857, "step": 2030 }, { "epoch": 0.08592097470175142, "grad_norm": 0.3559750020503998, "learning_rate": 0.001, "loss": 3.2138, "step": 2031 }, { "epoch": 0.08596327946526779, "grad_norm": 0.4111155867576599, "learning_rate": 0.001, "loss": 2.3594, "step": 2032 }, { "epoch": 0.08600558422878415, "grad_norm": 0.2714095115661621, "learning_rate": 0.001, "loss": 2.0511, "step": 2033 }, { "epoch": 0.08604788899230054, "grad_norm": 0.313900887966156, "learning_rate": 0.001, "loss": 2.274, "step": 2034 }, { "epoch": 0.0860901937558169, "grad_norm": 0.2859230637550354, "learning_rate": 0.001, "loss": 2.1988, "step": 2035 }, { "epoch": 0.08613249851933327, "grad_norm": 0.323250949382782, "learning_rate": 0.001, "loss": 2.0196, "step": 2036 }, { "epoch": 0.08617480328284965, "grad_norm": 0.2705003321170807, "learning_rate": 0.001, "loss": 1.9217, "step": 2037 }, { "epoch": 0.08621710804636602, "grad_norm": 0.25685760378837585, "learning_rate": 0.001, "loss": 1.9932, "step": 2038 }, { "epoch": 0.08625941280988239, "grad_norm": 0.2940180003643036, "learning_rate": 0.001, "loss": 3.1947, "step": 2039 }, { "epoch": 0.08630171757339876, "grad_norm": 0.23758481442928314, "learning_rate": 0.001, "loss": 2.1624, "step": 2040 }, { "epoch": 0.08634402233691514, "grad_norm": 0.2667909264564514, "learning_rate": 0.001, "loss": 2.5975, "step": 2041 }, { "epoch": 0.08638632710043151, "grad_norm": 0.23966266214847565, "learning_rate": 0.001, "loss": 2.7902, "step": 2042 }, { "epoch": 0.08642863186394788, "grad_norm": 0.3106350898742676, "learning_rate": 0.001, "loss": 1.6657, "step": 2043 }, { "epoch": 0.08647093662746426, "grad_norm": 0.25842878222465515, "learning_rate": 0.001, "loss": 1.9604, "step": 2044 }, { "epoch": 0.08651324139098063, "grad_norm": 0.6965135931968689, "learning_rate": 0.001, "loss": 2.3328, "step": 2045 }, { "epoch": 0.086555546154497, "grad_norm": 0.30815958976745605, "learning_rate": 0.001, "loss": 1.7124, "step": 2046 }, { "epoch": 0.08659785091801336, "grad_norm": 0.32658496499061584, "learning_rate": 0.001, "loss": 3.9796, "step": 2047 }, { "epoch": 0.08664015568152975, "grad_norm": 0.25080788135528564, "learning_rate": 0.001, "loss": 2.4049, "step": 2048 }, { "epoch": 0.08668246044504611, "grad_norm": 0.31165841221809387, "learning_rate": 0.001, "loss": 2.0898, "step": 2049 }, { "epoch": 0.08672476520856248, "grad_norm": 1.0614737272262573, "learning_rate": 0.001, "loss": 1.7963, "step": 2050 }, { "epoch": 0.08676706997207885, "grad_norm": 0.44003593921661377, "learning_rate": 0.001, "loss": 3.2018, "step": 2051 }, { "epoch": 0.08680937473559523, "grad_norm": 0.44519808888435364, "learning_rate": 0.001, "loss": 2.6672, "step": 2052 }, { "epoch": 0.0868516794991116, "grad_norm": 1.5413126945495605, "learning_rate": 0.001, "loss": 1.6514, "step": 2053 }, { "epoch": 0.08689398426262797, "grad_norm": 1.6876548528671265, "learning_rate": 0.001, "loss": 2.9417, "step": 2054 }, { "epoch": 0.08693628902614435, "grad_norm": 0.26920443773269653, "learning_rate": 0.001, "loss": 2.1858, "step": 2055 }, { "epoch": 0.08697859378966072, "grad_norm": 0.4876200556755066, "learning_rate": 0.001, "loss": 2.9616, "step": 2056 }, { "epoch": 0.08702089855317709, "grad_norm": 0.32026639580726624, "learning_rate": 0.001, "loss": 2.398, "step": 2057 }, { "epoch": 0.08706320331669345, "grad_norm": 0.34720298647880554, "learning_rate": 0.001, "loss": 2.5501, "step": 2058 }, { "epoch": 0.08710550808020984, "grad_norm": 0.3236474394798279, "learning_rate": 0.001, "loss": 1.9898, "step": 2059 }, { "epoch": 0.0871478128437262, "grad_norm": 0.4026321470737457, "learning_rate": 0.001, "loss": 2.4051, "step": 2060 }, { "epoch": 0.08719011760724257, "grad_norm": 0.9391548037528992, "learning_rate": 0.001, "loss": 2.2811, "step": 2061 }, { "epoch": 0.08723242237075894, "grad_norm": 0.29237544536590576, "learning_rate": 0.001, "loss": 2.1954, "step": 2062 }, { "epoch": 0.08727472713427532, "grad_norm": 0.26706722378730774, "learning_rate": 0.001, "loss": 2.0118, "step": 2063 }, { "epoch": 0.08731703189779169, "grad_norm": 0.6832917928695679, "learning_rate": 0.001, "loss": 2.0665, "step": 2064 }, { "epoch": 0.08735933666130806, "grad_norm": 3.363959312438965, "learning_rate": 0.001, "loss": 2.8937, "step": 2065 }, { "epoch": 0.08740164142482444, "grad_norm": 0.6167427897453308, "learning_rate": 0.001, "loss": 2.4164, "step": 2066 }, { "epoch": 0.08744394618834081, "grad_norm": 0.40449997782707214, "learning_rate": 0.001, "loss": 2.503, "step": 2067 }, { "epoch": 0.08748625095185718, "grad_norm": 0.77544105052948, "learning_rate": 0.001, "loss": 3.1617, "step": 2068 }, { "epoch": 0.08752855571537355, "grad_norm": 0.6678179502487183, "learning_rate": 0.001, "loss": 2.4034, "step": 2069 }, { "epoch": 0.08757086047888993, "grad_norm": 0.46908748149871826, "learning_rate": 0.001, "loss": 2.4388, "step": 2070 }, { "epoch": 0.0876131652424063, "grad_norm": 0.3415198028087616, "learning_rate": 0.001, "loss": 2.1139, "step": 2071 }, { "epoch": 0.08765547000592266, "grad_norm": 0.3351193070411682, "learning_rate": 0.001, "loss": 2.3951, "step": 2072 }, { "epoch": 0.08769777476943903, "grad_norm": 0.27987435460090637, "learning_rate": 0.001, "loss": 2.0981, "step": 2073 }, { "epoch": 0.08774007953295541, "grad_norm": 1.4388349056243896, "learning_rate": 0.001, "loss": 1.9596, "step": 2074 }, { "epoch": 0.08778238429647178, "grad_norm": 2.4498531818389893, "learning_rate": 0.001, "loss": 2.667, "step": 2075 }, { "epoch": 0.08782468905998815, "grad_norm": 0.259954035282135, "learning_rate": 0.001, "loss": 2.7094, "step": 2076 }, { "epoch": 0.08786699382350453, "grad_norm": 0.24030877649784088, "learning_rate": 0.001, "loss": 2.7154, "step": 2077 }, { "epoch": 0.0879092985870209, "grad_norm": 4.155751705169678, "learning_rate": 0.001, "loss": 1.8017, "step": 2078 }, { "epoch": 0.08795160335053727, "grad_norm": 0.30022862553596497, "learning_rate": 0.001, "loss": 2.6318, "step": 2079 }, { "epoch": 0.08799390811405364, "grad_norm": 4.097969055175781, "learning_rate": 0.001, "loss": 2.7386, "step": 2080 }, { "epoch": 0.08803621287757002, "grad_norm": 0.8870471715927124, "learning_rate": 0.001, "loss": 2.6592, "step": 2081 }, { "epoch": 0.08807851764108639, "grad_norm": 0.27012670040130615, "learning_rate": 0.001, "loss": 2.5062, "step": 2082 }, { "epoch": 0.08812082240460276, "grad_norm": 0.28547555208206177, "learning_rate": 0.001, "loss": 2.3858, "step": 2083 }, { "epoch": 0.08816312716811912, "grad_norm": 0.34155458211898804, "learning_rate": 0.001, "loss": 3.2342, "step": 2084 }, { "epoch": 0.0882054319316355, "grad_norm": 0.26611489057540894, "learning_rate": 0.001, "loss": 2.0016, "step": 2085 }, { "epoch": 0.08824773669515187, "grad_norm": 0.35170432925224304, "learning_rate": 0.001, "loss": 2.7927, "step": 2086 }, { "epoch": 0.08829004145866824, "grad_norm": 0.6946236491203308, "learning_rate": 0.001, "loss": 1.982, "step": 2087 }, { "epoch": 0.08833234622218462, "grad_norm": 0.8609383702278137, "learning_rate": 0.001, "loss": 1.992, "step": 2088 }, { "epoch": 0.08837465098570099, "grad_norm": 0.2939896583557129, "learning_rate": 0.001, "loss": 2.2666, "step": 2089 }, { "epoch": 0.08841695574921736, "grad_norm": 0.26682353019714355, "learning_rate": 0.001, "loss": 2.5193, "step": 2090 }, { "epoch": 0.08845926051273373, "grad_norm": 0.9872138500213623, "learning_rate": 0.001, "loss": 2.6831, "step": 2091 }, { "epoch": 0.08850156527625011, "grad_norm": 0.9422023296356201, "learning_rate": 0.001, "loss": 3.6744, "step": 2092 }, { "epoch": 0.08854387003976648, "grad_norm": 0.4777209758758545, "learning_rate": 0.001, "loss": 3.7379, "step": 2093 }, { "epoch": 0.08858617480328285, "grad_norm": 104.47978210449219, "learning_rate": 0.001, "loss": 1.8196, "step": 2094 }, { "epoch": 0.08862847956679921, "grad_norm": 17.303163528442383, "learning_rate": 0.001, "loss": 3.1286, "step": 2095 }, { "epoch": 0.0886707843303156, "grad_norm": 0.3178851008415222, "learning_rate": 0.001, "loss": 3.5087, "step": 2096 }, { "epoch": 0.08871308909383196, "grad_norm": 0.3228803873062134, "learning_rate": 0.001, "loss": 2.0294, "step": 2097 }, { "epoch": 0.08875539385734833, "grad_norm": 0.6258364319801331, "learning_rate": 0.001, "loss": 2.5332, "step": 2098 }, { "epoch": 0.08879769862086472, "grad_norm": 0.7965203523635864, "learning_rate": 0.001, "loss": 2.7295, "step": 2099 }, { "epoch": 0.08884000338438108, "grad_norm": 0.4302770793437958, "learning_rate": 0.001, "loss": 2.1668, "step": 2100 }, { "epoch": 0.08888230814789745, "grad_norm": 0.5029553771018982, "learning_rate": 0.001, "loss": 2.9496, "step": 2101 }, { "epoch": 0.08892461291141382, "grad_norm": 0.3942144215106964, "learning_rate": 0.001, "loss": 2.313, "step": 2102 }, { "epoch": 0.0889669176749302, "grad_norm": 0.43013280630111694, "learning_rate": 0.001, "loss": 1.9479, "step": 2103 }, { "epoch": 0.08900922243844657, "grad_norm": 1.0882439613342285, "learning_rate": 0.001, "loss": 2.8823, "step": 2104 }, { "epoch": 0.08905152720196294, "grad_norm": 0.3308344781398773, "learning_rate": 0.001, "loss": 2.302, "step": 2105 }, { "epoch": 0.08909383196547932, "grad_norm": 0.29911816120147705, "learning_rate": 0.001, "loss": 3.0091, "step": 2106 }, { "epoch": 0.08913613672899569, "grad_norm": 1.754325270652771, "learning_rate": 0.001, "loss": 3.21, "step": 2107 }, { "epoch": 0.08917844149251206, "grad_norm": 2.3082916736602783, "learning_rate": 0.001, "loss": 2.3856, "step": 2108 }, { "epoch": 0.08922074625602842, "grad_norm": 0.6127236485481262, "learning_rate": 0.001, "loss": 2.2969, "step": 2109 }, { "epoch": 0.0892630510195448, "grad_norm": 0.41946130990982056, "learning_rate": 0.001, "loss": 2.8269, "step": 2110 }, { "epoch": 0.08930535578306117, "grad_norm": 6.0178327560424805, "learning_rate": 0.001, "loss": 2.7265, "step": 2111 }, { "epoch": 0.08934766054657754, "grad_norm": 0.38182544708251953, "learning_rate": 0.001, "loss": 3.1223, "step": 2112 }, { "epoch": 0.08938996531009391, "grad_norm": 1.5331692695617676, "learning_rate": 0.001, "loss": 2.4643, "step": 2113 }, { "epoch": 0.08943227007361029, "grad_norm": 0.29090458154678345, "learning_rate": 0.001, "loss": 1.958, "step": 2114 }, { "epoch": 0.08947457483712666, "grad_norm": 0.3298831284046173, "learning_rate": 0.001, "loss": 2.4861, "step": 2115 }, { "epoch": 0.08951687960064303, "grad_norm": 0.9520841240882874, "learning_rate": 0.001, "loss": 2.2431, "step": 2116 }, { "epoch": 0.08955918436415941, "grad_norm": 0.3201279938220978, "learning_rate": 0.001, "loss": 1.9121, "step": 2117 }, { "epoch": 0.08960148912767578, "grad_norm": 0.3153676390647888, "learning_rate": 0.001, "loss": 2.2245, "step": 2118 }, { "epoch": 0.08964379389119215, "grad_norm": 0.425874263048172, "learning_rate": 0.001, "loss": 3.1083, "step": 2119 }, { "epoch": 0.08968609865470852, "grad_norm": 0.6781908869743347, "learning_rate": 0.001, "loss": 3.0335, "step": 2120 }, { "epoch": 0.0897284034182249, "grad_norm": 0.38351890444755554, "learning_rate": 0.001, "loss": 3.6367, "step": 2121 }, { "epoch": 0.08977070818174127, "grad_norm": 0.26305243372917175, "learning_rate": 0.001, "loss": 1.9731, "step": 2122 }, { "epoch": 0.08981301294525763, "grad_norm": 6.707549571990967, "learning_rate": 0.001, "loss": 2.2013, "step": 2123 }, { "epoch": 0.089855317708774, "grad_norm": 0.31812310218811035, "learning_rate": 0.001, "loss": 1.9723, "step": 2124 }, { "epoch": 0.08989762247229038, "grad_norm": 0.30152562260627747, "learning_rate": 0.001, "loss": 2.6349, "step": 2125 }, { "epoch": 0.08993992723580675, "grad_norm": 0.33307674527168274, "learning_rate": 0.001, "loss": 2.295, "step": 2126 }, { "epoch": 0.08998223199932312, "grad_norm": 0.26768437027931213, "learning_rate": 0.001, "loss": 3.2849, "step": 2127 }, { "epoch": 0.0900245367628395, "grad_norm": 0.39628762006759644, "learning_rate": 0.001, "loss": 3.1855, "step": 2128 }, { "epoch": 0.09006684152635587, "grad_norm": 0.3016011416912079, "learning_rate": 0.001, "loss": 3.3723, "step": 2129 }, { "epoch": 0.09010914628987224, "grad_norm": 0.6254957318305969, "learning_rate": 0.001, "loss": 4.1078, "step": 2130 }, { "epoch": 0.0901514510533886, "grad_norm": 0.23875102400779724, "learning_rate": 0.001, "loss": 1.7937, "step": 2131 }, { "epoch": 0.09019375581690499, "grad_norm": 0.2940853536128998, "learning_rate": 0.001, "loss": 2.0315, "step": 2132 }, { "epoch": 0.09023606058042136, "grad_norm": 2.5391368865966797, "learning_rate": 0.001, "loss": 2.5625, "step": 2133 }, { "epoch": 0.09027836534393772, "grad_norm": 5.3973388671875, "learning_rate": 0.001, "loss": 1.8511, "step": 2134 }, { "epoch": 0.09032067010745409, "grad_norm": 0.25178593397140503, "learning_rate": 0.001, "loss": 2.3371, "step": 2135 }, { "epoch": 0.09036297487097047, "grad_norm": 0.31066349148750305, "learning_rate": 0.001, "loss": 2.7473, "step": 2136 }, { "epoch": 0.09040527963448684, "grad_norm": 0.42618098855018616, "learning_rate": 0.001, "loss": 3.0467, "step": 2137 }, { "epoch": 0.09044758439800321, "grad_norm": 0.29961666464805603, "learning_rate": 0.001, "loss": 2.192, "step": 2138 }, { "epoch": 0.0904898891615196, "grad_norm": 0.31987568736076355, "learning_rate": 0.001, "loss": 2.7925, "step": 2139 }, { "epoch": 0.09053219392503596, "grad_norm": 0.309462308883667, "learning_rate": 0.001, "loss": 2.0587, "step": 2140 }, { "epoch": 0.09057449868855233, "grad_norm": 0.3607901930809021, "learning_rate": 0.001, "loss": 2.1792, "step": 2141 }, { "epoch": 0.0906168034520687, "grad_norm": 0.281773179769516, "learning_rate": 0.001, "loss": 1.8093, "step": 2142 }, { "epoch": 0.09065910821558508, "grad_norm": 0.5456468462944031, "learning_rate": 0.001, "loss": 3.7337, "step": 2143 }, { "epoch": 0.09070141297910145, "grad_norm": 0.25825467705726624, "learning_rate": 0.001, "loss": 1.9948, "step": 2144 }, { "epoch": 0.09074371774261782, "grad_norm": 0.9076284766197205, "learning_rate": 0.001, "loss": 2.3712, "step": 2145 }, { "epoch": 0.09078602250613418, "grad_norm": 0.41249004006385803, "learning_rate": 0.001, "loss": 2.3084, "step": 2146 }, { "epoch": 0.09082832726965057, "grad_norm": 1.1865334510803223, "learning_rate": 0.001, "loss": 3.0776, "step": 2147 }, { "epoch": 0.09087063203316693, "grad_norm": 0.3349679410457611, "learning_rate": 0.001, "loss": 2.3653, "step": 2148 }, { "epoch": 0.0909129367966833, "grad_norm": 0.4010538160800934, "learning_rate": 0.001, "loss": 2.3073, "step": 2149 }, { "epoch": 0.09095524156019968, "grad_norm": 0.38532817363739014, "learning_rate": 0.001, "loss": 2.9012, "step": 2150 }, { "epoch": 0.09099754632371605, "grad_norm": 0.33620357513427734, "learning_rate": 0.001, "loss": 2.9892, "step": 2151 }, { "epoch": 0.09103985108723242, "grad_norm": 5.379061222076416, "learning_rate": 0.001, "loss": 2.203, "step": 2152 }, { "epoch": 0.09108215585074879, "grad_norm": 8.182388305664062, "learning_rate": 0.001, "loss": 3.635, "step": 2153 }, { "epoch": 0.09112446061426517, "grad_norm": 0.23541434109210968, "learning_rate": 0.001, "loss": 2.343, "step": 2154 }, { "epoch": 0.09116676537778154, "grad_norm": 0.27592089772224426, "learning_rate": 0.001, "loss": 2.7213, "step": 2155 }, { "epoch": 0.09120907014129791, "grad_norm": 0.30198514461517334, "learning_rate": 0.001, "loss": 3.0081, "step": 2156 }, { "epoch": 0.09125137490481428, "grad_norm": 0.26055535674095154, "learning_rate": 0.001, "loss": 2.4518, "step": 2157 }, { "epoch": 0.09129367966833066, "grad_norm": 0.2565707564353943, "learning_rate": 0.001, "loss": 2.2259, "step": 2158 }, { "epoch": 0.09133598443184703, "grad_norm": 0.2991856634616852, "learning_rate": 0.001, "loss": 2.2095, "step": 2159 }, { "epoch": 0.0913782891953634, "grad_norm": 0.6168603897094727, "learning_rate": 0.001, "loss": 2.6562, "step": 2160 }, { "epoch": 0.09142059395887978, "grad_norm": 1.3440738916397095, "learning_rate": 0.001, "loss": 2.2589, "step": 2161 }, { "epoch": 0.09146289872239614, "grad_norm": 0.2948399782180786, "learning_rate": 0.001, "loss": 2.4666, "step": 2162 }, { "epoch": 0.09150520348591251, "grad_norm": 0.3604365885257721, "learning_rate": 0.001, "loss": 2.1146, "step": 2163 }, { "epoch": 0.09154750824942888, "grad_norm": 0.33632999658584595, "learning_rate": 0.001, "loss": 2.7935, "step": 2164 }, { "epoch": 0.09158981301294526, "grad_norm": 0.49976199865341187, "learning_rate": 0.001, "loss": 2.0044, "step": 2165 }, { "epoch": 0.09163211777646163, "grad_norm": 3.4067957401275635, "learning_rate": 0.001, "loss": 2.8011, "step": 2166 }, { "epoch": 0.091674422539978, "grad_norm": 0.3377350866794586, "learning_rate": 0.001, "loss": 3.0546, "step": 2167 }, { "epoch": 0.09171672730349438, "grad_norm": 0.3675040304660797, "learning_rate": 0.001, "loss": 2.6498, "step": 2168 }, { "epoch": 0.09175903206701075, "grad_norm": 0.30007094144821167, "learning_rate": 0.001, "loss": 3.8178, "step": 2169 }, { "epoch": 0.09180133683052712, "grad_norm": 0.27063828706741333, "learning_rate": 0.001, "loss": 3.6035, "step": 2170 }, { "epoch": 0.09184364159404348, "grad_norm": 0.23841188848018646, "learning_rate": 0.001, "loss": 1.9008, "step": 2171 }, { "epoch": 0.09188594635755987, "grad_norm": 1.4631080627441406, "learning_rate": 0.001, "loss": 2.8047, "step": 2172 }, { "epoch": 0.09192825112107623, "grad_norm": 0.26148828864097595, "learning_rate": 0.001, "loss": 1.7367, "step": 2173 }, { "epoch": 0.0919705558845926, "grad_norm": 3.1010782718658447, "learning_rate": 0.001, "loss": 2.1477, "step": 2174 }, { "epoch": 0.09201286064810897, "grad_norm": 1.398153305053711, "learning_rate": 0.001, "loss": 2.5751, "step": 2175 }, { "epoch": 0.09205516541162535, "grad_norm": 1.0668129920959473, "learning_rate": 0.001, "loss": 2.3153, "step": 2176 }, { "epoch": 0.09209747017514172, "grad_norm": 0.2531263828277588, "learning_rate": 0.001, "loss": 3.1233, "step": 2177 }, { "epoch": 0.09213977493865809, "grad_norm": 0.26637500524520874, "learning_rate": 0.001, "loss": 3.5828, "step": 2178 }, { "epoch": 0.09218207970217447, "grad_norm": 0.2811260223388672, "learning_rate": 0.001, "loss": 2.9139, "step": 2179 }, { "epoch": 0.09222438446569084, "grad_norm": 0.39359262585639954, "learning_rate": 0.001, "loss": 3.4906, "step": 2180 }, { "epoch": 0.09226668922920721, "grad_norm": 0.3012666702270508, "learning_rate": 0.001, "loss": 1.9615, "step": 2181 }, { "epoch": 0.09230899399272358, "grad_norm": 0.5200200080871582, "learning_rate": 0.001, "loss": 2.8205, "step": 2182 }, { "epoch": 0.09235129875623996, "grad_norm": 0.3044484257698059, "learning_rate": 0.001, "loss": 2.9967, "step": 2183 }, { "epoch": 0.09239360351975633, "grad_norm": 0.3814965784549713, "learning_rate": 0.001, "loss": 2.1881, "step": 2184 }, { "epoch": 0.0924359082832727, "grad_norm": 0.3739076554775238, "learning_rate": 0.001, "loss": 2.8411, "step": 2185 }, { "epoch": 0.09247821304678906, "grad_norm": 0.3986753225326538, "learning_rate": 0.001, "loss": 2.6639, "step": 2186 }, { "epoch": 0.09252051781030544, "grad_norm": 0.35847851634025574, "learning_rate": 0.001, "loss": 3.6791, "step": 2187 }, { "epoch": 0.09256282257382181, "grad_norm": 0.3173019289970398, "learning_rate": 0.001, "loss": 2.2659, "step": 2188 }, { "epoch": 0.09260512733733818, "grad_norm": 0.25434383749961853, "learning_rate": 0.001, "loss": 1.8084, "step": 2189 }, { "epoch": 0.09264743210085456, "grad_norm": 0.2687484323978424, "learning_rate": 0.001, "loss": 2.9027, "step": 2190 }, { "epoch": 0.09268973686437093, "grad_norm": 0.29788804054260254, "learning_rate": 0.001, "loss": 3.081, "step": 2191 }, { "epoch": 0.0927320416278873, "grad_norm": 2.9321951866149902, "learning_rate": 0.001, "loss": 4.4009, "step": 2192 }, { "epoch": 0.09277434639140367, "grad_norm": 0.29507976770401, "learning_rate": 0.001, "loss": 2.3275, "step": 2193 }, { "epoch": 0.09281665115492005, "grad_norm": 0.29394271969795227, "learning_rate": 0.001, "loss": 2.6158, "step": 2194 }, { "epoch": 0.09285895591843642, "grad_norm": 1.7109583616256714, "learning_rate": 0.001, "loss": 2.0686, "step": 2195 }, { "epoch": 0.09290126068195279, "grad_norm": 0.28951358795166016, "learning_rate": 0.001, "loss": 2.8997, "step": 2196 }, { "epoch": 0.09294356544546915, "grad_norm": 0.49977269768714905, "learning_rate": 0.001, "loss": 2.8217, "step": 2197 }, { "epoch": 0.09298587020898554, "grad_norm": 0.35717108845710754, "learning_rate": 0.001, "loss": 2.0279, "step": 2198 }, { "epoch": 0.0930281749725019, "grad_norm": 0.22373569011688232, "learning_rate": 0.001, "loss": 1.7682, "step": 2199 }, { "epoch": 0.09307047973601827, "grad_norm": 0.25105729699134827, "learning_rate": 0.001, "loss": 2.2288, "step": 2200 }, { "epoch": 0.09311278449953465, "grad_norm": 0.24033679068088531, "learning_rate": 0.001, "loss": 2.2573, "step": 2201 }, { "epoch": 0.09315508926305102, "grad_norm": 0.3561880588531494, "learning_rate": 0.001, "loss": 3.0528, "step": 2202 }, { "epoch": 0.09319739402656739, "grad_norm": 0.26465436816215515, "learning_rate": 0.001, "loss": 3.0083, "step": 2203 }, { "epoch": 0.09323969879008376, "grad_norm": 1.4494379758834839, "learning_rate": 0.001, "loss": 2.5106, "step": 2204 }, { "epoch": 0.09328200355360014, "grad_norm": 0.28080251812934875, "learning_rate": 0.001, "loss": 2.9088, "step": 2205 }, { "epoch": 0.09332430831711651, "grad_norm": 0.6943084597587585, "learning_rate": 0.001, "loss": 2.1316, "step": 2206 }, { "epoch": 0.09336661308063288, "grad_norm": 0.2942868173122406, "learning_rate": 0.001, "loss": 2.6873, "step": 2207 }, { "epoch": 0.09340891784414924, "grad_norm": 0.4104631245136261, "learning_rate": 0.001, "loss": 2.3427, "step": 2208 }, { "epoch": 0.09345122260766563, "grad_norm": 0.22013962268829346, "learning_rate": 0.001, "loss": 1.803, "step": 2209 }, { "epoch": 0.093493527371182, "grad_norm": 0.24783039093017578, "learning_rate": 0.001, "loss": 2.1961, "step": 2210 }, { "epoch": 0.09353583213469836, "grad_norm": 0.34850063920021057, "learning_rate": 0.001, "loss": 2.5243, "step": 2211 }, { "epoch": 0.09357813689821474, "grad_norm": 0.35803404450416565, "learning_rate": 0.001, "loss": 2.6485, "step": 2212 }, { "epoch": 0.09362044166173111, "grad_norm": 4.5896124839782715, "learning_rate": 0.001, "loss": 2.4434, "step": 2213 }, { "epoch": 0.09366274642524748, "grad_norm": 0.34777218103408813, "learning_rate": 0.001, "loss": 2.0321, "step": 2214 }, { "epoch": 0.09370505118876385, "grad_norm": 0.3306042551994324, "learning_rate": 0.001, "loss": 2.4836, "step": 2215 }, { "epoch": 0.09374735595228023, "grad_norm": 0.7642030715942383, "learning_rate": 0.001, "loss": 2.1946, "step": 2216 }, { "epoch": 0.0937896607157966, "grad_norm": 0.37406593561172485, "learning_rate": 0.001, "loss": 3.6855, "step": 2217 }, { "epoch": 0.09383196547931297, "grad_norm": 0.6254957318305969, "learning_rate": 0.001, "loss": 3.1232, "step": 2218 }, { "epoch": 0.09387427024282934, "grad_norm": 0.31491386890411377, "learning_rate": 0.001, "loss": 2.757, "step": 2219 }, { "epoch": 0.09391657500634572, "grad_norm": 0.37096840143203735, "learning_rate": 0.001, "loss": 2.4591, "step": 2220 }, { "epoch": 0.09395887976986209, "grad_norm": 0.30345040559768677, "learning_rate": 0.001, "loss": 2.2993, "step": 2221 }, { "epoch": 0.09400118453337845, "grad_norm": 0.8886005282402039, "learning_rate": 0.001, "loss": 2.9874, "step": 2222 }, { "epoch": 0.09404348929689484, "grad_norm": 0.5755462646484375, "learning_rate": 0.001, "loss": 2.5191, "step": 2223 }, { "epoch": 0.0940857940604112, "grad_norm": 0.28378450870513916, "learning_rate": 0.001, "loss": 2.1818, "step": 2224 }, { "epoch": 0.09412809882392757, "grad_norm": 0.4646637737751007, "learning_rate": 0.001, "loss": 2.155, "step": 2225 }, { "epoch": 0.09417040358744394, "grad_norm": 0.28239914774894714, "learning_rate": 0.001, "loss": 3.0008, "step": 2226 }, { "epoch": 0.09421270835096032, "grad_norm": 0.3244176506996155, "learning_rate": 0.001, "loss": 2.2789, "step": 2227 }, { "epoch": 0.09425501311447669, "grad_norm": 0.25214463472366333, "learning_rate": 0.001, "loss": 1.8638, "step": 2228 }, { "epoch": 0.09429731787799306, "grad_norm": 0.29388561844825745, "learning_rate": 0.001, "loss": 2.6425, "step": 2229 }, { "epoch": 0.09433962264150944, "grad_norm": 0.7651423811912537, "learning_rate": 0.001, "loss": 2.5595, "step": 2230 }, { "epoch": 0.09438192740502581, "grad_norm": 0.7599115967750549, "learning_rate": 0.001, "loss": 2.6142, "step": 2231 }, { "epoch": 0.09442423216854218, "grad_norm": 0.28916943073272705, "learning_rate": 0.001, "loss": 1.9283, "step": 2232 }, { "epoch": 0.09446653693205855, "grad_norm": 0.2711436152458191, "learning_rate": 0.001, "loss": 2.0485, "step": 2233 }, { "epoch": 0.09450884169557493, "grad_norm": 0.3689520061016083, "learning_rate": 0.001, "loss": 2.4108, "step": 2234 }, { "epoch": 0.0945511464590913, "grad_norm": 0.3966144621372223, "learning_rate": 0.001, "loss": 2.4453, "step": 2235 }, { "epoch": 0.09459345122260766, "grad_norm": 0.8278962969779968, "learning_rate": 0.001, "loss": 2.281, "step": 2236 }, { "epoch": 0.09463575598612403, "grad_norm": 0.29549941420555115, "learning_rate": 0.001, "loss": 1.8836, "step": 2237 }, { "epoch": 0.09467806074964041, "grad_norm": 0.563795268535614, "learning_rate": 0.001, "loss": 3.243, "step": 2238 }, { "epoch": 0.09472036551315678, "grad_norm": 0.2910638749599457, "learning_rate": 0.001, "loss": 2.7178, "step": 2239 }, { "epoch": 0.09476267027667315, "grad_norm": 1.0017096996307373, "learning_rate": 0.001, "loss": 2.5647, "step": 2240 }, { "epoch": 0.09480497504018953, "grad_norm": 0.2674773037433624, "learning_rate": 0.001, "loss": 2.4521, "step": 2241 }, { "epoch": 0.0948472798037059, "grad_norm": 0.33499273657798767, "learning_rate": 0.001, "loss": 1.9591, "step": 2242 }, { "epoch": 0.09488958456722227, "grad_norm": 0.28287482261657715, "learning_rate": 0.001, "loss": 1.8664, "step": 2243 }, { "epoch": 0.09493188933073864, "grad_norm": 0.3287140130996704, "learning_rate": 0.001, "loss": 2.2835, "step": 2244 }, { "epoch": 0.09497419409425502, "grad_norm": 0.28710776567459106, "learning_rate": 0.001, "loss": 2.1672, "step": 2245 }, { "epoch": 0.09501649885777139, "grad_norm": 0.5307938456535339, "learning_rate": 0.001, "loss": 3.1055, "step": 2246 }, { "epoch": 0.09505880362128775, "grad_norm": 0.2794020473957062, "learning_rate": 0.001, "loss": 1.4943, "step": 2247 }, { "epoch": 0.09510110838480412, "grad_norm": 0.483715295791626, "learning_rate": 0.001, "loss": 1.7752, "step": 2248 }, { "epoch": 0.0951434131483205, "grad_norm": 0.8411474227905273, "learning_rate": 0.001, "loss": 2.9864, "step": 2249 }, { "epoch": 0.09518571791183687, "grad_norm": 0.30003300309181213, "learning_rate": 0.001, "loss": 4.0293, "step": 2250 }, { "epoch": 0.09522802267535324, "grad_norm": 0.2558903396129608, "learning_rate": 0.001, "loss": 2.1845, "step": 2251 }, { "epoch": 0.09527032743886962, "grad_norm": 0.30151990056037903, "learning_rate": 0.001, "loss": 2.3792, "step": 2252 }, { "epoch": 0.09531263220238599, "grad_norm": 0.29656192660331726, "learning_rate": 0.001, "loss": 3.2587, "step": 2253 }, { "epoch": 0.09535493696590236, "grad_norm": 0.8995860815048218, "learning_rate": 0.001, "loss": 2.6727, "step": 2254 }, { "epoch": 0.09539724172941873, "grad_norm": 0.3521322011947632, "learning_rate": 0.001, "loss": 2.4839, "step": 2255 }, { "epoch": 0.09543954649293511, "grad_norm": 0.2579616904258728, "learning_rate": 0.001, "loss": 2.0373, "step": 2256 }, { "epoch": 0.09548185125645148, "grad_norm": 2.846085548400879, "learning_rate": 0.001, "loss": 2.3873, "step": 2257 }, { "epoch": 0.09552415601996785, "grad_norm": 0.28332656621932983, "learning_rate": 0.001, "loss": 2.426, "step": 2258 }, { "epoch": 0.09556646078348421, "grad_norm": 1.6657432317733765, "learning_rate": 0.001, "loss": 2.0307, "step": 2259 }, { "epoch": 0.0956087655470006, "grad_norm": 0.3576366901397705, "learning_rate": 0.001, "loss": 2.6188, "step": 2260 }, { "epoch": 0.09565107031051696, "grad_norm": 0.8456769585609436, "learning_rate": 0.001, "loss": 2.3674, "step": 2261 }, { "epoch": 0.09569337507403333, "grad_norm": 0.3482474088668823, "learning_rate": 0.001, "loss": 3.4504, "step": 2262 }, { "epoch": 0.09573567983754971, "grad_norm": 0.42225900292396545, "learning_rate": 0.001, "loss": 2.8715, "step": 2263 }, { "epoch": 0.09577798460106608, "grad_norm": 1.451746940612793, "learning_rate": 0.001, "loss": 2.8526, "step": 2264 }, { "epoch": 0.09582028936458245, "grad_norm": 0.3721246123313904, "learning_rate": 0.001, "loss": 2.4929, "step": 2265 }, { "epoch": 0.09586259412809882, "grad_norm": 0.9242255091667175, "learning_rate": 0.001, "loss": 2.1786, "step": 2266 }, { "epoch": 0.0959048988916152, "grad_norm": 0.4310086965560913, "learning_rate": 0.001, "loss": 3.3944, "step": 2267 }, { "epoch": 0.09594720365513157, "grad_norm": 14.419943809509277, "learning_rate": 0.001, "loss": 1.8957, "step": 2268 }, { "epoch": 0.09598950841864794, "grad_norm": 0.373689740896225, "learning_rate": 0.001, "loss": 2.3684, "step": 2269 }, { "epoch": 0.0960318131821643, "grad_norm": 0.3977186381816864, "learning_rate": 0.001, "loss": 1.9556, "step": 2270 }, { "epoch": 0.09607411794568069, "grad_norm": 0.6978753209114075, "learning_rate": 0.001, "loss": 2.1005, "step": 2271 }, { "epoch": 0.09611642270919706, "grad_norm": 0.3750711679458618, "learning_rate": 0.001, "loss": 2.1204, "step": 2272 }, { "epoch": 0.09615872747271342, "grad_norm": 0.5839630365371704, "learning_rate": 0.001, "loss": 2.1901, "step": 2273 }, { "epoch": 0.0962010322362298, "grad_norm": 0.27384617924690247, "learning_rate": 0.001, "loss": 3.4993, "step": 2274 }, { "epoch": 0.09624333699974617, "grad_norm": 0.3717498183250427, "learning_rate": 0.001, "loss": 1.9125, "step": 2275 }, { "epoch": 0.09628564176326254, "grad_norm": 0.34626662731170654, "learning_rate": 0.001, "loss": 2.2712, "step": 2276 }, { "epoch": 0.09632794652677891, "grad_norm": 1.6960554122924805, "learning_rate": 0.001, "loss": 2.3714, "step": 2277 }, { "epoch": 0.09637025129029529, "grad_norm": 0.8217235803604126, "learning_rate": 0.001, "loss": 2.2524, "step": 2278 }, { "epoch": 0.09641255605381166, "grad_norm": 0.2900844216346741, "learning_rate": 0.001, "loss": 2.7666, "step": 2279 }, { "epoch": 0.09645486081732803, "grad_norm": 0.47249212861061096, "learning_rate": 0.001, "loss": 2.5579, "step": 2280 }, { "epoch": 0.0964971655808444, "grad_norm": 0.2934323847293854, "learning_rate": 0.001, "loss": 2.2327, "step": 2281 }, { "epoch": 0.09653947034436078, "grad_norm": 0.7934165596961975, "learning_rate": 0.001, "loss": 2.5456, "step": 2282 }, { "epoch": 0.09658177510787715, "grad_norm": 0.442386269569397, "learning_rate": 0.001, "loss": 2.7316, "step": 2283 }, { "epoch": 0.09662407987139351, "grad_norm": 0.3879510760307312, "learning_rate": 0.001, "loss": 1.8956, "step": 2284 }, { "epoch": 0.0966663846349099, "grad_norm": 0.42023125290870667, "learning_rate": 0.001, "loss": 3.0717, "step": 2285 }, { "epoch": 0.09670868939842626, "grad_norm": 0.21355478465557098, "learning_rate": 0.001, "loss": 2.0076, "step": 2286 }, { "epoch": 0.09675099416194263, "grad_norm": 10.379256248474121, "learning_rate": 0.001, "loss": 2.8718, "step": 2287 }, { "epoch": 0.096793298925459, "grad_norm": 0.3485308289527893, "learning_rate": 0.001, "loss": 1.9105, "step": 2288 }, { "epoch": 0.09683560368897538, "grad_norm": 1.0859586000442505, "learning_rate": 0.001, "loss": 1.7195, "step": 2289 }, { "epoch": 0.09687790845249175, "grad_norm": 0.43755942583084106, "learning_rate": 0.001, "loss": 2.1547, "step": 2290 }, { "epoch": 0.09692021321600812, "grad_norm": 0.26755252480506897, "learning_rate": 0.001, "loss": 1.9872, "step": 2291 }, { "epoch": 0.0969625179795245, "grad_norm": 0.3288552463054657, "learning_rate": 0.001, "loss": 3.2151, "step": 2292 }, { "epoch": 0.09700482274304087, "grad_norm": 0.3707546293735504, "learning_rate": 0.001, "loss": 2.7954, "step": 2293 }, { "epoch": 0.09704712750655724, "grad_norm": 0.33741122484207153, "learning_rate": 0.001, "loss": 2.5806, "step": 2294 }, { "epoch": 0.0970894322700736, "grad_norm": 0.29366111755371094, "learning_rate": 0.001, "loss": 2.161, "step": 2295 }, { "epoch": 0.09713173703358999, "grad_norm": 0.29789918661117554, "learning_rate": 0.001, "loss": 2.6224, "step": 2296 }, { "epoch": 0.09717404179710636, "grad_norm": 2.258840322494507, "learning_rate": 0.001, "loss": 1.8646, "step": 2297 }, { "epoch": 0.09721634656062272, "grad_norm": 0.2928341329097748, "learning_rate": 0.001, "loss": 2.6383, "step": 2298 }, { "epoch": 0.09725865132413909, "grad_norm": 0.42597708106040955, "learning_rate": 0.001, "loss": 2.1843, "step": 2299 }, { "epoch": 0.09730095608765547, "grad_norm": 2.620967149734497, "learning_rate": 0.001, "loss": 2.0323, "step": 2300 }, { "epoch": 0.09734326085117184, "grad_norm": 0.5456410646438599, "learning_rate": 0.001, "loss": 1.7493, "step": 2301 }, { "epoch": 0.09738556561468821, "grad_norm": 0.28042513132095337, "learning_rate": 0.001, "loss": 2.5266, "step": 2302 }, { "epoch": 0.09742787037820459, "grad_norm": 0.23970133066177368, "learning_rate": 0.001, "loss": 1.9479, "step": 2303 }, { "epoch": 0.09747017514172096, "grad_norm": 0.40894538164138794, "learning_rate": 0.001, "loss": 1.8509, "step": 2304 }, { "epoch": 0.09751247990523733, "grad_norm": 0.23067353665828705, "learning_rate": 0.001, "loss": 2.7972, "step": 2305 }, { "epoch": 0.0975547846687537, "grad_norm": 0.36500483751296997, "learning_rate": 0.001, "loss": 2.0178, "step": 2306 }, { "epoch": 0.09759708943227008, "grad_norm": 0.4092254936695099, "learning_rate": 0.001, "loss": 2.9206, "step": 2307 }, { "epoch": 0.09763939419578645, "grad_norm": 0.49489879608154297, "learning_rate": 0.001, "loss": 2.1519, "step": 2308 }, { "epoch": 0.09768169895930282, "grad_norm": 0.39644548296928406, "learning_rate": 0.001, "loss": 2.3865, "step": 2309 }, { "epoch": 0.09772400372281918, "grad_norm": 0.4900401830673218, "learning_rate": 0.001, "loss": 1.6001, "step": 2310 }, { "epoch": 0.09776630848633557, "grad_norm": 0.2798866629600525, "learning_rate": 0.001, "loss": 2.7082, "step": 2311 }, { "epoch": 0.09780861324985193, "grad_norm": 0.23535476624965668, "learning_rate": 0.001, "loss": 1.938, "step": 2312 }, { "epoch": 0.0978509180133683, "grad_norm": 0.27713868021965027, "learning_rate": 0.001, "loss": 2.7315, "step": 2313 }, { "epoch": 0.09789322277688468, "grad_norm": 0.3740001618862152, "learning_rate": 0.001, "loss": 2.2113, "step": 2314 }, { "epoch": 0.09793552754040105, "grad_norm": 0.23348340392112732, "learning_rate": 0.001, "loss": 2.2677, "step": 2315 }, { "epoch": 0.09797783230391742, "grad_norm": 2.2800018787384033, "learning_rate": 0.001, "loss": 2.1095, "step": 2316 }, { "epoch": 0.09802013706743379, "grad_norm": 0.2242593616247177, "learning_rate": 0.001, "loss": 2.0112, "step": 2317 }, { "epoch": 0.09806244183095017, "grad_norm": 0.40007051825523376, "learning_rate": 0.001, "loss": 2.9323, "step": 2318 }, { "epoch": 0.09810474659446654, "grad_norm": 0.32281118631362915, "learning_rate": 0.001, "loss": 2.8329, "step": 2319 }, { "epoch": 0.0981470513579829, "grad_norm": 0.32091641426086426, "learning_rate": 0.001, "loss": 2.1386, "step": 2320 }, { "epoch": 0.09818935612149927, "grad_norm": 0.372270792722702, "learning_rate": 0.001, "loss": 3.7813, "step": 2321 }, { "epoch": 0.09823166088501566, "grad_norm": 0.23315389454364777, "learning_rate": 0.001, "loss": 1.7683, "step": 2322 }, { "epoch": 0.09827396564853202, "grad_norm": 0.287507027387619, "learning_rate": 0.001, "loss": 2.4763, "step": 2323 }, { "epoch": 0.09831627041204839, "grad_norm": 0.33321303129196167, "learning_rate": 0.001, "loss": 2.5295, "step": 2324 }, { "epoch": 0.09835857517556477, "grad_norm": 0.40260010957717896, "learning_rate": 0.001, "loss": 2.4879, "step": 2325 }, { "epoch": 0.09840087993908114, "grad_norm": 0.5056272745132446, "learning_rate": 0.001, "loss": 3.2216, "step": 2326 }, { "epoch": 0.09844318470259751, "grad_norm": 6.566803455352783, "learning_rate": 0.001, "loss": 2.4137, "step": 2327 }, { "epoch": 0.09848548946611388, "grad_norm": 0.37313172221183777, "learning_rate": 0.001, "loss": 2.8742, "step": 2328 }, { "epoch": 0.09852779422963026, "grad_norm": 0.7859897613525391, "learning_rate": 0.001, "loss": 1.96, "step": 2329 }, { "epoch": 0.09857009899314663, "grad_norm": 0.2522431015968323, "learning_rate": 0.001, "loss": 2.2632, "step": 2330 }, { "epoch": 0.098612403756663, "grad_norm": 8.351020812988281, "learning_rate": 0.001, "loss": 3.1826, "step": 2331 }, { "epoch": 0.09865470852017937, "grad_norm": 0.4760158360004425, "learning_rate": 0.001, "loss": 2.1825, "step": 2332 }, { "epoch": 0.09869701328369575, "grad_norm": 0.27335453033447266, "learning_rate": 0.001, "loss": 1.8718, "step": 2333 }, { "epoch": 0.09873931804721212, "grad_norm": 0.3000699281692505, "learning_rate": 0.001, "loss": 2.1485, "step": 2334 }, { "epoch": 0.09878162281072848, "grad_norm": 0.2664180099964142, "learning_rate": 0.001, "loss": 2.0335, "step": 2335 }, { "epoch": 0.09882392757424487, "grad_norm": 1.341559886932373, "learning_rate": 0.001, "loss": 1.9941, "step": 2336 }, { "epoch": 0.09886623233776123, "grad_norm": 0.2919972836971283, "learning_rate": 0.001, "loss": 2.7002, "step": 2337 }, { "epoch": 0.0989085371012776, "grad_norm": 0.3330998718738556, "learning_rate": 0.001, "loss": 2.831, "step": 2338 }, { "epoch": 0.09895084186479397, "grad_norm": 2.2501633167266846, "learning_rate": 0.001, "loss": 2.2368, "step": 2339 }, { "epoch": 0.09899314662831035, "grad_norm": 0.7335495352745056, "learning_rate": 0.001, "loss": 3.2086, "step": 2340 }, { "epoch": 0.09903545139182672, "grad_norm": 0.36334070563316345, "learning_rate": 0.001, "loss": 3.0316, "step": 2341 }, { "epoch": 0.09907775615534309, "grad_norm": 0.35820385813713074, "learning_rate": 0.001, "loss": 2.6684, "step": 2342 }, { "epoch": 0.09912006091885946, "grad_norm": 0.5417790412902832, "learning_rate": 0.001, "loss": 3.4955, "step": 2343 }, { "epoch": 0.09916236568237584, "grad_norm": 0.6243907809257507, "learning_rate": 0.001, "loss": 2.7038, "step": 2344 }, { "epoch": 0.0992046704458922, "grad_norm": 0.4269777834415436, "learning_rate": 0.001, "loss": 3.0679, "step": 2345 }, { "epoch": 0.09924697520940857, "grad_norm": 0.45058220624923706, "learning_rate": 0.001, "loss": 2.0214, "step": 2346 }, { "epoch": 0.09928927997292496, "grad_norm": 0.35213160514831543, "learning_rate": 0.001, "loss": 1.6954, "step": 2347 }, { "epoch": 0.09933158473644133, "grad_norm": 0.36422258615493774, "learning_rate": 0.001, "loss": 2.605, "step": 2348 }, { "epoch": 0.0993738894999577, "grad_norm": 0.4111798405647278, "learning_rate": 0.001, "loss": 2.9701, "step": 2349 }, { "epoch": 0.09941619426347406, "grad_norm": 0.2559814453125, "learning_rate": 0.001, "loss": 2.0841, "step": 2350 }, { "epoch": 0.09945849902699044, "grad_norm": 0.4313149154186249, "learning_rate": 0.001, "loss": 2.1517, "step": 2351 }, { "epoch": 0.09950080379050681, "grad_norm": 0.7786276340484619, "learning_rate": 0.001, "loss": 2.1795, "step": 2352 }, { "epoch": 0.09954310855402318, "grad_norm": 0.30976659059524536, "learning_rate": 0.001, "loss": 2.1551, "step": 2353 }, { "epoch": 0.09958541331753956, "grad_norm": 0.2895033061504364, "learning_rate": 0.001, "loss": 2.9111, "step": 2354 }, { "epoch": 0.09962771808105593, "grad_norm": 0.28255394101142883, "learning_rate": 0.001, "loss": 2.9254, "step": 2355 }, { "epoch": 0.0996700228445723, "grad_norm": 1.1467900276184082, "learning_rate": 0.001, "loss": 2.8531, "step": 2356 }, { "epoch": 0.09971232760808867, "grad_norm": 0.2727132737636566, "learning_rate": 0.001, "loss": 2.8811, "step": 2357 }, { "epoch": 0.09975463237160505, "grad_norm": 0.2686462104320526, "learning_rate": 0.001, "loss": 3.5868, "step": 2358 }, { "epoch": 0.09979693713512142, "grad_norm": 0.888546884059906, "learning_rate": 0.001, "loss": 2.3209, "step": 2359 }, { "epoch": 0.09983924189863778, "grad_norm": 0.96397465467453, "learning_rate": 0.001, "loss": 2.3643, "step": 2360 }, { "epoch": 0.09988154666215415, "grad_norm": 0.3050881028175354, "learning_rate": 0.001, "loss": 2.7307, "step": 2361 }, { "epoch": 0.09992385142567053, "grad_norm": 0.2523465156555176, "learning_rate": 0.001, "loss": 1.8036, "step": 2362 }, { "epoch": 0.0999661561891869, "grad_norm": 0.28606629371643066, "learning_rate": 0.001, "loss": 3.3748, "step": 2363 }, { "epoch": 0.10000846095270327, "grad_norm": 0.25041037797927856, "learning_rate": 0.001, "loss": 1.5891, "step": 2364 }, { "epoch": 0.10005076571621965, "grad_norm": 0.36506375670433044, "learning_rate": 0.001, "loss": 2.7191, "step": 2365 }, { "epoch": 0.10009307047973602, "grad_norm": 0.30195456743240356, "learning_rate": 0.001, "loss": 2.418, "step": 2366 }, { "epoch": 0.10013537524325239, "grad_norm": 0.2500908374786377, "learning_rate": 0.001, "loss": 1.9814, "step": 2367 }, { "epoch": 0.10017768000676876, "grad_norm": 0.29026201367378235, "learning_rate": 0.001, "loss": 2.4507, "step": 2368 }, { "epoch": 0.10021998477028514, "grad_norm": 0.28172945976257324, "learning_rate": 0.001, "loss": 2.6413, "step": 2369 }, { "epoch": 0.10026228953380151, "grad_norm": 0.2788645625114441, "learning_rate": 0.001, "loss": 2.1827, "step": 2370 }, { "epoch": 0.10030459429731788, "grad_norm": 0.24209265410900116, "learning_rate": 0.001, "loss": 2.1193, "step": 2371 }, { "epoch": 0.10034689906083424, "grad_norm": 0.25518038868904114, "learning_rate": 0.001, "loss": 2.3543, "step": 2372 }, { "epoch": 0.10038920382435063, "grad_norm": 0.28497177362442017, "learning_rate": 0.001, "loss": 2.3745, "step": 2373 }, { "epoch": 0.100431508587867, "grad_norm": 0.49267375469207764, "learning_rate": 0.001, "loss": 1.9744, "step": 2374 }, { "epoch": 0.10047381335138336, "grad_norm": 0.26035183668136597, "learning_rate": 0.001, "loss": 1.834, "step": 2375 }, { "epoch": 0.10051611811489974, "grad_norm": 0.28036877512931824, "learning_rate": 0.001, "loss": 3.1908, "step": 2376 }, { "epoch": 0.10055842287841611, "grad_norm": 1.9085135459899902, "learning_rate": 0.001, "loss": 3.0537, "step": 2377 }, { "epoch": 0.10060072764193248, "grad_norm": 0.26107195019721985, "learning_rate": 0.001, "loss": 2.0188, "step": 2378 }, { "epoch": 0.10064303240544885, "grad_norm": 0.2460990846157074, "learning_rate": 0.001, "loss": 3.2141, "step": 2379 }, { "epoch": 0.10068533716896523, "grad_norm": 0.3403857946395874, "learning_rate": 0.001, "loss": 2.0665, "step": 2380 }, { "epoch": 0.1007276419324816, "grad_norm": 0.36075910925865173, "learning_rate": 0.001, "loss": 2.6347, "step": 2381 }, { "epoch": 0.10076994669599797, "grad_norm": 2.8513433933258057, "learning_rate": 0.001, "loss": 2.6082, "step": 2382 }, { "epoch": 0.10081225145951433, "grad_norm": 0.30714812874794006, "learning_rate": 0.001, "loss": 2.1303, "step": 2383 }, { "epoch": 0.10085455622303072, "grad_norm": 0.26115673780441284, "learning_rate": 0.001, "loss": 2.4109, "step": 2384 }, { "epoch": 0.10089686098654709, "grad_norm": 3.011416435241699, "learning_rate": 0.001, "loss": 2.6895, "step": 2385 }, { "epoch": 0.10093916575006345, "grad_norm": 0.2893314063549042, "learning_rate": 0.001, "loss": 1.6992, "step": 2386 }, { "epoch": 0.10098147051357984, "grad_norm": 0.411714106798172, "learning_rate": 0.001, "loss": 3.7138, "step": 2387 }, { "epoch": 0.1010237752770962, "grad_norm": 0.37683776021003723, "learning_rate": 0.001, "loss": 2.6581, "step": 2388 }, { "epoch": 0.10106608004061257, "grad_norm": 0.8233429789543152, "learning_rate": 0.001, "loss": 3.1353, "step": 2389 }, { "epoch": 0.10110838480412894, "grad_norm": 0.7344207167625427, "learning_rate": 0.001, "loss": 2.6045, "step": 2390 }, { "epoch": 0.10115068956764532, "grad_norm": 0.28514331579208374, "learning_rate": 0.001, "loss": 1.9666, "step": 2391 }, { "epoch": 0.10119299433116169, "grad_norm": 0.2906087040901184, "learning_rate": 0.001, "loss": 1.729, "step": 2392 }, { "epoch": 0.10123529909467806, "grad_norm": 0.4669470191001892, "learning_rate": 0.001, "loss": 1.5726, "step": 2393 }, { "epoch": 0.10127760385819443, "grad_norm": 0.44565969705581665, "learning_rate": 0.001, "loss": 2.1407, "step": 2394 }, { "epoch": 0.10131990862171081, "grad_norm": 0.2725061774253845, "learning_rate": 0.001, "loss": 2.1667, "step": 2395 }, { "epoch": 0.10136221338522718, "grad_norm": 0.49515077471733093, "learning_rate": 0.001, "loss": 2.4108, "step": 2396 }, { "epoch": 0.10140451814874354, "grad_norm": 0.2847639322280884, "learning_rate": 0.001, "loss": 2.9909, "step": 2397 }, { "epoch": 0.10144682291225993, "grad_norm": 0.8457419872283936, "learning_rate": 0.001, "loss": 3.0259, "step": 2398 }, { "epoch": 0.1014891276757763, "grad_norm": 9.33295726776123, "learning_rate": 0.001, "loss": 3.2422, "step": 2399 }, { "epoch": 0.10153143243929266, "grad_norm": 1.8597520589828491, "learning_rate": 0.001, "loss": 2.6769, "step": 2400 }, { "epoch": 0.10157373720280903, "grad_norm": 0.26994621753692627, "learning_rate": 0.001, "loss": 2.2029, "step": 2401 }, { "epoch": 0.10161604196632541, "grad_norm": 1.4038335084915161, "learning_rate": 0.001, "loss": 3.4995, "step": 2402 }, { "epoch": 0.10165834672984178, "grad_norm": 0.6501076221466064, "learning_rate": 0.001, "loss": 2.7349, "step": 2403 }, { "epoch": 0.10170065149335815, "grad_norm": 0.21357211470603943, "learning_rate": 0.001, "loss": 1.9009, "step": 2404 }, { "epoch": 0.10174295625687452, "grad_norm": 0.272206574678421, "learning_rate": 0.001, "loss": 2.2061, "step": 2405 }, { "epoch": 0.1017852610203909, "grad_norm": 0.23111356794834137, "learning_rate": 0.001, "loss": 2.2551, "step": 2406 }, { "epoch": 0.10182756578390727, "grad_norm": 0.21537300944328308, "learning_rate": 0.001, "loss": 3.186, "step": 2407 }, { "epoch": 0.10186987054742364, "grad_norm": 0.26508206129074097, "learning_rate": 0.001, "loss": 2.1786, "step": 2408 }, { "epoch": 0.10191217531094002, "grad_norm": 3.251347303390503, "learning_rate": 0.001, "loss": 2.1077, "step": 2409 }, { "epoch": 0.10195448007445639, "grad_norm": 0.2736663818359375, "learning_rate": 0.001, "loss": 2.0577, "step": 2410 }, { "epoch": 0.10199678483797275, "grad_norm": 0.3979964852333069, "learning_rate": 0.001, "loss": 3.457, "step": 2411 }, { "epoch": 0.10203908960148912, "grad_norm": 0.2330218404531479, "learning_rate": 0.001, "loss": 2.2031, "step": 2412 }, { "epoch": 0.1020813943650055, "grad_norm": 0.24427588284015656, "learning_rate": 0.001, "loss": 1.7257, "step": 2413 }, { "epoch": 0.10212369912852187, "grad_norm": 0.7020580172538757, "learning_rate": 0.001, "loss": 3.6614, "step": 2414 }, { "epoch": 0.10216600389203824, "grad_norm": 0.35481932759284973, "learning_rate": 0.001, "loss": 2.5094, "step": 2415 }, { "epoch": 0.10220830865555462, "grad_norm": 10.207537651062012, "learning_rate": 0.001, "loss": 2.1178, "step": 2416 }, { "epoch": 0.10225061341907099, "grad_norm": 0.3748272657394409, "learning_rate": 0.001, "loss": 2.5037, "step": 2417 }, { "epoch": 0.10229291818258736, "grad_norm": 1.0302212238311768, "learning_rate": 0.001, "loss": 1.8157, "step": 2418 }, { "epoch": 0.10233522294610373, "grad_norm": 0.35969147086143494, "learning_rate": 0.001, "loss": 2.2622, "step": 2419 }, { "epoch": 0.10237752770962011, "grad_norm": 0.3533392548561096, "learning_rate": 0.001, "loss": 2.4072, "step": 2420 }, { "epoch": 0.10241983247313648, "grad_norm": 0.3274601101875305, "learning_rate": 0.001, "loss": 2.4427, "step": 2421 }, { "epoch": 0.10246213723665284, "grad_norm": 0.35946276783943176, "learning_rate": 0.001, "loss": 1.8442, "step": 2422 }, { "epoch": 0.10250444200016921, "grad_norm": 0.3757915198802948, "learning_rate": 0.001, "loss": 3.0923, "step": 2423 }, { "epoch": 0.1025467467636856, "grad_norm": 0.3471343219280243, "learning_rate": 0.001, "loss": 2.3881, "step": 2424 }, { "epoch": 0.10258905152720196, "grad_norm": 0.2516452372074127, "learning_rate": 0.001, "loss": 2.0774, "step": 2425 }, { "epoch": 0.10263135629071833, "grad_norm": 0.23940257728099823, "learning_rate": 0.001, "loss": 2.3271, "step": 2426 }, { "epoch": 0.10267366105423471, "grad_norm": 0.8426430821418762, "learning_rate": 0.001, "loss": 2.4435, "step": 2427 }, { "epoch": 0.10271596581775108, "grad_norm": 0.7691043019294739, "learning_rate": 0.001, "loss": 1.9913, "step": 2428 }, { "epoch": 0.10275827058126745, "grad_norm": 0.29252296686172485, "learning_rate": 0.001, "loss": 3.3262, "step": 2429 }, { "epoch": 0.10280057534478382, "grad_norm": 0.25585702061653137, "learning_rate": 0.001, "loss": 2.3882, "step": 2430 }, { "epoch": 0.1028428801083002, "grad_norm": 2.2944388389587402, "learning_rate": 0.001, "loss": 2.7547, "step": 2431 }, { "epoch": 0.10288518487181657, "grad_norm": 0.3069431185722351, "learning_rate": 0.001, "loss": 2.5555, "step": 2432 }, { "epoch": 0.10292748963533294, "grad_norm": 0.6198341250419617, "learning_rate": 0.001, "loss": 2.2501, "step": 2433 }, { "epoch": 0.1029697943988493, "grad_norm": 0.23452864587306976, "learning_rate": 0.001, "loss": 2.0086, "step": 2434 }, { "epoch": 0.10301209916236569, "grad_norm": 0.27118051052093506, "learning_rate": 0.001, "loss": 1.6991, "step": 2435 }, { "epoch": 0.10305440392588205, "grad_norm": 0.2895638048648834, "learning_rate": 0.001, "loss": 2.391, "step": 2436 }, { "epoch": 0.10309670868939842, "grad_norm": 0.5216059684753418, "learning_rate": 0.001, "loss": 2.0853, "step": 2437 }, { "epoch": 0.1031390134529148, "grad_norm": 0.32230275869369507, "learning_rate": 0.001, "loss": 2.3947, "step": 2438 }, { "epoch": 0.10318131821643117, "grad_norm": 0.34388068318367004, "learning_rate": 0.001, "loss": 2.6358, "step": 2439 }, { "epoch": 0.10322362297994754, "grad_norm": 0.3274713456630707, "learning_rate": 0.001, "loss": 3.2649, "step": 2440 }, { "epoch": 0.10326592774346391, "grad_norm": 0.24570752680301666, "learning_rate": 0.001, "loss": 2.3323, "step": 2441 }, { "epoch": 0.10330823250698029, "grad_norm": 0.23743736743927002, "learning_rate": 0.001, "loss": 1.6279, "step": 2442 }, { "epoch": 0.10335053727049666, "grad_norm": 0.2327599972486496, "learning_rate": 0.001, "loss": 2.0293, "step": 2443 }, { "epoch": 0.10339284203401303, "grad_norm": 0.24773749709129333, "learning_rate": 0.001, "loss": 2.1824, "step": 2444 }, { "epoch": 0.1034351467975294, "grad_norm": 0.3534669578075409, "learning_rate": 0.001, "loss": 2.7218, "step": 2445 }, { "epoch": 0.10347745156104578, "grad_norm": 0.5942555069923401, "learning_rate": 0.001, "loss": 2.3746, "step": 2446 }, { "epoch": 0.10351975632456215, "grad_norm": 0.26471078395843506, "learning_rate": 0.001, "loss": 2.2947, "step": 2447 }, { "epoch": 0.10356206108807851, "grad_norm": 0.3464234173297882, "learning_rate": 0.001, "loss": 2.1127, "step": 2448 }, { "epoch": 0.1036043658515949, "grad_norm": 1.5699352025985718, "learning_rate": 0.001, "loss": 2.5707, "step": 2449 }, { "epoch": 0.10364667061511126, "grad_norm": 1.2185924053192139, "learning_rate": 0.001, "loss": 2.956, "step": 2450 }, { "epoch": 0.10368897537862763, "grad_norm": 0.2412140667438507, "learning_rate": 0.001, "loss": 2.1361, "step": 2451 }, { "epoch": 0.103731280142144, "grad_norm": 1.7203179597854614, "learning_rate": 0.001, "loss": 2.7507, "step": 2452 }, { "epoch": 0.10377358490566038, "grad_norm": 0.40216708183288574, "learning_rate": 0.001, "loss": 2.3239, "step": 2453 }, { "epoch": 0.10381588966917675, "grad_norm": 0.41361749172210693, "learning_rate": 0.001, "loss": 2.4953, "step": 2454 }, { "epoch": 0.10385819443269312, "grad_norm": 2.6895222663879395, "learning_rate": 0.001, "loss": 2.6164, "step": 2455 }, { "epoch": 0.10390049919620949, "grad_norm": 0.6734895706176758, "learning_rate": 0.001, "loss": 2.5122, "step": 2456 }, { "epoch": 0.10394280395972587, "grad_norm": 3.203244686126709, "learning_rate": 0.001, "loss": 2.6159, "step": 2457 }, { "epoch": 0.10398510872324224, "grad_norm": 7.857353687286377, "learning_rate": 0.001, "loss": 2.2798, "step": 2458 }, { "epoch": 0.1040274134867586, "grad_norm": 0.4832097589969635, "learning_rate": 0.001, "loss": 2.6077, "step": 2459 }, { "epoch": 0.10406971825027499, "grad_norm": 1.2383735179901123, "learning_rate": 0.001, "loss": 2.2514, "step": 2460 }, { "epoch": 0.10411202301379135, "grad_norm": 0.27097123861312866, "learning_rate": 0.001, "loss": 1.8764, "step": 2461 }, { "epoch": 0.10415432777730772, "grad_norm": 0.2842322587966919, "learning_rate": 0.001, "loss": 2.4073, "step": 2462 }, { "epoch": 0.10419663254082409, "grad_norm": 0.31297391653060913, "learning_rate": 0.001, "loss": 1.8048, "step": 2463 }, { "epoch": 0.10423893730434047, "grad_norm": 0.4402758479118347, "learning_rate": 0.001, "loss": 3.4671, "step": 2464 }, { "epoch": 0.10428124206785684, "grad_norm": 0.28556615114212036, "learning_rate": 0.001, "loss": 2.2693, "step": 2465 }, { "epoch": 0.10432354683137321, "grad_norm": 0.2614104151725769, "learning_rate": 0.001, "loss": 1.8248, "step": 2466 }, { "epoch": 0.10436585159488958, "grad_norm": 0.24164602160453796, "learning_rate": 0.001, "loss": 2.1898, "step": 2467 }, { "epoch": 0.10440815635840596, "grad_norm": 0.3009679913520813, "learning_rate": 0.001, "loss": 3.0112, "step": 2468 }, { "epoch": 0.10445046112192233, "grad_norm": 0.3488333821296692, "learning_rate": 0.001, "loss": 2.0925, "step": 2469 }, { "epoch": 0.1044927658854387, "grad_norm": 0.6833105087280273, "learning_rate": 0.001, "loss": 2.22, "step": 2470 }, { "epoch": 0.10453507064895508, "grad_norm": 0.28377044200897217, "learning_rate": 0.001, "loss": 2.2993, "step": 2471 }, { "epoch": 0.10457737541247145, "grad_norm": 0.3093280792236328, "learning_rate": 0.001, "loss": 2.701, "step": 2472 }, { "epoch": 0.10461968017598781, "grad_norm": 0.31424522399902344, "learning_rate": 0.001, "loss": 2.6909, "step": 2473 }, { "epoch": 0.10466198493950418, "grad_norm": 0.23883268237113953, "learning_rate": 0.001, "loss": 3.2359, "step": 2474 }, { "epoch": 0.10470428970302056, "grad_norm": 0.3218192458152771, "learning_rate": 0.001, "loss": 2.6222, "step": 2475 }, { "epoch": 0.10474659446653693, "grad_norm": 0.3537428677082062, "learning_rate": 0.001, "loss": 2.6754, "step": 2476 }, { "epoch": 0.1047888992300533, "grad_norm": 0.2585582137107849, "learning_rate": 0.001, "loss": 2.7918, "step": 2477 }, { "epoch": 0.10483120399356968, "grad_norm": 0.28491732478141785, "learning_rate": 0.001, "loss": 1.814, "step": 2478 }, { "epoch": 0.10487350875708605, "grad_norm": 0.31250932812690735, "learning_rate": 0.001, "loss": 2.1781, "step": 2479 }, { "epoch": 0.10491581352060242, "grad_norm": 0.6643452644348145, "learning_rate": 0.001, "loss": 4.0611, "step": 2480 }, { "epoch": 0.10495811828411879, "grad_norm": 0.2877008318901062, "learning_rate": 0.001, "loss": 2.3672, "step": 2481 }, { "epoch": 0.10500042304763517, "grad_norm": 0.23607558012008667, "learning_rate": 0.001, "loss": 1.5298, "step": 2482 }, { "epoch": 0.10504272781115154, "grad_norm": 3.870473623275757, "learning_rate": 0.001, "loss": 2.4808, "step": 2483 }, { "epoch": 0.1050850325746679, "grad_norm": 0.29349610209465027, "learning_rate": 0.001, "loss": 1.9366, "step": 2484 }, { "epoch": 0.10512733733818427, "grad_norm": 0.23734307289123535, "learning_rate": 0.001, "loss": 2.6115, "step": 2485 }, { "epoch": 0.10516964210170066, "grad_norm": 0.8626511693000793, "learning_rate": 0.001, "loss": 2.4259, "step": 2486 }, { "epoch": 0.10521194686521702, "grad_norm": 5.853229999542236, "learning_rate": 0.001, "loss": 2.406, "step": 2487 }, { "epoch": 0.10525425162873339, "grad_norm": 0.23822012543678284, "learning_rate": 0.001, "loss": 3.1322, "step": 2488 }, { "epoch": 0.10529655639224977, "grad_norm": 0.2993183732032776, "learning_rate": 0.001, "loss": 3.2551, "step": 2489 }, { "epoch": 0.10533886115576614, "grad_norm": 1.8715354204177856, "learning_rate": 0.001, "loss": 2.2385, "step": 2490 }, { "epoch": 0.10538116591928251, "grad_norm": 0.3598434329032898, "learning_rate": 0.001, "loss": 2.4187, "step": 2491 }, { "epoch": 0.10542347068279888, "grad_norm": 0.5928519368171692, "learning_rate": 0.001, "loss": 1.9091, "step": 2492 }, { "epoch": 0.10546577544631526, "grad_norm": 1.1429487466812134, "learning_rate": 0.001, "loss": 2.5143, "step": 2493 }, { "epoch": 0.10550808020983163, "grad_norm": 2.955080270767212, "learning_rate": 0.001, "loss": 2.2716, "step": 2494 }, { "epoch": 0.105550384973348, "grad_norm": 0.3205156922340393, "learning_rate": 0.001, "loss": 3.5673, "step": 2495 }, { "epoch": 0.10559268973686436, "grad_norm": 0.31520524621009827, "learning_rate": 0.001, "loss": 2.6269, "step": 2496 }, { "epoch": 0.10563499450038075, "grad_norm": 0.2760033905506134, "learning_rate": 0.001, "loss": 2.1043, "step": 2497 }, { "epoch": 0.10567729926389711, "grad_norm": 0.23244822025299072, "learning_rate": 0.001, "loss": 2.583, "step": 2498 }, { "epoch": 0.10571960402741348, "grad_norm": 0.24715182185173035, "learning_rate": 0.001, "loss": 2.147, "step": 2499 }, { "epoch": 0.10576190879092986, "grad_norm": 0.3001532256603241, "learning_rate": 0.001, "loss": 2.2312, "step": 2500 }, { "epoch": 0.10580421355444623, "grad_norm": 0.2582187056541443, "learning_rate": 0.001, "loss": 2.7161, "step": 2501 }, { "epoch": 0.1058465183179626, "grad_norm": 0.6607329249382019, "learning_rate": 0.001, "loss": 2.7597, "step": 2502 }, { "epoch": 0.10588882308147897, "grad_norm": 0.2999674677848816, "learning_rate": 0.001, "loss": 2.7306, "step": 2503 }, { "epoch": 0.10593112784499535, "grad_norm": 0.2406844049692154, "learning_rate": 0.001, "loss": 1.7858, "step": 2504 }, { "epoch": 0.10597343260851172, "grad_norm": 0.23513177037239075, "learning_rate": 0.001, "loss": 2.072, "step": 2505 }, { "epoch": 0.10601573737202809, "grad_norm": 0.2586327791213989, "learning_rate": 0.001, "loss": 2.1561, "step": 2506 }, { "epoch": 0.10605804213554446, "grad_norm": 0.2618561089038849, "learning_rate": 0.001, "loss": 1.7579, "step": 2507 }, { "epoch": 0.10610034689906084, "grad_norm": 0.34452149271965027, "learning_rate": 0.001, "loss": 2.1415, "step": 2508 }, { "epoch": 0.1061426516625772, "grad_norm": 0.3686632513999939, "learning_rate": 0.001, "loss": 2.3883, "step": 2509 }, { "epoch": 0.10618495642609357, "grad_norm": 0.2636253535747528, "learning_rate": 0.001, "loss": 2.2841, "step": 2510 }, { "epoch": 0.10622726118960996, "grad_norm": 0.2659060060977936, "learning_rate": 0.001, "loss": 2.5267, "step": 2511 }, { "epoch": 0.10626956595312632, "grad_norm": 0.37761518359184265, "learning_rate": 0.001, "loss": 2.7947, "step": 2512 }, { "epoch": 0.10631187071664269, "grad_norm": 0.8648855090141296, "learning_rate": 0.001, "loss": 2.7321, "step": 2513 }, { "epoch": 0.10635417548015906, "grad_norm": 0.26311472058296204, "learning_rate": 0.001, "loss": 1.881, "step": 2514 }, { "epoch": 0.10639648024367544, "grad_norm": 0.344380259513855, "learning_rate": 0.001, "loss": 2.3485, "step": 2515 }, { "epoch": 0.10643878500719181, "grad_norm": 0.35995468497276306, "learning_rate": 0.001, "loss": 2.1559, "step": 2516 }, { "epoch": 0.10648108977070818, "grad_norm": 0.32800522446632385, "learning_rate": 0.001, "loss": 2.0882, "step": 2517 }, { "epoch": 0.10652339453422455, "grad_norm": 0.3281853497028351, "learning_rate": 0.001, "loss": 2.4298, "step": 2518 }, { "epoch": 0.10656569929774093, "grad_norm": 0.25847840309143066, "learning_rate": 0.001, "loss": 2.1247, "step": 2519 }, { "epoch": 0.1066080040612573, "grad_norm": 0.6328796148300171, "learning_rate": 0.001, "loss": 2.7698, "step": 2520 }, { "epoch": 0.10665030882477367, "grad_norm": 1.8759524822235107, "learning_rate": 0.001, "loss": 2.4454, "step": 2521 }, { "epoch": 0.10669261358829005, "grad_norm": 0.2672816812992096, "learning_rate": 0.001, "loss": 2.3352, "step": 2522 }, { "epoch": 0.10673491835180642, "grad_norm": 0.22392208874225616, "learning_rate": 0.001, "loss": 1.6423, "step": 2523 }, { "epoch": 0.10677722311532278, "grad_norm": 0.43734171986579895, "learning_rate": 0.001, "loss": 2.8971, "step": 2524 }, { "epoch": 0.10681952787883915, "grad_norm": 0.28167760372161865, "learning_rate": 0.001, "loss": 1.9086, "step": 2525 }, { "epoch": 0.10686183264235553, "grad_norm": 0.2657049000263214, "learning_rate": 0.001, "loss": 3.812, "step": 2526 }, { "epoch": 0.1069041374058719, "grad_norm": 0.26134321093559265, "learning_rate": 0.001, "loss": 2.2141, "step": 2527 }, { "epoch": 0.10694644216938827, "grad_norm": 0.271685928106308, "learning_rate": 0.001, "loss": 2.1261, "step": 2528 }, { "epoch": 0.10698874693290465, "grad_norm": 0.7208402156829834, "learning_rate": 0.001, "loss": 2.3952, "step": 2529 }, { "epoch": 0.10703105169642102, "grad_norm": 0.5197535157203674, "learning_rate": 0.001, "loss": 2.6504, "step": 2530 }, { "epoch": 0.10707335645993739, "grad_norm": 0.27384886145591736, "learning_rate": 0.001, "loss": 3.3872, "step": 2531 }, { "epoch": 0.10711566122345376, "grad_norm": 4.649167060852051, "learning_rate": 0.001, "loss": 2.3127, "step": 2532 }, { "epoch": 0.10715796598697014, "grad_norm": 0.5148127675056458, "learning_rate": 0.001, "loss": 3.0018, "step": 2533 }, { "epoch": 0.1072002707504865, "grad_norm": 1.9632915258407593, "learning_rate": 0.001, "loss": 3.2377, "step": 2534 }, { "epoch": 0.10724257551400287, "grad_norm": 1.258927583694458, "learning_rate": 0.001, "loss": 1.5741, "step": 2535 }, { "epoch": 0.10728488027751924, "grad_norm": 0.38562342524528503, "learning_rate": 0.001, "loss": 2.2438, "step": 2536 }, { "epoch": 0.10732718504103562, "grad_norm": 0.4237939715385437, "learning_rate": 0.001, "loss": 2.8422, "step": 2537 }, { "epoch": 0.10736948980455199, "grad_norm": 12.856072425842285, "learning_rate": 0.001, "loss": 1.9867, "step": 2538 }, { "epoch": 0.10741179456806836, "grad_norm": 0.3507222533226013, "learning_rate": 0.001, "loss": 2.4188, "step": 2539 }, { "epoch": 0.10745409933158474, "grad_norm": 0.29266253113746643, "learning_rate": 0.001, "loss": 2.3492, "step": 2540 }, { "epoch": 0.10749640409510111, "grad_norm": 0.31082549691200256, "learning_rate": 0.001, "loss": 2.4325, "step": 2541 }, { "epoch": 0.10753870885861748, "grad_norm": 0.41879016160964966, "learning_rate": 0.001, "loss": 3.5761, "step": 2542 }, { "epoch": 0.10758101362213385, "grad_norm": 0.2282257229089737, "learning_rate": 0.001, "loss": 2.143, "step": 2543 }, { "epoch": 0.10762331838565023, "grad_norm": 3.6079633235931396, "learning_rate": 0.001, "loss": 2.3143, "step": 2544 }, { "epoch": 0.1076656231491666, "grad_norm": 0.24503585696220398, "learning_rate": 0.001, "loss": 2.5591, "step": 2545 }, { "epoch": 0.10770792791268297, "grad_norm": 0.49733033776283264, "learning_rate": 0.001, "loss": 2.3954, "step": 2546 }, { "epoch": 0.10775023267619933, "grad_norm": 1.210880994796753, "learning_rate": 0.001, "loss": 2.9697, "step": 2547 }, { "epoch": 0.10779253743971572, "grad_norm": 0.6089571714401245, "learning_rate": 0.001, "loss": 2.2833, "step": 2548 }, { "epoch": 0.10783484220323208, "grad_norm": 0.2385062873363495, "learning_rate": 0.001, "loss": 3.0483, "step": 2549 }, { "epoch": 0.10787714696674845, "grad_norm": 0.2744576334953308, "learning_rate": 0.001, "loss": 1.9552, "step": 2550 }, { "epoch": 0.10791945173026483, "grad_norm": 0.32292699813842773, "learning_rate": 0.001, "loss": 2.3587, "step": 2551 }, { "epoch": 0.1079617564937812, "grad_norm": 0.4315103590488434, "learning_rate": 0.001, "loss": 3.1126, "step": 2552 }, { "epoch": 0.10800406125729757, "grad_norm": 0.38236674666404724, "learning_rate": 0.001, "loss": 2.787, "step": 2553 }, { "epoch": 0.10804636602081394, "grad_norm": 0.25783267617225647, "learning_rate": 0.001, "loss": 1.9616, "step": 2554 }, { "epoch": 0.10808867078433032, "grad_norm": 0.6072126030921936, "learning_rate": 0.001, "loss": 2.2998, "step": 2555 }, { "epoch": 0.10813097554784669, "grad_norm": 1.3105621337890625, "learning_rate": 0.001, "loss": 2.144, "step": 2556 }, { "epoch": 0.10817328031136306, "grad_norm": 0.3103470504283905, "learning_rate": 0.001, "loss": 2.4171, "step": 2557 }, { "epoch": 0.10821558507487943, "grad_norm": 2.017469644546509, "learning_rate": 0.001, "loss": 1.9626, "step": 2558 }, { "epoch": 0.10825788983839581, "grad_norm": 0.6327214241027832, "learning_rate": 0.001, "loss": 3.576, "step": 2559 }, { "epoch": 0.10830019460191218, "grad_norm": 0.3792698383331299, "learning_rate": 0.001, "loss": 2.7368, "step": 2560 }, { "epoch": 0.10834249936542854, "grad_norm": 0.3013581931591034, "learning_rate": 0.001, "loss": 2.3676, "step": 2561 }, { "epoch": 0.10838480412894493, "grad_norm": 0.2713608145713806, "learning_rate": 0.001, "loss": 2.6759, "step": 2562 }, { "epoch": 0.1084271088924613, "grad_norm": 0.32632705569267273, "learning_rate": 0.001, "loss": 3.0297, "step": 2563 }, { "epoch": 0.10846941365597766, "grad_norm": 0.48675236105918884, "learning_rate": 0.001, "loss": 2.3548, "step": 2564 }, { "epoch": 0.10851171841949403, "grad_norm": 0.3283641040325165, "learning_rate": 0.001, "loss": 3.2577, "step": 2565 }, { "epoch": 0.10855402318301041, "grad_norm": 0.3809754550457001, "learning_rate": 0.001, "loss": 1.8483, "step": 2566 }, { "epoch": 0.10859632794652678, "grad_norm": 0.5347718596458435, "learning_rate": 0.001, "loss": 2.3094, "step": 2567 }, { "epoch": 0.10863863271004315, "grad_norm": 0.28041812777519226, "learning_rate": 0.001, "loss": 2.4324, "step": 2568 }, { "epoch": 0.10868093747355952, "grad_norm": 0.33430221676826477, "learning_rate": 0.001, "loss": 2.1342, "step": 2569 }, { "epoch": 0.1087232422370759, "grad_norm": 0.23147587478160858, "learning_rate": 0.001, "loss": 2.2938, "step": 2570 }, { "epoch": 0.10876554700059227, "grad_norm": 0.25908035039901733, "learning_rate": 0.001, "loss": 3.265, "step": 2571 }, { "epoch": 0.10880785176410863, "grad_norm": 0.3302173316478729, "learning_rate": 0.001, "loss": 3.0818, "step": 2572 }, { "epoch": 0.10885015652762502, "grad_norm": 0.3298220634460449, "learning_rate": 0.001, "loss": 2.6535, "step": 2573 }, { "epoch": 0.10889246129114138, "grad_norm": 0.9503449201583862, "learning_rate": 0.001, "loss": 1.846, "step": 2574 }, { "epoch": 0.10893476605465775, "grad_norm": 0.4278430938720703, "learning_rate": 0.001, "loss": 2.8229, "step": 2575 }, { "epoch": 0.10897707081817412, "grad_norm": 0.35355350375175476, "learning_rate": 0.001, "loss": 2.7936, "step": 2576 }, { "epoch": 0.1090193755816905, "grad_norm": 2.9743497371673584, "learning_rate": 0.001, "loss": 1.9395, "step": 2577 }, { "epoch": 0.10906168034520687, "grad_norm": 1.5361433029174805, "learning_rate": 0.001, "loss": 2.2915, "step": 2578 }, { "epoch": 0.10910398510872324, "grad_norm": 0.35586532950401306, "learning_rate": 0.001, "loss": 2.2418, "step": 2579 }, { "epoch": 0.10914628987223961, "grad_norm": 0.7446362972259521, "learning_rate": 0.001, "loss": 2.3212, "step": 2580 }, { "epoch": 0.10918859463575599, "grad_norm": 0.3713662922382355, "learning_rate": 0.001, "loss": 2.0863, "step": 2581 }, { "epoch": 0.10923089939927236, "grad_norm": 0.4044400155544281, "learning_rate": 0.001, "loss": 2.1731, "step": 2582 }, { "epoch": 0.10927320416278873, "grad_norm": 2.1356008052825928, "learning_rate": 0.001, "loss": 2.3904, "step": 2583 }, { "epoch": 0.10931550892630511, "grad_norm": 0.9785255193710327, "learning_rate": 0.001, "loss": 1.529, "step": 2584 }, { "epoch": 0.10935781368982148, "grad_norm": 1.0335931777954102, "learning_rate": 0.001, "loss": 3.1518, "step": 2585 }, { "epoch": 0.10940011845333784, "grad_norm": 0.8856411576271057, "learning_rate": 0.001, "loss": 2.0349, "step": 2586 }, { "epoch": 0.10944242321685421, "grad_norm": 0.8047211766242981, "learning_rate": 0.001, "loss": 2.1286, "step": 2587 }, { "epoch": 0.1094847279803706, "grad_norm": 0.5640533566474915, "learning_rate": 0.001, "loss": 2.8253, "step": 2588 }, { "epoch": 0.10952703274388696, "grad_norm": 0.3256753087043762, "learning_rate": 0.001, "loss": 2.0331, "step": 2589 }, { "epoch": 0.10956933750740333, "grad_norm": 1.4646278619766235, "learning_rate": 0.001, "loss": 2.928, "step": 2590 }, { "epoch": 0.10961164227091971, "grad_norm": 5.041200637817383, "learning_rate": 0.001, "loss": 2.3683, "step": 2591 }, { "epoch": 0.10965394703443608, "grad_norm": 0.8993614912033081, "learning_rate": 0.001, "loss": 2.4207, "step": 2592 }, { "epoch": 0.10969625179795245, "grad_norm": 0.47238776087760925, "learning_rate": 0.001, "loss": 2.7745, "step": 2593 }, { "epoch": 0.10973855656146882, "grad_norm": 0.5236493945121765, "learning_rate": 0.001, "loss": 2.6803, "step": 2594 }, { "epoch": 0.1097808613249852, "grad_norm": 0.49597102403640747, "learning_rate": 0.001, "loss": 2.4384, "step": 2595 }, { "epoch": 0.10982316608850157, "grad_norm": 0.7567354440689087, "learning_rate": 0.001, "loss": 2.0341, "step": 2596 }, { "epoch": 0.10986547085201794, "grad_norm": 43.87346649169922, "learning_rate": 0.001, "loss": 2.3568, "step": 2597 }, { "epoch": 0.1099077756155343, "grad_norm": 0.926131546497345, "learning_rate": 0.001, "loss": 3.707, "step": 2598 }, { "epoch": 0.10995008037905069, "grad_norm": 0.2967435419559479, "learning_rate": 0.001, "loss": 2.0035, "step": 2599 }, { "epoch": 0.10999238514256705, "grad_norm": 0.4026656150817871, "learning_rate": 0.001, "loss": 2.9419, "step": 2600 }, { "epoch": 0.11003468990608342, "grad_norm": 1.9945727586746216, "learning_rate": 0.001, "loss": 2.478, "step": 2601 }, { "epoch": 0.1100769946695998, "grad_norm": 8.51455307006836, "learning_rate": 0.001, "loss": 2.4987, "step": 2602 }, { "epoch": 0.11011929943311617, "grad_norm": 7.822841167449951, "learning_rate": 0.001, "loss": 2.9194, "step": 2603 }, { "epoch": 0.11016160419663254, "grad_norm": 0.6975441575050354, "learning_rate": 0.001, "loss": 4.2243, "step": 2604 }, { "epoch": 0.11020390896014891, "grad_norm": 9.386311531066895, "learning_rate": 0.001, "loss": 2.6628, "step": 2605 }, { "epoch": 0.11024621372366529, "grad_norm": 0.5597333908081055, "learning_rate": 0.001, "loss": 3.464, "step": 2606 }, { "epoch": 0.11028851848718166, "grad_norm": 3.522331714630127, "learning_rate": 0.001, "loss": 2.7945, "step": 2607 }, { "epoch": 0.11033082325069803, "grad_norm": 0.4466964602470398, "learning_rate": 0.001, "loss": 3.2322, "step": 2608 }, { "epoch": 0.1103731280142144, "grad_norm": 12.962498664855957, "learning_rate": 0.001, "loss": 2.4416, "step": 2609 }, { "epoch": 0.11041543277773078, "grad_norm": 0.35099491477012634, "learning_rate": 0.001, "loss": 3.0072, "step": 2610 }, { "epoch": 0.11045773754124714, "grad_norm": 0.5855743885040283, "learning_rate": 0.001, "loss": 3.3016, "step": 2611 }, { "epoch": 0.11050004230476351, "grad_norm": 1.2006386518478394, "learning_rate": 0.001, "loss": 4.1105, "step": 2612 }, { "epoch": 0.1105423470682799, "grad_norm": 0.3867046535015106, "learning_rate": 0.001, "loss": 2.4014, "step": 2613 }, { "epoch": 0.11058465183179626, "grad_norm": 0.6155429482460022, "learning_rate": 0.001, "loss": 3.2606, "step": 2614 }, { "epoch": 0.11062695659531263, "grad_norm": 1.17522394657135, "learning_rate": 0.001, "loss": 2.7431, "step": 2615 }, { "epoch": 0.110669261358829, "grad_norm": 1.3711291551589966, "learning_rate": 0.001, "loss": 2.6056, "step": 2616 }, { "epoch": 0.11071156612234538, "grad_norm": 0.33851131796836853, "learning_rate": 0.001, "loss": 2.9142, "step": 2617 }, { "epoch": 0.11075387088586175, "grad_norm": 0.32981690764427185, "learning_rate": 0.001, "loss": 2.4273, "step": 2618 }, { "epoch": 0.11079617564937812, "grad_norm": 0.3835381269454956, "learning_rate": 0.001, "loss": 3.0215, "step": 2619 }, { "epoch": 0.11083848041289449, "grad_norm": 0.2677971422672272, "learning_rate": 0.001, "loss": 2.0736, "step": 2620 }, { "epoch": 0.11088078517641087, "grad_norm": 0.33958640694618225, "learning_rate": 0.001, "loss": 3.4959, "step": 2621 }, { "epoch": 0.11092308993992724, "grad_norm": 0.281795859336853, "learning_rate": 0.001, "loss": 2.2221, "step": 2622 }, { "epoch": 0.1109653947034436, "grad_norm": 0.3385681211948395, "learning_rate": 0.001, "loss": 1.5377, "step": 2623 }, { "epoch": 0.11100769946695999, "grad_norm": 0.466964453458786, "learning_rate": 0.001, "loss": 2.3652, "step": 2624 }, { "epoch": 0.11105000423047635, "grad_norm": 0.25589802861213684, "learning_rate": 0.001, "loss": 1.903, "step": 2625 }, { "epoch": 0.11109230899399272, "grad_norm": 0.3010057508945465, "learning_rate": 0.001, "loss": 3.1568, "step": 2626 }, { "epoch": 0.11113461375750909, "grad_norm": 6.012112140655518, "learning_rate": 0.001, "loss": 2.411, "step": 2627 }, { "epoch": 0.11117691852102547, "grad_norm": 0.4896494448184967, "learning_rate": 0.001, "loss": 3.1489, "step": 2628 }, { "epoch": 0.11121922328454184, "grad_norm": 0.33814510703086853, "learning_rate": 0.001, "loss": 2.1617, "step": 2629 }, { "epoch": 0.11126152804805821, "grad_norm": 0.23706887662410736, "learning_rate": 0.001, "loss": 2.3078, "step": 2630 }, { "epoch": 0.11130383281157458, "grad_norm": 0.280916690826416, "learning_rate": 0.001, "loss": 2.4966, "step": 2631 }, { "epoch": 0.11134613757509096, "grad_norm": 1.4228007793426514, "learning_rate": 0.001, "loss": 1.9013, "step": 2632 }, { "epoch": 0.11138844233860733, "grad_norm": 0.3774404525756836, "learning_rate": 0.001, "loss": 2.2246, "step": 2633 }, { "epoch": 0.1114307471021237, "grad_norm": 13.576204299926758, "learning_rate": 0.001, "loss": 2.8407, "step": 2634 }, { "epoch": 0.11147305186564008, "grad_norm": 1.4729946851730347, "learning_rate": 0.001, "loss": 2.5873, "step": 2635 }, { "epoch": 0.11151535662915645, "grad_norm": 0.2995510697364807, "learning_rate": 0.001, "loss": 2.0534, "step": 2636 }, { "epoch": 0.11155766139267281, "grad_norm": 0.389316201210022, "learning_rate": 0.001, "loss": 3.0107, "step": 2637 }, { "epoch": 0.11159996615618918, "grad_norm": 0.375186562538147, "learning_rate": 0.001, "loss": 3.2694, "step": 2638 }, { "epoch": 0.11164227091970556, "grad_norm": 0.24365709722042084, "learning_rate": 0.001, "loss": 2.6694, "step": 2639 }, { "epoch": 0.11168457568322193, "grad_norm": 0.35557329654693604, "learning_rate": 0.001, "loss": 2.1832, "step": 2640 }, { "epoch": 0.1117268804467383, "grad_norm": 0.2812917232513428, "learning_rate": 0.001, "loss": 2.6873, "step": 2641 }, { "epoch": 0.11176918521025467, "grad_norm": 0.3376398980617523, "learning_rate": 0.001, "loss": 2.1201, "step": 2642 }, { "epoch": 0.11181148997377105, "grad_norm": 1.3907296657562256, "learning_rate": 0.001, "loss": 2.5576, "step": 2643 }, { "epoch": 0.11185379473728742, "grad_norm": 3.7346065044403076, "learning_rate": 0.001, "loss": 2.812, "step": 2644 }, { "epoch": 0.11189609950080379, "grad_norm": 15.240631103515625, "learning_rate": 0.001, "loss": 2.7597, "step": 2645 }, { "epoch": 0.11193840426432017, "grad_norm": 0.24908733367919922, "learning_rate": 0.001, "loss": 2.6942, "step": 2646 }, { "epoch": 0.11198070902783654, "grad_norm": 0.43838199973106384, "learning_rate": 0.001, "loss": 2.216, "step": 2647 }, { "epoch": 0.1120230137913529, "grad_norm": 1.0071830749511719, "learning_rate": 0.001, "loss": 2.0025, "step": 2648 }, { "epoch": 0.11206531855486927, "grad_norm": 1.4726154804229736, "learning_rate": 0.001, "loss": 2.3125, "step": 2649 }, { "epoch": 0.11210762331838565, "grad_norm": 0.4552990198135376, "learning_rate": 0.001, "loss": 2.402, "step": 2650 }, { "epoch": 0.11214992808190202, "grad_norm": 0.4046100378036499, "learning_rate": 0.001, "loss": 2.8129, "step": 2651 }, { "epoch": 0.11219223284541839, "grad_norm": 0.3966389000415802, "learning_rate": 0.001, "loss": 3.0564, "step": 2652 }, { "epoch": 0.11223453760893477, "grad_norm": 0.25914904475212097, "learning_rate": 0.001, "loss": 2.0611, "step": 2653 }, { "epoch": 0.11227684237245114, "grad_norm": 0.2761140763759613, "learning_rate": 0.001, "loss": 2.1811, "step": 2654 }, { "epoch": 0.11231914713596751, "grad_norm": 0.3019154965877533, "learning_rate": 0.001, "loss": 2.5075, "step": 2655 }, { "epoch": 0.11236145189948388, "grad_norm": 0.22981767356395721, "learning_rate": 0.001, "loss": 1.7464, "step": 2656 }, { "epoch": 0.11240375666300026, "grad_norm": 0.32616347074508667, "learning_rate": 0.001, "loss": 3.1906, "step": 2657 }, { "epoch": 0.11244606142651663, "grad_norm": 0.2503935992717743, "learning_rate": 0.001, "loss": 2.4619, "step": 2658 }, { "epoch": 0.112488366190033, "grad_norm": 0.27525201439857483, "learning_rate": 0.001, "loss": 2.0142, "step": 2659 }, { "epoch": 0.11253067095354936, "grad_norm": 0.23904815316200256, "learning_rate": 0.001, "loss": 1.9905, "step": 2660 }, { "epoch": 0.11257297571706575, "grad_norm": 1.022966980934143, "learning_rate": 0.001, "loss": 2.2188, "step": 2661 }, { "epoch": 0.11261528048058211, "grad_norm": 0.39735960960388184, "learning_rate": 0.001, "loss": 2.3742, "step": 2662 }, { "epoch": 0.11265758524409848, "grad_norm": 0.22529076039791107, "learning_rate": 0.001, "loss": 2.3343, "step": 2663 }, { "epoch": 0.11269989000761486, "grad_norm": 0.6422929167747498, "learning_rate": 0.001, "loss": 2.8202, "step": 2664 }, { "epoch": 0.11274219477113123, "grad_norm": 0.32373046875, "learning_rate": 0.001, "loss": 3.1001, "step": 2665 }, { "epoch": 0.1127844995346476, "grad_norm": 0.4991035759449005, "learning_rate": 0.001, "loss": 1.8736, "step": 2666 }, { "epoch": 0.11282680429816397, "grad_norm": 0.2527635097503662, "learning_rate": 0.001, "loss": 2.5017, "step": 2667 }, { "epoch": 0.11286910906168035, "grad_norm": 7.938605785369873, "learning_rate": 0.001, "loss": 1.5331, "step": 2668 }, { "epoch": 0.11291141382519672, "grad_norm": 0.4624551832675934, "learning_rate": 0.001, "loss": 3.5508, "step": 2669 }, { "epoch": 0.11295371858871309, "grad_norm": 0.28487929701805115, "learning_rate": 0.001, "loss": 2.2229, "step": 2670 }, { "epoch": 0.11299602335222945, "grad_norm": 1.6842297315597534, "learning_rate": 0.001, "loss": 2.3192, "step": 2671 }, { "epoch": 0.11303832811574584, "grad_norm": 1.5760769844055176, "learning_rate": 0.001, "loss": 2.3325, "step": 2672 }, { "epoch": 0.1130806328792622, "grad_norm": 1.3597266674041748, "learning_rate": 0.001, "loss": 2.2061, "step": 2673 }, { "epoch": 0.11312293764277857, "grad_norm": 0.3656711280345917, "learning_rate": 0.001, "loss": 3.6342, "step": 2674 }, { "epoch": 0.11316524240629496, "grad_norm": 0.4814014136791229, "learning_rate": 0.001, "loss": 2.4345, "step": 2675 }, { "epoch": 0.11320754716981132, "grad_norm": 0.24970856308937073, "learning_rate": 0.001, "loss": 1.9132, "step": 2676 }, { "epoch": 0.11324985193332769, "grad_norm": 0.31391432881355286, "learning_rate": 0.001, "loss": 2.6646, "step": 2677 }, { "epoch": 0.11329215669684406, "grad_norm": 0.4391164481639862, "learning_rate": 0.001, "loss": 2.9287, "step": 2678 }, { "epoch": 0.11333446146036044, "grad_norm": 0.3026899993419647, "learning_rate": 0.001, "loss": 2.882, "step": 2679 }, { "epoch": 0.11337676622387681, "grad_norm": 0.755450963973999, "learning_rate": 0.001, "loss": 3.3115, "step": 2680 }, { "epoch": 0.11341907098739318, "grad_norm": 0.3200805187225342, "learning_rate": 0.001, "loss": 2.3836, "step": 2681 }, { "epoch": 0.11346137575090955, "grad_norm": 0.25980228185653687, "learning_rate": 0.001, "loss": 1.8265, "step": 2682 }, { "epoch": 0.11350368051442593, "grad_norm": 0.4624451696872711, "learning_rate": 0.001, "loss": 2.7016, "step": 2683 }, { "epoch": 0.1135459852779423, "grad_norm": 0.7080914378166199, "learning_rate": 0.001, "loss": 2.4309, "step": 2684 }, { "epoch": 0.11358829004145866, "grad_norm": 0.45167163014411926, "learning_rate": 0.001, "loss": 3.1995, "step": 2685 }, { "epoch": 0.11363059480497505, "grad_norm": 0.21926790475845337, "learning_rate": 0.001, "loss": 2.0448, "step": 2686 }, { "epoch": 0.11367289956849141, "grad_norm": 1.0838067531585693, "learning_rate": 0.001, "loss": 1.8552, "step": 2687 }, { "epoch": 0.11371520433200778, "grad_norm": 1.3477392196655273, "learning_rate": 0.001, "loss": 2.7008, "step": 2688 }, { "epoch": 0.11375750909552415, "grad_norm": 0.41002964973449707, "learning_rate": 0.001, "loss": 3.4506, "step": 2689 }, { "epoch": 0.11379981385904053, "grad_norm": 0.2277841717004776, "learning_rate": 0.001, "loss": 2.1868, "step": 2690 }, { "epoch": 0.1138421186225569, "grad_norm": 0.6149588227272034, "learning_rate": 0.001, "loss": 2.6013, "step": 2691 }, { "epoch": 0.11388442338607327, "grad_norm": 0.36279934644699097, "learning_rate": 0.001, "loss": 2.4312, "step": 2692 }, { "epoch": 0.11392672814958964, "grad_norm": 0.47520431876182556, "learning_rate": 0.001, "loss": 3.0366, "step": 2693 }, { "epoch": 0.11396903291310602, "grad_norm": 0.31242018938064575, "learning_rate": 0.001, "loss": 2.2829, "step": 2694 }, { "epoch": 0.11401133767662239, "grad_norm": 1.7739008665084839, "learning_rate": 0.001, "loss": 1.895, "step": 2695 }, { "epoch": 0.11405364244013876, "grad_norm": 0.5066441297531128, "learning_rate": 0.001, "loss": 2.6342, "step": 2696 }, { "epoch": 0.11409594720365514, "grad_norm": 0.24368147552013397, "learning_rate": 0.001, "loss": 2.7584, "step": 2697 }, { "epoch": 0.1141382519671715, "grad_norm": 0.44066333770751953, "learning_rate": 0.001, "loss": 2.3854, "step": 2698 }, { "epoch": 0.11418055673068787, "grad_norm": 0.26902955770492554, "learning_rate": 0.001, "loss": 2.7149, "step": 2699 }, { "epoch": 0.11422286149420424, "grad_norm": 4.966346263885498, "learning_rate": 0.001, "loss": 2.2739, "step": 2700 }, { "epoch": 0.11426516625772062, "grad_norm": 0.82923823595047, "learning_rate": 0.001, "loss": 2.4333, "step": 2701 }, { "epoch": 0.11430747102123699, "grad_norm": 0.37718573212623596, "learning_rate": 0.001, "loss": 2.846, "step": 2702 }, { "epoch": 0.11434977578475336, "grad_norm": 1.1402643918991089, "learning_rate": 0.001, "loss": 2.2025, "step": 2703 }, { "epoch": 0.11439208054826973, "grad_norm": 0.5933586359024048, "learning_rate": 0.001, "loss": 4.4132, "step": 2704 }, { "epoch": 0.11443438531178611, "grad_norm": 0.9966050386428833, "learning_rate": 0.001, "loss": 3.1715, "step": 2705 }, { "epoch": 0.11447669007530248, "grad_norm": 0.3818608820438385, "learning_rate": 0.001, "loss": 2.9614, "step": 2706 }, { "epoch": 0.11451899483881885, "grad_norm": 0.355037122964859, "learning_rate": 0.001, "loss": 2.7848, "step": 2707 }, { "epoch": 0.11456129960233523, "grad_norm": 0.4167128801345825, "learning_rate": 0.001, "loss": 2.1713, "step": 2708 }, { "epoch": 0.1146036043658516, "grad_norm": 0.3600790202617645, "learning_rate": 0.001, "loss": 2.0852, "step": 2709 }, { "epoch": 0.11464590912936796, "grad_norm": 0.7185443639755249, "learning_rate": 0.001, "loss": 2.1573, "step": 2710 }, { "epoch": 0.11468821389288433, "grad_norm": 0.5277695655822754, "learning_rate": 0.001, "loss": 2.6667, "step": 2711 }, { "epoch": 0.11473051865640072, "grad_norm": 2.362328052520752, "learning_rate": 0.001, "loss": 2.655, "step": 2712 }, { "epoch": 0.11477282341991708, "grad_norm": 1.576801061630249, "learning_rate": 0.001, "loss": 3.0455, "step": 2713 }, { "epoch": 0.11481512818343345, "grad_norm": 0.5034794807434082, "learning_rate": 0.001, "loss": 1.9612, "step": 2714 }, { "epoch": 0.11485743294694983, "grad_norm": 0.5581480860710144, "learning_rate": 0.001, "loss": 2.1294, "step": 2715 }, { "epoch": 0.1148997377104662, "grad_norm": 1.2885347604751587, "learning_rate": 0.001, "loss": 3.0082, "step": 2716 }, { "epoch": 0.11494204247398257, "grad_norm": 1.621717929840088, "learning_rate": 0.001, "loss": 2.4047, "step": 2717 }, { "epoch": 0.11498434723749894, "grad_norm": 22.007076263427734, "learning_rate": 0.001, "loss": 3.3206, "step": 2718 }, { "epoch": 0.11502665200101532, "grad_norm": 3.476715087890625, "learning_rate": 0.001, "loss": 2.8062, "step": 2719 }, { "epoch": 0.11506895676453169, "grad_norm": 0.24378205835819244, "learning_rate": 0.001, "loss": 1.6405, "step": 2720 }, { "epoch": 0.11511126152804806, "grad_norm": 3.1240763664245605, "learning_rate": 0.001, "loss": 2.9476, "step": 2721 }, { "epoch": 0.11515356629156442, "grad_norm": 0.27668654918670654, "learning_rate": 0.001, "loss": 1.6878, "step": 2722 }, { "epoch": 0.1151958710550808, "grad_norm": 33.68696975708008, "learning_rate": 0.001, "loss": 2.2438, "step": 2723 }, { "epoch": 0.11523817581859717, "grad_norm": 1.328370213508606, "learning_rate": 0.001, "loss": 2.6715, "step": 2724 }, { "epoch": 0.11528048058211354, "grad_norm": 22.9268741607666, "learning_rate": 0.001, "loss": 3.4431, "step": 2725 }, { "epoch": 0.11532278534562992, "grad_norm": 0.6958709955215454, "learning_rate": 0.001, "loss": 2.9032, "step": 2726 }, { "epoch": 0.11536509010914629, "grad_norm": 0.41907453536987305, "learning_rate": 0.001, "loss": 2.925, "step": 2727 }, { "epoch": 0.11540739487266266, "grad_norm": 0.2991430461406708, "learning_rate": 0.001, "loss": 2.5263, "step": 2728 }, { "epoch": 0.11544969963617903, "grad_norm": 0.7529126405715942, "learning_rate": 0.001, "loss": 2.8932, "step": 2729 }, { "epoch": 0.11549200439969541, "grad_norm": 0.8318426609039307, "learning_rate": 0.001, "loss": 2.7539, "step": 2730 }, { "epoch": 0.11553430916321178, "grad_norm": 1.7126951217651367, "learning_rate": 0.001, "loss": 2.2141, "step": 2731 }, { "epoch": 0.11557661392672815, "grad_norm": 0.28930333256721497, "learning_rate": 0.001, "loss": 2.4101, "step": 2732 }, { "epoch": 0.11561891869024452, "grad_norm": 0.3181939423084259, "learning_rate": 0.001, "loss": 2.2034, "step": 2733 }, { "epoch": 0.1156612234537609, "grad_norm": 0.34087949991226196, "learning_rate": 0.001, "loss": 2.6263, "step": 2734 }, { "epoch": 0.11570352821727727, "grad_norm": 1.4377021789550781, "learning_rate": 0.001, "loss": 2.2172, "step": 2735 }, { "epoch": 0.11574583298079363, "grad_norm": 0.3622700572013855, "learning_rate": 0.001, "loss": 2.5439, "step": 2736 }, { "epoch": 0.11578813774431002, "grad_norm": 0.27332210540771484, "learning_rate": 0.001, "loss": 2.9426, "step": 2737 }, { "epoch": 0.11583044250782638, "grad_norm": 32.977970123291016, "learning_rate": 0.001, "loss": 2.6618, "step": 2738 }, { "epoch": 0.11587274727134275, "grad_norm": 1.1440379619598389, "learning_rate": 0.001, "loss": 2.1755, "step": 2739 }, { "epoch": 0.11591505203485912, "grad_norm": 0.3366926610469818, "learning_rate": 0.001, "loss": 2.1376, "step": 2740 }, { "epoch": 0.1159573567983755, "grad_norm": 0.3079652786254883, "learning_rate": 0.001, "loss": 2.873, "step": 2741 }, { "epoch": 0.11599966156189187, "grad_norm": 0.3998541533946991, "learning_rate": 0.001, "loss": 2.7169, "step": 2742 }, { "epoch": 0.11604196632540824, "grad_norm": 0.33957117795944214, "learning_rate": 0.001, "loss": 2.496, "step": 2743 }, { "epoch": 0.1160842710889246, "grad_norm": 0.9444336891174316, "learning_rate": 0.001, "loss": 2.6322, "step": 2744 }, { "epoch": 0.11612657585244099, "grad_norm": 0.8139225840568542, "learning_rate": 0.001, "loss": 2.0574, "step": 2745 }, { "epoch": 0.11616888061595736, "grad_norm": 0.2738553583621979, "learning_rate": 0.001, "loss": 3.0332, "step": 2746 }, { "epoch": 0.11621118537947372, "grad_norm": 0.539919912815094, "learning_rate": 0.001, "loss": 2.1122, "step": 2747 }, { "epoch": 0.1162534901429901, "grad_norm": 0.29261156916618347, "learning_rate": 0.001, "loss": 2.6415, "step": 2748 }, { "epoch": 0.11629579490650647, "grad_norm": 0.36019593477249146, "learning_rate": 0.001, "loss": 2.0997, "step": 2749 }, { "epoch": 0.11633809967002284, "grad_norm": 1.0619240999221802, "learning_rate": 0.001, "loss": 2.7637, "step": 2750 }, { "epoch": 0.11638040443353921, "grad_norm": 0.32508841156959534, "learning_rate": 0.001, "loss": 3.1838, "step": 2751 }, { "epoch": 0.1164227091970556, "grad_norm": 0.37493109703063965, "learning_rate": 0.001, "loss": 2.8842, "step": 2752 }, { "epoch": 0.11646501396057196, "grad_norm": 0.2882426381111145, "learning_rate": 0.001, "loss": 2.1448, "step": 2753 }, { "epoch": 0.11650731872408833, "grad_norm": 0.30877891182899475, "learning_rate": 0.001, "loss": 2.4269, "step": 2754 }, { "epoch": 0.1165496234876047, "grad_norm": 0.2244696319103241, "learning_rate": 0.001, "loss": 1.6318, "step": 2755 }, { "epoch": 0.11659192825112108, "grad_norm": 0.2662592828273773, "learning_rate": 0.001, "loss": 2.2368, "step": 2756 }, { "epoch": 0.11663423301463745, "grad_norm": 0.7968965172767639, "learning_rate": 0.001, "loss": 2.4511, "step": 2757 }, { "epoch": 0.11667653777815382, "grad_norm": 0.3993340730667114, "learning_rate": 0.001, "loss": 2.2677, "step": 2758 }, { "epoch": 0.1167188425416702, "grad_norm": 7.428627014160156, "learning_rate": 0.001, "loss": 2.8915, "step": 2759 }, { "epoch": 0.11676114730518657, "grad_norm": 0.35218873620033264, "learning_rate": 0.001, "loss": 3.2628, "step": 2760 }, { "epoch": 0.11680345206870293, "grad_norm": 0.33024418354034424, "learning_rate": 0.001, "loss": 2.3512, "step": 2761 }, { "epoch": 0.1168457568322193, "grad_norm": 2.4879989624023438, "learning_rate": 0.001, "loss": 2.9775, "step": 2762 }, { "epoch": 0.11688806159573568, "grad_norm": 0.2612648904323578, "learning_rate": 0.001, "loss": 2.8071, "step": 2763 }, { "epoch": 0.11693036635925205, "grad_norm": 0.2603396475315094, "learning_rate": 0.001, "loss": 2.2835, "step": 2764 }, { "epoch": 0.11697267112276842, "grad_norm": 1.3054839372634888, "learning_rate": 0.001, "loss": 2.3514, "step": 2765 }, { "epoch": 0.11701497588628479, "grad_norm": 0.46820470690727234, "learning_rate": 0.001, "loss": 2.3354, "step": 2766 }, { "epoch": 0.11705728064980117, "grad_norm": 0.2680201530456543, "learning_rate": 0.001, "loss": 2.6435, "step": 2767 }, { "epoch": 0.11709958541331754, "grad_norm": 0.2581261694431305, "learning_rate": 0.001, "loss": 2.5269, "step": 2768 }, { "epoch": 0.11714189017683391, "grad_norm": 0.2201681286096573, "learning_rate": 0.001, "loss": 1.9714, "step": 2769 }, { "epoch": 0.11718419494035029, "grad_norm": 0.31604522466659546, "learning_rate": 0.001, "loss": 2.4044, "step": 2770 }, { "epoch": 0.11722649970386666, "grad_norm": 0.23219691216945648, "learning_rate": 0.001, "loss": 2.4686, "step": 2771 }, { "epoch": 0.11726880446738303, "grad_norm": 0.24609123170375824, "learning_rate": 0.001, "loss": 1.7137, "step": 2772 }, { "epoch": 0.1173111092308994, "grad_norm": 0.3166695535182953, "learning_rate": 0.001, "loss": 2.44, "step": 2773 }, { "epoch": 0.11735341399441578, "grad_norm": 0.6801291108131409, "learning_rate": 0.001, "loss": 2.8316, "step": 2774 }, { "epoch": 0.11739571875793214, "grad_norm": 0.25939130783081055, "learning_rate": 0.001, "loss": 2.155, "step": 2775 }, { "epoch": 0.11743802352144851, "grad_norm": 0.2404932826757431, "learning_rate": 0.001, "loss": 1.9066, "step": 2776 }, { "epoch": 0.1174803282849649, "grad_norm": 0.32322371006011963, "learning_rate": 0.001, "loss": 3.1624, "step": 2777 }, { "epoch": 0.11752263304848126, "grad_norm": 0.29162564873695374, "learning_rate": 0.001, "loss": 2.1677, "step": 2778 }, { "epoch": 0.11756493781199763, "grad_norm": 0.4201529324054718, "learning_rate": 0.001, "loss": 3.65, "step": 2779 }, { "epoch": 0.117607242575514, "grad_norm": 0.3757007420063019, "learning_rate": 0.001, "loss": 2.7946, "step": 2780 }, { "epoch": 0.11764954733903038, "grad_norm": 0.22242939472198486, "learning_rate": 0.001, "loss": 2.1342, "step": 2781 }, { "epoch": 0.11769185210254675, "grad_norm": 0.7142042517662048, "learning_rate": 0.001, "loss": 2.9385, "step": 2782 }, { "epoch": 0.11773415686606312, "grad_norm": 0.4159119129180908, "learning_rate": 0.001, "loss": 2.2336, "step": 2783 }, { "epoch": 0.11777646162957948, "grad_norm": 38.654056549072266, "learning_rate": 0.001, "loss": 3.0095, "step": 2784 }, { "epoch": 0.11781876639309587, "grad_norm": 0.2876143157482147, "learning_rate": 0.001, "loss": 2.1954, "step": 2785 }, { "epoch": 0.11786107115661223, "grad_norm": 2.3631954193115234, "learning_rate": 0.001, "loss": 3.1529, "step": 2786 }, { "epoch": 0.1179033759201286, "grad_norm": 0.2558928430080414, "learning_rate": 0.001, "loss": 2.0726, "step": 2787 }, { "epoch": 0.11794568068364499, "grad_norm": 2.708045721054077, "learning_rate": 0.001, "loss": 2.3912, "step": 2788 }, { "epoch": 0.11798798544716135, "grad_norm": 0.44729793071746826, "learning_rate": 0.001, "loss": 1.9636, "step": 2789 }, { "epoch": 0.11803029021067772, "grad_norm": 0.5945959091186523, "learning_rate": 0.001, "loss": 2.6161, "step": 2790 }, { "epoch": 0.11807259497419409, "grad_norm": 0.35763904452323914, "learning_rate": 0.001, "loss": 3.1271, "step": 2791 }, { "epoch": 0.11811489973771047, "grad_norm": 0.24619075655937195, "learning_rate": 0.001, "loss": 2.6635, "step": 2792 }, { "epoch": 0.11815720450122684, "grad_norm": 0.323395311832428, "learning_rate": 0.001, "loss": 3.0143, "step": 2793 }, { "epoch": 0.11819950926474321, "grad_norm": 0.2660515010356903, "learning_rate": 0.001, "loss": 1.9591, "step": 2794 }, { "epoch": 0.11824181402825958, "grad_norm": 0.5746325850486755, "learning_rate": 0.001, "loss": 2.3898, "step": 2795 }, { "epoch": 0.11828411879177596, "grad_norm": 0.2861972153186798, "learning_rate": 0.001, "loss": 2.0267, "step": 2796 }, { "epoch": 0.11832642355529233, "grad_norm": 0.2711239457130432, "learning_rate": 0.001, "loss": 2.5983, "step": 2797 }, { "epoch": 0.1183687283188087, "grad_norm": 0.22106711566448212, "learning_rate": 0.001, "loss": 1.6675, "step": 2798 }, { "epoch": 0.11841103308232508, "grad_norm": 0.2644370198249817, "learning_rate": 0.001, "loss": 2.7655, "step": 2799 }, { "epoch": 0.11845333784584144, "grad_norm": 0.24654194712638855, "learning_rate": 0.001, "loss": 3.2642, "step": 2800 }, { "epoch": 0.11849564260935781, "grad_norm": 0.7044499516487122, "learning_rate": 0.001, "loss": 2.9117, "step": 2801 }, { "epoch": 0.11853794737287418, "grad_norm": 0.3446405827999115, "learning_rate": 0.001, "loss": 2.4988, "step": 2802 }, { "epoch": 0.11858025213639056, "grad_norm": 0.27378132939338684, "learning_rate": 0.001, "loss": 2.2514, "step": 2803 }, { "epoch": 0.11862255689990693, "grad_norm": 0.20993216335773468, "learning_rate": 0.001, "loss": 2.4, "step": 2804 }, { "epoch": 0.1186648616634233, "grad_norm": 0.5672168731689453, "learning_rate": 0.001, "loss": 2.1295, "step": 2805 }, { "epoch": 0.11870716642693967, "grad_norm": 0.2943876385688782, "learning_rate": 0.001, "loss": 3.7169, "step": 2806 }, { "epoch": 0.11874947119045605, "grad_norm": 0.6475502252578735, "learning_rate": 0.001, "loss": 2.3737, "step": 2807 }, { "epoch": 0.11879177595397242, "grad_norm": 0.3072225749492645, "learning_rate": 0.001, "loss": 1.9827, "step": 2808 }, { "epoch": 0.11883408071748879, "grad_norm": 0.31037554144859314, "learning_rate": 0.001, "loss": 1.558, "step": 2809 }, { "epoch": 0.11887638548100517, "grad_norm": 0.23181147873401642, "learning_rate": 0.001, "loss": 2.0697, "step": 2810 }, { "epoch": 0.11891869024452154, "grad_norm": 10.96942138671875, "learning_rate": 0.001, "loss": 1.6794, "step": 2811 }, { "epoch": 0.1189609950080379, "grad_norm": 0.42705076932907104, "learning_rate": 0.001, "loss": 2.3502, "step": 2812 }, { "epoch": 0.11900329977155427, "grad_norm": 0.26363325119018555, "learning_rate": 0.001, "loss": 3.0325, "step": 2813 }, { "epoch": 0.11904560453507065, "grad_norm": 0.26286640763282776, "learning_rate": 0.001, "loss": 2.1994, "step": 2814 }, { "epoch": 0.11908790929858702, "grad_norm": 0.2361530065536499, "learning_rate": 0.001, "loss": 1.9359, "step": 2815 }, { "epoch": 0.11913021406210339, "grad_norm": 0.8257160186767578, "learning_rate": 0.001, "loss": 3.0286, "step": 2816 }, { "epoch": 0.11917251882561976, "grad_norm": 0.2814808487892151, "learning_rate": 0.001, "loss": 2.8173, "step": 2817 }, { "epoch": 0.11921482358913614, "grad_norm": 0.23943133652210236, "learning_rate": 0.001, "loss": 3.0642, "step": 2818 }, { "epoch": 0.11925712835265251, "grad_norm": 0.206394761800766, "learning_rate": 0.001, "loss": 1.8372, "step": 2819 }, { "epoch": 0.11929943311616888, "grad_norm": 0.29365062713623047, "learning_rate": 0.001, "loss": 2.7685, "step": 2820 }, { "epoch": 0.11934173787968526, "grad_norm": 0.2639111280441284, "learning_rate": 0.001, "loss": 4.2966, "step": 2821 }, { "epoch": 0.11938404264320163, "grad_norm": 0.24708105623722076, "learning_rate": 0.001, "loss": 2.184, "step": 2822 }, { "epoch": 0.119426347406718, "grad_norm": 0.3220023512840271, "learning_rate": 0.001, "loss": 2.0879, "step": 2823 }, { "epoch": 0.11946865217023436, "grad_norm": 0.30507785081863403, "learning_rate": 0.001, "loss": 2.2369, "step": 2824 }, { "epoch": 0.11951095693375074, "grad_norm": 0.23936758935451508, "learning_rate": 0.001, "loss": 3.1638, "step": 2825 }, { "epoch": 0.11955326169726711, "grad_norm": 0.5877100825309753, "learning_rate": 0.001, "loss": 2.9414, "step": 2826 }, { "epoch": 0.11959556646078348, "grad_norm": 0.21661652624607086, "learning_rate": 0.001, "loss": 2.3561, "step": 2827 }, { "epoch": 0.11963787122429985, "grad_norm": 0.2344318926334381, "learning_rate": 0.001, "loss": 2.9277, "step": 2828 }, { "epoch": 0.11968017598781623, "grad_norm": 0.3025057017803192, "learning_rate": 0.001, "loss": 2.3685, "step": 2829 }, { "epoch": 0.1197224807513326, "grad_norm": 0.24070441722869873, "learning_rate": 0.001, "loss": 3.1727, "step": 2830 }, { "epoch": 0.11976478551484897, "grad_norm": 0.25270789861679077, "learning_rate": 0.001, "loss": 2.9425, "step": 2831 }, { "epoch": 0.11980709027836535, "grad_norm": 0.23945005238056183, "learning_rate": 0.001, "loss": 3.2619, "step": 2832 }, { "epoch": 0.11984939504188172, "grad_norm": 0.257040798664093, "learning_rate": 0.001, "loss": 2.4056, "step": 2833 }, { "epoch": 0.11989169980539809, "grad_norm": 0.3537012040615082, "learning_rate": 0.001, "loss": 2.5206, "step": 2834 }, { "epoch": 0.11993400456891445, "grad_norm": 0.9324373006820679, "learning_rate": 0.001, "loss": 3.0553, "step": 2835 }, { "epoch": 0.11997630933243084, "grad_norm": 0.32961177825927734, "learning_rate": 0.001, "loss": 1.9009, "step": 2836 }, { "epoch": 0.1200186140959472, "grad_norm": 0.21902523934841156, "learning_rate": 0.001, "loss": 2.2892, "step": 2837 }, { "epoch": 0.12006091885946357, "grad_norm": 0.35956844687461853, "learning_rate": 0.001, "loss": 2.2198, "step": 2838 }, { "epoch": 0.12010322362297995, "grad_norm": 0.31855231523513794, "learning_rate": 0.001, "loss": 2.6221, "step": 2839 }, { "epoch": 0.12014552838649632, "grad_norm": 1.0669035911560059, "learning_rate": 0.001, "loss": 2.6779, "step": 2840 }, { "epoch": 0.12018783315001269, "grad_norm": 0.5651441812515259, "learning_rate": 0.001, "loss": 2.3371, "step": 2841 }, { "epoch": 0.12023013791352906, "grad_norm": 0.2813262939453125, "learning_rate": 0.001, "loss": 2.847, "step": 2842 }, { "epoch": 0.12027244267704544, "grad_norm": 0.21474647521972656, "learning_rate": 0.001, "loss": 2.262, "step": 2843 }, { "epoch": 0.12031474744056181, "grad_norm": 0.9822518229484558, "learning_rate": 0.001, "loss": 2.1823, "step": 2844 }, { "epoch": 0.12035705220407818, "grad_norm": 0.4417326748371124, "learning_rate": 0.001, "loss": 1.6555, "step": 2845 }, { "epoch": 0.12039935696759455, "grad_norm": 0.31064194440841675, "learning_rate": 0.001, "loss": 1.981, "step": 2846 }, { "epoch": 0.12044166173111093, "grad_norm": 1.0444011688232422, "learning_rate": 0.001, "loss": 2.0207, "step": 2847 }, { "epoch": 0.1204839664946273, "grad_norm": 0.25042444467544556, "learning_rate": 0.001, "loss": 2.2386, "step": 2848 }, { "epoch": 0.12052627125814366, "grad_norm": 0.26854464411735535, "learning_rate": 0.001, "loss": 2.7635, "step": 2849 }, { "epoch": 0.12056857602166005, "grad_norm": 0.28399658203125, "learning_rate": 0.001, "loss": 2.6783, "step": 2850 }, { "epoch": 0.12061088078517641, "grad_norm": 0.3274761736392975, "learning_rate": 0.001, "loss": 2.8354, "step": 2851 }, { "epoch": 0.12065318554869278, "grad_norm": 0.44863566756248474, "learning_rate": 0.001, "loss": 3.583, "step": 2852 }, { "epoch": 0.12069549031220915, "grad_norm": 0.2759772539138794, "learning_rate": 0.001, "loss": 2.4853, "step": 2853 }, { "epoch": 0.12073779507572553, "grad_norm": 0.3000403344631195, "learning_rate": 0.001, "loss": 2.8492, "step": 2854 }, { "epoch": 0.1207800998392419, "grad_norm": 0.2891789376735687, "learning_rate": 0.001, "loss": 2.5121, "step": 2855 }, { "epoch": 0.12082240460275827, "grad_norm": 0.26525014638900757, "learning_rate": 0.001, "loss": 3.1642, "step": 2856 }, { "epoch": 0.12086470936627464, "grad_norm": 2.695129156112671, "learning_rate": 0.001, "loss": 3.6222, "step": 2857 }, { "epoch": 0.12090701412979102, "grad_norm": 0.27255064249038696, "learning_rate": 0.001, "loss": 2.6515, "step": 2858 }, { "epoch": 0.12094931889330739, "grad_norm": 1.2651582956314087, "learning_rate": 0.001, "loss": 3.0903, "step": 2859 }, { "epoch": 0.12099162365682375, "grad_norm": 0.42988303303718567, "learning_rate": 0.001, "loss": 2.2843, "step": 2860 }, { "epoch": 0.12103392842034014, "grad_norm": 0.27339550852775574, "learning_rate": 0.001, "loss": 2.0482, "step": 2861 }, { "epoch": 0.1210762331838565, "grad_norm": 0.43721139430999756, "learning_rate": 0.001, "loss": 3.0085, "step": 2862 }, { "epoch": 0.12111853794737287, "grad_norm": 0.5759567022323608, "learning_rate": 0.001, "loss": 3.0561, "step": 2863 }, { "epoch": 0.12116084271088924, "grad_norm": 1.3344426155090332, "learning_rate": 0.001, "loss": 2.0301, "step": 2864 }, { "epoch": 0.12120314747440562, "grad_norm": 0.325006902217865, "learning_rate": 0.001, "loss": 2.0085, "step": 2865 }, { "epoch": 0.12124545223792199, "grad_norm": 0.27443817257881165, "learning_rate": 0.001, "loss": 1.91, "step": 2866 }, { "epoch": 0.12128775700143836, "grad_norm": 0.5229665637016296, "learning_rate": 0.001, "loss": 2.2958, "step": 2867 }, { "epoch": 0.12133006176495473, "grad_norm": 0.22957605123519897, "learning_rate": 0.001, "loss": 1.9397, "step": 2868 }, { "epoch": 0.12137236652847111, "grad_norm": 0.23909400403499603, "learning_rate": 0.001, "loss": 2.603, "step": 2869 }, { "epoch": 0.12141467129198748, "grad_norm": 0.9538590908050537, "learning_rate": 0.001, "loss": 1.8949, "step": 2870 }, { "epoch": 0.12145697605550385, "grad_norm": 0.25031131505966187, "learning_rate": 0.001, "loss": 1.9435, "step": 2871 }, { "epoch": 0.12149928081902023, "grad_norm": 0.30926719307899475, "learning_rate": 0.001, "loss": 2.2511, "step": 2872 }, { "epoch": 0.1215415855825366, "grad_norm": 0.2400166243314743, "learning_rate": 0.001, "loss": 2.2112, "step": 2873 }, { "epoch": 0.12158389034605296, "grad_norm": 0.5655799508094788, "learning_rate": 0.001, "loss": 2.8809, "step": 2874 }, { "epoch": 0.12162619510956933, "grad_norm": 0.495080828666687, "learning_rate": 0.001, "loss": 2.6534, "step": 2875 }, { "epoch": 0.12166849987308571, "grad_norm": 0.46104100346565247, "learning_rate": 0.001, "loss": 2.5455, "step": 2876 }, { "epoch": 0.12171080463660208, "grad_norm": 3.711001396179199, "learning_rate": 0.001, "loss": 2.2507, "step": 2877 }, { "epoch": 0.12175310940011845, "grad_norm": 0.30323144793510437, "learning_rate": 0.001, "loss": 2.8051, "step": 2878 }, { "epoch": 0.12179541416363482, "grad_norm": 0.5401155948638916, "learning_rate": 0.001, "loss": 1.596, "step": 2879 }, { "epoch": 0.1218377189271512, "grad_norm": 0.28242409229278564, "learning_rate": 0.001, "loss": 2.9103, "step": 2880 }, { "epoch": 0.12188002369066757, "grad_norm": 0.5616193413734436, "learning_rate": 0.001, "loss": 2.0185, "step": 2881 }, { "epoch": 0.12192232845418394, "grad_norm": 0.48654183745384216, "learning_rate": 0.001, "loss": 1.996, "step": 2882 }, { "epoch": 0.12196463321770032, "grad_norm": 0.2724916338920593, "learning_rate": 0.001, "loss": 2.6126, "step": 2883 }, { "epoch": 0.12200693798121669, "grad_norm": 0.3358596861362457, "learning_rate": 0.001, "loss": 2.2338, "step": 2884 }, { "epoch": 0.12204924274473306, "grad_norm": 0.28303262591362, "learning_rate": 0.001, "loss": 3.4625, "step": 2885 }, { "epoch": 0.12209154750824942, "grad_norm": 1.536812663078308, "learning_rate": 0.001, "loss": 2.7973, "step": 2886 }, { "epoch": 0.1221338522717658, "grad_norm": 0.2855709195137024, "learning_rate": 0.001, "loss": 2.583, "step": 2887 }, { "epoch": 0.12217615703528217, "grad_norm": 3.504598379135132, "learning_rate": 0.001, "loss": 2.1699, "step": 2888 }, { "epoch": 0.12221846179879854, "grad_norm": 1.3819570541381836, "learning_rate": 0.001, "loss": 3.72, "step": 2889 }, { "epoch": 0.12226076656231491, "grad_norm": 0.21111451089382172, "learning_rate": 0.001, "loss": 1.495, "step": 2890 }, { "epoch": 0.12230307132583129, "grad_norm": 0.4661988317966461, "learning_rate": 0.001, "loss": 2.697, "step": 2891 }, { "epoch": 0.12234537608934766, "grad_norm": 0.47962015867233276, "learning_rate": 0.001, "loss": 1.6233, "step": 2892 }, { "epoch": 0.12238768085286403, "grad_norm": 0.33557215332984924, "learning_rate": 0.001, "loss": 3.1164, "step": 2893 }, { "epoch": 0.12242998561638041, "grad_norm": 0.25374147295951843, "learning_rate": 0.001, "loss": 2.2069, "step": 2894 }, { "epoch": 0.12247229037989678, "grad_norm": 0.397442489862442, "learning_rate": 0.001, "loss": 2.6315, "step": 2895 }, { "epoch": 0.12251459514341315, "grad_norm": 0.2583683729171753, "learning_rate": 0.001, "loss": 2.516, "step": 2896 }, { "epoch": 0.12255689990692951, "grad_norm": 0.3760106861591339, "learning_rate": 0.001, "loss": 1.859, "step": 2897 }, { "epoch": 0.1225992046704459, "grad_norm": 0.3791460394859314, "learning_rate": 0.001, "loss": 2.9503, "step": 2898 }, { "epoch": 0.12264150943396226, "grad_norm": 0.5899031162261963, "learning_rate": 0.001, "loss": 2.4752, "step": 2899 }, { "epoch": 0.12268381419747863, "grad_norm": 0.29078209400177, "learning_rate": 0.001, "loss": 2.3481, "step": 2900 }, { "epoch": 0.12272611896099501, "grad_norm": 6.856618881225586, "learning_rate": 0.001, "loss": 3.0669, "step": 2901 }, { "epoch": 0.12276842372451138, "grad_norm": 1.2403521537780762, "learning_rate": 0.001, "loss": 1.9102, "step": 2902 }, { "epoch": 0.12281072848802775, "grad_norm": 0.6264525055885315, "learning_rate": 0.001, "loss": 1.9421, "step": 2903 }, { "epoch": 0.12285303325154412, "grad_norm": 0.9500356912612915, "learning_rate": 0.001, "loss": 2.146, "step": 2904 }, { "epoch": 0.1228953380150605, "grad_norm": 0.27822932600975037, "learning_rate": 0.001, "loss": 2.3955, "step": 2905 }, { "epoch": 0.12293764277857687, "grad_norm": 0.27553531527519226, "learning_rate": 0.001, "loss": 2.3293, "step": 2906 }, { "epoch": 0.12297994754209324, "grad_norm": 0.2871011197566986, "learning_rate": 0.001, "loss": 2.0411, "step": 2907 }, { "epoch": 0.1230222523056096, "grad_norm": 0.3595285713672638, "learning_rate": 0.001, "loss": 2.316, "step": 2908 }, { "epoch": 0.12306455706912599, "grad_norm": 0.32838475704193115, "learning_rate": 0.001, "loss": 3.3189, "step": 2909 }, { "epoch": 0.12310686183264236, "grad_norm": 0.2596978545188904, "learning_rate": 0.001, "loss": 2.4232, "step": 2910 }, { "epoch": 0.12314916659615872, "grad_norm": 0.2212909609079361, "learning_rate": 0.001, "loss": 2.0995, "step": 2911 }, { "epoch": 0.1231914713596751, "grad_norm": 0.2778913974761963, "learning_rate": 0.001, "loss": 2.1786, "step": 2912 }, { "epoch": 0.12323377612319147, "grad_norm": 0.23343005776405334, "learning_rate": 0.001, "loss": 2.1327, "step": 2913 }, { "epoch": 0.12327608088670784, "grad_norm": 0.3342663049697876, "learning_rate": 0.001, "loss": 2.605, "step": 2914 }, { "epoch": 0.12331838565022421, "grad_norm": 0.23159651458263397, "learning_rate": 0.001, "loss": 2.4356, "step": 2915 }, { "epoch": 0.12336069041374059, "grad_norm": 0.36005109548568726, "learning_rate": 0.001, "loss": 1.9138, "step": 2916 }, { "epoch": 0.12340299517725696, "grad_norm": 0.21737025678157806, "learning_rate": 0.001, "loss": 1.9291, "step": 2917 }, { "epoch": 0.12344529994077333, "grad_norm": 0.9176145195960999, "learning_rate": 0.001, "loss": 1.9523, "step": 2918 }, { "epoch": 0.1234876047042897, "grad_norm": 0.4270826280117035, "learning_rate": 0.001, "loss": 2.6282, "step": 2919 }, { "epoch": 0.12352990946780608, "grad_norm": 0.23092937469482422, "learning_rate": 0.001, "loss": 2.1475, "step": 2920 }, { "epoch": 0.12357221423132245, "grad_norm": 0.32089483737945557, "learning_rate": 0.001, "loss": 2.8787, "step": 2921 }, { "epoch": 0.12361451899483882, "grad_norm": 0.5019152164459229, "learning_rate": 0.001, "loss": 2.4339, "step": 2922 }, { "epoch": 0.1236568237583552, "grad_norm": 8.176496505737305, "learning_rate": 0.001, "loss": 2.9616, "step": 2923 }, { "epoch": 0.12369912852187157, "grad_norm": 0.42185178399086, "learning_rate": 0.001, "loss": 3.3083, "step": 2924 }, { "epoch": 0.12374143328538793, "grad_norm": 0.6926589608192444, "learning_rate": 0.001, "loss": 4.3947, "step": 2925 }, { "epoch": 0.1237837380489043, "grad_norm": 2.2707536220550537, "learning_rate": 0.001, "loss": 1.8298, "step": 2926 }, { "epoch": 0.12382604281242068, "grad_norm": 0.2851990759372711, "learning_rate": 0.001, "loss": 2.0727, "step": 2927 }, { "epoch": 0.12386834757593705, "grad_norm": 0.7518361210823059, "learning_rate": 0.001, "loss": 1.6419, "step": 2928 }, { "epoch": 0.12391065233945342, "grad_norm": 0.2845136523246765, "learning_rate": 0.001, "loss": 2.6422, "step": 2929 }, { "epoch": 0.12395295710296979, "grad_norm": 0.32894209027290344, "learning_rate": 0.001, "loss": 2.7414, "step": 2930 }, { "epoch": 0.12399526186648617, "grad_norm": 1.5604556798934937, "learning_rate": 0.001, "loss": 2.3227, "step": 2931 }, { "epoch": 0.12403756663000254, "grad_norm": 0.49824783205986023, "learning_rate": 0.001, "loss": 1.9455, "step": 2932 }, { "epoch": 0.1240798713935189, "grad_norm": 0.40257930755615234, "learning_rate": 0.001, "loss": 2.6387, "step": 2933 }, { "epoch": 0.12412217615703529, "grad_norm": 0.27476415038108826, "learning_rate": 0.001, "loss": 2.3756, "step": 2934 }, { "epoch": 0.12416448092055166, "grad_norm": 0.3869374096393585, "learning_rate": 0.001, "loss": 2.8796, "step": 2935 }, { "epoch": 0.12420678568406802, "grad_norm": 0.47996222972869873, "learning_rate": 0.001, "loss": 2.8597, "step": 2936 }, { "epoch": 0.12424909044758439, "grad_norm": 0.2530654966831207, "learning_rate": 0.001, "loss": 2.6784, "step": 2937 }, { "epoch": 0.12429139521110077, "grad_norm": 0.8970469832420349, "learning_rate": 0.001, "loss": 2.6644, "step": 2938 }, { "epoch": 0.12433369997461714, "grad_norm": 0.24477270245552063, "learning_rate": 0.001, "loss": 2.4868, "step": 2939 }, { "epoch": 0.12437600473813351, "grad_norm": 1.4595485925674438, "learning_rate": 0.001, "loss": 2.1721, "step": 2940 }, { "epoch": 0.12441830950164988, "grad_norm": 0.31102073192596436, "learning_rate": 0.001, "loss": 2.7839, "step": 2941 }, { "epoch": 0.12446061426516626, "grad_norm": 1.8330522775650024, "learning_rate": 0.001, "loss": 3.1596, "step": 2942 }, { "epoch": 0.12450291902868263, "grad_norm": 0.30926570296287537, "learning_rate": 0.001, "loss": 3.4155, "step": 2943 }, { "epoch": 0.124545223792199, "grad_norm": 0.3584842085838318, "learning_rate": 0.001, "loss": 5.0056, "step": 2944 }, { "epoch": 0.12458752855571538, "grad_norm": 0.24263957142829895, "learning_rate": 0.001, "loss": 1.7688, "step": 2945 }, { "epoch": 0.12462983331923175, "grad_norm": 0.7257891893386841, "learning_rate": 0.001, "loss": 2.9134, "step": 2946 }, { "epoch": 0.12467213808274812, "grad_norm": 0.2676026225090027, "learning_rate": 0.001, "loss": 3.1154, "step": 2947 }, { "epoch": 0.12471444284626448, "grad_norm": 1.5698000192642212, "learning_rate": 0.001, "loss": 2.7397, "step": 2948 }, { "epoch": 0.12475674760978087, "grad_norm": 0.2666419744491577, "learning_rate": 0.001, "loss": 2.6645, "step": 2949 }, { "epoch": 0.12479905237329723, "grad_norm": 0.9079283475875854, "learning_rate": 0.001, "loss": 1.6547, "step": 2950 }, { "epoch": 0.1248413571368136, "grad_norm": 0.27443641424179077, "learning_rate": 0.001, "loss": 2.2036, "step": 2951 }, { "epoch": 0.12488366190032997, "grad_norm": 0.27384769916534424, "learning_rate": 0.001, "loss": 2.2574, "step": 2952 }, { "epoch": 0.12492596666384635, "grad_norm": 0.37685632705688477, "learning_rate": 0.001, "loss": 2.1753, "step": 2953 }, { "epoch": 0.12496827142736272, "grad_norm": 0.5168205499649048, "learning_rate": 0.001, "loss": 3.1164, "step": 2954 }, { "epoch": 0.1250105761908791, "grad_norm": 0.22727070748806, "learning_rate": 0.001, "loss": 1.8189, "step": 2955 }, { "epoch": 0.12505288095439546, "grad_norm": 2.466627836227417, "learning_rate": 0.001, "loss": 1.925, "step": 2956 }, { "epoch": 0.12509518571791184, "grad_norm": 0.37915435433387756, "learning_rate": 0.001, "loss": 3.6095, "step": 2957 }, { "epoch": 0.12513749048142822, "grad_norm": 0.3377191424369812, "learning_rate": 0.001, "loss": 3.52, "step": 2958 }, { "epoch": 0.12517979524494457, "grad_norm": 0.5984206199645996, "learning_rate": 0.001, "loss": 2.0392, "step": 2959 }, { "epoch": 0.12522210000846096, "grad_norm": 0.2659249007701874, "learning_rate": 0.001, "loss": 1.8572, "step": 2960 }, { "epoch": 0.1252644047719773, "grad_norm": 0.25528255105018616, "learning_rate": 0.001, "loss": 2.2558, "step": 2961 }, { "epoch": 0.1253067095354937, "grad_norm": 0.3032079339027405, "learning_rate": 0.001, "loss": 2.3852, "step": 2962 }, { "epoch": 0.12534901429901008, "grad_norm": 0.29467710852622986, "learning_rate": 0.001, "loss": 2.2288, "step": 2963 }, { "epoch": 0.12539131906252643, "grad_norm": 0.5475602746009827, "learning_rate": 0.001, "loss": 2.0179, "step": 2964 }, { "epoch": 0.1254336238260428, "grad_norm": 0.2524794340133667, "learning_rate": 0.001, "loss": 3.1053, "step": 2965 }, { "epoch": 0.1254759285895592, "grad_norm": 0.26027101278305054, "learning_rate": 0.001, "loss": 2.4225, "step": 2966 }, { "epoch": 0.12551823335307555, "grad_norm": 0.2270876169204712, "learning_rate": 0.001, "loss": 1.8372, "step": 2967 }, { "epoch": 0.12556053811659193, "grad_norm": 1.1829967498779297, "learning_rate": 0.001, "loss": 2.1506, "step": 2968 }, { "epoch": 0.1256028428801083, "grad_norm": 0.423272967338562, "learning_rate": 0.001, "loss": 2.3921, "step": 2969 }, { "epoch": 0.12564514764362467, "grad_norm": 0.8556601405143738, "learning_rate": 0.001, "loss": 2.0293, "step": 2970 }, { "epoch": 0.12568745240714105, "grad_norm": 0.28667885065078735, "learning_rate": 0.001, "loss": 2.2469, "step": 2971 }, { "epoch": 0.1257297571706574, "grad_norm": 0.3176821768283844, "learning_rate": 0.001, "loss": 2.315, "step": 2972 }, { "epoch": 0.12577206193417378, "grad_norm": 0.3208604156970978, "learning_rate": 0.001, "loss": 3.3225, "step": 2973 }, { "epoch": 0.12581436669769017, "grad_norm": 0.3094156086444855, "learning_rate": 0.001, "loss": 1.8009, "step": 2974 }, { "epoch": 0.12585667146120652, "grad_norm": 0.2794618606567383, "learning_rate": 0.001, "loss": 2.4301, "step": 2975 }, { "epoch": 0.1258989762247229, "grad_norm": 1.7522642612457275, "learning_rate": 0.001, "loss": 2.5346, "step": 2976 }, { "epoch": 0.12594128098823928, "grad_norm": 0.7548322081565857, "learning_rate": 0.001, "loss": 2.3579, "step": 2977 }, { "epoch": 0.12598358575175564, "grad_norm": 0.31399986147880554, "learning_rate": 0.001, "loss": 2.8131, "step": 2978 }, { "epoch": 0.12602589051527202, "grad_norm": 0.5363809466362, "learning_rate": 0.001, "loss": 2.5607, "step": 2979 }, { "epoch": 0.1260681952787884, "grad_norm": 0.495072603225708, "learning_rate": 0.001, "loss": 4.0278, "step": 2980 }, { "epoch": 0.12611050004230476, "grad_norm": 0.36730262637138367, "learning_rate": 0.001, "loss": 2.1536, "step": 2981 }, { "epoch": 0.12615280480582114, "grad_norm": 0.31238043308258057, "learning_rate": 0.001, "loss": 2.0246, "step": 2982 }, { "epoch": 0.1261951095693375, "grad_norm": 0.2928439974784851, "learning_rate": 0.001, "loss": 3.6699, "step": 2983 }, { "epoch": 0.12623741433285388, "grad_norm": 0.31920191645622253, "learning_rate": 0.001, "loss": 3.32, "step": 2984 }, { "epoch": 0.12627971909637026, "grad_norm": 0.27571341395378113, "learning_rate": 0.001, "loss": 1.9494, "step": 2985 }, { "epoch": 0.1263220238598866, "grad_norm": 0.3193123936653137, "learning_rate": 0.001, "loss": 2.1162, "step": 2986 }, { "epoch": 0.126364328623403, "grad_norm": 0.24550597369670868, "learning_rate": 0.001, "loss": 3.5763, "step": 2987 }, { "epoch": 0.12640663338691938, "grad_norm": 0.2659911513328552, "learning_rate": 0.001, "loss": 2.2886, "step": 2988 }, { "epoch": 0.12644893815043573, "grad_norm": 0.3616361916065216, "learning_rate": 0.001, "loss": 2.4602, "step": 2989 }, { "epoch": 0.1264912429139521, "grad_norm": 1.0882079601287842, "learning_rate": 0.001, "loss": 1.6782, "step": 2990 }, { "epoch": 0.1265335476774685, "grad_norm": 0.1997089982032776, "learning_rate": 0.001, "loss": 2.1267, "step": 2991 }, { "epoch": 0.12657585244098485, "grad_norm": 0.2554531991481781, "learning_rate": 0.001, "loss": 2.2105, "step": 2992 }, { "epoch": 0.12661815720450123, "grad_norm": 1.5472227334976196, "learning_rate": 0.001, "loss": 2.6065, "step": 2993 }, { "epoch": 0.1266604619680176, "grad_norm": 0.4191298186779022, "learning_rate": 0.001, "loss": 3.8673, "step": 2994 }, { "epoch": 0.12670276673153397, "grad_norm": 0.3299647569656372, "learning_rate": 0.001, "loss": 2.6935, "step": 2995 }, { "epoch": 0.12674507149505035, "grad_norm": 0.26964640617370605, "learning_rate": 0.001, "loss": 2.0866, "step": 2996 }, { "epoch": 0.1267873762585667, "grad_norm": 0.22724339365959167, "learning_rate": 0.001, "loss": 1.6615, "step": 2997 }, { "epoch": 0.12682968102208309, "grad_norm": 0.28157809376716614, "learning_rate": 0.001, "loss": 2.4699, "step": 2998 }, { "epoch": 0.12687198578559947, "grad_norm": 0.3091438412666321, "learning_rate": 0.001, "loss": 2.359, "step": 2999 }, { "epoch": 0.12691429054911582, "grad_norm": 0.2963161766529083, "learning_rate": 0.001, "loss": 2.3396, "step": 3000 }, { "epoch": 0.1269565953126322, "grad_norm": 0.3389158546924591, "learning_rate": 0.001, "loss": 3.0763, "step": 3001 }, { "epoch": 0.12699890007614859, "grad_norm": 1.0668145418167114, "learning_rate": 0.001, "loss": 2.6003, "step": 3002 }, { "epoch": 0.12704120483966494, "grad_norm": 0.28413814306259155, "learning_rate": 0.001, "loss": 2.9304, "step": 3003 }, { "epoch": 0.12708350960318132, "grad_norm": 0.5639888048171997, "learning_rate": 0.001, "loss": 2.1774, "step": 3004 }, { "epoch": 0.1271258143666977, "grad_norm": 0.24791397154331207, "learning_rate": 0.001, "loss": 2.9347, "step": 3005 }, { "epoch": 0.12716811913021406, "grad_norm": 0.23885676264762878, "learning_rate": 0.001, "loss": 2.189, "step": 3006 }, { "epoch": 0.12721042389373044, "grad_norm": 0.2213631123304367, "learning_rate": 0.001, "loss": 2.3451, "step": 3007 }, { "epoch": 0.1272527286572468, "grad_norm": 0.29912659525871277, "learning_rate": 0.001, "loss": 2.8164, "step": 3008 }, { "epoch": 0.12729503342076318, "grad_norm": 0.7422712445259094, "learning_rate": 0.001, "loss": 2.4031, "step": 3009 }, { "epoch": 0.12733733818427956, "grad_norm": 0.2729654014110565, "learning_rate": 0.001, "loss": 2.3894, "step": 3010 }, { "epoch": 0.1273796429477959, "grad_norm": 0.2171071618795395, "learning_rate": 0.001, "loss": 2.0108, "step": 3011 }, { "epoch": 0.1274219477113123, "grad_norm": 0.2428562194108963, "learning_rate": 0.001, "loss": 2.9788, "step": 3012 }, { "epoch": 0.12746425247482868, "grad_norm": 0.19264620542526245, "learning_rate": 0.001, "loss": 1.949, "step": 3013 }, { "epoch": 0.12750655723834503, "grad_norm": 0.2231709510087967, "learning_rate": 0.001, "loss": 2.2655, "step": 3014 }, { "epoch": 0.1275488620018614, "grad_norm": 0.6086738705635071, "learning_rate": 0.001, "loss": 3.2293, "step": 3015 }, { "epoch": 0.1275911667653778, "grad_norm": 0.20442450046539307, "learning_rate": 0.001, "loss": 1.6902, "step": 3016 }, { "epoch": 0.12763347152889415, "grad_norm": 0.2568240463733673, "learning_rate": 0.001, "loss": 2.3126, "step": 3017 }, { "epoch": 0.12767577629241053, "grad_norm": 0.6941277384757996, "learning_rate": 0.001, "loss": 2.5573, "step": 3018 }, { "epoch": 0.12771808105592689, "grad_norm": 0.2820114493370056, "learning_rate": 0.001, "loss": 3.45, "step": 3019 }, { "epoch": 0.12776038581944327, "grad_norm": 0.27247634530067444, "learning_rate": 0.001, "loss": 3.0103, "step": 3020 }, { "epoch": 0.12780269058295965, "grad_norm": 0.30561110377311707, "learning_rate": 0.001, "loss": 2.9789, "step": 3021 }, { "epoch": 0.127844995346476, "grad_norm": 0.7641719579696655, "learning_rate": 0.001, "loss": 1.9984, "step": 3022 }, { "epoch": 0.12788730010999239, "grad_norm": 1.2558095455169678, "learning_rate": 0.001, "loss": 2.3797, "step": 3023 }, { "epoch": 0.12792960487350877, "grad_norm": 0.24033454060554504, "learning_rate": 0.001, "loss": 2.5231, "step": 3024 }, { "epoch": 0.12797190963702512, "grad_norm": 0.9906525611877441, "learning_rate": 0.001, "loss": 3.2656, "step": 3025 }, { "epoch": 0.1280142144005415, "grad_norm": 0.35158273577690125, "learning_rate": 0.001, "loss": 2.5469, "step": 3026 }, { "epoch": 0.12805651916405789, "grad_norm": 0.313698947429657, "learning_rate": 0.001, "loss": 2.2092, "step": 3027 }, { "epoch": 0.12809882392757424, "grad_norm": 0.2757653594017029, "learning_rate": 0.001, "loss": 1.7885, "step": 3028 }, { "epoch": 0.12814112869109062, "grad_norm": 0.23092950880527496, "learning_rate": 0.001, "loss": 1.9807, "step": 3029 }, { "epoch": 0.12818343345460698, "grad_norm": 0.7450085282325745, "learning_rate": 0.001, "loss": 1.9991, "step": 3030 }, { "epoch": 0.12822573821812336, "grad_norm": 0.362114816904068, "learning_rate": 0.001, "loss": 4.5653, "step": 3031 }, { "epoch": 0.12826804298163974, "grad_norm": 0.23789288103580475, "learning_rate": 0.001, "loss": 2.1967, "step": 3032 }, { "epoch": 0.1283103477451561, "grad_norm": 0.27310964465141296, "learning_rate": 0.001, "loss": 3.2499, "step": 3033 }, { "epoch": 0.12835265250867248, "grad_norm": 0.3834547996520996, "learning_rate": 0.001, "loss": 2.3692, "step": 3034 }, { "epoch": 0.12839495727218886, "grad_norm": 0.28623712062835693, "learning_rate": 0.001, "loss": 2.875, "step": 3035 }, { "epoch": 0.1284372620357052, "grad_norm": 0.35939669609069824, "learning_rate": 0.001, "loss": 2.4295, "step": 3036 }, { "epoch": 0.1284795667992216, "grad_norm": 0.403884619474411, "learning_rate": 0.001, "loss": 2.8118, "step": 3037 }, { "epoch": 0.12852187156273798, "grad_norm": 0.7596374154090881, "learning_rate": 0.001, "loss": 2.5978, "step": 3038 }, { "epoch": 0.12856417632625433, "grad_norm": 0.24331355094909668, "learning_rate": 0.001, "loss": 2.3842, "step": 3039 }, { "epoch": 0.1286064810897707, "grad_norm": 0.23005782067775726, "learning_rate": 0.001, "loss": 2.1265, "step": 3040 }, { "epoch": 0.12864878585328707, "grad_norm": 0.23082035779953003, "learning_rate": 0.001, "loss": 2.273, "step": 3041 }, { "epoch": 0.12869109061680345, "grad_norm": 0.26438039541244507, "learning_rate": 0.001, "loss": 2.3408, "step": 3042 }, { "epoch": 0.12873339538031983, "grad_norm": 0.22107988595962524, "learning_rate": 0.001, "loss": 1.9021, "step": 3043 }, { "epoch": 0.12877570014383619, "grad_norm": 0.21788084506988525, "learning_rate": 0.001, "loss": 3.0715, "step": 3044 }, { "epoch": 0.12881800490735257, "grad_norm": 0.23471501469612122, "learning_rate": 0.001, "loss": 2.2976, "step": 3045 }, { "epoch": 0.12886030967086895, "grad_norm": 0.28260231018066406, "learning_rate": 0.001, "loss": 1.8561, "step": 3046 }, { "epoch": 0.1289026144343853, "grad_norm": 0.26574090123176575, "learning_rate": 0.001, "loss": 2.0225, "step": 3047 }, { "epoch": 0.1289449191979017, "grad_norm": 0.34297651052474976, "learning_rate": 0.001, "loss": 3.0373, "step": 3048 }, { "epoch": 0.12898722396141807, "grad_norm": 0.586717426776886, "learning_rate": 0.001, "loss": 2.1005, "step": 3049 }, { "epoch": 0.12902952872493442, "grad_norm": 0.21093139052391052, "learning_rate": 0.001, "loss": 2.3463, "step": 3050 }, { "epoch": 0.1290718334884508, "grad_norm": 0.29111140966415405, "learning_rate": 0.001, "loss": 2.1633, "step": 3051 }, { "epoch": 0.12911413825196716, "grad_norm": 0.30666249990463257, "learning_rate": 0.001, "loss": 3.0201, "step": 3052 }, { "epoch": 0.12915644301548354, "grad_norm": 0.24410761892795563, "learning_rate": 0.001, "loss": 2.8614, "step": 3053 }, { "epoch": 0.12919874777899992, "grad_norm": 0.2161339968442917, "learning_rate": 0.001, "loss": 1.7318, "step": 3054 }, { "epoch": 0.12924105254251628, "grad_norm": 0.19016364216804504, "learning_rate": 0.001, "loss": 2.0828, "step": 3055 }, { "epoch": 0.12928335730603266, "grad_norm": 0.23141834139823914, "learning_rate": 0.001, "loss": 2.8, "step": 3056 }, { "epoch": 0.12932566206954904, "grad_norm": 1.5664108991622925, "learning_rate": 0.001, "loss": 2.1793, "step": 3057 }, { "epoch": 0.1293679668330654, "grad_norm": 2.544483184814453, "learning_rate": 0.001, "loss": 2.9984, "step": 3058 }, { "epoch": 0.12941027159658178, "grad_norm": 0.30240291357040405, "learning_rate": 0.001, "loss": 2.197, "step": 3059 }, { "epoch": 0.12945257636009816, "grad_norm": 0.24638395011425018, "learning_rate": 0.001, "loss": 2.2356, "step": 3060 }, { "epoch": 0.1294948811236145, "grad_norm": 0.9346217513084412, "learning_rate": 0.001, "loss": 2.1051, "step": 3061 }, { "epoch": 0.1295371858871309, "grad_norm": 0.256610244512558, "learning_rate": 0.001, "loss": 2.3009, "step": 3062 }, { "epoch": 0.12957949065064725, "grad_norm": 0.40396857261657715, "learning_rate": 0.001, "loss": 2.5668, "step": 3063 }, { "epoch": 0.12962179541416363, "grad_norm": 10.963554382324219, "learning_rate": 0.001, "loss": 2.1443, "step": 3064 }, { "epoch": 0.12966410017768001, "grad_norm": 2.6234822273254395, "learning_rate": 0.001, "loss": 2.6211, "step": 3065 }, { "epoch": 0.12970640494119637, "grad_norm": 0.3858783543109894, "learning_rate": 0.001, "loss": 1.9484, "step": 3066 }, { "epoch": 0.12974870970471275, "grad_norm": 4.675527572631836, "learning_rate": 0.001, "loss": 2.551, "step": 3067 }, { "epoch": 0.12979101446822913, "grad_norm": 0.3626082241535187, "learning_rate": 0.001, "loss": 1.7012, "step": 3068 }, { "epoch": 0.1298333192317455, "grad_norm": 0.605455756187439, "learning_rate": 0.001, "loss": 1.6341, "step": 3069 }, { "epoch": 0.12987562399526187, "grad_norm": 0.38578104972839355, "learning_rate": 0.001, "loss": 2.0653, "step": 3070 }, { "epoch": 0.12991792875877825, "grad_norm": 0.3008464574813843, "learning_rate": 0.001, "loss": 2.3254, "step": 3071 }, { "epoch": 0.1299602335222946, "grad_norm": 0.31806275248527527, "learning_rate": 0.001, "loss": 2.4444, "step": 3072 }, { "epoch": 0.130002538285811, "grad_norm": 0.3251250386238098, "learning_rate": 0.001, "loss": 2.173, "step": 3073 }, { "epoch": 0.13004484304932734, "grad_norm": 0.3980516791343689, "learning_rate": 0.001, "loss": 2.8316, "step": 3074 }, { "epoch": 0.13008714781284372, "grad_norm": 0.3091077506542206, "learning_rate": 0.001, "loss": 2.9065, "step": 3075 }, { "epoch": 0.1301294525763601, "grad_norm": 0.6228027939796448, "learning_rate": 0.001, "loss": 2.3924, "step": 3076 }, { "epoch": 0.13017175733987646, "grad_norm": 0.2532864212989807, "learning_rate": 0.001, "loss": 2.3496, "step": 3077 }, { "epoch": 0.13021406210339284, "grad_norm": 0.32938069105148315, "learning_rate": 0.001, "loss": 2.9943, "step": 3078 }, { "epoch": 0.13025636686690922, "grad_norm": 0.7084699273109436, "learning_rate": 0.001, "loss": 2.3694, "step": 3079 }, { "epoch": 0.13029867163042558, "grad_norm": 0.2795187830924988, "learning_rate": 0.001, "loss": 2.4931, "step": 3080 }, { "epoch": 0.13034097639394196, "grad_norm": 2.5224292278289795, "learning_rate": 0.001, "loss": 2.2264, "step": 3081 }, { "epoch": 0.13038328115745834, "grad_norm": 0.25982218980789185, "learning_rate": 0.001, "loss": 2.2179, "step": 3082 }, { "epoch": 0.1304255859209747, "grad_norm": 0.35819700360298157, "learning_rate": 0.001, "loss": 2.5449, "step": 3083 }, { "epoch": 0.13046789068449108, "grad_norm": 0.7076107263565063, "learning_rate": 0.001, "loss": 2.2326, "step": 3084 }, { "epoch": 0.13051019544800743, "grad_norm": 0.4517766237258911, "learning_rate": 0.001, "loss": 3.3274, "step": 3085 }, { "epoch": 0.13055250021152381, "grad_norm": 0.3324654698371887, "learning_rate": 0.001, "loss": 2.1643, "step": 3086 }, { "epoch": 0.1305948049750402, "grad_norm": 0.2703245282173157, "learning_rate": 0.001, "loss": 2.2756, "step": 3087 }, { "epoch": 0.13063710973855655, "grad_norm": 0.24688389897346497, "learning_rate": 0.001, "loss": 1.4894, "step": 3088 }, { "epoch": 0.13067941450207293, "grad_norm": 0.2690267860889435, "learning_rate": 0.001, "loss": 2.0199, "step": 3089 }, { "epoch": 0.13072171926558931, "grad_norm": 0.2187577337026596, "learning_rate": 0.001, "loss": 1.8486, "step": 3090 }, { "epoch": 0.13076402402910567, "grad_norm": 0.3903275728225708, "learning_rate": 0.001, "loss": 1.9344, "step": 3091 }, { "epoch": 0.13080632879262205, "grad_norm": 1.0155338048934937, "learning_rate": 0.001, "loss": 2.6314, "step": 3092 }, { "epoch": 0.13084863355613843, "grad_norm": 2.3899335861206055, "learning_rate": 0.001, "loss": 2.6188, "step": 3093 }, { "epoch": 0.1308909383196548, "grad_norm": 0.25962334871292114, "learning_rate": 0.001, "loss": 2.4554, "step": 3094 }, { "epoch": 0.13093324308317117, "grad_norm": 0.4966904819011688, "learning_rate": 0.001, "loss": 2.3855, "step": 3095 }, { "epoch": 0.13097554784668752, "grad_norm": 0.26274529099464417, "learning_rate": 0.001, "loss": 2.2674, "step": 3096 }, { "epoch": 0.1310178526102039, "grad_norm": 0.7305838465690613, "learning_rate": 0.001, "loss": 3.0309, "step": 3097 }, { "epoch": 0.1310601573737203, "grad_norm": 0.2599664330482483, "learning_rate": 0.001, "loss": 3.2799, "step": 3098 }, { "epoch": 0.13110246213723664, "grad_norm": 0.3204602301120758, "learning_rate": 0.001, "loss": 1.795, "step": 3099 }, { "epoch": 0.13114476690075302, "grad_norm": 0.4395667612552643, "learning_rate": 0.001, "loss": 2.2309, "step": 3100 }, { "epoch": 0.1311870716642694, "grad_norm": 0.5608950257301331, "learning_rate": 0.001, "loss": 3.5101, "step": 3101 }, { "epoch": 0.13122937642778576, "grad_norm": 0.28046101331710815, "learning_rate": 0.001, "loss": 2.8559, "step": 3102 }, { "epoch": 0.13127168119130214, "grad_norm": 0.7474309206008911, "learning_rate": 0.001, "loss": 2.1403, "step": 3103 }, { "epoch": 0.13131398595481852, "grad_norm": 0.3261275291442871, "learning_rate": 0.001, "loss": 2.4168, "step": 3104 }, { "epoch": 0.13135629071833488, "grad_norm": 0.29234766960144043, "learning_rate": 0.001, "loss": 1.9736, "step": 3105 }, { "epoch": 0.13139859548185126, "grad_norm": 0.32281428575515747, "learning_rate": 0.001, "loss": 2.9004, "step": 3106 }, { "epoch": 0.13144090024536761, "grad_norm": 0.3510703146457672, "learning_rate": 0.001, "loss": 3.3073, "step": 3107 }, { "epoch": 0.131483205008884, "grad_norm": 0.25277993083000183, "learning_rate": 0.001, "loss": 1.9087, "step": 3108 }, { "epoch": 0.13152550977240038, "grad_norm": 0.5728982090950012, "learning_rate": 0.001, "loss": 2.3717, "step": 3109 }, { "epoch": 0.13156781453591673, "grad_norm": 0.2837635278701782, "learning_rate": 0.001, "loss": 3.2256, "step": 3110 }, { "epoch": 0.13161011929943311, "grad_norm": 0.9751304984092712, "learning_rate": 0.001, "loss": 1.9881, "step": 3111 }, { "epoch": 0.1316524240629495, "grad_norm": 0.26259645819664, "learning_rate": 0.001, "loss": 2.9728, "step": 3112 }, { "epoch": 0.13169472882646585, "grad_norm": 0.28536665439605713, "learning_rate": 0.001, "loss": 2.4389, "step": 3113 }, { "epoch": 0.13173703358998223, "grad_norm": 0.8838117718696594, "learning_rate": 0.001, "loss": 2.7865, "step": 3114 }, { "epoch": 0.13177933835349862, "grad_norm": 0.351610392332077, "learning_rate": 0.001, "loss": 4.3358, "step": 3115 }, { "epoch": 0.13182164311701497, "grad_norm": 0.6937295198440552, "learning_rate": 0.001, "loss": 3.2516, "step": 3116 }, { "epoch": 0.13186394788053135, "grad_norm": 0.24456751346588135, "learning_rate": 0.001, "loss": 3.5256, "step": 3117 }, { "epoch": 0.13190625264404773, "grad_norm": 1.5360091924667358, "learning_rate": 0.001, "loss": 2.8469, "step": 3118 }, { "epoch": 0.1319485574075641, "grad_norm": 0.3377660810947418, "learning_rate": 0.001, "loss": 2.5228, "step": 3119 }, { "epoch": 0.13199086217108047, "grad_norm": 0.3961987793445587, "learning_rate": 0.001, "loss": 2.7447, "step": 3120 }, { "epoch": 0.13203316693459682, "grad_norm": 0.32120051980018616, "learning_rate": 0.001, "loss": 2.3102, "step": 3121 }, { "epoch": 0.1320754716981132, "grad_norm": 0.3845050036907196, "learning_rate": 0.001, "loss": 3.7081, "step": 3122 }, { "epoch": 0.1321177764616296, "grad_norm": 0.2517256736755371, "learning_rate": 0.001, "loss": 2.6708, "step": 3123 }, { "epoch": 0.13216008122514594, "grad_norm": 0.25273680686950684, "learning_rate": 0.001, "loss": 2.748, "step": 3124 }, { "epoch": 0.13220238598866232, "grad_norm": 0.3618360161781311, "learning_rate": 0.001, "loss": 3.418, "step": 3125 }, { "epoch": 0.1322446907521787, "grad_norm": 0.4369431436061859, "learning_rate": 0.001, "loss": 3.6098, "step": 3126 }, { "epoch": 0.13228699551569506, "grad_norm": 0.28894707560539246, "learning_rate": 0.001, "loss": 1.7968, "step": 3127 }, { "epoch": 0.13232930027921144, "grad_norm": 0.29500776529312134, "learning_rate": 0.001, "loss": 2.6331, "step": 3128 }, { "epoch": 0.13237160504272782, "grad_norm": 0.8470263481140137, "learning_rate": 0.001, "loss": 2.9434, "step": 3129 }, { "epoch": 0.13241390980624418, "grad_norm": 0.20783580839633942, "learning_rate": 0.001, "loss": 1.635, "step": 3130 }, { "epoch": 0.13245621456976056, "grad_norm": 0.4847624897956848, "learning_rate": 0.001, "loss": 2.6969, "step": 3131 }, { "epoch": 0.13249851933327692, "grad_norm": 0.2904376983642578, "learning_rate": 0.001, "loss": 3.1772, "step": 3132 }, { "epoch": 0.1325408240967933, "grad_norm": 0.29514384269714355, "learning_rate": 0.001, "loss": 2.2651, "step": 3133 }, { "epoch": 0.13258312886030968, "grad_norm": 0.3028735816478729, "learning_rate": 0.001, "loss": 2.2932, "step": 3134 }, { "epoch": 0.13262543362382603, "grad_norm": 0.2190832495689392, "learning_rate": 0.001, "loss": 2.291, "step": 3135 }, { "epoch": 0.13266773838734242, "grad_norm": 0.4608692526817322, "learning_rate": 0.001, "loss": 2.9313, "step": 3136 }, { "epoch": 0.1327100431508588, "grad_norm": 0.5700821280479431, "learning_rate": 0.001, "loss": 2.6597, "step": 3137 }, { "epoch": 0.13275234791437515, "grad_norm": 0.41855689883232117, "learning_rate": 0.001, "loss": 2.2829, "step": 3138 }, { "epoch": 0.13279465267789153, "grad_norm": 0.2460172474384308, "learning_rate": 0.001, "loss": 2.0043, "step": 3139 }, { "epoch": 0.13283695744140792, "grad_norm": 0.26085442304611206, "learning_rate": 0.001, "loss": 2.8132, "step": 3140 }, { "epoch": 0.13287926220492427, "grad_norm": 0.27267521619796753, "learning_rate": 0.001, "loss": 1.9265, "step": 3141 }, { "epoch": 0.13292156696844065, "grad_norm": 0.4825087785720825, "learning_rate": 0.001, "loss": 2.5328, "step": 3142 }, { "epoch": 0.132963871731957, "grad_norm": 0.7955046892166138, "learning_rate": 0.001, "loss": 2.4661, "step": 3143 }, { "epoch": 0.1330061764954734, "grad_norm": 0.22219106554985046, "learning_rate": 0.001, "loss": 2.8113, "step": 3144 }, { "epoch": 0.13304848125898977, "grad_norm": 0.20542827248573303, "learning_rate": 0.001, "loss": 1.5933, "step": 3145 }, { "epoch": 0.13309078602250612, "grad_norm": 1.0127766132354736, "learning_rate": 0.001, "loss": 2.4128, "step": 3146 }, { "epoch": 0.1331330907860225, "grad_norm": 0.7383486032485962, "learning_rate": 0.001, "loss": 2.2777, "step": 3147 }, { "epoch": 0.1331753955495389, "grad_norm": 0.714616596698761, "learning_rate": 0.001, "loss": 2.5478, "step": 3148 }, { "epoch": 0.13321770031305524, "grad_norm": 0.4222099781036377, "learning_rate": 0.001, "loss": 2.4327, "step": 3149 }, { "epoch": 0.13326000507657162, "grad_norm": 0.3047798275947571, "learning_rate": 0.001, "loss": 1.8279, "step": 3150 }, { "epoch": 0.133302309840088, "grad_norm": 32.82894515991211, "learning_rate": 0.001, "loss": 2.1241, "step": 3151 }, { "epoch": 0.13334461460360436, "grad_norm": 4.569951057434082, "learning_rate": 0.001, "loss": 2.4177, "step": 3152 }, { "epoch": 0.13338691936712074, "grad_norm": 0.2539486587047577, "learning_rate": 0.001, "loss": 2.6896, "step": 3153 }, { "epoch": 0.1334292241306371, "grad_norm": 0.3113529086112976, "learning_rate": 0.001, "loss": 2.3834, "step": 3154 }, { "epoch": 0.13347152889415348, "grad_norm": 0.5559782981872559, "learning_rate": 0.001, "loss": 3.521, "step": 3155 }, { "epoch": 0.13351383365766986, "grad_norm": 0.3125370144844055, "learning_rate": 0.001, "loss": 2.5982, "step": 3156 }, { "epoch": 0.13355613842118622, "grad_norm": 0.2650197744369507, "learning_rate": 0.001, "loss": 2.1565, "step": 3157 }, { "epoch": 0.1335984431847026, "grad_norm": 0.3328724503517151, "learning_rate": 0.001, "loss": 2.0653, "step": 3158 }, { "epoch": 0.13364074794821898, "grad_norm": 0.2736479341983795, "learning_rate": 0.001, "loss": 3.6324, "step": 3159 }, { "epoch": 0.13368305271173533, "grad_norm": 0.3234842121601105, "learning_rate": 0.001, "loss": 2.1484, "step": 3160 }, { "epoch": 0.13372535747525172, "grad_norm": 0.2782366871833801, "learning_rate": 0.001, "loss": 4.0075, "step": 3161 }, { "epoch": 0.1337676622387681, "grad_norm": 0.2664390504360199, "learning_rate": 0.001, "loss": 1.7258, "step": 3162 }, { "epoch": 0.13380996700228445, "grad_norm": 0.23455047607421875, "learning_rate": 0.001, "loss": 1.9677, "step": 3163 }, { "epoch": 0.13385227176580083, "grad_norm": 0.22314786911010742, "learning_rate": 0.001, "loss": 2.3173, "step": 3164 }, { "epoch": 0.1338945765293172, "grad_norm": 0.2761704921722412, "learning_rate": 0.001, "loss": 1.8873, "step": 3165 }, { "epoch": 0.13393688129283357, "grad_norm": 0.6037988066673279, "learning_rate": 0.001, "loss": 2.6251, "step": 3166 }, { "epoch": 0.13397918605634995, "grad_norm": 0.42490071058273315, "learning_rate": 0.001, "loss": 2.1205, "step": 3167 }, { "epoch": 0.1340214908198663, "grad_norm": 0.27945172786712646, "learning_rate": 0.001, "loss": 2.0086, "step": 3168 }, { "epoch": 0.1340637955833827, "grad_norm": 0.2539001703262329, "learning_rate": 0.001, "loss": 2.1604, "step": 3169 }, { "epoch": 0.13410610034689907, "grad_norm": 0.23439648747444153, "learning_rate": 0.001, "loss": 2.4407, "step": 3170 }, { "epoch": 0.13414840511041543, "grad_norm": 0.3301600515842438, "learning_rate": 0.001, "loss": 2.0621, "step": 3171 }, { "epoch": 0.1341907098739318, "grad_norm": 2.253829002380371, "learning_rate": 0.001, "loss": 2.1485, "step": 3172 }, { "epoch": 0.1342330146374482, "grad_norm": 0.34564846754074097, "learning_rate": 0.001, "loss": 2.2515, "step": 3173 }, { "epoch": 0.13427531940096454, "grad_norm": 0.269344300031662, "learning_rate": 0.001, "loss": 3.2911, "step": 3174 }, { "epoch": 0.13431762416448093, "grad_norm": 0.2677054703235626, "learning_rate": 0.001, "loss": 1.9107, "step": 3175 }, { "epoch": 0.13435992892799728, "grad_norm": 1.6993762254714966, "learning_rate": 0.001, "loss": 2.3414, "step": 3176 }, { "epoch": 0.13440223369151366, "grad_norm": 0.29692259430885315, "learning_rate": 0.001, "loss": 2.1323, "step": 3177 }, { "epoch": 0.13444453845503004, "grad_norm": 0.24042674899101257, "learning_rate": 0.001, "loss": 2.9594, "step": 3178 }, { "epoch": 0.1344868432185464, "grad_norm": 0.7225860357284546, "learning_rate": 0.001, "loss": 2.6567, "step": 3179 }, { "epoch": 0.13452914798206278, "grad_norm": 0.24381719529628754, "learning_rate": 0.001, "loss": 2.0548, "step": 3180 }, { "epoch": 0.13457145274557916, "grad_norm": 0.2055826634168625, "learning_rate": 0.001, "loss": 2.3845, "step": 3181 }, { "epoch": 0.13461375750909552, "grad_norm": 0.2573340833187103, "learning_rate": 0.001, "loss": 2.0984, "step": 3182 }, { "epoch": 0.1346560622726119, "grad_norm": 0.2962018847465515, "learning_rate": 0.001, "loss": 2.5636, "step": 3183 }, { "epoch": 0.13469836703612828, "grad_norm": 0.40545910596847534, "learning_rate": 0.001, "loss": 2.3188, "step": 3184 }, { "epoch": 0.13474067179964463, "grad_norm": 0.6234009861946106, "learning_rate": 0.001, "loss": 1.9832, "step": 3185 }, { "epoch": 0.13478297656316102, "grad_norm": 0.9001304507255554, "learning_rate": 0.001, "loss": 2.0331, "step": 3186 }, { "epoch": 0.13482528132667737, "grad_norm": 3.3280558586120605, "learning_rate": 0.001, "loss": 2.8476, "step": 3187 }, { "epoch": 0.13486758609019375, "grad_norm": 0.25616398453712463, "learning_rate": 0.001, "loss": 2.8065, "step": 3188 }, { "epoch": 0.13490989085371013, "grad_norm": 4.6577467918396, "learning_rate": 0.001, "loss": 2.244, "step": 3189 }, { "epoch": 0.1349521956172265, "grad_norm": 0.37131521105766296, "learning_rate": 0.001, "loss": 2.2248, "step": 3190 }, { "epoch": 0.13499450038074287, "grad_norm": 0.4129955768585205, "learning_rate": 0.001, "loss": 2.701, "step": 3191 }, { "epoch": 0.13503680514425925, "grad_norm": 0.24256403744220734, "learning_rate": 0.001, "loss": 2.0321, "step": 3192 }, { "epoch": 0.1350791099077756, "grad_norm": 0.4478329122066498, "learning_rate": 0.001, "loss": 2.888, "step": 3193 }, { "epoch": 0.135121414671292, "grad_norm": 0.24205787479877472, "learning_rate": 0.001, "loss": 2.9618, "step": 3194 }, { "epoch": 0.13516371943480837, "grad_norm": 0.2591802179813385, "learning_rate": 0.001, "loss": 2.3314, "step": 3195 }, { "epoch": 0.13520602419832473, "grad_norm": 0.26308709383010864, "learning_rate": 0.001, "loss": 1.5799, "step": 3196 }, { "epoch": 0.1352483289618411, "grad_norm": 2.3362512588500977, "learning_rate": 0.001, "loss": 2.72, "step": 3197 }, { "epoch": 0.13529063372535746, "grad_norm": 0.25793129205703735, "learning_rate": 0.001, "loss": 3.2309, "step": 3198 }, { "epoch": 0.13533293848887384, "grad_norm": 0.24646157026290894, "learning_rate": 0.001, "loss": 2.5992, "step": 3199 }, { "epoch": 0.13537524325239023, "grad_norm": 1.71589195728302, "learning_rate": 0.001, "loss": 2.0522, "step": 3200 }, { "epoch": 0.13541754801590658, "grad_norm": 0.24351108074188232, "learning_rate": 0.001, "loss": 1.7597, "step": 3201 }, { "epoch": 0.13545985277942296, "grad_norm": 0.23399876058101654, "learning_rate": 0.001, "loss": 3.4004, "step": 3202 }, { "epoch": 0.13550215754293934, "grad_norm": 0.2488681972026825, "learning_rate": 0.001, "loss": 3.0523, "step": 3203 }, { "epoch": 0.1355444623064557, "grad_norm": 0.276132732629776, "learning_rate": 0.001, "loss": 3.5311, "step": 3204 }, { "epoch": 0.13558676706997208, "grad_norm": 0.2650410830974579, "learning_rate": 0.001, "loss": 2.3133, "step": 3205 }, { "epoch": 0.13562907183348846, "grad_norm": 8.522130966186523, "learning_rate": 0.001, "loss": 2.3636, "step": 3206 }, { "epoch": 0.13567137659700482, "grad_norm": 0.26246312260627747, "learning_rate": 0.001, "loss": 2.2168, "step": 3207 }, { "epoch": 0.1357136813605212, "grad_norm": 0.29150646924972534, "learning_rate": 0.001, "loss": 2.2161, "step": 3208 }, { "epoch": 0.13575598612403755, "grad_norm": 0.2513675093650818, "learning_rate": 0.001, "loss": 2.0227, "step": 3209 }, { "epoch": 0.13579829088755394, "grad_norm": 0.274081289768219, "learning_rate": 0.001, "loss": 3.2165, "step": 3210 }, { "epoch": 0.13584059565107032, "grad_norm": 0.7095363736152649, "learning_rate": 0.001, "loss": 2.1727, "step": 3211 }, { "epoch": 0.13588290041458667, "grad_norm": 0.24826420843601227, "learning_rate": 0.001, "loss": 2.9115, "step": 3212 }, { "epoch": 0.13592520517810305, "grad_norm": 0.2485196441411972, "learning_rate": 0.001, "loss": 2.705, "step": 3213 }, { "epoch": 0.13596750994161944, "grad_norm": 1.0916763544082642, "learning_rate": 0.001, "loss": 2.5535, "step": 3214 }, { "epoch": 0.1360098147051358, "grad_norm": 0.29727303981781006, "learning_rate": 0.001, "loss": 3.2918, "step": 3215 }, { "epoch": 0.13605211946865217, "grad_norm": 0.24911724030971527, "learning_rate": 0.001, "loss": 2.0456, "step": 3216 }, { "epoch": 0.13609442423216855, "grad_norm": 0.2424779087305069, "learning_rate": 0.001, "loss": 2.466, "step": 3217 }, { "epoch": 0.1361367289956849, "grad_norm": 0.23129363358020782, "learning_rate": 0.001, "loss": 2.5406, "step": 3218 }, { "epoch": 0.1361790337592013, "grad_norm": 0.2544875741004944, "learning_rate": 0.001, "loss": 2.7152, "step": 3219 }, { "epoch": 0.13622133852271764, "grad_norm": 0.28286901116371155, "learning_rate": 0.001, "loss": 2.7993, "step": 3220 }, { "epoch": 0.13626364328623403, "grad_norm": 0.3165169060230255, "learning_rate": 0.001, "loss": 1.8458, "step": 3221 }, { "epoch": 0.1363059480497504, "grad_norm": 0.2771192491054535, "learning_rate": 0.001, "loss": 2.2885, "step": 3222 }, { "epoch": 0.13634825281326676, "grad_norm": 0.2702016830444336, "learning_rate": 0.001, "loss": 2.1035, "step": 3223 }, { "epoch": 0.13639055757678314, "grad_norm": 0.20894509553909302, "learning_rate": 0.001, "loss": 2.4901, "step": 3224 }, { "epoch": 0.13643286234029953, "grad_norm": 0.4794827997684479, "learning_rate": 0.001, "loss": 2.3976, "step": 3225 }, { "epoch": 0.13647516710381588, "grad_norm": 0.3762035667896271, "learning_rate": 0.001, "loss": 2.3773, "step": 3226 }, { "epoch": 0.13651747186733226, "grad_norm": 0.9282992482185364, "learning_rate": 0.001, "loss": 2.3559, "step": 3227 }, { "epoch": 0.13655977663084864, "grad_norm": 0.24388918280601501, "learning_rate": 0.001, "loss": 2.6357, "step": 3228 }, { "epoch": 0.136602081394365, "grad_norm": 0.22450682520866394, "learning_rate": 0.001, "loss": 3.0694, "step": 3229 }, { "epoch": 0.13664438615788138, "grad_norm": 0.22949005663394928, "learning_rate": 0.001, "loss": 2.4709, "step": 3230 }, { "epoch": 0.13668669092139776, "grad_norm": 0.2965823709964752, "learning_rate": 0.001, "loss": 2.0617, "step": 3231 }, { "epoch": 0.13672899568491412, "grad_norm": 0.6691174507141113, "learning_rate": 0.001, "loss": 2.4596, "step": 3232 }, { "epoch": 0.1367713004484305, "grad_norm": 0.21386466920375824, "learning_rate": 0.001, "loss": 2.3095, "step": 3233 }, { "epoch": 0.13681360521194685, "grad_norm": 0.3088574707508087, "learning_rate": 0.001, "loss": 2.1195, "step": 3234 }, { "epoch": 0.13685590997546324, "grad_norm": 0.23426178097724915, "learning_rate": 0.001, "loss": 2.036, "step": 3235 }, { "epoch": 0.13689821473897962, "grad_norm": 0.2373049259185791, "learning_rate": 0.001, "loss": 2.3527, "step": 3236 }, { "epoch": 0.13694051950249597, "grad_norm": 0.253162682056427, "learning_rate": 0.001, "loss": 1.8557, "step": 3237 }, { "epoch": 0.13698282426601235, "grad_norm": 0.23162192106246948, "learning_rate": 0.001, "loss": 2.3313, "step": 3238 }, { "epoch": 0.13702512902952874, "grad_norm": 0.20582619309425354, "learning_rate": 0.001, "loss": 2.1614, "step": 3239 }, { "epoch": 0.1370674337930451, "grad_norm": 0.23828250169754028, "learning_rate": 0.001, "loss": 2.4971, "step": 3240 }, { "epoch": 0.13710973855656147, "grad_norm": 0.2543167471885681, "learning_rate": 0.001, "loss": 2.4651, "step": 3241 }, { "epoch": 0.13715204332007785, "grad_norm": 1.886708378791809, "learning_rate": 0.001, "loss": 2.7207, "step": 3242 }, { "epoch": 0.1371943480835942, "grad_norm": 0.2185901701450348, "learning_rate": 0.001, "loss": 1.9575, "step": 3243 }, { "epoch": 0.1372366528471106, "grad_norm": 0.25897449254989624, "learning_rate": 0.001, "loss": 3.6101, "step": 3244 }, { "epoch": 0.13727895761062694, "grad_norm": 0.22902828454971313, "learning_rate": 0.001, "loss": 2.5504, "step": 3245 }, { "epoch": 0.13732126237414333, "grad_norm": 0.9940201044082642, "learning_rate": 0.001, "loss": 2.4984, "step": 3246 }, { "epoch": 0.1373635671376597, "grad_norm": 0.5756583213806152, "learning_rate": 0.001, "loss": 2.0633, "step": 3247 }, { "epoch": 0.13740587190117606, "grad_norm": 0.24829471111297607, "learning_rate": 0.001, "loss": 2.2558, "step": 3248 }, { "epoch": 0.13744817666469245, "grad_norm": 0.2848198413848877, "learning_rate": 0.001, "loss": 2.2559, "step": 3249 }, { "epoch": 0.13749048142820883, "grad_norm": 0.2882290184497833, "learning_rate": 0.001, "loss": 2.2497, "step": 3250 }, { "epoch": 0.13753278619172518, "grad_norm": 0.3670541048049927, "learning_rate": 0.001, "loss": 2.8339, "step": 3251 }, { "epoch": 0.13757509095524156, "grad_norm": 0.2615547180175781, "learning_rate": 0.001, "loss": 2.7191, "step": 3252 }, { "epoch": 0.13761739571875795, "grad_norm": 0.2824948728084564, "learning_rate": 0.001, "loss": 2.4394, "step": 3253 }, { "epoch": 0.1376597004822743, "grad_norm": 0.2855933904647827, "learning_rate": 0.001, "loss": 1.7882, "step": 3254 }, { "epoch": 0.13770200524579068, "grad_norm": 0.22275911271572113, "learning_rate": 0.001, "loss": 2.8593, "step": 3255 }, { "epoch": 0.13774431000930704, "grad_norm": 0.29241636395454407, "learning_rate": 0.001, "loss": 2.4057, "step": 3256 }, { "epoch": 0.13778661477282342, "grad_norm": 0.2222166359424591, "learning_rate": 0.001, "loss": 2.4944, "step": 3257 }, { "epoch": 0.1378289195363398, "grad_norm": 0.23610618710517883, "learning_rate": 0.001, "loss": 2.5925, "step": 3258 }, { "epoch": 0.13787122429985615, "grad_norm": 0.25960981845855713, "learning_rate": 0.001, "loss": 2.085, "step": 3259 }, { "epoch": 0.13791352906337254, "grad_norm": 0.23140114545822144, "learning_rate": 0.001, "loss": 2.1499, "step": 3260 }, { "epoch": 0.13795583382688892, "grad_norm": 1.9852933883666992, "learning_rate": 0.001, "loss": 2.7648, "step": 3261 }, { "epoch": 0.13799813859040527, "grad_norm": 0.27138063311576843, "learning_rate": 0.001, "loss": 2.131, "step": 3262 }, { "epoch": 0.13804044335392165, "grad_norm": 0.23058192431926727, "learning_rate": 0.001, "loss": 1.905, "step": 3263 }, { "epoch": 0.13808274811743804, "grad_norm": 0.360061377286911, "learning_rate": 0.001, "loss": 1.958, "step": 3264 }, { "epoch": 0.1381250528809544, "grad_norm": 0.2711131274700165, "learning_rate": 0.001, "loss": 2.4258, "step": 3265 }, { "epoch": 0.13816735764447077, "grad_norm": 0.30782273411750793, "learning_rate": 0.001, "loss": 3.0677, "step": 3266 }, { "epoch": 0.13820966240798713, "grad_norm": 0.2656923234462738, "learning_rate": 0.001, "loss": 1.8376, "step": 3267 }, { "epoch": 0.1382519671715035, "grad_norm": 0.24168632924556732, "learning_rate": 0.001, "loss": 1.9629, "step": 3268 }, { "epoch": 0.1382942719350199, "grad_norm": 0.34883561730384827, "learning_rate": 0.001, "loss": 3.1204, "step": 3269 }, { "epoch": 0.13833657669853625, "grad_norm": 0.25927019119262695, "learning_rate": 0.001, "loss": 2.9234, "step": 3270 }, { "epoch": 0.13837888146205263, "grad_norm": 0.38157638907432556, "learning_rate": 0.001, "loss": 2.7691, "step": 3271 }, { "epoch": 0.138421186225569, "grad_norm": 0.4185699224472046, "learning_rate": 0.001, "loss": 2.306, "step": 3272 }, { "epoch": 0.13846349098908536, "grad_norm": 0.22628718614578247, "learning_rate": 0.001, "loss": 2.0184, "step": 3273 }, { "epoch": 0.13850579575260175, "grad_norm": 0.25233402848243713, "learning_rate": 0.001, "loss": 2.845, "step": 3274 }, { "epoch": 0.13854810051611813, "grad_norm": 0.26197144389152527, "learning_rate": 0.001, "loss": 1.7947, "step": 3275 }, { "epoch": 0.13859040527963448, "grad_norm": 0.38328817486763, "learning_rate": 0.001, "loss": 2.4887, "step": 3276 }, { "epoch": 0.13863271004315086, "grad_norm": 0.2760562300682068, "learning_rate": 0.001, "loss": 2.0466, "step": 3277 }, { "epoch": 0.13867501480666722, "grad_norm": 0.2207472026348114, "learning_rate": 0.001, "loss": 2.2844, "step": 3278 }, { "epoch": 0.1387173195701836, "grad_norm": 0.24221129715442657, "learning_rate": 0.001, "loss": 2.7634, "step": 3279 }, { "epoch": 0.13875962433369998, "grad_norm": 0.23639145493507385, "learning_rate": 0.001, "loss": 2.3057, "step": 3280 }, { "epoch": 0.13880192909721634, "grad_norm": 0.6643745303153992, "learning_rate": 0.001, "loss": 2.1484, "step": 3281 }, { "epoch": 0.13884423386073272, "grad_norm": 0.29646036028862, "learning_rate": 0.001, "loss": 1.7328, "step": 3282 }, { "epoch": 0.1388865386242491, "grad_norm": 0.7454001307487488, "learning_rate": 0.001, "loss": 2.0942, "step": 3283 }, { "epoch": 0.13892884338776545, "grad_norm": 1.1968051195144653, "learning_rate": 0.001, "loss": 2.9441, "step": 3284 }, { "epoch": 0.13897114815128184, "grad_norm": 0.23698095977306366, "learning_rate": 0.001, "loss": 2.6655, "step": 3285 }, { "epoch": 0.13901345291479822, "grad_norm": 0.6837499141693115, "learning_rate": 0.001, "loss": 2.2513, "step": 3286 }, { "epoch": 0.13905575767831457, "grad_norm": 0.48883894085884094, "learning_rate": 0.001, "loss": 2.6241, "step": 3287 }, { "epoch": 0.13909806244183096, "grad_norm": 0.5206930637359619, "learning_rate": 0.001, "loss": 2.2263, "step": 3288 }, { "epoch": 0.1391403672053473, "grad_norm": 0.40440839529037476, "learning_rate": 0.001, "loss": 3.109, "step": 3289 }, { "epoch": 0.1391826719688637, "grad_norm": 1.5446665287017822, "learning_rate": 0.001, "loss": 2.3798, "step": 3290 }, { "epoch": 0.13922497673238007, "grad_norm": 0.6700052618980408, "learning_rate": 0.001, "loss": 1.8315, "step": 3291 }, { "epoch": 0.13926728149589643, "grad_norm": 0.5063325762748718, "learning_rate": 0.001, "loss": 2.6604, "step": 3292 }, { "epoch": 0.1393095862594128, "grad_norm": 0.38244378566741943, "learning_rate": 0.001, "loss": 2.4225, "step": 3293 }, { "epoch": 0.1393518910229292, "grad_norm": 0.3094440996646881, "learning_rate": 0.001, "loss": 2.6557, "step": 3294 }, { "epoch": 0.13939419578644555, "grad_norm": 0.2953416109085083, "learning_rate": 0.001, "loss": 2.1801, "step": 3295 }, { "epoch": 0.13943650054996193, "grad_norm": 0.2831237018108368, "learning_rate": 0.001, "loss": 2.5021, "step": 3296 }, { "epoch": 0.1394788053134783, "grad_norm": 14.991360664367676, "learning_rate": 0.001, "loss": 3.954, "step": 3297 }, { "epoch": 0.13952111007699466, "grad_norm": 0.26766881346702576, "learning_rate": 0.001, "loss": 2.3572, "step": 3298 }, { "epoch": 0.13956341484051105, "grad_norm": 0.4463373124599457, "learning_rate": 0.001, "loss": 2.2735, "step": 3299 }, { "epoch": 0.1396057196040274, "grad_norm": 0.30204683542251587, "learning_rate": 0.001, "loss": 2.7257, "step": 3300 }, { "epoch": 0.13964802436754378, "grad_norm": 0.34700825810432434, "learning_rate": 0.001, "loss": 3.2625, "step": 3301 }, { "epoch": 0.13969032913106016, "grad_norm": 0.348042756319046, "learning_rate": 0.001, "loss": 2.5979, "step": 3302 }, { "epoch": 0.13973263389457652, "grad_norm": 0.38788822293281555, "learning_rate": 0.001, "loss": 2.5037, "step": 3303 }, { "epoch": 0.1397749386580929, "grad_norm": 0.24049419164657593, "learning_rate": 0.001, "loss": 2.2218, "step": 3304 }, { "epoch": 0.13981724342160928, "grad_norm": 0.2679419219493866, "learning_rate": 0.001, "loss": 2.8018, "step": 3305 }, { "epoch": 0.13985954818512564, "grad_norm": 0.2681131064891815, "learning_rate": 0.001, "loss": 3.2434, "step": 3306 }, { "epoch": 0.13990185294864202, "grad_norm": 0.25279781222343445, "learning_rate": 0.001, "loss": 2.6153, "step": 3307 }, { "epoch": 0.1399441577121584, "grad_norm": 0.23080165684223175, "learning_rate": 0.001, "loss": 2.5528, "step": 3308 }, { "epoch": 0.13998646247567476, "grad_norm": 0.21056526899337769, "learning_rate": 0.001, "loss": 2.2255, "step": 3309 }, { "epoch": 0.14002876723919114, "grad_norm": 1.0948318243026733, "learning_rate": 0.001, "loss": 1.8427, "step": 3310 }, { "epoch": 0.1400710720027075, "grad_norm": 0.27404388785362244, "learning_rate": 0.001, "loss": 1.971, "step": 3311 }, { "epoch": 0.14011337676622387, "grad_norm": 0.23093107342720032, "learning_rate": 0.001, "loss": 2.145, "step": 3312 }, { "epoch": 0.14015568152974026, "grad_norm": 0.8817729949951172, "learning_rate": 0.001, "loss": 2.4664, "step": 3313 }, { "epoch": 0.1401979862932566, "grad_norm": 0.3034093677997589, "learning_rate": 0.001, "loss": 2.2661, "step": 3314 }, { "epoch": 0.140240291056773, "grad_norm": 0.2352837473154068, "learning_rate": 0.001, "loss": 1.6646, "step": 3315 }, { "epoch": 0.14028259582028937, "grad_norm": 0.2697775661945343, "learning_rate": 0.001, "loss": 2.322, "step": 3316 }, { "epoch": 0.14032490058380573, "grad_norm": 0.889599621295929, "learning_rate": 0.001, "loss": 2.4923, "step": 3317 }, { "epoch": 0.1403672053473221, "grad_norm": 0.3509933352470398, "learning_rate": 0.001, "loss": 2.4785, "step": 3318 }, { "epoch": 0.1404095101108385, "grad_norm": 0.2816075086593628, "learning_rate": 0.001, "loss": 2.1828, "step": 3319 }, { "epoch": 0.14045181487435485, "grad_norm": 0.4183788299560547, "learning_rate": 0.001, "loss": 2.2104, "step": 3320 }, { "epoch": 0.14049411963787123, "grad_norm": 31.530492782592773, "learning_rate": 0.001, "loss": 2.0174, "step": 3321 }, { "epoch": 0.14053642440138758, "grad_norm": 0.3534470796585083, "learning_rate": 0.001, "loss": 2.234, "step": 3322 }, { "epoch": 0.14057872916490396, "grad_norm": 0.3242388367652893, "learning_rate": 0.001, "loss": 2.3006, "step": 3323 }, { "epoch": 0.14062103392842035, "grad_norm": 1.5292270183563232, "learning_rate": 0.001, "loss": 2.3741, "step": 3324 }, { "epoch": 0.1406633386919367, "grad_norm": 0.5664166212081909, "learning_rate": 0.001, "loss": 2.2073, "step": 3325 }, { "epoch": 0.14070564345545308, "grad_norm": 0.4291297495365143, "learning_rate": 0.001, "loss": 2.9416, "step": 3326 }, { "epoch": 0.14074794821896947, "grad_norm": 0.3085836172103882, "learning_rate": 0.001, "loss": 3.2857, "step": 3327 }, { "epoch": 0.14079025298248582, "grad_norm": 0.3337254524230957, "learning_rate": 0.001, "loss": 2.8056, "step": 3328 }, { "epoch": 0.1408325577460022, "grad_norm": 0.267048180103302, "learning_rate": 0.001, "loss": 2.1272, "step": 3329 }, { "epoch": 0.14087486250951858, "grad_norm": 0.8167191743850708, "learning_rate": 0.001, "loss": 2.2273, "step": 3330 }, { "epoch": 0.14091716727303494, "grad_norm": 0.426624596118927, "learning_rate": 0.001, "loss": 3.7321, "step": 3331 }, { "epoch": 0.14095947203655132, "grad_norm": 0.2855152189731598, "learning_rate": 0.001, "loss": 3.1353, "step": 3332 }, { "epoch": 0.14100177680006767, "grad_norm": 0.647718071937561, "learning_rate": 0.001, "loss": 2.2319, "step": 3333 }, { "epoch": 0.14104408156358406, "grad_norm": 0.7934921383857727, "learning_rate": 0.001, "loss": 2.3643, "step": 3334 }, { "epoch": 0.14108638632710044, "grad_norm": 0.22644270956516266, "learning_rate": 0.001, "loss": 2.802, "step": 3335 }, { "epoch": 0.1411286910906168, "grad_norm": 0.5594651699066162, "learning_rate": 0.001, "loss": 2.4036, "step": 3336 }, { "epoch": 0.14117099585413317, "grad_norm": 0.34176865220069885, "learning_rate": 0.001, "loss": 3.0433, "step": 3337 }, { "epoch": 0.14121330061764956, "grad_norm": 0.9747275114059448, "learning_rate": 0.001, "loss": 2.6015, "step": 3338 }, { "epoch": 0.1412556053811659, "grad_norm": 0.4771696925163269, "learning_rate": 0.001, "loss": 2.0621, "step": 3339 }, { "epoch": 0.1412979101446823, "grad_norm": 0.2995487451553345, "learning_rate": 0.001, "loss": 2.6303, "step": 3340 }, { "epoch": 0.14134021490819867, "grad_norm": 0.3293488919734955, "learning_rate": 0.001, "loss": 3.3868, "step": 3341 }, { "epoch": 0.14138251967171503, "grad_norm": 0.2893940508365631, "learning_rate": 0.001, "loss": 3.0442, "step": 3342 }, { "epoch": 0.1414248244352314, "grad_norm": 0.31001016497612, "learning_rate": 0.001, "loss": 2.3384, "step": 3343 }, { "epoch": 0.14146712919874777, "grad_norm": 0.31601953506469727, "learning_rate": 0.001, "loss": 2.9162, "step": 3344 }, { "epoch": 0.14150943396226415, "grad_norm": 0.27529218792915344, "learning_rate": 0.001, "loss": 2.4267, "step": 3345 }, { "epoch": 0.14155173872578053, "grad_norm": 0.6147814393043518, "learning_rate": 0.001, "loss": 1.7324, "step": 3346 }, { "epoch": 0.14159404348929688, "grad_norm": 0.25061115622520447, "learning_rate": 0.001, "loss": 2.4223, "step": 3347 }, { "epoch": 0.14163634825281327, "grad_norm": 7.874605178833008, "learning_rate": 0.001, "loss": 2.3057, "step": 3348 }, { "epoch": 0.14167865301632965, "grad_norm": 0.23804105818271637, "learning_rate": 0.001, "loss": 1.9634, "step": 3349 }, { "epoch": 0.141720957779846, "grad_norm": 1.9372200965881348, "learning_rate": 0.001, "loss": 2.3494, "step": 3350 }, { "epoch": 0.14176326254336238, "grad_norm": 0.3472435772418976, "learning_rate": 0.001, "loss": 2.9078, "step": 3351 }, { "epoch": 0.14180556730687877, "grad_norm": 0.35763394832611084, "learning_rate": 0.001, "loss": 3.6208, "step": 3352 }, { "epoch": 0.14184787207039512, "grad_norm": 0.3070499300956726, "learning_rate": 0.001, "loss": 1.5918, "step": 3353 }, { "epoch": 0.1418901768339115, "grad_norm": 0.4274042248725891, "learning_rate": 0.001, "loss": 2.3483, "step": 3354 }, { "epoch": 0.14193248159742788, "grad_norm": 0.2639428675174713, "learning_rate": 0.001, "loss": 2.1166, "step": 3355 }, { "epoch": 0.14197478636094424, "grad_norm": 0.23096594214439392, "learning_rate": 0.001, "loss": 1.9444, "step": 3356 }, { "epoch": 0.14201709112446062, "grad_norm": 0.2586401402950287, "learning_rate": 0.001, "loss": 2.3109, "step": 3357 }, { "epoch": 0.14205939588797697, "grad_norm": 0.26827335357666016, "learning_rate": 0.001, "loss": 2.2147, "step": 3358 }, { "epoch": 0.14210170065149336, "grad_norm": 0.26217904686927795, "learning_rate": 0.001, "loss": 2.1678, "step": 3359 }, { "epoch": 0.14214400541500974, "grad_norm": 0.45599186420440674, "learning_rate": 0.001, "loss": 2.3742, "step": 3360 }, { "epoch": 0.1421863101785261, "grad_norm": 0.24741707742214203, "learning_rate": 0.001, "loss": 1.727, "step": 3361 }, { "epoch": 0.14222861494204248, "grad_norm": 0.300525963306427, "learning_rate": 0.001, "loss": 2.6092, "step": 3362 }, { "epoch": 0.14227091970555886, "grad_norm": 0.27571022510528564, "learning_rate": 0.001, "loss": 2.3276, "step": 3363 }, { "epoch": 0.1423132244690752, "grad_norm": 0.42578956484794617, "learning_rate": 0.001, "loss": 1.5469, "step": 3364 }, { "epoch": 0.1423555292325916, "grad_norm": 0.2815234661102295, "learning_rate": 0.001, "loss": 3.2236, "step": 3365 }, { "epoch": 0.14239783399610798, "grad_norm": 3.376448392868042, "learning_rate": 0.001, "loss": 2.2013, "step": 3366 }, { "epoch": 0.14244013875962433, "grad_norm": 1.9560842514038086, "learning_rate": 0.001, "loss": 2.0186, "step": 3367 }, { "epoch": 0.1424824435231407, "grad_norm": 0.23897530138492584, "learning_rate": 0.001, "loss": 2.2577, "step": 3368 }, { "epoch": 0.14252474828665707, "grad_norm": 0.25748342275619507, "learning_rate": 0.001, "loss": 2.3233, "step": 3369 }, { "epoch": 0.14256705305017345, "grad_norm": 0.2940260171890259, "learning_rate": 0.001, "loss": 1.8611, "step": 3370 }, { "epoch": 0.14260935781368983, "grad_norm": 0.5368818640708923, "learning_rate": 0.001, "loss": 5.5408, "step": 3371 }, { "epoch": 0.14265166257720618, "grad_norm": 0.2916557788848877, "learning_rate": 0.001, "loss": 2.0839, "step": 3372 }, { "epoch": 0.14269396734072257, "grad_norm": 0.21450082957744598, "learning_rate": 0.001, "loss": 3.1867, "step": 3373 }, { "epoch": 0.14273627210423895, "grad_norm": 0.43757933378219604, "learning_rate": 0.001, "loss": 3.901, "step": 3374 }, { "epoch": 0.1427785768677553, "grad_norm": 0.2997872829437256, "learning_rate": 0.001, "loss": 2.2917, "step": 3375 }, { "epoch": 0.14282088163127168, "grad_norm": 0.26807701587677, "learning_rate": 0.001, "loss": 1.9812, "step": 3376 }, { "epoch": 0.14286318639478807, "grad_norm": 0.7264923453330994, "learning_rate": 0.001, "loss": 1.7461, "step": 3377 }, { "epoch": 0.14290549115830442, "grad_norm": 0.23869849741458893, "learning_rate": 0.001, "loss": 2.7253, "step": 3378 }, { "epoch": 0.1429477959218208, "grad_norm": 2.684951066970825, "learning_rate": 0.001, "loss": 1.8465, "step": 3379 }, { "epoch": 0.14299010068533716, "grad_norm": 0.33340945839881897, "learning_rate": 0.001, "loss": 2.7263, "step": 3380 }, { "epoch": 0.14303240544885354, "grad_norm": 0.8364661335945129, "learning_rate": 0.001, "loss": 3.4191, "step": 3381 }, { "epoch": 0.14307471021236992, "grad_norm": 0.2745528221130371, "learning_rate": 0.001, "loss": 2.9753, "step": 3382 }, { "epoch": 0.14311701497588628, "grad_norm": 0.5543707609176636, "learning_rate": 0.001, "loss": 2.9777, "step": 3383 }, { "epoch": 0.14315931973940266, "grad_norm": 0.31856632232666016, "learning_rate": 0.001, "loss": 3.4294, "step": 3384 }, { "epoch": 0.14320162450291904, "grad_norm": 0.2563943862915039, "learning_rate": 0.001, "loss": 2.3104, "step": 3385 }, { "epoch": 0.1432439292664354, "grad_norm": 1.2910809516906738, "learning_rate": 0.001, "loss": 2.1959, "step": 3386 }, { "epoch": 0.14328623402995178, "grad_norm": 0.261447548866272, "learning_rate": 0.001, "loss": 1.9312, "step": 3387 }, { "epoch": 0.14332853879346816, "grad_norm": 1.013630747795105, "learning_rate": 0.001, "loss": 1.6115, "step": 3388 }, { "epoch": 0.1433708435569845, "grad_norm": 2.35341477394104, "learning_rate": 0.001, "loss": 2.19, "step": 3389 }, { "epoch": 0.1434131483205009, "grad_norm": 0.24885472655296326, "learning_rate": 0.001, "loss": 2.1222, "step": 3390 }, { "epoch": 0.14345545308401725, "grad_norm": 0.45557206869125366, "learning_rate": 0.001, "loss": 2.2925, "step": 3391 }, { "epoch": 0.14349775784753363, "grad_norm": 0.7096180319786072, "learning_rate": 0.001, "loss": 1.7682, "step": 3392 }, { "epoch": 0.14354006261105, "grad_norm": 0.6223247051239014, "learning_rate": 0.001, "loss": 2.476, "step": 3393 }, { "epoch": 0.14358236737456637, "grad_norm": 0.3025702238082886, "learning_rate": 0.001, "loss": 2.1303, "step": 3394 }, { "epoch": 0.14362467213808275, "grad_norm": 0.6526556015014648, "learning_rate": 0.001, "loss": 2.4046, "step": 3395 }, { "epoch": 0.14366697690159913, "grad_norm": 0.40125930309295654, "learning_rate": 0.001, "loss": 3.7139, "step": 3396 }, { "epoch": 0.14370928166511548, "grad_norm": 0.26466187834739685, "learning_rate": 0.001, "loss": 2.4826, "step": 3397 }, { "epoch": 0.14375158642863187, "grad_norm": 0.26691725850105286, "learning_rate": 0.001, "loss": 1.7453, "step": 3398 }, { "epoch": 0.14379389119214825, "grad_norm": 0.2618395686149597, "learning_rate": 0.001, "loss": 2.1459, "step": 3399 }, { "epoch": 0.1438361959556646, "grad_norm": 0.7489890456199646, "learning_rate": 0.001, "loss": 2.8917, "step": 3400 }, { "epoch": 0.14387850071918099, "grad_norm": 0.24983298778533936, "learning_rate": 0.001, "loss": 1.8924, "step": 3401 }, { "epoch": 0.14392080548269734, "grad_norm": 0.210697740316391, "learning_rate": 0.001, "loss": 2.1125, "step": 3402 }, { "epoch": 0.14396311024621372, "grad_norm": 0.25680264830589294, "learning_rate": 0.001, "loss": 2.1817, "step": 3403 }, { "epoch": 0.1440054150097301, "grad_norm": 0.2333562821149826, "learning_rate": 0.001, "loss": 2.1268, "step": 3404 }, { "epoch": 0.14404771977324646, "grad_norm": 0.26086026430130005, "learning_rate": 0.001, "loss": 2.647, "step": 3405 }, { "epoch": 0.14409002453676284, "grad_norm": 0.24561172723770142, "learning_rate": 0.001, "loss": 1.7931, "step": 3406 }, { "epoch": 0.14413232930027922, "grad_norm": 0.2474185675382614, "learning_rate": 0.001, "loss": 2.4043, "step": 3407 }, { "epoch": 0.14417463406379558, "grad_norm": 0.2294658124446869, "learning_rate": 0.001, "loss": 2.243, "step": 3408 }, { "epoch": 0.14421693882731196, "grad_norm": 0.2385028749704361, "learning_rate": 0.001, "loss": 2.5765, "step": 3409 }, { "epoch": 0.14425924359082834, "grad_norm": 0.2432500123977661, "learning_rate": 0.001, "loss": 1.8027, "step": 3410 }, { "epoch": 0.1443015483543447, "grad_norm": 0.2768508791923523, "learning_rate": 0.001, "loss": 2.4532, "step": 3411 }, { "epoch": 0.14434385311786108, "grad_norm": 0.892453134059906, "learning_rate": 0.001, "loss": 2.4542, "step": 3412 }, { "epoch": 0.14438615788137743, "grad_norm": 0.2457326054573059, "learning_rate": 0.001, "loss": 2.1699, "step": 3413 }, { "epoch": 0.1444284626448938, "grad_norm": 0.26426583528518677, "learning_rate": 0.001, "loss": 2.1309, "step": 3414 }, { "epoch": 0.1444707674084102, "grad_norm": 0.2871556282043457, "learning_rate": 0.001, "loss": 3.3215, "step": 3415 }, { "epoch": 0.14451307217192655, "grad_norm": 0.2520632743835449, "learning_rate": 0.001, "loss": 1.9709, "step": 3416 }, { "epoch": 0.14455537693544293, "grad_norm": 0.38204339146614075, "learning_rate": 0.001, "loss": 3.6021, "step": 3417 }, { "epoch": 0.1445976816989593, "grad_norm": 0.26071059703826904, "learning_rate": 0.001, "loss": 2.2053, "step": 3418 }, { "epoch": 0.14463998646247567, "grad_norm": 0.2733023762702942, "learning_rate": 0.001, "loss": 3.47, "step": 3419 }, { "epoch": 0.14468229122599205, "grad_norm": 1.002029538154602, "learning_rate": 0.001, "loss": 3.2734, "step": 3420 }, { "epoch": 0.14472459598950843, "grad_norm": 0.3164158761501312, "learning_rate": 0.001, "loss": 2.763, "step": 3421 }, { "epoch": 0.14476690075302479, "grad_norm": 0.2513934075832367, "learning_rate": 0.001, "loss": 2.0143, "step": 3422 }, { "epoch": 0.14480920551654117, "grad_norm": 0.24494628608226776, "learning_rate": 0.001, "loss": 2.298, "step": 3423 }, { "epoch": 0.14485151028005752, "grad_norm": 0.27729180455207825, "learning_rate": 0.001, "loss": 2.6749, "step": 3424 }, { "epoch": 0.1448938150435739, "grad_norm": 0.26897814869880676, "learning_rate": 0.001, "loss": 3.574, "step": 3425 }, { "epoch": 0.14493611980709029, "grad_norm": 0.22974863648414612, "learning_rate": 0.001, "loss": 3.4897, "step": 3426 }, { "epoch": 0.14497842457060664, "grad_norm": 0.22176092863082886, "learning_rate": 0.001, "loss": 1.9926, "step": 3427 }, { "epoch": 0.14502072933412302, "grad_norm": 0.38495469093322754, "learning_rate": 0.001, "loss": 2.5689, "step": 3428 }, { "epoch": 0.1450630340976394, "grad_norm": 0.22981159389019012, "learning_rate": 0.001, "loss": 2.2184, "step": 3429 }, { "epoch": 0.14510533886115576, "grad_norm": 0.2774735987186432, "learning_rate": 0.001, "loss": 3.0189, "step": 3430 }, { "epoch": 0.14514764362467214, "grad_norm": 0.2698344886302948, "learning_rate": 0.001, "loss": 2.4688, "step": 3431 }, { "epoch": 0.14518994838818852, "grad_norm": 0.6816653609275818, "learning_rate": 0.001, "loss": 2.0268, "step": 3432 }, { "epoch": 0.14523225315170488, "grad_norm": 0.29013943672180176, "learning_rate": 0.001, "loss": 3.2067, "step": 3433 }, { "epoch": 0.14527455791522126, "grad_norm": 0.22570838034152985, "learning_rate": 0.001, "loss": 3.0135, "step": 3434 }, { "epoch": 0.1453168626787376, "grad_norm": 0.21075336635112762, "learning_rate": 0.001, "loss": 2.242, "step": 3435 }, { "epoch": 0.145359167442254, "grad_norm": 0.2940874993801117, "learning_rate": 0.001, "loss": 3.109, "step": 3436 }, { "epoch": 0.14540147220577038, "grad_norm": 0.21364165842533112, "learning_rate": 0.001, "loss": 1.3964, "step": 3437 }, { "epoch": 0.14544377696928673, "grad_norm": 0.2253018319606781, "learning_rate": 0.001, "loss": 2.2411, "step": 3438 }, { "epoch": 0.1454860817328031, "grad_norm": 0.2131890505552292, "learning_rate": 0.001, "loss": 1.7277, "step": 3439 }, { "epoch": 0.1455283864963195, "grad_norm": 0.2189098596572876, "learning_rate": 0.001, "loss": 1.7839, "step": 3440 }, { "epoch": 0.14557069125983585, "grad_norm": 2.004612922668457, "learning_rate": 0.001, "loss": 1.8655, "step": 3441 }, { "epoch": 0.14561299602335223, "grad_norm": 0.2156573235988617, "learning_rate": 0.001, "loss": 2.3993, "step": 3442 }, { "epoch": 0.1456553007868686, "grad_norm": 0.30679431557655334, "learning_rate": 0.001, "loss": 2.738, "step": 3443 }, { "epoch": 0.14569760555038497, "grad_norm": 0.2574104070663452, "learning_rate": 0.001, "loss": 2.4518, "step": 3444 }, { "epoch": 0.14573991031390135, "grad_norm": 0.2621307075023651, "learning_rate": 0.001, "loss": 2.3622, "step": 3445 }, { "epoch": 0.1457822150774177, "grad_norm": 0.30141088366508484, "learning_rate": 0.001, "loss": 2.494, "step": 3446 }, { "epoch": 0.14582451984093409, "grad_norm": 0.19590161740779877, "learning_rate": 0.001, "loss": 1.6406, "step": 3447 }, { "epoch": 0.14586682460445047, "grad_norm": 0.21325886249542236, "learning_rate": 0.001, "loss": 1.6452, "step": 3448 }, { "epoch": 0.14590912936796682, "grad_norm": 0.22410674393177032, "learning_rate": 0.001, "loss": 2.4961, "step": 3449 }, { "epoch": 0.1459514341314832, "grad_norm": 0.2333114743232727, "learning_rate": 0.001, "loss": 1.9583, "step": 3450 }, { "epoch": 0.1459937388949996, "grad_norm": 0.43659529089927673, "learning_rate": 0.001, "loss": 2.4177, "step": 3451 }, { "epoch": 0.14603604365851594, "grad_norm": 0.23108899593353271, "learning_rate": 0.001, "loss": 1.9033, "step": 3452 }, { "epoch": 0.14607834842203232, "grad_norm": 0.2105552852153778, "learning_rate": 0.001, "loss": 1.7711, "step": 3453 }, { "epoch": 0.1461206531855487, "grad_norm": 0.22220350801944733, "learning_rate": 0.001, "loss": 3.0378, "step": 3454 }, { "epoch": 0.14616295794906506, "grad_norm": 0.2945927679538727, "learning_rate": 0.001, "loss": 2.0337, "step": 3455 }, { "epoch": 0.14620526271258144, "grad_norm": 0.26347851753234863, "learning_rate": 0.001, "loss": 2.1827, "step": 3456 }, { "epoch": 0.1462475674760978, "grad_norm": 1.3712072372436523, "learning_rate": 0.001, "loss": 2.5637, "step": 3457 }, { "epoch": 0.14628987223961418, "grad_norm": 0.33388158679008484, "learning_rate": 0.001, "loss": 1.8558, "step": 3458 }, { "epoch": 0.14633217700313056, "grad_norm": 0.23212599754333496, "learning_rate": 0.001, "loss": 2.7854, "step": 3459 }, { "epoch": 0.1463744817666469, "grad_norm": 0.2477710247039795, "learning_rate": 0.001, "loss": 2.2062, "step": 3460 }, { "epoch": 0.1464167865301633, "grad_norm": 0.5189692974090576, "learning_rate": 0.001, "loss": 2.5085, "step": 3461 }, { "epoch": 0.14645909129367968, "grad_norm": 0.39620745182037354, "learning_rate": 0.001, "loss": 2.913, "step": 3462 }, { "epoch": 0.14650139605719603, "grad_norm": 0.23096932470798492, "learning_rate": 0.001, "loss": 2.7137, "step": 3463 }, { "epoch": 0.1465437008207124, "grad_norm": 0.2657645642757416, "learning_rate": 0.001, "loss": 2.3191, "step": 3464 }, { "epoch": 0.1465860055842288, "grad_norm": 0.2581949532032013, "learning_rate": 0.001, "loss": 2.4491, "step": 3465 }, { "epoch": 0.14662831034774515, "grad_norm": 0.21660561859607697, "learning_rate": 0.001, "loss": 2.4903, "step": 3466 }, { "epoch": 0.14667061511126153, "grad_norm": 0.2140873819589615, "learning_rate": 0.001, "loss": 2.8028, "step": 3467 }, { "epoch": 0.1467129198747779, "grad_norm": 0.23401807248592377, "learning_rate": 0.001, "loss": 1.9589, "step": 3468 }, { "epoch": 0.14675522463829427, "grad_norm": 0.43440452218055725, "learning_rate": 0.001, "loss": 2.3189, "step": 3469 }, { "epoch": 0.14679752940181065, "grad_norm": 0.2246183156967163, "learning_rate": 0.001, "loss": 2.8871, "step": 3470 }, { "epoch": 0.146839834165327, "grad_norm": 0.25875502824783325, "learning_rate": 0.001, "loss": 2.123, "step": 3471 }, { "epoch": 0.1468821389288434, "grad_norm": 0.2841735780239105, "learning_rate": 0.001, "loss": 2.4137, "step": 3472 }, { "epoch": 0.14692444369235977, "grad_norm": 0.3783358037471771, "learning_rate": 0.001, "loss": 2.2655, "step": 3473 }, { "epoch": 0.14696674845587612, "grad_norm": 0.37711983919143677, "learning_rate": 0.001, "loss": 2.7385, "step": 3474 }, { "epoch": 0.1470090532193925, "grad_norm": 0.2421020269393921, "learning_rate": 0.001, "loss": 3.2295, "step": 3475 }, { "epoch": 0.1470513579829089, "grad_norm": 0.3066774308681488, "learning_rate": 0.001, "loss": 2.2816, "step": 3476 }, { "epoch": 0.14709366274642524, "grad_norm": 0.29278188943862915, "learning_rate": 0.001, "loss": 2.3431, "step": 3477 }, { "epoch": 0.14713596750994162, "grad_norm": 0.259764701128006, "learning_rate": 0.001, "loss": 2.7806, "step": 3478 }, { "epoch": 0.147178272273458, "grad_norm": 0.2844379246234894, "learning_rate": 0.001, "loss": 2.9955, "step": 3479 }, { "epoch": 0.14722057703697436, "grad_norm": 0.46466735005378723, "learning_rate": 0.001, "loss": 2.212, "step": 3480 }, { "epoch": 0.14726288180049074, "grad_norm": 0.2156391441822052, "learning_rate": 0.001, "loss": 1.6904, "step": 3481 }, { "epoch": 0.1473051865640071, "grad_norm": 0.3095730245113373, "learning_rate": 0.001, "loss": 2.1532, "step": 3482 }, { "epoch": 0.14734749132752348, "grad_norm": 0.23583956062793732, "learning_rate": 0.001, "loss": 1.8892, "step": 3483 }, { "epoch": 0.14738979609103986, "grad_norm": 0.19967339932918549, "learning_rate": 0.001, "loss": 2.1184, "step": 3484 }, { "epoch": 0.14743210085455621, "grad_norm": 0.22221580147743225, "learning_rate": 0.001, "loss": 2.1233, "step": 3485 }, { "epoch": 0.1474744056180726, "grad_norm": 0.21620295941829681, "learning_rate": 0.001, "loss": 1.8619, "step": 3486 }, { "epoch": 0.14751671038158898, "grad_norm": 0.2938464283943176, "learning_rate": 0.001, "loss": 2.0775, "step": 3487 }, { "epoch": 0.14755901514510533, "grad_norm": 0.4123593270778656, "learning_rate": 0.001, "loss": 2.9428, "step": 3488 }, { "epoch": 0.14760131990862171, "grad_norm": 0.3647308647632599, "learning_rate": 0.001, "loss": 2.6945, "step": 3489 }, { "epoch": 0.1476436246721381, "grad_norm": 0.9250668287277222, "learning_rate": 0.001, "loss": 2.283, "step": 3490 }, { "epoch": 0.14768592943565445, "grad_norm": 0.19745373725891113, "learning_rate": 0.001, "loss": 1.9975, "step": 3491 }, { "epoch": 0.14772823419917083, "grad_norm": 0.30542847514152527, "learning_rate": 0.001, "loss": 2.3923, "step": 3492 }, { "epoch": 0.1477705389626872, "grad_norm": 0.23577959835529327, "learning_rate": 0.001, "loss": 1.8087, "step": 3493 }, { "epoch": 0.14781284372620357, "grad_norm": 1.1520733833312988, "learning_rate": 0.001, "loss": 2.0672, "step": 3494 }, { "epoch": 0.14785514848971995, "grad_norm": 0.4492546617984772, "learning_rate": 0.001, "loss": 2.4425, "step": 3495 }, { "epoch": 0.1478974532532363, "grad_norm": 0.2272578924894333, "learning_rate": 0.001, "loss": 2.0611, "step": 3496 }, { "epoch": 0.1479397580167527, "grad_norm": 1.9856147766113281, "learning_rate": 0.001, "loss": 2.5383, "step": 3497 }, { "epoch": 0.14798206278026907, "grad_norm": 0.32524406909942627, "learning_rate": 0.001, "loss": 2.6931, "step": 3498 }, { "epoch": 0.14802436754378542, "grad_norm": 0.3067854046821594, "learning_rate": 0.001, "loss": 2.6123, "step": 3499 }, { "epoch": 0.1480666723073018, "grad_norm": 0.2817644476890564, "learning_rate": 0.001, "loss": 2.0649, "step": 3500 }, { "epoch": 0.1481089770708182, "grad_norm": 1.7400952577590942, "learning_rate": 0.001, "loss": 2.2053, "step": 3501 }, { "epoch": 0.14815128183433454, "grad_norm": 0.2977108061313629, "learning_rate": 0.001, "loss": 2.1427, "step": 3502 }, { "epoch": 0.14819358659785092, "grad_norm": 0.3165165185928345, "learning_rate": 0.001, "loss": 3.425, "step": 3503 }, { "epoch": 0.14823589136136728, "grad_norm": 0.25336670875549316, "learning_rate": 0.001, "loss": 1.8021, "step": 3504 }, { "epoch": 0.14827819612488366, "grad_norm": 1.0275665521621704, "learning_rate": 0.001, "loss": 2.5291, "step": 3505 }, { "epoch": 0.14832050088840004, "grad_norm": 0.291814923286438, "learning_rate": 0.001, "loss": 1.9457, "step": 3506 }, { "epoch": 0.1483628056519164, "grad_norm": 0.25686556100845337, "learning_rate": 0.001, "loss": 2.059, "step": 3507 }, { "epoch": 0.14840511041543278, "grad_norm": 0.2979572117328644, "learning_rate": 0.001, "loss": 1.9924, "step": 3508 }, { "epoch": 0.14844741517894916, "grad_norm": 0.2901727259159088, "learning_rate": 0.001, "loss": 2.0276, "step": 3509 }, { "epoch": 0.14848971994246551, "grad_norm": 0.34327811002731323, "learning_rate": 0.001, "loss": 2.7836, "step": 3510 }, { "epoch": 0.1485320247059819, "grad_norm": 0.38972118496894836, "learning_rate": 0.001, "loss": 3.0989, "step": 3511 }, { "epoch": 0.14857432946949828, "grad_norm": 11.712198257446289, "learning_rate": 0.001, "loss": 3.3529, "step": 3512 }, { "epoch": 0.14861663423301463, "grad_norm": 5.345020771026611, "learning_rate": 0.001, "loss": 3.9772, "step": 3513 }, { "epoch": 0.14865893899653101, "grad_norm": 0.3382362425327301, "learning_rate": 0.001, "loss": 2.0698, "step": 3514 }, { "epoch": 0.14870124376004737, "grad_norm": 0.24800531566143036, "learning_rate": 0.001, "loss": 2.5415, "step": 3515 }, { "epoch": 0.14874354852356375, "grad_norm": 0.2699640393257141, "learning_rate": 0.001, "loss": 3.1028, "step": 3516 }, { "epoch": 0.14878585328708013, "grad_norm": 0.273624062538147, "learning_rate": 0.001, "loss": 2.3766, "step": 3517 }, { "epoch": 0.1488281580505965, "grad_norm": 0.2503005266189575, "learning_rate": 0.001, "loss": 2.6137, "step": 3518 }, { "epoch": 0.14887046281411287, "grad_norm": 0.8663057684898376, "learning_rate": 0.001, "loss": 1.9792, "step": 3519 }, { "epoch": 0.14891276757762925, "grad_norm": 0.25076824426651, "learning_rate": 0.001, "loss": 2.2382, "step": 3520 }, { "epoch": 0.1489550723411456, "grad_norm": 0.34838005900382996, "learning_rate": 0.001, "loss": 2.0448, "step": 3521 }, { "epoch": 0.148997377104662, "grad_norm": 0.2928178012371063, "learning_rate": 0.001, "loss": 3.3827, "step": 3522 }, { "epoch": 0.14903968186817837, "grad_norm": 1.0668600797653198, "learning_rate": 0.001, "loss": 2.3127, "step": 3523 }, { "epoch": 0.14908198663169472, "grad_norm": 0.32312437891960144, "learning_rate": 0.001, "loss": 2.3694, "step": 3524 }, { "epoch": 0.1491242913952111, "grad_norm": 19.220008850097656, "learning_rate": 0.001, "loss": 1.897, "step": 3525 }, { "epoch": 0.14916659615872746, "grad_norm": 0.2430810034275055, "learning_rate": 0.001, "loss": 2.16, "step": 3526 }, { "epoch": 0.14920890092224384, "grad_norm": 0.3711296319961548, "learning_rate": 0.001, "loss": 2.4911, "step": 3527 }, { "epoch": 0.14925120568576022, "grad_norm": 0.4155080020427704, "learning_rate": 0.001, "loss": 3.8939, "step": 3528 }, { "epoch": 0.14929351044927658, "grad_norm": 0.3918618857860565, "learning_rate": 0.001, "loss": 2.4925, "step": 3529 }, { "epoch": 0.14933581521279296, "grad_norm": 0.3465852439403534, "learning_rate": 0.001, "loss": 2.4602, "step": 3530 }, { "epoch": 0.14937811997630934, "grad_norm": 0.2546325623989105, "learning_rate": 0.001, "loss": 3.3572, "step": 3531 }, { "epoch": 0.1494204247398257, "grad_norm": 0.24824251234531403, "learning_rate": 0.001, "loss": 2.3866, "step": 3532 }, { "epoch": 0.14946272950334208, "grad_norm": 0.783733606338501, "learning_rate": 0.001, "loss": 2.2685, "step": 3533 }, { "epoch": 0.14950503426685846, "grad_norm": 0.27685683965682983, "learning_rate": 0.001, "loss": 2.6839, "step": 3534 }, { "epoch": 0.14954733903037482, "grad_norm": 0.9122802019119263, "learning_rate": 0.001, "loss": 2.1993, "step": 3535 }, { "epoch": 0.1495896437938912, "grad_norm": 0.20734846591949463, "learning_rate": 0.001, "loss": 2.7781, "step": 3536 }, { "epoch": 0.14963194855740755, "grad_norm": 0.303323894739151, "learning_rate": 0.001, "loss": 3.147, "step": 3537 }, { "epoch": 0.14967425332092393, "grad_norm": 3.6282622814178467, "learning_rate": 0.001, "loss": 3.1942, "step": 3538 }, { "epoch": 0.14971655808444032, "grad_norm": 0.5021495223045349, "learning_rate": 0.001, "loss": 2.3829, "step": 3539 }, { "epoch": 0.14975886284795667, "grad_norm": 0.3432963788509369, "learning_rate": 0.001, "loss": 2.3058, "step": 3540 }, { "epoch": 0.14980116761147305, "grad_norm": 0.46659693121910095, "learning_rate": 0.001, "loss": 2.5437, "step": 3541 }, { "epoch": 0.14984347237498943, "grad_norm": 0.2588510513305664, "learning_rate": 0.001, "loss": 1.9825, "step": 3542 }, { "epoch": 0.1498857771385058, "grad_norm": 0.8514876961708069, "learning_rate": 0.001, "loss": 2.1087, "step": 3543 }, { "epoch": 0.14992808190202217, "grad_norm": 1.0069010257720947, "learning_rate": 0.001, "loss": 1.9367, "step": 3544 }, { "epoch": 0.14997038666553855, "grad_norm": 0.3433607220649719, "learning_rate": 0.001, "loss": 1.9445, "step": 3545 }, { "epoch": 0.1500126914290549, "grad_norm": 0.9383977055549622, "learning_rate": 0.001, "loss": 3.1781, "step": 3546 }, { "epoch": 0.1500549961925713, "grad_norm": 0.6198011040687561, "learning_rate": 0.001, "loss": 2.755, "step": 3547 }, { "epoch": 0.15009730095608764, "grad_norm": 0.7712514400482178, "learning_rate": 0.001, "loss": 2.3735, "step": 3548 }, { "epoch": 0.15013960571960402, "grad_norm": 0.8419329524040222, "learning_rate": 0.001, "loss": 1.8685, "step": 3549 }, { "epoch": 0.1501819104831204, "grad_norm": 0.38785937428474426, "learning_rate": 0.001, "loss": 2.7159, "step": 3550 }, { "epoch": 0.15022421524663676, "grad_norm": 0.25905489921569824, "learning_rate": 0.001, "loss": 1.916, "step": 3551 }, { "epoch": 0.15026652001015314, "grad_norm": 0.28637632727622986, "learning_rate": 0.001, "loss": 2.152, "step": 3552 }, { "epoch": 0.15030882477366952, "grad_norm": 0.4733045697212219, "learning_rate": 0.001, "loss": 2.2419, "step": 3553 }, { "epoch": 0.15035112953718588, "grad_norm": 0.3638868033885956, "learning_rate": 0.001, "loss": 2.0374, "step": 3554 }, { "epoch": 0.15039343430070226, "grad_norm": 0.4152970612049103, "learning_rate": 0.001, "loss": 2.3609, "step": 3555 }, { "epoch": 0.15043573906421864, "grad_norm": 1.5988473892211914, "learning_rate": 0.001, "loss": 1.8426, "step": 3556 }, { "epoch": 0.150478043827735, "grad_norm": 0.24985834956169128, "learning_rate": 0.001, "loss": 2.2193, "step": 3557 }, { "epoch": 0.15052034859125138, "grad_norm": 7.093243598937988, "learning_rate": 0.001, "loss": 1.9872, "step": 3558 }, { "epoch": 0.15056265335476773, "grad_norm": 0.5130526423454285, "learning_rate": 0.001, "loss": 3.1589, "step": 3559 }, { "epoch": 0.15060495811828412, "grad_norm": 0.9955002665519714, "learning_rate": 0.001, "loss": 2.2292, "step": 3560 }, { "epoch": 0.1506472628818005, "grad_norm": 0.4683253765106201, "learning_rate": 0.001, "loss": 1.9829, "step": 3561 }, { "epoch": 0.15068956764531685, "grad_norm": 0.24029046297073364, "learning_rate": 0.001, "loss": 2.5349, "step": 3562 }, { "epoch": 0.15073187240883323, "grad_norm": 0.24601727724075317, "learning_rate": 0.001, "loss": 2.589, "step": 3563 }, { "epoch": 0.15077417717234962, "grad_norm": 0.2925271987915039, "learning_rate": 0.001, "loss": 2.6538, "step": 3564 }, { "epoch": 0.15081648193586597, "grad_norm": 0.6500786542892456, "learning_rate": 0.001, "loss": 2.3506, "step": 3565 }, { "epoch": 0.15085878669938235, "grad_norm": 0.3403443396091461, "learning_rate": 0.001, "loss": 3.4838, "step": 3566 }, { "epoch": 0.15090109146289873, "grad_norm": 0.2680628299713135, "learning_rate": 0.001, "loss": 3.188, "step": 3567 }, { "epoch": 0.1509433962264151, "grad_norm": 0.25424718856811523, "learning_rate": 0.001, "loss": 2.5081, "step": 3568 }, { "epoch": 0.15098570098993147, "grad_norm": 0.24566307663917542, "learning_rate": 0.001, "loss": 2.1124, "step": 3569 }, { "epoch": 0.15102800575344782, "grad_norm": 2.2985541820526123, "learning_rate": 0.001, "loss": 2.8511, "step": 3570 }, { "epoch": 0.1510703105169642, "grad_norm": 0.2778152823448181, "learning_rate": 0.001, "loss": 2.8095, "step": 3571 }, { "epoch": 0.1511126152804806, "grad_norm": 1.3614428043365479, "learning_rate": 0.001, "loss": 2.2643, "step": 3572 }, { "epoch": 0.15115492004399694, "grad_norm": 4.856557846069336, "learning_rate": 0.001, "loss": 2.3387, "step": 3573 }, { "epoch": 0.15119722480751333, "grad_norm": 0.2886848449707031, "learning_rate": 0.001, "loss": 3.1371, "step": 3574 }, { "epoch": 0.1512395295710297, "grad_norm": 0.2378971427679062, "learning_rate": 0.001, "loss": 2.4346, "step": 3575 }, { "epoch": 0.15128183433454606, "grad_norm": 0.28027504682540894, "learning_rate": 0.001, "loss": 2.2992, "step": 3576 }, { "epoch": 0.15132413909806244, "grad_norm": 0.9018852114677429, "learning_rate": 0.001, "loss": 1.6448, "step": 3577 }, { "epoch": 0.15136644386157883, "grad_norm": 0.28057655692100525, "learning_rate": 0.001, "loss": 1.584, "step": 3578 }, { "epoch": 0.15140874862509518, "grad_norm": 0.6362797021865845, "learning_rate": 0.001, "loss": 3.2339, "step": 3579 }, { "epoch": 0.15145105338861156, "grad_norm": 2.48857045173645, "learning_rate": 0.001, "loss": 2.0265, "step": 3580 }, { "epoch": 0.15149335815212792, "grad_norm": 0.341279536485672, "learning_rate": 0.001, "loss": 2.7349, "step": 3581 }, { "epoch": 0.1515356629156443, "grad_norm": 0.39645954966545105, "learning_rate": 0.001, "loss": 2.335, "step": 3582 }, { "epoch": 0.15157796767916068, "grad_norm": 2.092052698135376, "learning_rate": 0.001, "loss": 1.5818, "step": 3583 }, { "epoch": 0.15162027244267703, "grad_norm": 5.003619194030762, "learning_rate": 0.001, "loss": 2.547, "step": 3584 }, { "epoch": 0.15166257720619342, "grad_norm": 0.3023916482925415, "learning_rate": 0.001, "loss": 1.8312, "step": 3585 }, { "epoch": 0.1517048819697098, "grad_norm": 0.5600648522377014, "learning_rate": 0.001, "loss": 2.258, "step": 3586 }, { "epoch": 0.15174718673322615, "grad_norm": 0.38749417662620544, "learning_rate": 0.001, "loss": 2.2861, "step": 3587 }, { "epoch": 0.15178949149674253, "grad_norm": 1.0356155633926392, "learning_rate": 0.001, "loss": 3.6475, "step": 3588 }, { "epoch": 0.15183179626025892, "grad_norm": 0.27420303225517273, "learning_rate": 0.001, "loss": 2.0222, "step": 3589 }, { "epoch": 0.15187410102377527, "grad_norm": 1.7488365173339844, "learning_rate": 0.001, "loss": 2.0034, "step": 3590 }, { "epoch": 0.15191640578729165, "grad_norm": 0.3303074538707733, "learning_rate": 0.001, "loss": 2.4819, "step": 3591 }, { "epoch": 0.151958710550808, "grad_norm": 0.8918514847755432, "learning_rate": 0.001, "loss": 2.3028, "step": 3592 }, { "epoch": 0.1520010153143244, "grad_norm": 0.40100759267807007, "learning_rate": 0.001, "loss": 2.3375, "step": 3593 }, { "epoch": 0.15204332007784077, "grad_norm": 0.4550624489784241, "learning_rate": 0.001, "loss": 4.096, "step": 3594 }, { "epoch": 0.15208562484135713, "grad_norm": 0.7799673080444336, "learning_rate": 0.001, "loss": 2.6198, "step": 3595 }, { "epoch": 0.1521279296048735, "grad_norm": 0.34559065103530884, "learning_rate": 0.001, "loss": 2.9821, "step": 3596 }, { "epoch": 0.1521702343683899, "grad_norm": 0.3571106493473053, "learning_rate": 0.001, "loss": 2.503, "step": 3597 }, { "epoch": 0.15221253913190624, "grad_norm": 0.3727911710739136, "learning_rate": 0.001, "loss": 2.5119, "step": 3598 }, { "epoch": 0.15225484389542263, "grad_norm": 4.5436015129089355, "learning_rate": 0.001, "loss": 3.7948, "step": 3599 }, { "epoch": 0.152297148658939, "grad_norm": 0.3875895142555237, "learning_rate": 0.001, "loss": 2.6795, "step": 3600 }, { "epoch": 0.15233945342245536, "grad_norm": 0.3254016935825348, "learning_rate": 0.001, "loss": 2.9257, "step": 3601 }, { "epoch": 0.15238175818597174, "grad_norm": 0.27709123492240906, "learning_rate": 0.001, "loss": 2.9524, "step": 3602 }, { "epoch": 0.15242406294948813, "grad_norm": 0.236286923289299, "learning_rate": 0.001, "loss": 2.0965, "step": 3603 }, { "epoch": 0.15246636771300448, "grad_norm": 0.29797059297561646, "learning_rate": 0.001, "loss": 2.974, "step": 3604 }, { "epoch": 0.15250867247652086, "grad_norm": 0.9622611999511719, "learning_rate": 0.001, "loss": 1.8431, "step": 3605 }, { "epoch": 0.15255097724003722, "grad_norm": 0.32603511214256287, "learning_rate": 0.001, "loss": 3.1559, "step": 3606 }, { "epoch": 0.1525932820035536, "grad_norm": 0.965201199054718, "learning_rate": 0.001, "loss": 3.2636, "step": 3607 }, { "epoch": 0.15263558676706998, "grad_norm": 0.28725409507751465, "learning_rate": 0.001, "loss": 1.9986, "step": 3608 }, { "epoch": 0.15267789153058633, "grad_norm": 0.28865811228752136, "learning_rate": 0.001, "loss": 3.1487, "step": 3609 }, { "epoch": 0.15272019629410272, "grad_norm": 0.22873492538928986, "learning_rate": 0.001, "loss": 1.9311, "step": 3610 }, { "epoch": 0.1527625010576191, "grad_norm": 0.4197172522544861, "learning_rate": 0.001, "loss": 2.2263, "step": 3611 }, { "epoch": 0.15280480582113545, "grad_norm": 0.3066697120666504, "learning_rate": 0.001, "loss": 2.3186, "step": 3612 }, { "epoch": 0.15284711058465184, "grad_norm": 0.23598486185073853, "learning_rate": 0.001, "loss": 1.9821, "step": 3613 }, { "epoch": 0.15288941534816822, "grad_norm": 0.21602579951286316, "learning_rate": 0.001, "loss": 1.9219, "step": 3614 }, { "epoch": 0.15293172011168457, "grad_norm": 0.28833669424057007, "learning_rate": 0.001, "loss": 2.1563, "step": 3615 }, { "epoch": 0.15297402487520095, "grad_norm": 0.6677043437957764, "learning_rate": 0.001, "loss": 2.4993, "step": 3616 }, { "epoch": 0.1530163296387173, "grad_norm": 0.29518651962280273, "learning_rate": 0.001, "loss": 3.194, "step": 3617 }, { "epoch": 0.1530586344022337, "grad_norm": 4.215554237365723, "learning_rate": 0.001, "loss": 2.5791, "step": 3618 }, { "epoch": 0.15310093916575007, "grad_norm": 0.27064064145088196, "learning_rate": 0.001, "loss": 3.0186, "step": 3619 }, { "epoch": 0.15314324392926643, "grad_norm": 0.22626028954982758, "learning_rate": 0.001, "loss": 1.7413, "step": 3620 }, { "epoch": 0.1531855486927828, "grad_norm": 0.5641341805458069, "learning_rate": 0.001, "loss": 2.2514, "step": 3621 }, { "epoch": 0.1532278534562992, "grad_norm": 0.6790673136711121, "learning_rate": 0.001, "loss": 1.9258, "step": 3622 }, { "epoch": 0.15327015821981554, "grad_norm": 1.0468883514404297, "learning_rate": 0.001, "loss": 2.5064, "step": 3623 }, { "epoch": 0.15331246298333193, "grad_norm": 0.710141658782959, "learning_rate": 0.001, "loss": 2.2824, "step": 3624 }, { "epoch": 0.1533547677468483, "grad_norm": 0.46319958567619324, "learning_rate": 0.001, "loss": 2.4836, "step": 3625 }, { "epoch": 0.15339707251036466, "grad_norm": 0.24517352879047394, "learning_rate": 0.001, "loss": 2.353, "step": 3626 }, { "epoch": 0.15343937727388104, "grad_norm": 0.25180742144584656, "learning_rate": 0.001, "loss": 2.1456, "step": 3627 }, { "epoch": 0.1534816820373974, "grad_norm": 0.8973338603973389, "learning_rate": 0.001, "loss": 2.0089, "step": 3628 }, { "epoch": 0.15352398680091378, "grad_norm": 0.2948894500732422, "learning_rate": 0.001, "loss": 1.8217, "step": 3629 }, { "epoch": 0.15356629156443016, "grad_norm": 0.4694260358810425, "learning_rate": 0.001, "loss": 1.9615, "step": 3630 }, { "epoch": 0.15360859632794652, "grad_norm": 0.2402891218662262, "learning_rate": 0.001, "loss": 2.4652, "step": 3631 }, { "epoch": 0.1536509010914629, "grad_norm": 2.8009684085845947, "learning_rate": 0.001, "loss": 2.6741, "step": 3632 }, { "epoch": 0.15369320585497928, "grad_norm": 0.3112724721431732, "learning_rate": 0.001, "loss": 2.6238, "step": 3633 }, { "epoch": 0.15373551061849564, "grad_norm": 0.29018670320510864, "learning_rate": 0.001, "loss": 2.4207, "step": 3634 }, { "epoch": 0.15377781538201202, "grad_norm": 0.8226271867752075, "learning_rate": 0.001, "loss": 2.0828, "step": 3635 }, { "epoch": 0.1538201201455284, "grad_norm": 0.26996588706970215, "learning_rate": 0.001, "loss": 2.2658, "step": 3636 }, { "epoch": 0.15386242490904475, "grad_norm": 0.33914855122566223, "learning_rate": 0.001, "loss": 2.4518, "step": 3637 }, { "epoch": 0.15390472967256114, "grad_norm": 0.37279003858566284, "learning_rate": 0.001, "loss": 2.0381, "step": 3638 }, { "epoch": 0.1539470344360775, "grad_norm": 0.4848242700099945, "learning_rate": 0.001, "loss": 2.8453, "step": 3639 }, { "epoch": 0.15398933919959387, "grad_norm": 0.3430381119251251, "learning_rate": 0.001, "loss": 1.9578, "step": 3640 }, { "epoch": 0.15403164396311025, "grad_norm": 0.2790931165218353, "learning_rate": 0.001, "loss": 1.5198, "step": 3641 }, { "epoch": 0.1540739487266266, "grad_norm": 0.23120807111263275, "learning_rate": 0.001, "loss": 2.044, "step": 3642 }, { "epoch": 0.154116253490143, "grad_norm": 0.26103150844573975, "learning_rate": 0.001, "loss": 2.3744, "step": 3643 }, { "epoch": 0.15415855825365937, "grad_norm": 0.21912577748298645, "learning_rate": 0.001, "loss": 1.8251, "step": 3644 }, { "epoch": 0.15420086301717573, "grad_norm": 0.268587201833725, "learning_rate": 0.001, "loss": 4.6103, "step": 3645 }, { "epoch": 0.1542431677806921, "grad_norm": 0.24096794426441193, "learning_rate": 0.001, "loss": 1.8308, "step": 3646 }, { "epoch": 0.1542854725442085, "grad_norm": 0.5062342882156372, "learning_rate": 0.001, "loss": 1.7605, "step": 3647 }, { "epoch": 0.15432777730772484, "grad_norm": 1.528459072113037, "learning_rate": 0.001, "loss": 3.088, "step": 3648 }, { "epoch": 0.15437008207124123, "grad_norm": 0.27189287543296814, "learning_rate": 0.001, "loss": 1.9692, "step": 3649 }, { "epoch": 0.15441238683475758, "grad_norm": 0.22782421112060547, "learning_rate": 0.001, "loss": 1.9446, "step": 3650 }, { "epoch": 0.15445469159827396, "grad_norm": 0.24183671176433563, "learning_rate": 0.001, "loss": 2.9768, "step": 3651 }, { "epoch": 0.15449699636179035, "grad_norm": 0.37327948212623596, "learning_rate": 0.001, "loss": 2.5958, "step": 3652 }, { "epoch": 0.1545393011253067, "grad_norm": 0.2614731788635254, "learning_rate": 0.001, "loss": 2.8753, "step": 3653 }, { "epoch": 0.15458160588882308, "grad_norm": 0.30423954129219055, "learning_rate": 0.001, "loss": 2.9914, "step": 3654 }, { "epoch": 0.15462391065233946, "grad_norm": 0.2651650607585907, "learning_rate": 0.001, "loss": 2.2149, "step": 3655 }, { "epoch": 0.15466621541585582, "grad_norm": 0.29150527715682983, "learning_rate": 0.001, "loss": 3.1139, "step": 3656 }, { "epoch": 0.1547085201793722, "grad_norm": 0.26949405670166016, "learning_rate": 0.001, "loss": 3.0504, "step": 3657 }, { "epoch": 0.15475082494288858, "grad_norm": 0.277124285697937, "learning_rate": 0.001, "loss": 2.7155, "step": 3658 }, { "epoch": 0.15479312970640494, "grad_norm": 0.24489636719226837, "learning_rate": 0.001, "loss": 2.4702, "step": 3659 }, { "epoch": 0.15483543446992132, "grad_norm": 0.18855103850364685, "learning_rate": 0.001, "loss": 2.2161, "step": 3660 }, { "epoch": 0.15487773923343767, "grad_norm": 0.311081200838089, "learning_rate": 0.001, "loss": 2.6019, "step": 3661 }, { "epoch": 0.15492004399695405, "grad_norm": 0.22517842054367065, "learning_rate": 0.001, "loss": 2.035, "step": 3662 }, { "epoch": 0.15496234876047044, "grad_norm": 0.20022818446159363, "learning_rate": 0.001, "loss": 3.0709, "step": 3663 }, { "epoch": 0.1550046535239868, "grad_norm": 0.3839741349220276, "learning_rate": 0.001, "loss": 1.8192, "step": 3664 }, { "epoch": 0.15504695828750317, "grad_norm": 0.3007325828075409, "learning_rate": 0.001, "loss": 3.0195, "step": 3665 }, { "epoch": 0.15508926305101955, "grad_norm": 0.2808815538883209, "learning_rate": 0.001, "loss": 2.1549, "step": 3666 }, { "epoch": 0.1551315678145359, "grad_norm": 0.23041900992393494, "learning_rate": 0.001, "loss": 1.8212, "step": 3667 }, { "epoch": 0.1551738725780523, "grad_norm": 0.2662278711795807, "learning_rate": 0.001, "loss": 2.8651, "step": 3668 }, { "epoch": 0.15521617734156867, "grad_norm": 1.5377527475357056, "learning_rate": 0.001, "loss": 2.4422, "step": 3669 }, { "epoch": 0.15525848210508503, "grad_norm": 0.22316411137580872, "learning_rate": 0.001, "loss": 2.1897, "step": 3670 }, { "epoch": 0.1553007868686014, "grad_norm": 0.26653704047203064, "learning_rate": 0.001, "loss": 2.4008, "step": 3671 }, { "epoch": 0.15534309163211776, "grad_norm": 0.2589184045791626, "learning_rate": 0.001, "loss": 2.8367, "step": 3672 }, { "epoch": 0.15538539639563415, "grad_norm": 0.26263269782066345, "learning_rate": 0.001, "loss": 1.9108, "step": 3673 }, { "epoch": 0.15542770115915053, "grad_norm": 0.3323366343975067, "learning_rate": 0.001, "loss": 2.1237, "step": 3674 }, { "epoch": 0.15547000592266688, "grad_norm": 0.22809675335884094, "learning_rate": 0.001, "loss": 1.8055, "step": 3675 }, { "epoch": 0.15551231068618326, "grad_norm": 0.2392113357782364, "learning_rate": 0.001, "loss": 3.3006, "step": 3676 }, { "epoch": 0.15555461544969965, "grad_norm": 0.24501052498817444, "learning_rate": 0.001, "loss": 2.4065, "step": 3677 }, { "epoch": 0.155596920213216, "grad_norm": 1.7419697046279907, "learning_rate": 0.001, "loss": 2.0626, "step": 3678 }, { "epoch": 0.15563922497673238, "grad_norm": 0.2538470923900604, "learning_rate": 0.001, "loss": 2.548, "step": 3679 }, { "epoch": 0.15568152974024876, "grad_norm": 0.27344977855682373, "learning_rate": 0.001, "loss": 1.8455, "step": 3680 }, { "epoch": 0.15572383450376512, "grad_norm": 5.969301700592041, "learning_rate": 0.001, "loss": 1.987, "step": 3681 }, { "epoch": 0.1557661392672815, "grad_norm": 0.21255825459957123, "learning_rate": 0.001, "loss": 2.1096, "step": 3682 }, { "epoch": 0.15580844403079785, "grad_norm": 0.25273945927619934, "learning_rate": 0.001, "loss": 2.4582, "step": 3683 }, { "epoch": 0.15585074879431424, "grad_norm": 0.2562407851219177, "learning_rate": 0.001, "loss": 1.9308, "step": 3684 }, { "epoch": 0.15589305355783062, "grad_norm": 0.596889853477478, "learning_rate": 0.001, "loss": 2.817, "step": 3685 }, { "epoch": 0.15593535832134697, "grad_norm": 0.55907142162323, "learning_rate": 0.001, "loss": 2.8084, "step": 3686 }, { "epoch": 0.15597766308486335, "grad_norm": 2.2937467098236084, "learning_rate": 0.001, "loss": 2.1206, "step": 3687 }, { "epoch": 0.15601996784837974, "grad_norm": 0.2151869237422943, "learning_rate": 0.001, "loss": 2.1484, "step": 3688 }, { "epoch": 0.1560622726118961, "grad_norm": 0.2432985007762909, "learning_rate": 0.001, "loss": 2.1668, "step": 3689 }, { "epoch": 0.15610457737541247, "grad_norm": 0.26198938488960266, "learning_rate": 0.001, "loss": 2.4545, "step": 3690 }, { "epoch": 0.15614688213892886, "grad_norm": 0.2777709364891052, "learning_rate": 0.001, "loss": 2.664, "step": 3691 }, { "epoch": 0.1561891869024452, "grad_norm": 0.28329089283943176, "learning_rate": 0.001, "loss": 2.5311, "step": 3692 }, { "epoch": 0.1562314916659616, "grad_norm": 0.3065381944179535, "learning_rate": 0.001, "loss": 2.1369, "step": 3693 }, { "epoch": 0.15627379642947795, "grad_norm": 0.28511857986450195, "learning_rate": 0.001, "loss": 2.6129, "step": 3694 }, { "epoch": 0.15631610119299433, "grad_norm": 0.24408607184886932, "learning_rate": 0.001, "loss": 2.0128, "step": 3695 }, { "epoch": 0.1563584059565107, "grad_norm": 0.8473067879676819, "learning_rate": 0.001, "loss": 2.2145, "step": 3696 }, { "epoch": 0.15640071072002706, "grad_norm": 0.42082521319389343, "learning_rate": 0.001, "loss": 2.1128, "step": 3697 }, { "epoch": 0.15644301548354345, "grad_norm": 0.42995426058769226, "learning_rate": 0.001, "loss": 2.3262, "step": 3698 }, { "epoch": 0.15648532024705983, "grad_norm": 0.3102007210254669, "learning_rate": 0.001, "loss": 2.7998, "step": 3699 }, { "epoch": 0.15652762501057618, "grad_norm": 0.3263281285762787, "learning_rate": 0.001, "loss": 2.2744, "step": 3700 }, { "epoch": 0.15656992977409256, "grad_norm": 2.9346940517425537, "learning_rate": 0.001, "loss": 2.1367, "step": 3701 }, { "epoch": 0.15661223453760895, "grad_norm": 0.298091858625412, "learning_rate": 0.001, "loss": 3.0486, "step": 3702 }, { "epoch": 0.1566545393011253, "grad_norm": 0.5562085509300232, "learning_rate": 0.001, "loss": 2.0666, "step": 3703 }, { "epoch": 0.15669684406464168, "grad_norm": 0.26188915967941284, "learning_rate": 0.001, "loss": 1.9705, "step": 3704 }, { "epoch": 0.15673914882815804, "grad_norm": 0.3531795144081116, "learning_rate": 0.001, "loss": 2.6941, "step": 3705 }, { "epoch": 0.15678145359167442, "grad_norm": 0.32536250352859497, "learning_rate": 0.001, "loss": 2.1217, "step": 3706 }, { "epoch": 0.1568237583551908, "grad_norm": 0.2858486771583557, "learning_rate": 0.001, "loss": 2.2795, "step": 3707 }, { "epoch": 0.15686606311870716, "grad_norm": 0.2625608742237091, "learning_rate": 0.001, "loss": 3.3786, "step": 3708 }, { "epoch": 0.15690836788222354, "grad_norm": 0.2354310154914856, "learning_rate": 0.001, "loss": 2.0803, "step": 3709 }, { "epoch": 0.15695067264573992, "grad_norm": 0.25832563638687134, "learning_rate": 0.001, "loss": 3.1712, "step": 3710 }, { "epoch": 0.15699297740925627, "grad_norm": 2.9774281978607178, "learning_rate": 0.001, "loss": 2.6699, "step": 3711 }, { "epoch": 0.15703528217277266, "grad_norm": 0.945013165473938, "learning_rate": 0.001, "loss": 3.3384, "step": 3712 }, { "epoch": 0.15707758693628904, "grad_norm": 0.7904821634292603, "learning_rate": 0.001, "loss": 2.3788, "step": 3713 }, { "epoch": 0.1571198916998054, "grad_norm": 0.2616831958293915, "learning_rate": 0.001, "loss": 2.8228, "step": 3714 }, { "epoch": 0.15716219646332177, "grad_norm": 0.5680962204933167, "learning_rate": 0.001, "loss": 2.8964, "step": 3715 }, { "epoch": 0.15720450122683813, "grad_norm": 1.2796063423156738, "learning_rate": 0.001, "loss": 2.6141, "step": 3716 }, { "epoch": 0.1572468059903545, "grad_norm": 0.34902986884117126, "learning_rate": 0.001, "loss": 3.4248, "step": 3717 }, { "epoch": 0.1572891107538709, "grad_norm": 0.30092769861221313, "learning_rate": 0.001, "loss": 2.7998, "step": 3718 }, { "epoch": 0.15733141551738725, "grad_norm": 0.2545045018196106, "learning_rate": 0.001, "loss": 2.4546, "step": 3719 }, { "epoch": 0.15737372028090363, "grad_norm": 0.30985644459724426, "learning_rate": 0.001, "loss": 2.1622, "step": 3720 }, { "epoch": 0.15741602504442, "grad_norm": 0.21358482539653778, "learning_rate": 0.001, "loss": 1.9671, "step": 3721 }, { "epoch": 0.15745832980793636, "grad_norm": 0.2403787076473236, "learning_rate": 0.001, "loss": 1.8596, "step": 3722 }, { "epoch": 0.15750063457145275, "grad_norm": 0.25962314009666443, "learning_rate": 0.001, "loss": 2.595, "step": 3723 }, { "epoch": 0.15754293933496913, "grad_norm": 0.9243412613868713, "learning_rate": 0.001, "loss": 2.2004, "step": 3724 }, { "epoch": 0.15758524409848548, "grad_norm": 0.2106955498456955, "learning_rate": 0.001, "loss": 3.3038, "step": 3725 }, { "epoch": 0.15762754886200186, "grad_norm": 0.26728370785713196, "learning_rate": 0.001, "loss": 2.3843, "step": 3726 }, { "epoch": 0.15766985362551825, "grad_norm": 0.26427310705184937, "learning_rate": 0.001, "loss": 2.1108, "step": 3727 }, { "epoch": 0.1577121583890346, "grad_norm": 0.28308454155921936, "learning_rate": 0.001, "loss": 2.4893, "step": 3728 }, { "epoch": 0.15775446315255098, "grad_norm": 0.266253262758255, "learning_rate": 0.001, "loss": 2.521, "step": 3729 }, { "epoch": 0.15779676791606734, "grad_norm": 0.4611959159374237, "learning_rate": 0.001, "loss": 2.2577, "step": 3730 }, { "epoch": 0.15783907267958372, "grad_norm": 0.3216572701931, "learning_rate": 0.001, "loss": 3.0579, "step": 3731 }, { "epoch": 0.1578813774431001, "grad_norm": 0.2995474934577942, "learning_rate": 0.001, "loss": 2.8257, "step": 3732 }, { "epoch": 0.15792368220661646, "grad_norm": 0.704836368560791, "learning_rate": 0.001, "loss": 2.7877, "step": 3733 }, { "epoch": 0.15796598697013284, "grad_norm": 0.4456735849380493, "learning_rate": 0.001, "loss": 2.1902, "step": 3734 }, { "epoch": 0.15800829173364922, "grad_norm": 0.21803617477416992, "learning_rate": 0.001, "loss": 2.4335, "step": 3735 }, { "epoch": 0.15805059649716557, "grad_norm": 0.24227195978164673, "learning_rate": 0.001, "loss": 1.9287, "step": 3736 }, { "epoch": 0.15809290126068196, "grad_norm": 0.2500062882900238, "learning_rate": 0.001, "loss": 2.7866, "step": 3737 }, { "epoch": 0.15813520602419834, "grad_norm": 0.24755047261714935, "learning_rate": 0.001, "loss": 2.8725, "step": 3738 }, { "epoch": 0.1581775107877147, "grad_norm": 0.26385197043418884, "learning_rate": 0.001, "loss": 2.4956, "step": 3739 }, { "epoch": 0.15821981555123107, "grad_norm": 0.3932577967643738, "learning_rate": 0.001, "loss": 2.5877, "step": 3740 }, { "epoch": 0.15826212031474743, "grad_norm": 0.21887458860874176, "learning_rate": 0.001, "loss": 2.3305, "step": 3741 }, { "epoch": 0.1583044250782638, "grad_norm": 0.2982005774974823, "learning_rate": 0.001, "loss": 3.3444, "step": 3742 }, { "epoch": 0.1583467298417802, "grad_norm": 0.4082737863063812, "learning_rate": 0.001, "loss": 3.0144, "step": 3743 }, { "epoch": 0.15838903460529655, "grad_norm": 0.4862198829650879, "learning_rate": 0.001, "loss": 2.7298, "step": 3744 }, { "epoch": 0.15843133936881293, "grad_norm": 0.2732762098312378, "learning_rate": 0.001, "loss": 2.9059, "step": 3745 }, { "epoch": 0.1584736441323293, "grad_norm": 0.2768647372722626, "learning_rate": 0.001, "loss": 2.005, "step": 3746 }, { "epoch": 0.15851594889584567, "grad_norm": 0.2584654688835144, "learning_rate": 0.001, "loss": 2.2505, "step": 3747 }, { "epoch": 0.15855825365936205, "grad_norm": 0.2902711033821106, "learning_rate": 0.001, "loss": 2.4176, "step": 3748 }, { "epoch": 0.15860055842287843, "grad_norm": 0.2150590866804123, "learning_rate": 0.001, "loss": 1.8724, "step": 3749 }, { "epoch": 0.15864286318639478, "grad_norm": 1.3695828914642334, "learning_rate": 0.001, "loss": 1.9712, "step": 3750 }, { "epoch": 0.15868516794991117, "grad_norm": 0.3454398810863495, "learning_rate": 0.001, "loss": 2.6138, "step": 3751 }, { "epoch": 0.15872747271342752, "grad_norm": 0.7297479510307312, "learning_rate": 0.001, "loss": 3.5123, "step": 3752 }, { "epoch": 0.1587697774769439, "grad_norm": 0.43115225434303284, "learning_rate": 0.001, "loss": 1.886, "step": 3753 }, { "epoch": 0.15881208224046028, "grad_norm": 0.2626439929008484, "learning_rate": 0.001, "loss": 1.6985, "step": 3754 }, { "epoch": 0.15885438700397664, "grad_norm": 1.067133903503418, "learning_rate": 0.001, "loss": 2.0347, "step": 3755 }, { "epoch": 0.15889669176749302, "grad_norm": 0.28213247656822205, "learning_rate": 0.001, "loss": 2.6482, "step": 3756 }, { "epoch": 0.1589389965310094, "grad_norm": 0.3096356689929962, "learning_rate": 0.001, "loss": 2.4303, "step": 3757 }, { "epoch": 0.15898130129452576, "grad_norm": 1.1172544956207275, "learning_rate": 0.001, "loss": 1.4872, "step": 3758 }, { "epoch": 0.15902360605804214, "grad_norm": 0.5289722681045532, "learning_rate": 0.001, "loss": 2.2845, "step": 3759 }, { "epoch": 0.15906591082155852, "grad_norm": 0.41261282563209534, "learning_rate": 0.001, "loss": 2.7228, "step": 3760 }, { "epoch": 0.15910821558507487, "grad_norm": 0.2809741199016571, "learning_rate": 0.001, "loss": 2.4155, "step": 3761 }, { "epoch": 0.15915052034859126, "grad_norm": 0.6320580840110779, "learning_rate": 0.001, "loss": 1.6108, "step": 3762 }, { "epoch": 0.1591928251121076, "grad_norm": 0.2594701051712036, "learning_rate": 0.001, "loss": 1.7907, "step": 3763 }, { "epoch": 0.159235129875624, "grad_norm": 0.266510933637619, "learning_rate": 0.001, "loss": 1.9833, "step": 3764 }, { "epoch": 0.15927743463914038, "grad_norm": 0.24247369170188904, "learning_rate": 0.001, "loss": 2.0623, "step": 3765 }, { "epoch": 0.15931973940265673, "grad_norm": 0.23818761110305786, "learning_rate": 0.001, "loss": 2.2446, "step": 3766 }, { "epoch": 0.1593620441661731, "grad_norm": 0.27693408727645874, "learning_rate": 0.001, "loss": 2.7074, "step": 3767 }, { "epoch": 0.1594043489296895, "grad_norm": 0.6779216527938843, "learning_rate": 0.001, "loss": 2.1112, "step": 3768 }, { "epoch": 0.15944665369320585, "grad_norm": 0.5291603207588196, "learning_rate": 0.001, "loss": 2.0643, "step": 3769 }, { "epoch": 0.15948895845672223, "grad_norm": 0.2355789989233017, "learning_rate": 0.001, "loss": 2.9922, "step": 3770 }, { "epoch": 0.1595312632202386, "grad_norm": 0.334757536649704, "learning_rate": 0.001, "loss": 3.6081, "step": 3771 }, { "epoch": 0.15957356798375497, "grad_norm": 0.2403586208820343, "learning_rate": 0.001, "loss": 1.8171, "step": 3772 }, { "epoch": 0.15961587274727135, "grad_norm": 0.697832465171814, "learning_rate": 0.001, "loss": 2.3378, "step": 3773 }, { "epoch": 0.1596581775107877, "grad_norm": 0.7586553692817688, "learning_rate": 0.001, "loss": 2.3106, "step": 3774 }, { "epoch": 0.15970048227430408, "grad_norm": 0.365491658449173, "learning_rate": 0.001, "loss": 3.1585, "step": 3775 }, { "epoch": 0.15974278703782047, "grad_norm": 1.0467041730880737, "learning_rate": 0.001, "loss": 3.8686, "step": 3776 }, { "epoch": 0.15978509180133682, "grad_norm": 0.23686069250106812, "learning_rate": 0.001, "loss": 3.3134, "step": 3777 }, { "epoch": 0.1598273965648532, "grad_norm": 0.2806885838508606, "learning_rate": 0.001, "loss": 2.4206, "step": 3778 }, { "epoch": 0.15986970132836958, "grad_norm": 0.25082680583000183, "learning_rate": 0.001, "loss": 2.3114, "step": 3779 }, { "epoch": 0.15991200609188594, "grad_norm": 0.22670866549015045, "learning_rate": 0.001, "loss": 2.852, "step": 3780 }, { "epoch": 0.15995431085540232, "grad_norm": 0.2668560743331909, "learning_rate": 0.001, "loss": 2.3196, "step": 3781 }, { "epoch": 0.1599966156189187, "grad_norm": 1.557098150253296, "learning_rate": 0.001, "loss": 1.6908, "step": 3782 }, { "epoch": 0.16003892038243506, "grad_norm": 0.22779476642608643, "learning_rate": 0.001, "loss": 3.2161, "step": 3783 }, { "epoch": 0.16008122514595144, "grad_norm": 0.777952253818512, "learning_rate": 0.001, "loss": 1.5536, "step": 3784 }, { "epoch": 0.1601235299094678, "grad_norm": 1.5574365854263306, "learning_rate": 0.001, "loss": 2.5072, "step": 3785 }, { "epoch": 0.16016583467298418, "grad_norm": 0.299005925655365, "learning_rate": 0.001, "loss": 3.0707, "step": 3786 }, { "epoch": 0.16020813943650056, "grad_norm": 0.3228248953819275, "learning_rate": 0.001, "loss": 2.3951, "step": 3787 }, { "epoch": 0.1602504442000169, "grad_norm": 1.0663105249404907, "learning_rate": 0.001, "loss": 2.2717, "step": 3788 }, { "epoch": 0.1602927489635333, "grad_norm": 3.4064865112304688, "learning_rate": 0.001, "loss": 2.4347, "step": 3789 }, { "epoch": 0.16033505372704968, "grad_norm": 0.3657093048095703, "learning_rate": 0.001, "loss": 2.6096, "step": 3790 }, { "epoch": 0.16037735849056603, "grad_norm": 0.3097366690635681, "learning_rate": 0.001, "loss": 2.0905, "step": 3791 }, { "epoch": 0.1604196632540824, "grad_norm": 0.35716721415519714, "learning_rate": 0.001, "loss": 2.5812, "step": 3792 }, { "epoch": 0.1604619680175988, "grad_norm": 0.32534101605415344, "learning_rate": 0.001, "loss": 2.9844, "step": 3793 }, { "epoch": 0.16050427278111515, "grad_norm": 0.36417415738105774, "learning_rate": 0.001, "loss": 2.5366, "step": 3794 }, { "epoch": 0.16054657754463153, "grad_norm": 0.26519954204559326, "learning_rate": 0.001, "loss": 2.7738, "step": 3795 }, { "epoch": 0.16058888230814788, "grad_norm": 0.473812073469162, "learning_rate": 0.001, "loss": 2.2917, "step": 3796 }, { "epoch": 0.16063118707166427, "grad_norm": 0.3712599277496338, "learning_rate": 0.001, "loss": 2.2126, "step": 3797 }, { "epoch": 0.16067349183518065, "grad_norm": 0.30291634798049927, "learning_rate": 0.001, "loss": 2.1419, "step": 3798 }, { "epoch": 0.160715796598697, "grad_norm": 0.30859270691871643, "learning_rate": 0.001, "loss": 1.9372, "step": 3799 }, { "epoch": 0.16075810136221338, "grad_norm": 0.24209967255592346, "learning_rate": 0.001, "loss": 1.9439, "step": 3800 }, { "epoch": 0.16080040612572977, "grad_norm": 0.30465030670166016, "learning_rate": 0.001, "loss": 3.1223, "step": 3801 }, { "epoch": 0.16084271088924612, "grad_norm": 0.29678988456726074, "learning_rate": 0.001, "loss": 2.2062, "step": 3802 }, { "epoch": 0.1608850156527625, "grad_norm": 0.21192920207977295, "learning_rate": 0.001, "loss": 2.1279, "step": 3803 }, { "epoch": 0.16092732041627889, "grad_norm": 2.0357847213745117, "learning_rate": 0.001, "loss": 3.1239, "step": 3804 }, { "epoch": 0.16096962517979524, "grad_norm": 0.31351718306541443, "learning_rate": 0.001, "loss": 2.176, "step": 3805 }, { "epoch": 0.16101192994331162, "grad_norm": 0.3054656684398651, "learning_rate": 0.001, "loss": 2.5238, "step": 3806 }, { "epoch": 0.16105423470682798, "grad_norm": 0.5278902053833008, "learning_rate": 0.001, "loss": 1.6492, "step": 3807 }, { "epoch": 0.16109653947034436, "grad_norm": 0.300001323223114, "learning_rate": 0.001, "loss": 2.562, "step": 3808 }, { "epoch": 0.16113884423386074, "grad_norm": 0.34436389803886414, "learning_rate": 0.001, "loss": 3.3102, "step": 3809 }, { "epoch": 0.1611811489973771, "grad_norm": 0.33348026871681213, "learning_rate": 0.001, "loss": 3.1311, "step": 3810 }, { "epoch": 0.16122345376089348, "grad_norm": 0.24410240352153778, "learning_rate": 0.001, "loss": 3.0828, "step": 3811 }, { "epoch": 0.16126575852440986, "grad_norm": 0.21113960444927216, "learning_rate": 0.001, "loss": 1.9945, "step": 3812 }, { "epoch": 0.1613080632879262, "grad_norm": 0.2360842376947403, "learning_rate": 0.001, "loss": 1.9214, "step": 3813 }, { "epoch": 0.1613503680514426, "grad_norm": 0.3132474720478058, "learning_rate": 0.001, "loss": 3.3039, "step": 3814 }, { "epoch": 0.16139267281495898, "grad_norm": 0.23220689594745636, "learning_rate": 0.001, "loss": 2.3121, "step": 3815 }, { "epoch": 0.16143497757847533, "grad_norm": 0.3871288001537323, "learning_rate": 0.001, "loss": 3.1045, "step": 3816 }, { "epoch": 0.1614772823419917, "grad_norm": 0.2573937773704529, "learning_rate": 0.001, "loss": 2.2547, "step": 3817 }, { "epoch": 0.16151958710550807, "grad_norm": 0.23230381309986115, "learning_rate": 0.001, "loss": 1.9308, "step": 3818 }, { "epoch": 0.16156189186902445, "grad_norm": 0.2752211093902588, "learning_rate": 0.001, "loss": 3.6371, "step": 3819 }, { "epoch": 0.16160419663254083, "grad_norm": 0.2206379473209381, "learning_rate": 0.001, "loss": 3.6659, "step": 3820 }, { "epoch": 0.16164650139605719, "grad_norm": 0.5794355869293213, "learning_rate": 0.001, "loss": 2.3093, "step": 3821 }, { "epoch": 0.16168880615957357, "grad_norm": 0.23683209717273712, "learning_rate": 0.001, "loss": 2.5634, "step": 3822 }, { "epoch": 0.16173111092308995, "grad_norm": 0.2306908518075943, "learning_rate": 0.001, "loss": 1.9803, "step": 3823 }, { "epoch": 0.1617734156866063, "grad_norm": 0.22796887159347534, "learning_rate": 0.001, "loss": 2.3285, "step": 3824 }, { "epoch": 0.16181572045012269, "grad_norm": 0.27235713601112366, "learning_rate": 0.001, "loss": 2.6457, "step": 3825 }, { "epoch": 0.16185802521363907, "grad_norm": 1.1337499618530273, "learning_rate": 0.001, "loss": 2.7345, "step": 3826 }, { "epoch": 0.16190032997715542, "grad_norm": 0.22080039978027344, "learning_rate": 0.001, "loss": 2.0913, "step": 3827 }, { "epoch": 0.1619426347406718, "grad_norm": 0.24313177168369293, "learning_rate": 0.001, "loss": 2.4784, "step": 3828 }, { "epoch": 0.16198493950418816, "grad_norm": 0.28508949279785156, "learning_rate": 0.001, "loss": 2.8932, "step": 3829 }, { "epoch": 0.16202724426770454, "grad_norm": 0.37626540660858154, "learning_rate": 0.001, "loss": 2.596, "step": 3830 }, { "epoch": 0.16206954903122092, "grad_norm": 0.22318224608898163, "learning_rate": 0.001, "loss": 2.2057, "step": 3831 }, { "epoch": 0.16211185379473728, "grad_norm": 0.2062063217163086, "learning_rate": 0.001, "loss": 1.5958, "step": 3832 }, { "epoch": 0.16215415855825366, "grad_norm": 0.2109755128622055, "learning_rate": 0.001, "loss": 2.1923, "step": 3833 }, { "epoch": 0.16219646332177004, "grad_norm": 0.21446409821510315, "learning_rate": 0.001, "loss": 2.1621, "step": 3834 }, { "epoch": 0.1622387680852864, "grad_norm": 0.21408231556415558, "learning_rate": 0.001, "loss": 2.683, "step": 3835 }, { "epoch": 0.16228107284880278, "grad_norm": 0.2532133460044861, "learning_rate": 0.001, "loss": 2.2158, "step": 3836 }, { "epoch": 0.16232337761231916, "grad_norm": 0.27173325419425964, "learning_rate": 0.001, "loss": 2.2614, "step": 3837 }, { "epoch": 0.1623656823758355, "grad_norm": 0.2316475659608841, "learning_rate": 0.001, "loss": 2.1505, "step": 3838 }, { "epoch": 0.1624079871393519, "grad_norm": 0.23597820103168488, "learning_rate": 0.001, "loss": 2.2652, "step": 3839 }, { "epoch": 0.16245029190286828, "grad_norm": 0.23943273723125458, "learning_rate": 0.001, "loss": 2.0347, "step": 3840 }, { "epoch": 0.16249259666638463, "grad_norm": 0.8113381266593933, "learning_rate": 0.001, "loss": 2.0472, "step": 3841 }, { "epoch": 0.162534901429901, "grad_norm": 2.3139660358428955, "learning_rate": 0.001, "loss": 1.8397, "step": 3842 }, { "epoch": 0.16257720619341737, "grad_norm": 0.42972567677497864, "learning_rate": 0.001, "loss": 2.0672, "step": 3843 }, { "epoch": 0.16261951095693375, "grad_norm": 0.2802884578704834, "learning_rate": 0.001, "loss": 1.9931, "step": 3844 }, { "epoch": 0.16266181572045013, "grad_norm": 0.3165445625782013, "learning_rate": 0.001, "loss": 2.3039, "step": 3845 }, { "epoch": 0.16270412048396649, "grad_norm": 0.5963391661643982, "learning_rate": 0.001, "loss": 1.952, "step": 3846 }, { "epoch": 0.16274642524748287, "grad_norm": 0.25218188762664795, "learning_rate": 0.001, "loss": 2.3469, "step": 3847 }, { "epoch": 0.16278873001099925, "grad_norm": 0.31111615896224976, "learning_rate": 0.001, "loss": 2.0224, "step": 3848 }, { "epoch": 0.1628310347745156, "grad_norm": 0.3019029200077057, "learning_rate": 0.001, "loss": 2.4928, "step": 3849 }, { "epoch": 0.16287333953803199, "grad_norm": 0.22773532569408417, "learning_rate": 0.001, "loss": 2.9932, "step": 3850 }, { "epoch": 0.16291564430154837, "grad_norm": 0.2483637034893036, "learning_rate": 0.001, "loss": 2.8444, "step": 3851 }, { "epoch": 0.16295794906506472, "grad_norm": 0.2056424915790558, "learning_rate": 0.001, "loss": 1.9696, "step": 3852 }, { "epoch": 0.1630002538285811, "grad_norm": 0.22549600899219513, "learning_rate": 0.001, "loss": 1.7296, "step": 3853 }, { "epoch": 0.16304255859209746, "grad_norm": 0.2797006368637085, "learning_rate": 0.001, "loss": 2.0571, "step": 3854 }, { "epoch": 0.16308486335561384, "grad_norm": 0.2678796947002411, "learning_rate": 0.001, "loss": 1.9062, "step": 3855 }, { "epoch": 0.16312716811913022, "grad_norm": 0.24409295618534088, "learning_rate": 0.001, "loss": 2.4397, "step": 3856 }, { "epoch": 0.16316947288264658, "grad_norm": 0.25241202116012573, "learning_rate": 0.001, "loss": 2.1752, "step": 3857 }, { "epoch": 0.16321177764616296, "grad_norm": 0.23605115711688995, "learning_rate": 0.001, "loss": 2.9556, "step": 3858 }, { "epoch": 0.16325408240967934, "grad_norm": 0.26487863063812256, "learning_rate": 0.001, "loss": 1.9874, "step": 3859 }, { "epoch": 0.1632963871731957, "grad_norm": 0.24517613649368286, "learning_rate": 0.001, "loss": 2.321, "step": 3860 }, { "epoch": 0.16333869193671208, "grad_norm": 0.20442011952400208, "learning_rate": 0.001, "loss": 1.8018, "step": 3861 }, { "epoch": 0.16338099670022846, "grad_norm": 0.24029582738876343, "learning_rate": 0.001, "loss": 2.722, "step": 3862 }, { "epoch": 0.1634233014637448, "grad_norm": 0.2389087826013565, "learning_rate": 0.001, "loss": 1.9895, "step": 3863 }, { "epoch": 0.1634656062272612, "grad_norm": 0.2552791237831116, "learning_rate": 0.001, "loss": 2.4092, "step": 3864 }, { "epoch": 0.16350791099077755, "grad_norm": 0.20444203913211823, "learning_rate": 0.001, "loss": 1.8636, "step": 3865 }, { "epoch": 0.16355021575429393, "grad_norm": 1.1959320306777954, "learning_rate": 0.001, "loss": 2.6259, "step": 3866 }, { "epoch": 0.1635925205178103, "grad_norm": 0.20237359404563904, "learning_rate": 0.001, "loss": 1.7948, "step": 3867 }, { "epoch": 0.16363482528132667, "grad_norm": 0.21189026534557343, "learning_rate": 0.001, "loss": 2.6693, "step": 3868 }, { "epoch": 0.16367713004484305, "grad_norm": 0.25462326407432556, "learning_rate": 0.001, "loss": 1.9829, "step": 3869 }, { "epoch": 0.16371943480835943, "grad_norm": 0.24201983213424683, "learning_rate": 0.001, "loss": 2.1617, "step": 3870 }, { "epoch": 0.1637617395718758, "grad_norm": 0.22964642941951752, "learning_rate": 0.001, "loss": 2.9446, "step": 3871 }, { "epoch": 0.16380404433539217, "grad_norm": 0.2601517140865326, "learning_rate": 0.001, "loss": 2.3314, "step": 3872 }, { "epoch": 0.16384634909890855, "grad_norm": 0.19910211861133575, "learning_rate": 0.001, "loss": 2.236, "step": 3873 }, { "epoch": 0.1638886538624249, "grad_norm": 0.2370014637708664, "learning_rate": 0.001, "loss": 2.767, "step": 3874 }, { "epoch": 0.1639309586259413, "grad_norm": 0.273014098405838, "learning_rate": 0.001, "loss": 2.8735, "step": 3875 }, { "epoch": 0.16397326338945764, "grad_norm": 0.2136770635843277, "learning_rate": 0.001, "loss": 2.2062, "step": 3876 }, { "epoch": 0.16401556815297402, "grad_norm": 0.196532741189003, "learning_rate": 0.001, "loss": 2.1683, "step": 3877 }, { "epoch": 0.1640578729164904, "grad_norm": 1.7746950387954712, "learning_rate": 0.001, "loss": 2.3646, "step": 3878 }, { "epoch": 0.16410017768000676, "grad_norm": 0.29451486468315125, "learning_rate": 0.001, "loss": 2.2666, "step": 3879 }, { "epoch": 0.16414248244352314, "grad_norm": 0.6255790591239929, "learning_rate": 0.001, "loss": 2.7286, "step": 3880 }, { "epoch": 0.16418478720703952, "grad_norm": 0.49284324049949646, "learning_rate": 0.001, "loss": 2.0154, "step": 3881 }, { "epoch": 0.16422709197055588, "grad_norm": 0.27464428544044495, "learning_rate": 0.001, "loss": 2.347, "step": 3882 }, { "epoch": 0.16426939673407226, "grad_norm": 0.2803431451320648, "learning_rate": 0.001, "loss": 2.6582, "step": 3883 }, { "epoch": 0.16431170149758864, "grad_norm": 0.2514314353466034, "learning_rate": 0.001, "loss": 2.6842, "step": 3884 }, { "epoch": 0.164354006261105, "grad_norm": 0.30920740962028503, "learning_rate": 0.001, "loss": 2.5913, "step": 3885 }, { "epoch": 0.16439631102462138, "grad_norm": 0.6570300459861755, "learning_rate": 0.001, "loss": 2.4439, "step": 3886 }, { "epoch": 0.16443861578813773, "grad_norm": 0.24634666740894318, "learning_rate": 0.001, "loss": 2.1094, "step": 3887 }, { "epoch": 0.16448092055165411, "grad_norm": 0.2316695600748062, "learning_rate": 0.001, "loss": 2.3292, "step": 3888 }, { "epoch": 0.1645232253151705, "grad_norm": 0.6251176595687866, "learning_rate": 0.001, "loss": 2.9064, "step": 3889 }, { "epoch": 0.16456553007868685, "grad_norm": 0.28041884303092957, "learning_rate": 0.001, "loss": 2.4278, "step": 3890 }, { "epoch": 0.16460783484220323, "grad_norm": 0.6248710751533508, "learning_rate": 0.001, "loss": 3.7971, "step": 3891 }, { "epoch": 0.16465013960571961, "grad_norm": 0.24960578978061676, "learning_rate": 0.001, "loss": 3.1562, "step": 3892 }, { "epoch": 0.16469244436923597, "grad_norm": 0.24922244250774384, "learning_rate": 0.001, "loss": 3.1838, "step": 3893 }, { "epoch": 0.16473474913275235, "grad_norm": 0.21565461158752441, "learning_rate": 0.001, "loss": 2.5373, "step": 3894 }, { "epoch": 0.16477705389626873, "grad_norm": 0.30223679542541504, "learning_rate": 0.001, "loss": 1.9657, "step": 3895 }, { "epoch": 0.1648193586597851, "grad_norm": 0.23021796345710754, "learning_rate": 0.001, "loss": 2.0286, "step": 3896 }, { "epoch": 0.16486166342330147, "grad_norm": 0.22594904899597168, "learning_rate": 0.001, "loss": 2.0249, "step": 3897 }, { "epoch": 0.16490396818681782, "grad_norm": 0.20173117518424988, "learning_rate": 0.001, "loss": 2.8041, "step": 3898 }, { "epoch": 0.1649462729503342, "grad_norm": 0.2069264054298401, "learning_rate": 0.001, "loss": 1.9167, "step": 3899 }, { "epoch": 0.1649885777138506, "grad_norm": 0.4964730739593506, "learning_rate": 0.001, "loss": 2.2618, "step": 3900 }, { "epoch": 0.16503088247736694, "grad_norm": 0.2436501681804657, "learning_rate": 0.001, "loss": 1.9212, "step": 3901 }, { "epoch": 0.16507318724088332, "grad_norm": 0.27425655722618103, "learning_rate": 0.001, "loss": 2.2874, "step": 3902 }, { "epoch": 0.1651154920043997, "grad_norm": 0.18915478885173798, "learning_rate": 0.001, "loss": 1.7265, "step": 3903 }, { "epoch": 0.16515779676791606, "grad_norm": 0.34025347232818604, "learning_rate": 0.001, "loss": 2.0228, "step": 3904 }, { "epoch": 0.16520010153143244, "grad_norm": 0.3264094889163971, "learning_rate": 0.001, "loss": 2.3233, "step": 3905 }, { "epoch": 0.16524240629494882, "grad_norm": 0.2108362913131714, "learning_rate": 0.001, "loss": 2.6092, "step": 3906 }, { "epoch": 0.16528471105846518, "grad_norm": 0.27986034750938416, "learning_rate": 0.001, "loss": 1.6611, "step": 3907 }, { "epoch": 0.16532701582198156, "grad_norm": 0.22576811909675598, "learning_rate": 0.001, "loss": 1.8204, "step": 3908 }, { "epoch": 0.16536932058549791, "grad_norm": 0.5198656916618347, "learning_rate": 0.001, "loss": 2.44, "step": 3909 }, { "epoch": 0.1654116253490143, "grad_norm": 0.24308837950229645, "learning_rate": 0.001, "loss": 2.2157, "step": 3910 }, { "epoch": 0.16545393011253068, "grad_norm": 0.2985725402832031, "learning_rate": 0.001, "loss": 2.6967, "step": 3911 }, { "epoch": 0.16549623487604703, "grad_norm": 0.2176986038684845, "learning_rate": 0.001, "loss": 1.9572, "step": 3912 }, { "epoch": 0.16553853963956341, "grad_norm": 0.49889811873435974, "learning_rate": 0.001, "loss": 2.2016, "step": 3913 }, { "epoch": 0.1655808444030798, "grad_norm": 0.23154956102371216, "learning_rate": 0.001, "loss": 2.0423, "step": 3914 }, { "epoch": 0.16562314916659615, "grad_norm": 0.2753046751022339, "learning_rate": 0.001, "loss": 1.9889, "step": 3915 }, { "epoch": 0.16566545393011253, "grad_norm": 0.19035251438617706, "learning_rate": 0.001, "loss": 2.0711, "step": 3916 }, { "epoch": 0.16570775869362891, "grad_norm": 0.23184069991111755, "learning_rate": 0.001, "loss": 1.9129, "step": 3917 }, { "epoch": 0.16575006345714527, "grad_norm": 0.20346291363239288, "learning_rate": 0.001, "loss": 2.411, "step": 3918 }, { "epoch": 0.16579236822066165, "grad_norm": 0.24841812252998352, "learning_rate": 0.001, "loss": 1.8754, "step": 3919 }, { "epoch": 0.165834672984178, "grad_norm": 0.3259766697883606, "learning_rate": 0.001, "loss": 2.4933, "step": 3920 }, { "epoch": 0.1658769777476944, "grad_norm": 0.27737030386924744, "learning_rate": 0.001, "loss": 2.1435, "step": 3921 }, { "epoch": 0.16591928251121077, "grad_norm": 1.7625619173049927, "learning_rate": 0.001, "loss": 2.05, "step": 3922 }, { "epoch": 0.16596158727472712, "grad_norm": 0.24628551304340363, "learning_rate": 0.001, "loss": 2.3493, "step": 3923 }, { "epoch": 0.1660038920382435, "grad_norm": 0.29456469416618347, "learning_rate": 0.001, "loss": 2.2105, "step": 3924 }, { "epoch": 0.1660461968017599, "grad_norm": 0.20997002720832825, "learning_rate": 0.001, "loss": 1.8444, "step": 3925 }, { "epoch": 0.16608850156527624, "grad_norm": 2.3074915409088135, "learning_rate": 0.001, "loss": 2.2948, "step": 3926 }, { "epoch": 0.16613080632879262, "grad_norm": 0.3753257691860199, "learning_rate": 0.001, "loss": 3.4608, "step": 3927 }, { "epoch": 0.166173111092309, "grad_norm": 0.32698577642440796, "learning_rate": 0.001, "loss": 2.5853, "step": 3928 }, { "epoch": 0.16621541585582536, "grad_norm": 0.22866463661193848, "learning_rate": 0.001, "loss": 2.7564, "step": 3929 }, { "epoch": 0.16625772061934174, "grad_norm": 0.30471017956733704, "learning_rate": 0.001, "loss": 3.0827, "step": 3930 }, { "epoch": 0.1663000253828581, "grad_norm": 0.2387181669473648, "learning_rate": 0.001, "loss": 2.8088, "step": 3931 }, { "epoch": 0.16634233014637448, "grad_norm": 0.2087489813566208, "learning_rate": 0.001, "loss": 1.9849, "step": 3932 }, { "epoch": 0.16638463490989086, "grad_norm": 0.22151561081409454, "learning_rate": 0.001, "loss": 2.8208, "step": 3933 }, { "epoch": 0.16642693967340721, "grad_norm": 0.21167586743831635, "learning_rate": 0.001, "loss": 2.9183, "step": 3934 }, { "epoch": 0.1664692444369236, "grad_norm": 0.2525525391101837, "learning_rate": 0.001, "loss": 3.3236, "step": 3935 }, { "epoch": 0.16651154920043998, "grad_norm": 0.26379460096359253, "learning_rate": 0.001, "loss": 2.4491, "step": 3936 }, { "epoch": 0.16655385396395633, "grad_norm": 0.2644009590148926, "learning_rate": 0.001, "loss": 2.4049, "step": 3937 }, { "epoch": 0.16659615872747272, "grad_norm": 9.060093879699707, "learning_rate": 0.001, "loss": 2.6395, "step": 3938 }, { "epoch": 0.1666384634909891, "grad_norm": 0.22047647833824158, "learning_rate": 0.001, "loss": 1.9525, "step": 3939 }, { "epoch": 0.16668076825450545, "grad_norm": 0.5126127004623413, "learning_rate": 0.001, "loss": 1.6991, "step": 3940 }, { "epoch": 0.16672307301802183, "grad_norm": 0.25247883796691895, "learning_rate": 0.001, "loss": 2.1926, "step": 3941 }, { "epoch": 0.1667653777815382, "grad_norm": 0.25880908966064453, "learning_rate": 0.001, "loss": 2.1134, "step": 3942 }, { "epoch": 0.16680768254505457, "grad_norm": 0.23852358758449554, "learning_rate": 0.001, "loss": 2.3002, "step": 3943 }, { "epoch": 0.16684998730857095, "grad_norm": 0.20607604086399078, "learning_rate": 0.001, "loss": 1.8629, "step": 3944 }, { "epoch": 0.1668922920720873, "grad_norm": 0.279757559299469, "learning_rate": 0.001, "loss": 3.2208, "step": 3945 }, { "epoch": 0.1669345968356037, "grad_norm": 0.2418821156024933, "learning_rate": 0.001, "loss": 2.4583, "step": 3946 }, { "epoch": 0.16697690159912007, "grad_norm": 0.19842597842216492, "learning_rate": 0.001, "loss": 2.4909, "step": 3947 }, { "epoch": 0.16701920636263642, "grad_norm": 0.6748121976852417, "learning_rate": 0.001, "loss": 2.0367, "step": 3948 }, { "epoch": 0.1670615111261528, "grad_norm": 0.3548758625984192, "learning_rate": 0.001, "loss": 2.2625, "step": 3949 }, { "epoch": 0.1671038158896692, "grad_norm": 0.23522132635116577, "learning_rate": 0.001, "loss": 1.9258, "step": 3950 }, { "epoch": 0.16714612065318554, "grad_norm": 1.6471339464187622, "learning_rate": 0.001, "loss": 1.9447, "step": 3951 }, { "epoch": 0.16718842541670192, "grad_norm": 0.9741820693016052, "learning_rate": 0.001, "loss": 2.5284, "step": 3952 }, { "epoch": 0.16723073018021828, "grad_norm": 0.2638908624649048, "learning_rate": 0.001, "loss": 3.4217, "step": 3953 }, { "epoch": 0.16727303494373466, "grad_norm": 0.2375737726688385, "learning_rate": 0.001, "loss": 2.1506, "step": 3954 }, { "epoch": 0.16731533970725104, "grad_norm": 0.24627238512039185, "learning_rate": 0.001, "loss": 2.8165, "step": 3955 }, { "epoch": 0.1673576444707674, "grad_norm": 0.5549525618553162, "learning_rate": 0.001, "loss": 2.7948, "step": 3956 }, { "epoch": 0.16739994923428378, "grad_norm": 9.49412727355957, "learning_rate": 0.001, "loss": 2.6595, "step": 3957 }, { "epoch": 0.16744225399780016, "grad_norm": 0.2718313932418823, "learning_rate": 0.001, "loss": 1.5429, "step": 3958 }, { "epoch": 0.16748455876131652, "grad_norm": 0.2949593663215637, "learning_rate": 0.001, "loss": 2.1059, "step": 3959 }, { "epoch": 0.1675268635248329, "grad_norm": 0.4094192385673523, "learning_rate": 0.001, "loss": 2.1578, "step": 3960 }, { "epoch": 0.16756916828834928, "grad_norm": 0.23077575862407684, "learning_rate": 0.001, "loss": 1.7893, "step": 3961 }, { "epoch": 0.16761147305186563, "grad_norm": 3.148542881011963, "learning_rate": 0.001, "loss": 2.0162, "step": 3962 }, { "epoch": 0.16765377781538202, "grad_norm": 0.2932436764240265, "learning_rate": 0.001, "loss": 3.187, "step": 3963 }, { "epoch": 0.1676960825788984, "grad_norm": 4.55126953125, "learning_rate": 0.001, "loss": 4.0645, "step": 3964 }, { "epoch": 0.16773838734241475, "grad_norm": 0.31805071234703064, "learning_rate": 0.001, "loss": 1.9577, "step": 3965 }, { "epoch": 0.16778069210593113, "grad_norm": 0.6054160594940186, "learning_rate": 0.001, "loss": 2.4387, "step": 3966 }, { "epoch": 0.1678229968694475, "grad_norm": 0.34546828269958496, "learning_rate": 0.001, "loss": 1.7153, "step": 3967 }, { "epoch": 0.16786530163296387, "grad_norm": 0.29277992248535156, "learning_rate": 0.001, "loss": 2.0135, "step": 3968 }, { "epoch": 0.16790760639648025, "grad_norm": 0.25595077872276306, "learning_rate": 0.001, "loss": 3.5568, "step": 3969 }, { "epoch": 0.1679499111599966, "grad_norm": 0.38943490386009216, "learning_rate": 0.001, "loss": 2.8329, "step": 3970 }, { "epoch": 0.167992215923513, "grad_norm": 0.23987014591693878, "learning_rate": 0.001, "loss": 2.4131, "step": 3971 }, { "epoch": 0.16803452068702937, "grad_norm": 0.3031393587589264, "learning_rate": 0.001, "loss": 2.5168, "step": 3972 }, { "epoch": 0.16807682545054572, "grad_norm": 0.27220430970191956, "learning_rate": 0.001, "loss": 3.0367, "step": 3973 }, { "epoch": 0.1681191302140621, "grad_norm": 0.3574322462081909, "learning_rate": 0.001, "loss": 1.6484, "step": 3974 }, { "epoch": 0.1681614349775785, "grad_norm": 0.6500173211097717, "learning_rate": 0.001, "loss": 2.2295, "step": 3975 }, { "epoch": 0.16820373974109484, "grad_norm": 0.3387952744960785, "learning_rate": 0.001, "loss": 2.0314, "step": 3976 }, { "epoch": 0.16824604450461123, "grad_norm": 0.21707701683044434, "learning_rate": 0.001, "loss": 2.2662, "step": 3977 }, { "epoch": 0.16828834926812758, "grad_norm": 0.2807854115962982, "learning_rate": 0.001, "loss": 3.0303, "step": 3978 }, { "epoch": 0.16833065403164396, "grad_norm": 0.19229240715503693, "learning_rate": 0.001, "loss": 2.87, "step": 3979 }, { "epoch": 0.16837295879516034, "grad_norm": 0.24821369349956512, "learning_rate": 0.001, "loss": 3.3929, "step": 3980 }, { "epoch": 0.1684152635586767, "grad_norm": 0.2526116967201233, "learning_rate": 0.001, "loss": 2.3234, "step": 3981 }, { "epoch": 0.16845756832219308, "grad_norm": 0.3005029857158661, "learning_rate": 0.001, "loss": 1.7588, "step": 3982 }, { "epoch": 0.16849987308570946, "grad_norm": 2.6109414100646973, "learning_rate": 0.001, "loss": 2.1013, "step": 3983 }, { "epoch": 0.16854217784922582, "grad_norm": 0.605918824672699, "learning_rate": 0.001, "loss": 3.1133, "step": 3984 }, { "epoch": 0.1685844826127422, "grad_norm": 0.21746625006198883, "learning_rate": 0.001, "loss": 2.9431, "step": 3985 }, { "epoch": 0.16862678737625858, "grad_norm": 0.260303795337677, "learning_rate": 0.001, "loss": 2.0652, "step": 3986 }, { "epoch": 0.16866909213977493, "grad_norm": 0.2302677035331726, "learning_rate": 0.001, "loss": 3.3315, "step": 3987 }, { "epoch": 0.16871139690329132, "grad_norm": 0.34791648387908936, "learning_rate": 0.001, "loss": 2.44, "step": 3988 }, { "epoch": 0.16875370166680767, "grad_norm": 1.0129145383834839, "learning_rate": 0.001, "loss": 1.8525, "step": 3989 }, { "epoch": 0.16879600643032405, "grad_norm": 0.2503008246421814, "learning_rate": 0.001, "loss": 1.666, "step": 3990 }, { "epoch": 0.16883831119384043, "grad_norm": 0.2816570997238159, "learning_rate": 0.001, "loss": 2.4679, "step": 3991 }, { "epoch": 0.1688806159573568, "grad_norm": 0.24317888915538788, "learning_rate": 0.001, "loss": 2.6195, "step": 3992 }, { "epoch": 0.16892292072087317, "grad_norm": 0.3003460466861725, "learning_rate": 0.001, "loss": 2.6692, "step": 3993 }, { "epoch": 0.16896522548438955, "grad_norm": 0.2870309054851532, "learning_rate": 0.001, "loss": 2.37, "step": 3994 }, { "epoch": 0.1690075302479059, "grad_norm": 0.35769209265708923, "learning_rate": 0.001, "loss": 2.6095, "step": 3995 }, { "epoch": 0.1690498350114223, "grad_norm": 0.647505521774292, "learning_rate": 0.001, "loss": 2.4609, "step": 3996 }, { "epoch": 0.16909213977493867, "grad_norm": 0.2910658121109009, "learning_rate": 0.001, "loss": 2.1234, "step": 3997 }, { "epoch": 0.16913444453845503, "grad_norm": 0.4279431402683258, "learning_rate": 0.001, "loss": 2.5119, "step": 3998 }, { "epoch": 0.1691767493019714, "grad_norm": 0.7346844673156738, "learning_rate": 0.001, "loss": 2.1146, "step": 3999 }, { "epoch": 0.16921905406548776, "grad_norm": 0.24350988864898682, "learning_rate": 0.001, "loss": 2.28, "step": 4000 }, { "epoch": 0.16926135882900414, "grad_norm": 0.2387346774339676, "learning_rate": 0.001, "loss": 2.8811, "step": 4001 }, { "epoch": 0.16930366359252053, "grad_norm": 0.7333387732505798, "learning_rate": 0.001, "loss": 2.0075, "step": 4002 }, { "epoch": 0.16934596835603688, "grad_norm": 0.289885938167572, "learning_rate": 0.001, "loss": 1.9186, "step": 4003 }, { "epoch": 0.16938827311955326, "grad_norm": 0.3716363310813904, "learning_rate": 0.001, "loss": 3.237, "step": 4004 }, { "epoch": 0.16943057788306964, "grad_norm": 0.29184696078300476, "learning_rate": 0.001, "loss": 1.9219, "step": 4005 }, { "epoch": 0.169472882646586, "grad_norm": 2.09316349029541, "learning_rate": 0.001, "loss": 2.8952, "step": 4006 }, { "epoch": 0.16951518741010238, "grad_norm": 0.30977028608322144, "learning_rate": 0.001, "loss": 1.6384, "step": 4007 }, { "epoch": 0.16955749217361876, "grad_norm": 0.2913471460342407, "learning_rate": 0.001, "loss": 2.3069, "step": 4008 }, { "epoch": 0.16959979693713512, "grad_norm": 0.2933041751384735, "learning_rate": 0.001, "loss": 2.115, "step": 4009 }, { "epoch": 0.1696421017006515, "grad_norm": 0.29783979058265686, "learning_rate": 0.001, "loss": 3.0471, "step": 4010 }, { "epoch": 0.16968440646416785, "grad_norm": 0.46605968475341797, "learning_rate": 0.001, "loss": 2.4636, "step": 4011 }, { "epoch": 0.16972671122768423, "grad_norm": 0.346618115901947, "learning_rate": 0.001, "loss": 2.6208, "step": 4012 }, { "epoch": 0.16976901599120062, "grad_norm": 0.279699444770813, "learning_rate": 0.001, "loss": 2.4847, "step": 4013 }, { "epoch": 0.16981132075471697, "grad_norm": 0.5628467798233032, "learning_rate": 0.001, "loss": 2.4294, "step": 4014 }, { "epoch": 0.16985362551823335, "grad_norm": 0.30765336751937866, "learning_rate": 0.001, "loss": 2.5887, "step": 4015 }, { "epoch": 0.16989593028174974, "grad_norm": 0.43252626061439514, "learning_rate": 0.001, "loss": 2.6351, "step": 4016 }, { "epoch": 0.1699382350452661, "grad_norm": 0.7735976576805115, "learning_rate": 0.001, "loss": 2.4618, "step": 4017 }, { "epoch": 0.16998053980878247, "grad_norm": 0.4775107204914093, "learning_rate": 0.001, "loss": 2.93, "step": 4018 }, { "epoch": 0.17002284457229885, "grad_norm": 0.2542901933193207, "learning_rate": 0.001, "loss": 1.9901, "step": 4019 }, { "epoch": 0.1700651493358152, "grad_norm": 0.7986733317375183, "learning_rate": 0.001, "loss": 1.8875, "step": 4020 }, { "epoch": 0.1701074540993316, "grad_norm": 0.24276584386825562, "learning_rate": 0.001, "loss": 2.8431, "step": 4021 }, { "epoch": 0.17014975886284794, "grad_norm": 0.29002276062965393, "learning_rate": 0.001, "loss": 2.1358, "step": 4022 }, { "epoch": 0.17019206362636433, "grad_norm": 0.7131394743919373, "learning_rate": 0.001, "loss": 2.1005, "step": 4023 }, { "epoch": 0.1702343683898807, "grad_norm": 0.22828415036201477, "learning_rate": 0.001, "loss": 1.7578, "step": 4024 }, { "epoch": 0.17027667315339706, "grad_norm": 0.2842770218849182, "learning_rate": 0.001, "loss": 2.5149, "step": 4025 }, { "epoch": 0.17031897791691344, "grad_norm": 0.24497929215431213, "learning_rate": 0.001, "loss": 2.7276, "step": 4026 }, { "epoch": 0.17036128268042983, "grad_norm": 0.2938736379146576, "learning_rate": 0.001, "loss": 3.0422, "step": 4027 }, { "epoch": 0.17040358744394618, "grad_norm": 0.2794887125492096, "learning_rate": 0.001, "loss": 1.9868, "step": 4028 }, { "epoch": 0.17044589220746256, "grad_norm": 0.23071792721748352, "learning_rate": 0.001, "loss": 3.0668, "step": 4029 }, { "epoch": 0.17048819697097894, "grad_norm": 0.2662751376628876, "learning_rate": 0.001, "loss": 2.5487, "step": 4030 }, { "epoch": 0.1705305017344953, "grad_norm": 0.21133390069007874, "learning_rate": 0.001, "loss": 2.2983, "step": 4031 }, { "epoch": 0.17057280649801168, "grad_norm": 0.22290173172950745, "learning_rate": 0.001, "loss": 2.4885, "step": 4032 }, { "epoch": 0.17061511126152804, "grad_norm": 0.29108884930610657, "learning_rate": 0.001, "loss": 1.848, "step": 4033 }, { "epoch": 0.17065741602504442, "grad_norm": 0.28918585181236267, "learning_rate": 0.001, "loss": 2.7061, "step": 4034 }, { "epoch": 0.1706997207885608, "grad_norm": 1.4359923601150513, "learning_rate": 0.001, "loss": 2.7202, "step": 4035 }, { "epoch": 0.17074202555207715, "grad_norm": 0.6065989136695862, "learning_rate": 0.001, "loss": 2.978, "step": 4036 }, { "epoch": 0.17078433031559354, "grad_norm": 1.2570323944091797, "learning_rate": 0.001, "loss": 2.1911, "step": 4037 }, { "epoch": 0.17082663507910992, "grad_norm": 0.2336377501487732, "learning_rate": 0.001, "loss": 2.2424, "step": 4038 }, { "epoch": 0.17086893984262627, "grad_norm": 3.624565601348877, "learning_rate": 0.001, "loss": 1.9867, "step": 4039 }, { "epoch": 0.17091124460614265, "grad_norm": 0.21247118711471558, "learning_rate": 0.001, "loss": 2.1356, "step": 4040 }, { "epoch": 0.17095354936965904, "grad_norm": 0.7525285482406616, "learning_rate": 0.001, "loss": 2.7077, "step": 4041 }, { "epoch": 0.1709958541331754, "grad_norm": 0.3459639549255371, "learning_rate": 0.001, "loss": 2.9617, "step": 4042 }, { "epoch": 0.17103815889669177, "grad_norm": 0.28101351857185364, "learning_rate": 0.001, "loss": 2.4219, "step": 4043 }, { "epoch": 0.17108046366020813, "grad_norm": 0.262946218252182, "learning_rate": 0.001, "loss": 2.1022, "step": 4044 }, { "epoch": 0.1711227684237245, "grad_norm": 134.08250427246094, "learning_rate": 0.001, "loss": 2.2692, "step": 4045 }, { "epoch": 0.1711650731872409, "grad_norm": 0.24002955853939056, "learning_rate": 0.001, "loss": 2.385, "step": 4046 }, { "epoch": 0.17120737795075724, "grad_norm": 0.29694628715515137, "learning_rate": 0.001, "loss": 2.6118, "step": 4047 }, { "epoch": 0.17124968271427363, "grad_norm": 12.005644798278809, "learning_rate": 0.001, "loss": 2.6029, "step": 4048 }, { "epoch": 0.17129198747779, "grad_norm": 0.2015325427055359, "learning_rate": 0.001, "loss": 1.6005, "step": 4049 }, { "epoch": 0.17133429224130636, "grad_norm": 0.18817874789237976, "learning_rate": 0.001, "loss": 2.2318, "step": 4050 }, { "epoch": 0.17137659700482274, "grad_norm": 0.22923152148723602, "learning_rate": 0.001, "loss": 1.8762, "step": 4051 }, { "epoch": 0.17141890176833913, "grad_norm": 0.2749441862106323, "learning_rate": 0.001, "loss": 2.4774, "step": 4052 }, { "epoch": 0.17146120653185548, "grad_norm": 0.23520220816135406, "learning_rate": 0.001, "loss": 1.9863, "step": 4053 }, { "epoch": 0.17150351129537186, "grad_norm": 0.21112807095050812, "learning_rate": 0.001, "loss": 1.7663, "step": 4054 }, { "epoch": 0.17154581605888822, "grad_norm": 0.23605698347091675, "learning_rate": 0.001, "loss": 2.1954, "step": 4055 }, { "epoch": 0.1715881208224046, "grad_norm": 0.19001781940460205, "learning_rate": 0.001, "loss": 2.1111, "step": 4056 }, { "epoch": 0.17163042558592098, "grad_norm": 0.34026920795440674, "learning_rate": 0.001, "loss": 2.5831, "step": 4057 }, { "epoch": 0.17167273034943734, "grad_norm": 3.0105605125427246, "learning_rate": 0.001, "loss": 3.2851, "step": 4058 }, { "epoch": 0.17171503511295372, "grad_norm": 0.2173168957233429, "learning_rate": 0.001, "loss": 1.7838, "step": 4059 }, { "epoch": 0.1717573398764701, "grad_norm": 0.23681773245334625, "learning_rate": 0.001, "loss": 2.5279, "step": 4060 }, { "epoch": 0.17179964463998645, "grad_norm": 0.4054577648639679, "learning_rate": 0.001, "loss": 1.787, "step": 4061 }, { "epoch": 0.17184194940350284, "grad_norm": 0.6441399455070496, "learning_rate": 0.001, "loss": 2.2311, "step": 4062 }, { "epoch": 0.17188425416701922, "grad_norm": 1.3562344312667847, "learning_rate": 0.001, "loss": 2.7153, "step": 4063 }, { "epoch": 0.17192655893053557, "grad_norm": 0.26166918873786926, "learning_rate": 0.001, "loss": 3.0784, "step": 4064 }, { "epoch": 0.17196886369405195, "grad_norm": 0.22632955014705658, "learning_rate": 0.001, "loss": 1.6698, "step": 4065 }, { "epoch": 0.1720111684575683, "grad_norm": 0.7687202095985413, "learning_rate": 0.001, "loss": 2.1554, "step": 4066 }, { "epoch": 0.1720534732210847, "grad_norm": 0.6964684128761292, "learning_rate": 0.001, "loss": 2.7002, "step": 4067 }, { "epoch": 0.17209577798460107, "grad_norm": 0.4354396164417267, "learning_rate": 0.001, "loss": 3.5678, "step": 4068 }, { "epoch": 0.17213808274811743, "grad_norm": 0.2912690341472626, "learning_rate": 0.001, "loss": 2.0832, "step": 4069 }, { "epoch": 0.1721803875116338, "grad_norm": 1.5569998025894165, "learning_rate": 0.001, "loss": 2.1359, "step": 4070 }, { "epoch": 0.1722226922751502, "grad_norm": 0.893632709980011, "learning_rate": 0.001, "loss": 3.0861, "step": 4071 }, { "epoch": 0.17226499703866655, "grad_norm": 0.7286151051521301, "learning_rate": 0.001, "loss": 2.3538, "step": 4072 }, { "epoch": 0.17230730180218293, "grad_norm": 0.29030993580818176, "learning_rate": 0.001, "loss": 2.4437, "step": 4073 }, { "epoch": 0.1723496065656993, "grad_norm": 0.28612467646598816, "learning_rate": 0.001, "loss": 2.9171, "step": 4074 }, { "epoch": 0.17239191132921566, "grad_norm": 3.892707109451294, "learning_rate": 0.001, "loss": 2.1065, "step": 4075 }, { "epoch": 0.17243421609273205, "grad_norm": 0.24053117632865906, "learning_rate": 0.001, "loss": 2.6832, "step": 4076 }, { "epoch": 0.1724765208562484, "grad_norm": 0.2183643877506256, "learning_rate": 0.001, "loss": 3.0531, "step": 4077 }, { "epoch": 0.17251882561976478, "grad_norm": 1.0080100297927856, "learning_rate": 0.001, "loss": 2.9435, "step": 4078 }, { "epoch": 0.17256113038328116, "grad_norm": 0.3189499080181122, "learning_rate": 0.001, "loss": 2.0975, "step": 4079 }, { "epoch": 0.17260343514679752, "grad_norm": 0.2897562086582184, "learning_rate": 0.001, "loss": 2.5301, "step": 4080 }, { "epoch": 0.1726457399103139, "grad_norm": 0.2769043743610382, "learning_rate": 0.001, "loss": 1.8059, "step": 4081 }, { "epoch": 0.17268804467383028, "grad_norm": 0.8298352360725403, "learning_rate": 0.001, "loss": 2.0551, "step": 4082 }, { "epoch": 0.17273034943734664, "grad_norm": 0.26442840695381165, "learning_rate": 0.001, "loss": 1.9421, "step": 4083 }, { "epoch": 0.17277265420086302, "grad_norm": 0.27219903469085693, "learning_rate": 0.001, "loss": 2.6075, "step": 4084 }, { "epoch": 0.1728149589643794, "grad_norm": 0.21034714579582214, "learning_rate": 0.001, "loss": 2.0325, "step": 4085 }, { "epoch": 0.17285726372789575, "grad_norm": 3.3212363719940186, "learning_rate": 0.001, "loss": 1.9906, "step": 4086 }, { "epoch": 0.17289956849141214, "grad_norm": 0.9745553135871887, "learning_rate": 0.001, "loss": 2.8193, "step": 4087 }, { "epoch": 0.17294187325492852, "grad_norm": 0.2681751847267151, "learning_rate": 0.001, "loss": 2.3555, "step": 4088 }, { "epoch": 0.17298417801844487, "grad_norm": 0.31693801283836365, "learning_rate": 0.001, "loss": 2.0349, "step": 4089 }, { "epoch": 0.17302648278196125, "grad_norm": 0.32253557443618774, "learning_rate": 0.001, "loss": 1.9938, "step": 4090 }, { "epoch": 0.1730687875454776, "grad_norm": 0.25818389654159546, "learning_rate": 0.001, "loss": 2.6496, "step": 4091 }, { "epoch": 0.173111092308994, "grad_norm": 0.24890708923339844, "learning_rate": 0.001, "loss": 1.7513, "step": 4092 }, { "epoch": 0.17315339707251037, "grad_norm": 0.22379307448863983, "learning_rate": 0.001, "loss": 1.7251, "step": 4093 }, { "epoch": 0.17319570183602673, "grad_norm": 0.24759642779827118, "learning_rate": 0.001, "loss": 3.0331, "step": 4094 }, { "epoch": 0.1732380065995431, "grad_norm": 0.3050745129585266, "learning_rate": 0.001, "loss": 3.0863, "step": 4095 }, { "epoch": 0.1732803113630595, "grad_norm": 0.29657745361328125, "learning_rate": 0.001, "loss": 3.5831, "step": 4096 }, { "epoch": 0.17332261612657585, "grad_norm": 0.254731684923172, "learning_rate": 0.001, "loss": 1.9884, "step": 4097 }, { "epoch": 0.17336492089009223, "grad_norm": 0.24810083210468292, "learning_rate": 0.001, "loss": 1.6098, "step": 4098 }, { "epoch": 0.1734072256536086, "grad_norm": 0.7149272561073303, "learning_rate": 0.001, "loss": 1.6882, "step": 4099 }, { "epoch": 0.17344953041712496, "grad_norm": 0.34439778327941895, "learning_rate": 0.001, "loss": 3.468, "step": 4100 }, { "epoch": 0.17349183518064135, "grad_norm": 0.27477285265922546, "learning_rate": 0.001, "loss": 3.7453, "step": 4101 }, { "epoch": 0.1735341399441577, "grad_norm": 0.2810986042022705, "learning_rate": 0.001, "loss": 2.4957, "step": 4102 }, { "epoch": 0.17357644470767408, "grad_norm": 0.24247132241725922, "learning_rate": 0.001, "loss": 1.6966, "step": 4103 }, { "epoch": 0.17361874947119046, "grad_norm": 0.1760856807231903, "learning_rate": 0.001, "loss": 2.591, "step": 4104 }, { "epoch": 0.17366105423470682, "grad_norm": 0.25415733456611633, "learning_rate": 0.001, "loss": 2.1011, "step": 4105 }, { "epoch": 0.1737033589982232, "grad_norm": 2.5164103507995605, "learning_rate": 0.001, "loss": 2.035, "step": 4106 }, { "epoch": 0.17374566376173958, "grad_norm": 0.5804241895675659, "learning_rate": 0.001, "loss": 2.2759, "step": 4107 }, { "epoch": 0.17378796852525594, "grad_norm": 0.24741019308567047, "learning_rate": 0.001, "loss": 2.2455, "step": 4108 }, { "epoch": 0.17383027328877232, "grad_norm": 0.230664923787117, "learning_rate": 0.001, "loss": 1.8416, "step": 4109 }, { "epoch": 0.1738725780522887, "grad_norm": 0.3088621497154236, "learning_rate": 0.001, "loss": 2.0536, "step": 4110 }, { "epoch": 0.17391488281580506, "grad_norm": 0.21473775804042816, "learning_rate": 0.001, "loss": 1.4643, "step": 4111 }, { "epoch": 0.17395718757932144, "grad_norm": 0.23546308279037476, "learning_rate": 0.001, "loss": 2.4978, "step": 4112 }, { "epoch": 0.1739994923428378, "grad_norm": 0.24598172307014465, "learning_rate": 0.001, "loss": 3.8651, "step": 4113 }, { "epoch": 0.17404179710635417, "grad_norm": 0.24466127157211304, "learning_rate": 0.001, "loss": 3.8869, "step": 4114 }, { "epoch": 0.17408410186987056, "grad_norm": 0.21379978954792023, "learning_rate": 0.001, "loss": 3.1837, "step": 4115 }, { "epoch": 0.1741264066333869, "grad_norm": 3.490262031555176, "learning_rate": 0.001, "loss": 1.6441, "step": 4116 }, { "epoch": 0.1741687113969033, "grad_norm": 0.30612412095069885, "learning_rate": 0.001, "loss": 2.0432, "step": 4117 }, { "epoch": 0.17421101616041967, "grad_norm": 0.32725802063941956, "learning_rate": 0.001, "loss": 2.1969, "step": 4118 }, { "epoch": 0.17425332092393603, "grad_norm": 0.34883803129196167, "learning_rate": 0.001, "loss": 2.9586, "step": 4119 }, { "epoch": 0.1742956256874524, "grad_norm": 0.2720988690853119, "learning_rate": 0.001, "loss": 2.6285, "step": 4120 }, { "epoch": 0.1743379304509688, "grad_norm": 0.47883912920951843, "learning_rate": 0.001, "loss": 2.1746, "step": 4121 }, { "epoch": 0.17438023521448515, "grad_norm": 0.3157065212726593, "learning_rate": 0.001, "loss": 2.5907, "step": 4122 }, { "epoch": 0.17442253997800153, "grad_norm": 0.22530221939086914, "learning_rate": 0.001, "loss": 1.8267, "step": 4123 }, { "epoch": 0.17446484474151788, "grad_norm": 0.6682106852531433, "learning_rate": 0.001, "loss": 1.8648, "step": 4124 }, { "epoch": 0.17450714950503426, "grad_norm": 0.24948126077651978, "learning_rate": 0.001, "loss": 3.0028, "step": 4125 }, { "epoch": 0.17454945426855065, "grad_norm": 0.23554354906082153, "learning_rate": 0.001, "loss": 2.8481, "step": 4126 }, { "epoch": 0.174591759032067, "grad_norm": 0.2798170745372772, "learning_rate": 0.001, "loss": 2.5993, "step": 4127 }, { "epoch": 0.17463406379558338, "grad_norm": 0.1892385184764862, "learning_rate": 0.001, "loss": 2.373, "step": 4128 }, { "epoch": 0.17467636855909977, "grad_norm": 4.151578426361084, "learning_rate": 0.001, "loss": 2.2752, "step": 4129 }, { "epoch": 0.17471867332261612, "grad_norm": 0.2719863951206207, "learning_rate": 0.001, "loss": 2.4036, "step": 4130 }, { "epoch": 0.1747609780861325, "grad_norm": 0.27087652683258057, "learning_rate": 0.001, "loss": 2.1397, "step": 4131 }, { "epoch": 0.17480328284964888, "grad_norm": 0.27612924575805664, "learning_rate": 0.001, "loss": 3.2754, "step": 4132 }, { "epoch": 0.17484558761316524, "grad_norm": 0.32560476660728455, "learning_rate": 0.001, "loss": 2.3888, "step": 4133 }, { "epoch": 0.17488789237668162, "grad_norm": 0.5404102802276611, "learning_rate": 0.001, "loss": 2.7755, "step": 4134 }, { "epoch": 0.17493019714019797, "grad_norm": 0.28054457902908325, "learning_rate": 0.001, "loss": 2.4661, "step": 4135 }, { "epoch": 0.17497250190371436, "grad_norm": 7.133134365081787, "learning_rate": 0.001, "loss": 2.3996, "step": 4136 }, { "epoch": 0.17501480666723074, "grad_norm": 0.4292699098587036, "learning_rate": 0.001, "loss": 2.2355, "step": 4137 }, { "epoch": 0.1750571114307471, "grad_norm": 0.2766590416431427, "learning_rate": 0.001, "loss": 2.556, "step": 4138 }, { "epoch": 0.17509941619426347, "grad_norm": 0.26670682430267334, "learning_rate": 0.001, "loss": 2.2956, "step": 4139 }, { "epoch": 0.17514172095777986, "grad_norm": 0.2878996729850769, "learning_rate": 0.001, "loss": 2.3641, "step": 4140 }, { "epoch": 0.1751840257212962, "grad_norm": 0.4456641376018524, "learning_rate": 0.001, "loss": 2.4915, "step": 4141 }, { "epoch": 0.1752263304848126, "grad_norm": 0.28615471720695496, "learning_rate": 0.001, "loss": 2.2347, "step": 4142 }, { "epoch": 0.17526863524832897, "grad_norm": 0.28259655833244324, "learning_rate": 0.001, "loss": 2.1569, "step": 4143 }, { "epoch": 0.17531094001184533, "grad_norm": 0.22979553043842316, "learning_rate": 0.001, "loss": 2.1944, "step": 4144 }, { "epoch": 0.1753532447753617, "grad_norm": 0.6153604984283447, "learning_rate": 0.001, "loss": 2.1908, "step": 4145 }, { "epoch": 0.17539554953887806, "grad_norm": 1.9100154638290405, "learning_rate": 0.001, "loss": 2.154, "step": 4146 }, { "epoch": 0.17543785430239445, "grad_norm": 0.23111866414546967, "learning_rate": 0.001, "loss": 1.4795, "step": 4147 }, { "epoch": 0.17548015906591083, "grad_norm": 0.21934235095977783, "learning_rate": 0.001, "loss": 2.0412, "step": 4148 }, { "epoch": 0.17552246382942718, "grad_norm": 0.31571948528289795, "learning_rate": 0.001, "loss": 1.7655, "step": 4149 }, { "epoch": 0.17556476859294357, "grad_norm": 0.35099247097969055, "learning_rate": 0.001, "loss": 2.8888, "step": 4150 }, { "epoch": 0.17560707335645995, "grad_norm": 0.25058233737945557, "learning_rate": 0.001, "loss": 1.6825, "step": 4151 }, { "epoch": 0.1756493781199763, "grad_norm": 0.35002222657203674, "learning_rate": 0.001, "loss": 2.3041, "step": 4152 }, { "epoch": 0.17569168288349268, "grad_norm": 0.2953129708766937, "learning_rate": 0.001, "loss": 2.1666, "step": 4153 }, { "epoch": 0.17573398764700907, "grad_norm": 0.2621290981769562, "learning_rate": 0.001, "loss": 3.3843, "step": 4154 }, { "epoch": 0.17577629241052542, "grad_norm": 0.2586938738822937, "learning_rate": 0.001, "loss": 1.8781, "step": 4155 }, { "epoch": 0.1758185971740418, "grad_norm": 0.29512864351272583, "learning_rate": 0.001, "loss": 2.2121, "step": 4156 }, { "epoch": 0.17586090193755816, "grad_norm": 0.2333596795797348, "learning_rate": 0.001, "loss": 2.6819, "step": 4157 }, { "epoch": 0.17590320670107454, "grad_norm": 0.2719203531742096, "learning_rate": 0.001, "loss": 2.4844, "step": 4158 }, { "epoch": 0.17594551146459092, "grad_norm": 0.9604827761650085, "learning_rate": 0.001, "loss": 2.4097, "step": 4159 }, { "epoch": 0.17598781622810727, "grad_norm": 0.29836225509643555, "learning_rate": 0.001, "loss": 1.9671, "step": 4160 }, { "epoch": 0.17603012099162366, "grad_norm": 0.2874457538127899, "learning_rate": 0.001, "loss": 2.8141, "step": 4161 }, { "epoch": 0.17607242575514004, "grad_norm": 0.3310123085975647, "learning_rate": 0.001, "loss": 2.1315, "step": 4162 }, { "epoch": 0.1761147305186564, "grad_norm": 0.26700732111930847, "learning_rate": 0.001, "loss": 2.7584, "step": 4163 }, { "epoch": 0.17615703528217277, "grad_norm": 0.2958766520023346, "learning_rate": 0.001, "loss": 2.1226, "step": 4164 }, { "epoch": 0.17619934004568916, "grad_norm": 0.30483001470565796, "learning_rate": 0.001, "loss": 2.3202, "step": 4165 }, { "epoch": 0.1762416448092055, "grad_norm": 0.792739987373352, "learning_rate": 0.001, "loss": 2.3611, "step": 4166 }, { "epoch": 0.1762839495727219, "grad_norm": 0.39795559644699097, "learning_rate": 0.001, "loss": 3.8292, "step": 4167 }, { "epoch": 0.17632625433623825, "grad_norm": 0.5967978239059448, "learning_rate": 0.001, "loss": 2.3902, "step": 4168 }, { "epoch": 0.17636855909975463, "grad_norm": 0.25690481066703796, "learning_rate": 0.001, "loss": 3.1277, "step": 4169 }, { "epoch": 0.176410863863271, "grad_norm": 0.3368457555770874, "learning_rate": 0.001, "loss": 2.4135, "step": 4170 }, { "epoch": 0.17645316862678737, "grad_norm": 0.5564723014831543, "learning_rate": 0.001, "loss": 2.2856, "step": 4171 }, { "epoch": 0.17649547339030375, "grad_norm": 0.28822070360183716, "learning_rate": 0.001, "loss": 2.0568, "step": 4172 }, { "epoch": 0.17653777815382013, "grad_norm": 0.2540615200996399, "learning_rate": 0.001, "loss": 2.7371, "step": 4173 }, { "epoch": 0.17658008291733648, "grad_norm": 0.30011865496635437, "learning_rate": 0.001, "loss": 2.0551, "step": 4174 }, { "epoch": 0.17662238768085287, "grad_norm": 0.28831255435943604, "learning_rate": 0.001, "loss": 2.3959, "step": 4175 }, { "epoch": 0.17666469244436925, "grad_norm": 0.2988402843475342, "learning_rate": 0.001, "loss": 1.8196, "step": 4176 }, { "epoch": 0.1767069972078856, "grad_norm": 1.3660717010498047, "learning_rate": 0.001, "loss": 1.853, "step": 4177 }, { "epoch": 0.17674930197140198, "grad_norm": 0.25615715980529785, "learning_rate": 0.001, "loss": 2.7053, "step": 4178 }, { "epoch": 0.17679160673491834, "grad_norm": 0.25990021228790283, "learning_rate": 0.001, "loss": 3.0701, "step": 4179 }, { "epoch": 0.17683391149843472, "grad_norm": 0.24227409064769745, "learning_rate": 0.001, "loss": 2.085, "step": 4180 }, { "epoch": 0.1768762162619511, "grad_norm": 0.2668752670288086, "learning_rate": 0.001, "loss": 2.9824, "step": 4181 }, { "epoch": 0.17691852102546746, "grad_norm": 0.2560098469257355, "learning_rate": 0.001, "loss": 1.9853, "step": 4182 }, { "epoch": 0.17696082578898384, "grad_norm": 26.13444709777832, "learning_rate": 0.001, "loss": 1.7282, "step": 4183 }, { "epoch": 0.17700313055250022, "grad_norm": 0.28741776943206787, "learning_rate": 0.001, "loss": 1.9061, "step": 4184 }, { "epoch": 0.17704543531601658, "grad_norm": 0.2741623520851135, "learning_rate": 0.001, "loss": 2.4942, "step": 4185 }, { "epoch": 0.17708774007953296, "grad_norm": 0.24903661012649536, "learning_rate": 0.001, "loss": 3.1056, "step": 4186 }, { "epoch": 0.17713004484304934, "grad_norm": 0.21220599114894867, "learning_rate": 0.001, "loss": 1.4461, "step": 4187 }, { "epoch": 0.1771723496065657, "grad_norm": 0.3349112570285797, "learning_rate": 0.001, "loss": 2.0917, "step": 4188 }, { "epoch": 0.17721465437008208, "grad_norm": 0.7024485468864441, "learning_rate": 0.001, "loss": 2.4401, "step": 4189 }, { "epoch": 0.17725695913359843, "grad_norm": 0.2594164311885834, "learning_rate": 0.001, "loss": 2.3743, "step": 4190 }, { "epoch": 0.1772992638971148, "grad_norm": 0.26141050457954407, "learning_rate": 0.001, "loss": 3.0745, "step": 4191 }, { "epoch": 0.1773415686606312, "grad_norm": 0.25073671340942383, "learning_rate": 0.001, "loss": 2.8072, "step": 4192 }, { "epoch": 0.17738387342414755, "grad_norm": 0.25238800048828125, "learning_rate": 0.001, "loss": 2.5714, "step": 4193 }, { "epoch": 0.17742617818766393, "grad_norm": 2.378894805908203, "learning_rate": 0.001, "loss": 3.8264, "step": 4194 }, { "epoch": 0.1774684829511803, "grad_norm": 0.2004442662000656, "learning_rate": 0.001, "loss": 3.0773, "step": 4195 }, { "epoch": 0.17751078771469667, "grad_norm": 0.48582643270492554, "learning_rate": 0.001, "loss": 2.3928, "step": 4196 }, { "epoch": 0.17755309247821305, "grad_norm": 0.3151031732559204, "learning_rate": 0.001, "loss": 2.8011, "step": 4197 }, { "epoch": 0.17759539724172943, "grad_norm": 0.257080078125, "learning_rate": 0.001, "loss": 2.5786, "step": 4198 }, { "epoch": 0.17763770200524578, "grad_norm": 0.42748337984085083, "learning_rate": 0.001, "loss": 2.5647, "step": 4199 }, { "epoch": 0.17768000676876217, "grad_norm": 0.3146316409111023, "learning_rate": 0.001, "loss": 2.9859, "step": 4200 }, { "epoch": 0.17772231153227852, "grad_norm": 0.2928094267845154, "learning_rate": 0.001, "loss": 2.3672, "step": 4201 }, { "epoch": 0.1777646162957949, "grad_norm": 0.31037336587905884, "learning_rate": 0.001, "loss": 2.9569, "step": 4202 }, { "epoch": 0.17780692105931128, "grad_norm": 0.2981893718242645, "learning_rate": 0.001, "loss": 2.2674, "step": 4203 }, { "epoch": 0.17784922582282764, "grad_norm": 0.26700565218925476, "learning_rate": 0.001, "loss": 1.9258, "step": 4204 }, { "epoch": 0.17789153058634402, "grad_norm": 0.3305959701538086, "learning_rate": 0.001, "loss": 1.9121, "step": 4205 }, { "epoch": 0.1779338353498604, "grad_norm": 0.323095440864563, "learning_rate": 0.001, "loss": 2.6363, "step": 4206 }, { "epoch": 0.17797614011337676, "grad_norm": 0.5207691788673401, "learning_rate": 0.001, "loss": 2.3411, "step": 4207 }, { "epoch": 0.17801844487689314, "grad_norm": 0.5256071090698242, "learning_rate": 0.001, "loss": 2.0633, "step": 4208 }, { "epoch": 0.17806074964040952, "grad_norm": 0.20449815690517426, "learning_rate": 0.001, "loss": 1.5104, "step": 4209 }, { "epoch": 0.17810305440392588, "grad_norm": 0.3280058801174164, "learning_rate": 0.001, "loss": 2.0113, "step": 4210 }, { "epoch": 0.17814535916744226, "grad_norm": 0.20661139488220215, "learning_rate": 0.001, "loss": 1.8537, "step": 4211 }, { "epoch": 0.17818766393095864, "grad_norm": 0.22364257276058197, "learning_rate": 0.001, "loss": 2.2209, "step": 4212 }, { "epoch": 0.178229968694475, "grad_norm": 0.3003136217594147, "learning_rate": 0.001, "loss": 2.3106, "step": 4213 }, { "epoch": 0.17827227345799138, "grad_norm": 0.39940145611763, "learning_rate": 0.001, "loss": 2.6153, "step": 4214 }, { "epoch": 0.17831457822150773, "grad_norm": 0.2537018954753876, "learning_rate": 0.001, "loss": 2.6277, "step": 4215 }, { "epoch": 0.1783568829850241, "grad_norm": 0.25565820932388306, "learning_rate": 0.001, "loss": 2.7391, "step": 4216 }, { "epoch": 0.1783991877485405, "grad_norm": 0.3583895266056061, "learning_rate": 0.001, "loss": 2.151, "step": 4217 }, { "epoch": 0.17844149251205685, "grad_norm": 0.21725155413150787, "learning_rate": 0.001, "loss": 2.0606, "step": 4218 }, { "epoch": 0.17848379727557323, "grad_norm": 0.21374504268169403, "learning_rate": 0.001, "loss": 2.0371, "step": 4219 }, { "epoch": 0.1785261020390896, "grad_norm": 0.21078385412693024, "learning_rate": 0.001, "loss": 1.8766, "step": 4220 }, { "epoch": 0.17856840680260597, "grad_norm": 0.2687901258468628, "learning_rate": 0.001, "loss": 2.4474, "step": 4221 }, { "epoch": 0.17861071156612235, "grad_norm": 0.3007321357727051, "learning_rate": 0.001, "loss": 2.4974, "step": 4222 }, { "epoch": 0.17865301632963873, "grad_norm": 0.35999658703804016, "learning_rate": 0.001, "loss": 2.2235, "step": 4223 }, { "epoch": 0.17869532109315509, "grad_norm": 0.5664570331573486, "learning_rate": 0.001, "loss": 2.6284, "step": 4224 }, { "epoch": 0.17873762585667147, "grad_norm": 0.28979048132896423, "learning_rate": 0.001, "loss": 2.0292, "step": 4225 }, { "epoch": 0.17877993062018782, "grad_norm": 0.3224925398826599, "learning_rate": 0.001, "loss": 1.9622, "step": 4226 }, { "epoch": 0.1788222353837042, "grad_norm": 4.19497537612915, "learning_rate": 0.001, "loss": 2.2633, "step": 4227 }, { "epoch": 0.17886454014722059, "grad_norm": 0.252350389957428, "learning_rate": 0.001, "loss": 3.7678, "step": 4228 }, { "epoch": 0.17890684491073694, "grad_norm": 0.26426586508750916, "learning_rate": 0.001, "loss": 2.3954, "step": 4229 }, { "epoch": 0.17894914967425332, "grad_norm": 0.24827632308006287, "learning_rate": 0.001, "loss": 1.9709, "step": 4230 }, { "epoch": 0.1789914544377697, "grad_norm": 4.474344253540039, "learning_rate": 0.001, "loss": 2.9707, "step": 4231 }, { "epoch": 0.17903375920128606, "grad_norm": 0.4154488444328308, "learning_rate": 0.001, "loss": 2.6213, "step": 4232 }, { "epoch": 0.17907606396480244, "grad_norm": 0.2699248492717743, "learning_rate": 0.001, "loss": 2.1127, "step": 4233 }, { "epoch": 0.17911836872831882, "grad_norm": 0.2923453152179718, "learning_rate": 0.001, "loss": 1.9264, "step": 4234 }, { "epoch": 0.17916067349183518, "grad_norm": 0.31689560413360596, "learning_rate": 0.001, "loss": 2.5336, "step": 4235 }, { "epoch": 0.17920297825535156, "grad_norm": 0.2745906412601471, "learning_rate": 0.001, "loss": 1.8758, "step": 4236 }, { "epoch": 0.1792452830188679, "grad_norm": 1.625520944595337, "learning_rate": 0.001, "loss": 4.2199, "step": 4237 }, { "epoch": 0.1792875877823843, "grad_norm": 0.32071831822395325, "learning_rate": 0.001, "loss": 2.993, "step": 4238 }, { "epoch": 0.17932989254590068, "grad_norm": 0.2907838225364685, "learning_rate": 0.001, "loss": 2.3436, "step": 4239 }, { "epoch": 0.17937219730941703, "grad_norm": 0.25917503237724304, "learning_rate": 0.001, "loss": 1.7643, "step": 4240 }, { "epoch": 0.1794145020729334, "grad_norm": 0.35071495175361633, "learning_rate": 0.001, "loss": 2.6049, "step": 4241 }, { "epoch": 0.1794568068364498, "grad_norm": 0.402885377407074, "learning_rate": 0.001, "loss": 3.151, "step": 4242 }, { "epoch": 0.17949911159996615, "grad_norm": 0.2622643709182739, "learning_rate": 0.001, "loss": 1.8995, "step": 4243 }, { "epoch": 0.17954141636348253, "grad_norm": 0.2709498107433319, "learning_rate": 0.001, "loss": 2.5256, "step": 4244 }, { "epoch": 0.1795837211269989, "grad_norm": 0.27462783455848694, "learning_rate": 0.001, "loss": 2.714, "step": 4245 }, { "epoch": 0.17962602589051527, "grad_norm": 0.20611165463924408, "learning_rate": 0.001, "loss": 1.6344, "step": 4246 }, { "epoch": 0.17966833065403165, "grad_norm": 0.32255131006240845, "learning_rate": 0.001, "loss": 2.2145, "step": 4247 }, { "epoch": 0.179710635417548, "grad_norm": 0.21528469026088715, "learning_rate": 0.001, "loss": 1.8803, "step": 4248 }, { "epoch": 0.17975294018106439, "grad_norm": 0.21335670351982117, "learning_rate": 0.001, "loss": 2.8012, "step": 4249 }, { "epoch": 0.17979524494458077, "grad_norm": 1.517720341682434, "learning_rate": 0.001, "loss": 2.1899, "step": 4250 }, { "epoch": 0.17983754970809712, "grad_norm": 0.762283205986023, "learning_rate": 0.001, "loss": 3.0798, "step": 4251 }, { "epoch": 0.1798798544716135, "grad_norm": 0.23315021395683289, "learning_rate": 0.001, "loss": 2.9799, "step": 4252 }, { "epoch": 0.17992215923512989, "grad_norm": 1.0861098766326904, "learning_rate": 0.001, "loss": 3.0077, "step": 4253 }, { "epoch": 0.17996446399864624, "grad_norm": 0.4819357991218567, "learning_rate": 0.001, "loss": 3.5156, "step": 4254 }, { "epoch": 0.18000676876216262, "grad_norm": 0.5548194646835327, "learning_rate": 0.001, "loss": 2.9003, "step": 4255 }, { "epoch": 0.180049073525679, "grad_norm": 8.51793384552002, "learning_rate": 0.001, "loss": 2.8978, "step": 4256 }, { "epoch": 0.18009137828919536, "grad_norm": 0.37549543380737305, "learning_rate": 0.001, "loss": 2.1621, "step": 4257 }, { "epoch": 0.18013368305271174, "grad_norm": 1.0575660467147827, "learning_rate": 0.001, "loss": 2.7155, "step": 4258 }, { "epoch": 0.1801759878162281, "grad_norm": 0.26176881790161133, "learning_rate": 0.001, "loss": 1.717, "step": 4259 }, { "epoch": 0.18021829257974448, "grad_norm": 0.418062299489975, "learning_rate": 0.001, "loss": 3.2709, "step": 4260 }, { "epoch": 0.18026059734326086, "grad_norm": 0.3230735957622528, "learning_rate": 0.001, "loss": 2.752, "step": 4261 }, { "epoch": 0.1803029021067772, "grad_norm": 0.3032633662223816, "learning_rate": 0.001, "loss": 1.8807, "step": 4262 }, { "epoch": 0.1803452068702936, "grad_norm": 0.37252533435821533, "learning_rate": 0.001, "loss": 2.2327, "step": 4263 }, { "epoch": 0.18038751163380998, "grad_norm": 0.8877015709877014, "learning_rate": 0.001, "loss": 2.6157, "step": 4264 }, { "epoch": 0.18042981639732633, "grad_norm": 0.6750360131263733, "learning_rate": 0.001, "loss": 2.3641, "step": 4265 }, { "epoch": 0.1804721211608427, "grad_norm": 0.626649796962738, "learning_rate": 0.001, "loss": 3.1486, "step": 4266 }, { "epoch": 0.1805144259243591, "grad_norm": 0.3944655656814575, "learning_rate": 0.001, "loss": 2.4488, "step": 4267 }, { "epoch": 0.18055673068787545, "grad_norm": 0.28062745928764343, "learning_rate": 0.001, "loss": 1.9655, "step": 4268 }, { "epoch": 0.18059903545139183, "grad_norm": 2.311063051223755, "learning_rate": 0.001, "loss": 2.6447, "step": 4269 }, { "epoch": 0.18064134021490819, "grad_norm": 0.30582699179649353, "learning_rate": 0.001, "loss": 2.7865, "step": 4270 }, { "epoch": 0.18068364497842457, "grad_norm": 0.6388780474662781, "learning_rate": 0.001, "loss": 3.422, "step": 4271 }, { "epoch": 0.18072594974194095, "grad_norm": 0.24511829018592834, "learning_rate": 0.001, "loss": 1.925, "step": 4272 }, { "epoch": 0.1807682545054573, "grad_norm": 10.821248054504395, "learning_rate": 0.001, "loss": 2.6423, "step": 4273 }, { "epoch": 0.1808105592689737, "grad_norm": 0.9741492867469788, "learning_rate": 0.001, "loss": 2.614, "step": 4274 }, { "epoch": 0.18085286403249007, "grad_norm": 0.4937375485897064, "learning_rate": 0.001, "loss": 2.9155, "step": 4275 }, { "epoch": 0.18089516879600642, "grad_norm": 0.32273703813552856, "learning_rate": 0.001, "loss": 3.4425, "step": 4276 }, { "epoch": 0.1809374735595228, "grad_norm": 0.3622698187828064, "learning_rate": 0.001, "loss": 2.5883, "step": 4277 }, { "epoch": 0.1809797783230392, "grad_norm": 0.2949691116809845, "learning_rate": 0.001, "loss": 3.5752, "step": 4278 }, { "epoch": 0.18102208308655554, "grad_norm": 0.2598804533481598, "learning_rate": 0.001, "loss": 1.8536, "step": 4279 }, { "epoch": 0.18106438785007192, "grad_norm": 1.1611905097961426, "learning_rate": 0.001, "loss": 3.0881, "step": 4280 }, { "epoch": 0.18110669261358828, "grad_norm": 1.2100458145141602, "learning_rate": 0.001, "loss": 2.2188, "step": 4281 }, { "epoch": 0.18114899737710466, "grad_norm": 0.5973420739173889, "learning_rate": 0.001, "loss": 2.6307, "step": 4282 }, { "epoch": 0.18119130214062104, "grad_norm": 0.29785507917404175, "learning_rate": 0.001, "loss": 2.6614, "step": 4283 }, { "epoch": 0.1812336069041374, "grad_norm": 0.2885024845600128, "learning_rate": 0.001, "loss": 2.4603, "step": 4284 }, { "epoch": 0.18127591166765378, "grad_norm": 5.69395112991333, "learning_rate": 0.001, "loss": 2.2926, "step": 4285 }, { "epoch": 0.18131821643117016, "grad_norm": 0.9888959527015686, "learning_rate": 0.001, "loss": 3.3646, "step": 4286 }, { "epoch": 0.1813605211946865, "grad_norm": 1.5482476949691772, "learning_rate": 0.001, "loss": 1.983, "step": 4287 }, { "epoch": 0.1814028259582029, "grad_norm": 1.1126073598861694, "learning_rate": 0.001, "loss": 2.3665, "step": 4288 }, { "epoch": 0.18144513072171928, "grad_norm": 3.9008285999298096, "learning_rate": 0.001, "loss": 2.3922, "step": 4289 }, { "epoch": 0.18148743548523563, "grad_norm": 0.30987706780433655, "learning_rate": 0.001, "loss": 2.7724, "step": 4290 }, { "epoch": 0.18152974024875201, "grad_norm": 0.5834580659866333, "learning_rate": 0.001, "loss": 3.4511, "step": 4291 }, { "epoch": 0.18157204501226837, "grad_norm": 0.37197399139404297, "learning_rate": 0.001, "loss": 2.7004, "step": 4292 }, { "epoch": 0.18161434977578475, "grad_norm": 0.2617497742176056, "learning_rate": 0.001, "loss": 2.2616, "step": 4293 }, { "epoch": 0.18165665453930113, "grad_norm": 0.25308120250701904, "learning_rate": 0.001, "loss": 2.5587, "step": 4294 }, { "epoch": 0.1816989593028175, "grad_norm": 0.22817359864711761, "learning_rate": 0.001, "loss": 2.3229, "step": 4295 }, { "epoch": 0.18174126406633387, "grad_norm": 0.27048158645629883, "learning_rate": 0.001, "loss": 2.0208, "step": 4296 }, { "epoch": 0.18178356882985025, "grad_norm": 0.7957993149757385, "learning_rate": 0.001, "loss": 2.4789, "step": 4297 }, { "epoch": 0.1818258735933666, "grad_norm": 0.2634366452693939, "learning_rate": 0.001, "loss": 2.9582, "step": 4298 }, { "epoch": 0.181868178356883, "grad_norm": 0.2717626094818115, "learning_rate": 0.001, "loss": 2.3598, "step": 4299 }, { "epoch": 0.18191048312039937, "grad_norm": 1.2515925168991089, "learning_rate": 0.001, "loss": 2.4262, "step": 4300 }, { "epoch": 0.18195278788391572, "grad_norm": 0.307389497756958, "learning_rate": 0.001, "loss": 2.2833, "step": 4301 }, { "epoch": 0.1819950926474321, "grad_norm": 0.6592715978622437, "learning_rate": 0.001, "loss": 2.8568, "step": 4302 }, { "epoch": 0.18203739741094846, "grad_norm": 0.24115906655788422, "learning_rate": 0.001, "loss": 2.0772, "step": 4303 }, { "epoch": 0.18207970217446484, "grad_norm": 0.3468441963195801, "learning_rate": 0.001, "loss": 3.1513, "step": 4304 }, { "epoch": 0.18212200693798122, "grad_norm": 0.3268192708492279, "learning_rate": 0.001, "loss": 2.2592, "step": 4305 }, { "epoch": 0.18216431170149758, "grad_norm": 0.4834609925746918, "learning_rate": 0.001, "loss": 1.9173, "step": 4306 }, { "epoch": 0.18220661646501396, "grad_norm": 0.29343941807746887, "learning_rate": 0.001, "loss": 4.07, "step": 4307 }, { "epoch": 0.18224892122853034, "grad_norm": 0.5635595321655273, "learning_rate": 0.001, "loss": 3.5071, "step": 4308 }, { "epoch": 0.1822912259920467, "grad_norm": 6.358903884887695, "learning_rate": 0.001, "loss": 2.6644, "step": 4309 }, { "epoch": 0.18233353075556308, "grad_norm": 1.2210367918014526, "learning_rate": 0.001, "loss": 3.2344, "step": 4310 }, { "epoch": 0.18237583551907946, "grad_norm": 0.3168500065803528, "learning_rate": 0.001, "loss": 1.8108, "step": 4311 }, { "epoch": 0.18241814028259581, "grad_norm": 0.2486400455236435, "learning_rate": 0.001, "loss": 2.1005, "step": 4312 }, { "epoch": 0.1824604450461122, "grad_norm": 0.2820669114589691, "learning_rate": 0.001, "loss": 2.4824, "step": 4313 }, { "epoch": 0.18250274980962855, "grad_norm": 0.23513592779636383, "learning_rate": 0.001, "loss": 2.2421, "step": 4314 }, { "epoch": 0.18254505457314493, "grad_norm": 0.33237895369529724, "learning_rate": 0.001, "loss": 2.6718, "step": 4315 }, { "epoch": 0.18258735933666131, "grad_norm": 1.0040096044540405, "learning_rate": 0.001, "loss": 2.4138, "step": 4316 }, { "epoch": 0.18262966410017767, "grad_norm": 2.4072158336639404, "learning_rate": 0.001, "loss": 2.3787, "step": 4317 }, { "epoch": 0.18267196886369405, "grad_norm": 0.2276010513305664, "learning_rate": 0.001, "loss": 2.5066, "step": 4318 }, { "epoch": 0.18271427362721043, "grad_norm": 0.2673986256122589, "learning_rate": 0.001, "loss": 2.4379, "step": 4319 }, { "epoch": 0.1827565783907268, "grad_norm": 0.3459979295730591, "learning_rate": 0.001, "loss": 2.8818, "step": 4320 }, { "epoch": 0.18279888315424317, "grad_norm": 0.30909305810928345, "learning_rate": 0.001, "loss": 2.359, "step": 4321 }, { "epoch": 0.18284118791775955, "grad_norm": 0.29360231757164, "learning_rate": 0.001, "loss": 2.3982, "step": 4322 }, { "epoch": 0.1828834926812759, "grad_norm": 0.3940381407737732, "learning_rate": 0.001, "loss": 2.7807, "step": 4323 }, { "epoch": 0.1829257974447923, "grad_norm": 0.7100034356117249, "learning_rate": 0.001, "loss": 3.6592, "step": 4324 }, { "epoch": 0.18296810220830864, "grad_norm": 4.326420307159424, "learning_rate": 0.001, "loss": 2.7129, "step": 4325 }, { "epoch": 0.18301040697182502, "grad_norm": 0.2793787717819214, "learning_rate": 0.001, "loss": 3.1424, "step": 4326 }, { "epoch": 0.1830527117353414, "grad_norm": 0.9387155175209045, "learning_rate": 0.001, "loss": 3.1617, "step": 4327 }, { "epoch": 0.18309501649885776, "grad_norm": 0.3149893581867218, "learning_rate": 0.001, "loss": 2.914, "step": 4328 }, { "epoch": 0.18313732126237414, "grad_norm": 1.0925941467285156, "learning_rate": 0.001, "loss": 2.902, "step": 4329 }, { "epoch": 0.18317962602589052, "grad_norm": 0.3763814866542816, "learning_rate": 0.001, "loss": 3.5513, "step": 4330 }, { "epoch": 0.18322193078940688, "grad_norm": 0.2654663622379303, "learning_rate": 0.001, "loss": 3.1993, "step": 4331 }, { "epoch": 0.18326423555292326, "grad_norm": 0.34242555499076843, "learning_rate": 0.001, "loss": 2.4377, "step": 4332 }, { "epoch": 0.18330654031643964, "grad_norm": 0.37130022048950195, "learning_rate": 0.001, "loss": 3.27, "step": 4333 }, { "epoch": 0.183348845079956, "grad_norm": 0.4086019694805145, "learning_rate": 0.001, "loss": 2.3096, "step": 4334 }, { "epoch": 0.18339114984347238, "grad_norm": 0.2771133482456207, "learning_rate": 0.001, "loss": 3.351, "step": 4335 }, { "epoch": 0.18343345460698876, "grad_norm": 0.3115023970603943, "learning_rate": 0.001, "loss": 1.9335, "step": 4336 }, { "epoch": 0.18347575937050511, "grad_norm": 0.513750433921814, "learning_rate": 0.001, "loss": 3.1363, "step": 4337 }, { "epoch": 0.1835180641340215, "grad_norm": 0.3518810570240021, "learning_rate": 0.001, "loss": 3.5947, "step": 4338 }, { "epoch": 0.18356036889753785, "grad_norm": 0.2949376106262207, "learning_rate": 0.001, "loss": 2.2923, "step": 4339 }, { "epoch": 0.18360267366105423, "grad_norm": 0.2582836449146271, "learning_rate": 0.001, "loss": 3.0664, "step": 4340 }, { "epoch": 0.18364497842457062, "grad_norm": 0.19488857686519623, "learning_rate": 0.001, "loss": 2.7243, "step": 4341 }, { "epoch": 0.18368728318808697, "grad_norm": 0.26442408561706543, "learning_rate": 0.001, "loss": 3.4023, "step": 4342 }, { "epoch": 0.18372958795160335, "grad_norm": 0.22754669189453125, "learning_rate": 0.001, "loss": 2.4041, "step": 4343 }, { "epoch": 0.18377189271511973, "grad_norm": 0.22837084531784058, "learning_rate": 0.001, "loss": 3.132, "step": 4344 }, { "epoch": 0.1838141974786361, "grad_norm": 6.180835723876953, "learning_rate": 0.001, "loss": 1.6405, "step": 4345 }, { "epoch": 0.18385650224215247, "grad_norm": 0.21038007736206055, "learning_rate": 0.001, "loss": 2.0711, "step": 4346 }, { "epoch": 0.18389880700566885, "grad_norm": 0.2657643258571625, "learning_rate": 0.001, "loss": 2.2425, "step": 4347 }, { "epoch": 0.1839411117691852, "grad_norm": 1.5151898860931396, "learning_rate": 0.001, "loss": 2.9717, "step": 4348 }, { "epoch": 0.1839834165327016, "grad_norm": 0.2588333189487457, "learning_rate": 0.001, "loss": 2.7555, "step": 4349 }, { "epoch": 0.18402572129621794, "grad_norm": 0.28635910153388977, "learning_rate": 0.001, "loss": 2.7952, "step": 4350 }, { "epoch": 0.18406802605973432, "grad_norm": 0.32209694385528564, "learning_rate": 0.001, "loss": 2.6064, "step": 4351 }, { "epoch": 0.1841103308232507, "grad_norm": 2.678985357284546, "learning_rate": 0.001, "loss": 2.9109, "step": 4352 }, { "epoch": 0.18415263558676706, "grad_norm": 0.25267353653907776, "learning_rate": 0.001, "loss": 2.4621, "step": 4353 }, { "epoch": 0.18419494035028344, "grad_norm": 0.24416348338127136, "learning_rate": 0.001, "loss": 1.9122, "step": 4354 }, { "epoch": 0.18423724511379982, "grad_norm": 0.24533572793006897, "learning_rate": 0.001, "loss": 2.3288, "step": 4355 }, { "epoch": 0.18427954987731618, "grad_norm": 0.2123999446630478, "learning_rate": 0.001, "loss": 2.8654, "step": 4356 }, { "epoch": 0.18432185464083256, "grad_norm": 0.5792009234428406, "learning_rate": 0.001, "loss": 2.8559, "step": 4357 }, { "epoch": 0.18436415940434894, "grad_norm": 0.30343350768089294, "learning_rate": 0.001, "loss": 2.3648, "step": 4358 }, { "epoch": 0.1844064641678653, "grad_norm": 0.23615819215774536, "learning_rate": 0.001, "loss": 1.7143, "step": 4359 }, { "epoch": 0.18444876893138168, "grad_norm": 0.2771995961666107, "learning_rate": 0.001, "loss": 2.4038, "step": 4360 }, { "epoch": 0.18449107369489803, "grad_norm": 0.7265622615814209, "learning_rate": 0.001, "loss": 2.8253, "step": 4361 }, { "epoch": 0.18453337845841442, "grad_norm": 0.1909170150756836, "learning_rate": 0.001, "loss": 2.1125, "step": 4362 }, { "epoch": 0.1845756832219308, "grad_norm": 0.2617343068122864, "learning_rate": 0.001, "loss": 2.8421, "step": 4363 }, { "epoch": 0.18461798798544715, "grad_norm": 0.6342441439628601, "learning_rate": 0.001, "loss": 2.0168, "step": 4364 }, { "epoch": 0.18466029274896353, "grad_norm": 0.3092588484287262, "learning_rate": 0.001, "loss": 2.8462, "step": 4365 }, { "epoch": 0.18470259751247992, "grad_norm": 0.27178463339805603, "learning_rate": 0.001, "loss": 2.1236, "step": 4366 }, { "epoch": 0.18474490227599627, "grad_norm": 10.186301231384277, "learning_rate": 0.001, "loss": 2.949, "step": 4367 }, { "epoch": 0.18478720703951265, "grad_norm": 0.23955731093883514, "learning_rate": 0.001, "loss": 2.6389, "step": 4368 }, { "epoch": 0.18482951180302903, "grad_norm": 1.5058904886245728, "learning_rate": 0.001, "loss": 2.7837, "step": 4369 }, { "epoch": 0.1848718165665454, "grad_norm": 2.1568853855133057, "learning_rate": 0.001, "loss": 3.1977, "step": 4370 }, { "epoch": 0.18491412133006177, "grad_norm": 0.23633380234241486, "learning_rate": 0.001, "loss": 1.8045, "step": 4371 }, { "epoch": 0.18495642609357812, "grad_norm": 0.31348666548728943, "learning_rate": 0.001, "loss": 2.6933, "step": 4372 }, { "epoch": 0.1849987308570945, "grad_norm": 0.3345177471637726, "learning_rate": 0.001, "loss": 2.535, "step": 4373 }, { "epoch": 0.1850410356206109, "grad_norm": 0.26245883107185364, "learning_rate": 0.001, "loss": 1.6552, "step": 4374 }, { "epoch": 0.18508334038412724, "grad_norm": 0.5210462808609009, "learning_rate": 0.001, "loss": 2.2235, "step": 4375 }, { "epoch": 0.18512564514764362, "grad_norm": 1.8073543310165405, "learning_rate": 0.001, "loss": 2.5607, "step": 4376 }, { "epoch": 0.18516794991116, "grad_norm": 1.0985215902328491, "learning_rate": 0.001, "loss": 2.2265, "step": 4377 }, { "epoch": 0.18521025467467636, "grad_norm": 0.4432186484336853, "learning_rate": 0.001, "loss": 3.1894, "step": 4378 }, { "epoch": 0.18525255943819274, "grad_norm": 10.145984649658203, "learning_rate": 0.001, "loss": 2.348, "step": 4379 }, { "epoch": 0.18529486420170913, "grad_norm": 0.7287303805351257, "learning_rate": 0.001, "loss": 3.4078, "step": 4380 }, { "epoch": 0.18533716896522548, "grad_norm": 0.4048196077346802, "learning_rate": 0.001, "loss": 2.3973, "step": 4381 }, { "epoch": 0.18537947372874186, "grad_norm": 0.3356500566005707, "learning_rate": 0.001, "loss": 1.8402, "step": 4382 }, { "epoch": 0.18542177849225822, "grad_norm": 0.3641221225261688, "learning_rate": 0.001, "loss": 2.937, "step": 4383 }, { "epoch": 0.1854640832557746, "grad_norm": 0.3666855990886688, "learning_rate": 0.001, "loss": 2.2261, "step": 4384 }, { "epoch": 0.18550638801929098, "grad_norm": 0.34403887391090393, "learning_rate": 0.001, "loss": 2.4207, "step": 4385 }, { "epoch": 0.18554869278280733, "grad_norm": 0.3421815037727356, "learning_rate": 0.001, "loss": 2.913, "step": 4386 }, { "epoch": 0.18559099754632372, "grad_norm": 0.22695082426071167, "learning_rate": 0.001, "loss": 1.8004, "step": 4387 }, { "epoch": 0.1856333023098401, "grad_norm": 0.24995842576026917, "learning_rate": 0.001, "loss": 1.5226, "step": 4388 }, { "epoch": 0.18567560707335645, "grad_norm": 0.23015795648097992, "learning_rate": 0.001, "loss": 2.1822, "step": 4389 }, { "epoch": 0.18571791183687283, "grad_norm": 0.26571375131607056, "learning_rate": 0.001, "loss": 2.0279, "step": 4390 }, { "epoch": 0.18576021660038922, "grad_norm": 0.22029922902584076, "learning_rate": 0.001, "loss": 2.4631, "step": 4391 }, { "epoch": 0.18580252136390557, "grad_norm": 0.24884076416492462, "learning_rate": 0.001, "loss": 2.1439, "step": 4392 }, { "epoch": 0.18584482612742195, "grad_norm": 0.27514705061912537, "learning_rate": 0.001, "loss": 2.0805, "step": 4393 }, { "epoch": 0.1858871308909383, "grad_norm": 0.23706066608428955, "learning_rate": 0.001, "loss": 2.3177, "step": 4394 }, { "epoch": 0.1859294356544547, "grad_norm": 0.21739627420902252, "learning_rate": 0.001, "loss": 1.9557, "step": 4395 }, { "epoch": 0.18597174041797107, "grad_norm": 0.3604746460914612, "learning_rate": 0.001, "loss": 1.6202, "step": 4396 }, { "epoch": 0.18601404518148743, "grad_norm": 0.25236776471138, "learning_rate": 0.001, "loss": 2.6437, "step": 4397 }, { "epoch": 0.1860563499450038, "grad_norm": 0.23242443799972534, "learning_rate": 0.001, "loss": 3.186, "step": 4398 }, { "epoch": 0.1860986547085202, "grad_norm": 0.3794444799423218, "learning_rate": 0.001, "loss": 2.4195, "step": 4399 }, { "epoch": 0.18614095947203654, "grad_norm": 0.23218633234500885, "learning_rate": 0.001, "loss": 2.6508, "step": 4400 }, { "epoch": 0.18618326423555293, "grad_norm": 0.20233222842216492, "learning_rate": 0.001, "loss": 1.5472, "step": 4401 }, { "epoch": 0.1862255689990693, "grad_norm": 0.6316521167755127, "learning_rate": 0.001, "loss": 3.3565, "step": 4402 }, { "epoch": 0.18626787376258566, "grad_norm": 0.40583282709121704, "learning_rate": 0.001, "loss": 2.4887, "step": 4403 }, { "epoch": 0.18631017852610204, "grad_norm": 0.35350820422172546, "learning_rate": 0.001, "loss": 2.5153, "step": 4404 }, { "epoch": 0.1863524832896184, "grad_norm": 0.2667691707611084, "learning_rate": 0.001, "loss": 3.2235, "step": 4405 }, { "epoch": 0.18639478805313478, "grad_norm": 0.7131944894790649, "learning_rate": 0.001, "loss": 3.317, "step": 4406 }, { "epoch": 0.18643709281665116, "grad_norm": 0.2557836174964905, "learning_rate": 0.001, "loss": 2.8428, "step": 4407 }, { "epoch": 0.18647939758016752, "grad_norm": 0.18979831039905548, "learning_rate": 0.001, "loss": 2.1681, "step": 4408 }, { "epoch": 0.1865217023436839, "grad_norm": 0.8334029316902161, "learning_rate": 0.001, "loss": 1.9816, "step": 4409 }, { "epoch": 0.18656400710720028, "grad_norm": 0.2034035176038742, "learning_rate": 0.001, "loss": 1.8831, "step": 4410 }, { "epoch": 0.18660631187071663, "grad_norm": 0.25661614537239075, "learning_rate": 0.001, "loss": 2.1606, "step": 4411 }, { "epoch": 0.18664861663423302, "grad_norm": 0.21977266669273376, "learning_rate": 0.001, "loss": 2.2903, "step": 4412 }, { "epoch": 0.1866909213977494, "grad_norm": 0.5622522234916687, "learning_rate": 0.001, "loss": 3.0276, "step": 4413 }, { "epoch": 0.18673322616126575, "grad_norm": 0.2287818193435669, "learning_rate": 0.001, "loss": 3.0729, "step": 4414 }, { "epoch": 0.18677553092478213, "grad_norm": 0.2905156910419464, "learning_rate": 0.001, "loss": 3.1699, "step": 4415 }, { "epoch": 0.1868178356882985, "grad_norm": 0.23308047652244568, "learning_rate": 0.001, "loss": 2.4299, "step": 4416 }, { "epoch": 0.18686014045181487, "grad_norm": 0.23100613057613373, "learning_rate": 0.001, "loss": 2.6107, "step": 4417 }, { "epoch": 0.18690244521533125, "grad_norm": 0.2881180942058563, "learning_rate": 0.001, "loss": 2.3265, "step": 4418 }, { "epoch": 0.1869447499788476, "grad_norm": 0.26562678813934326, "learning_rate": 0.001, "loss": 2.6836, "step": 4419 }, { "epoch": 0.186987054742364, "grad_norm": 0.21036438643932343, "learning_rate": 0.001, "loss": 1.9917, "step": 4420 }, { "epoch": 0.18702935950588037, "grad_norm": 0.27544263005256653, "learning_rate": 0.001, "loss": 3.4425, "step": 4421 }, { "epoch": 0.18707166426939673, "grad_norm": 0.2592555284500122, "learning_rate": 0.001, "loss": 2.09, "step": 4422 }, { "epoch": 0.1871139690329131, "grad_norm": 0.5881548523902893, "learning_rate": 0.001, "loss": 2.1932, "step": 4423 }, { "epoch": 0.1871562737964295, "grad_norm": 0.20673885941505432, "learning_rate": 0.001, "loss": 2.2809, "step": 4424 }, { "epoch": 0.18719857855994584, "grad_norm": 0.20291772484779358, "learning_rate": 0.001, "loss": 2.0593, "step": 4425 }, { "epoch": 0.18724088332346223, "grad_norm": 0.270967960357666, "learning_rate": 0.001, "loss": 1.6903, "step": 4426 }, { "epoch": 0.18728318808697858, "grad_norm": 0.35140615701675415, "learning_rate": 0.001, "loss": 2.364, "step": 4427 }, { "epoch": 0.18732549285049496, "grad_norm": 0.20926691591739655, "learning_rate": 0.001, "loss": 1.9464, "step": 4428 }, { "epoch": 0.18736779761401134, "grad_norm": 0.28219670057296753, "learning_rate": 0.001, "loss": 3.2835, "step": 4429 }, { "epoch": 0.1874101023775277, "grad_norm": 3.9769115447998047, "learning_rate": 0.001, "loss": 2.0957, "step": 4430 }, { "epoch": 0.18745240714104408, "grad_norm": 0.24822956323623657, "learning_rate": 0.001, "loss": 2.3957, "step": 4431 }, { "epoch": 0.18749471190456046, "grad_norm": 0.22063516080379486, "learning_rate": 0.001, "loss": 2.1515, "step": 4432 }, { "epoch": 0.18753701666807682, "grad_norm": 0.25612160563468933, "learning_rate": 0.001, "loss": 2.687, "step": 4433 }, { "epoch": 0.1875793214315932, "grad_norm": 0.2492561936378479, "learning_rate": 0.001, "loss": 2.4043, "step": 4434 }, { "epoch": 0.18762162619510958, "grad_norm": 0.28341639041900635, "learning_rate": 0.001, "loss": 2.0796, "step": 4435 }, { "epoch": 0.18766393095862594, "grad_norm": 0.2945077121257782, "learning_rate": 0.001, "loss": 2.1302, "step": 4436 }, { "epoch": 0.18770623572214232, "grad_norm": 0.3373602330684662, "learning_rate": 0.001, "loss": 2.0283, "step": 4437 }, { "epoch": 0.18774854048565867, "grad_norm": 0.24458883702754974, "learning_rate": 0.001, "loss": 2.6162, "step": 4438 }, { "epoch": 0.18779084524917505, "grad_norm": 0.21525456011295319, "learning_rate": 0.001, "loss": 2.0196, "step": 4439 }, { "epoch": 0.18783315001269144, "grad_norm": 0.22924627363681793, "learning_rate": 0.001, "loss": 2.1364, "step": 4440 }, { "epoch": 0.1878754547762078, "grad_norm": 0.22823546826839447, "learning_rate": 0.001, "loss": 3.0261, "step": 4441 }, { "epoch": 0.18791775953972417, "grad_norm": 0.37336304783821106, "learning_rate": 0.001, "loss": 2.0595, "step": 4442 }, { "epoch": 0.18796006430324055, "grad_norm": 2.6956934928894043, "learning_rate": 0.001, "loss": 1.9668, "step": 4443 }, { "epoch": 0.1880023690667569, "grad_norm": 0.7836053371429443, "learning_rate": 0.001, "loss": 2.3485, "step": 4444 }, { "epoch": 0.1880446738302733, "grad_norm": 1.6367077827453613, "learning_rate": 0.001, "loss": 3.084, "step": 4445 }, { "epoch": 0.18808697859378967, "grad_norm": 0.26571542024612427, "learning_rate": 0.001, "loss": 2.4098, "step": 4446 }, { "epoch": 0.18812928335730603, "grad_norm": 0.2825041711330414, "learning_rate": 0.001, "loss": 2.2918, "step": 4447 }, { "epoch": 0.1881715881208224, "grad_norm": 0.246236190199852, "learning_rate": 0.001, "loss": 2.4611, "step": 4448 }, { "epoch": 0.1882138928843388, "grad_norm": 0.5740474462509155, "learning_rate": 0.001, "loss": 2.9142, "step": 4449 }, { "epoch": 0.18825619764785514, "grad_norm": 0.37695226073265076, "learning_rate": 0.001, "loss": 1.6922, "step": 4450 }, { "epoch": 0.18829850241137153, "grad_norm": 1.8497018814086914, "learning_rate": 0.001, "loss": 2.2615, "step": 4451 }, { "epoch": 0.18834080717488788, "grad_norm": 0.2767746150493622, "learning_rate": 0.001, "loss": 2.1198, "step": 4452 }, { "epoch": 0.18838311193840426, "grad_norm": 1.5770310163497925, "learning_rate": 0.001, "loss": 2.4928, "step": 4453 }, { "epoch": 0.18842541670192064, "grad_norm": 0.23543912172317505, "learning_rate": 0.001, "loss": 1.8614, "step": 4454 }, { "epoch": 0.188467721465437, "grad_norm": 0.26487410068511963, "learning_rate": 0.001, "loss": 2.2644, "step": 4455 }, { "epoch": 0.18851002622895338, "grad_norm": 0.26625239849090576, "learning_rate": 0.001, "loss": 2.4482, "step": 4456 }, { "epoch": 0.18855233099246976, "grad_norm": 0.21155591309070587, "learning_rate": 0.001, "loss": 1.929, "step": 4457 }, { "epoch": 0.18859463575598612, "grad_norm": 0.25420722365379333, "learning_rate": 0.001, "loss": 2.1863, "step": 4458 }, { "epoch": 0.1886369405195025, "grad_norm": 0.27316761016845703, "learning_rate": 0.001, "loss": 1.9442, "step": 4459 }, { "epoch": 0.18867924528301888, "grad_norm": 3.036153793334961, "learning_rate": 0.001, "loss": 3.9133, "step": 4460 }, { "epoch": 0.18872155004653524, "grad_norm": 0.22309522330760956, "learning_rate": 0.001, "loss": 2.348, "step": 4461 }, { "epoch": 0.18876385481005162, "grad_norm": 0.21288998425006866, "learning_rate": 0.001, "loss": 1.5457, "step": 4462 }, { "epoch": 0.18880615957356797, "grad_norm": 3.6792640686035156, "learning_rate": 0.001, "loss": 2.3953, "step": 4463 }, { "epoch": 0.18884846433708435, "grad_norm": 0.3118633031845093, "learning_rate": 0.001, "loss": 3.1294, "step": 4464 }, { "epoch": 0.18889076910060074, "grad_norm": 2.057178497314453, "learning_rate": 0.001, "loss": 1.7783, "step": 4465 }, { "epoch": 0.1889330738641171, "grad_norm": 0.6279289722442627, "learning_rate": 0.001, "loss": 1.8288, "step": 4466 }, { "epoch": 0.18897537862763347, "grad_norm": 0.3405144214630127, "learning_rate": 0.001, "loss": 2.3726, "step": 4467 }, { "epoch": 0.18901768339114985, "grad_norm": 0.2901994585990906, "learning_rate": 0.001, "loss": 1.8338, "step": 4468 }, { "epoch": 0.1890599881546662, "grad_norm": 0.3155980706214905, "learning_rate": 0.001, "loss": 3.4189, "step": 4469 }, { "epoch": 0.1891022929181826, "grad_norm": 0.5617590546607971, "learning_rate": 0.001, "loss": 2.9148, "step": 4470 }, { "epoch": 0.18914459768169897, "grad_norm": 0.3109961450099945, "learning_rate": 0.001, "loss": 2.4199, "step": 4471 }, { "epoch": 0.18918690244521533, "grad_norm": 0.23848043382167816, "learning_rate": 0.001, "loss": 2.1432, "step": 4472 }, { "epoch": 0.1892292072087317, "grad_norm": 0.2614109218120575, "learning_rate": 0.001, "loss": 1.9615, "step": 4473 }, { "epoch": 0.18927151197224806, "grad_norm": 0.30078810453414917, "learning_rate": 0.001, "loss": 2.6696, "step": 4474 }, { "epoch": 0.18931381673576445, "grad_norm": 0.25650012493133545, "learning_rate": 0.001, "loss": 1.8638, "step": 4475 }, { "epoch": 0.18935612149928083, "grad_norm": 0.21869732439517975, "learning_rate": 0.001, "loss": 1.5891, "step": 4476 }, { "epoch": 0.18939842626279718, "grad_norm": 8.205790519714355, "learning_rate": 0.001, "loss": 1.5669, "step": 4477 }, { "epoch": 0.18944073102631356, "grad_norm": 0.2447681427001953, "learning_rate": 0.001, "loss": 2.9438, "step": 4478 }, { "epoch": 0.18948303578982995, "grad_norm": 1.446384072303772, "learning_rate": 0.001, "loss": 2.7589, "step": 4479 }, { "epoch": 0.1895253405533463, "grad_norm": 0.8409281373023987, "learning_rate": 0.001, "loss": 1.9919, "step": 4480 }, { "epoch": 0.18956764531686268, "grad_norm": 0.27768608927726746, "learning_rate": 0.001, "loss": 2.4846, "step": 4481 }, { "epoch": 0.18960995008037906, "grad_norm": 0.4152839779853821, "learning_rate": 0.001, "loss": 3.0854, "step": 4482 }, { "epoch": 0.18965225484389542, "grad_norm": 0.2601878046989441, "learning_rate": 0.001, "loss": 2.7528, "step": 4483 }, { "epoch": 0.1896945596074118, "grad_norm": 0.2510074973106384, "learning_rate": 0.001, "loss": 2.1918, "step": 4484 }, { "epoch": 0.18973686437092815, "grad_norm": 0.22711610794067383, "learning_rate": 0.001, "loss": 2.3735, "step": 4485 }, { "epoch": 0.18977916913444454, "grad_norm": 0.35560375452041626, "learning_rate": 0.001, "loss": 2.2848, "step": 4486 }, { "epoch": 0.18982147389796092, "grad_norm": 0.27158334851264954, "learning_rate": 0.001, "loss": 2.6733, "step": 4487 }, { "epoch": 0.18986377866147727, "grad_norm": 0.2838427424430847, "learning_rate": 0.001, "loss": 2.8558, "step": 4488 }, { "epoch": 0.18990608342499365, "grad_norm": 0.2345813810825348, "learning_rate": 0.001, "loss": 3.6895, "step": 4489 }, { "epoch": 0.18994838818851004, "grad_norm": 0.2404671013355255, "learning_rate": 0.001, "loss": 2.1784, "step": 4490 }, { "epoch": 0.1899906929520264, "grad_norm": 0.2407745122909546, "learning_rate": 0.001, "loss": 2.8477, "step": 4491 }, { "epoch": 0.19003299771554277, "grad_norm": 0.2435643970966339, "learning_rate": 0.001, "loss": 2.7741, "step": 4492 }, { "epoch": 0.19007530247905915, "grad_norm": 0.20882461965084076, "learning_rate": 0.001, "loss": 2.0404, "step": 4493 }, { "epoch": 0.1901176072425755, "grad_norm": 0.20815284550189972, "learning_rate": 0.001, "loss": 1.4838, "step": 4494 }, { "epoch": 0.1901599120060919, "grad_norm": 0.5364993810653687, "learning_rate": 0.001, "loss": 2.5901, "step": 4495 }, { "epoch": 0.19020221676960825, "grad_norm": 0.18857216835021973, "learning_rate": 0.001, "loss": 2.0745, "step": 4496 }, { "epoch": 0.19024452153312463, "grad_norm": 0.22083306312561035, "learning_rate": 0.001, "loss": 2.2613, "step": 4497 }, { "epoch": 0.190286826296641, "grad_norm": 0.21830333769321442, "learning_rate": 0.001, "loss": 2.4491, "step": 4498 }, { "epoch": 0.19032913106015736, "grad_norm": 0.918835461139679, "learning_rate": 0.001, "loss": 2.8763, "step": 4499 }, { "epoch": 0.19037143582367375, "grad_norm": 0.2459859997034073, "learning_rate": 0.001, "loss": 2.3936, "step": 4500 }, { "epoch": 0.19041374058719013, "grad_norm": 0.2378329634666443, "learning_rate": 0.001, "loss": 1.8399, "step": 4501 }, { "epoch": 0.19045604535070648, "grad_norm": 0.3564843237400055, "learning_rate": 0.001, "loss": 3.5808, "step": 4502 }, { "epoch": 0.19049835011422286, "grad_norm": 0.5608360171318054, "learning_rate": 0.001, "loss": 2.1496, "step": 4503 }, { "epoch": 0.19054065487773925, "grad_norm": 0.3090474009513855, "learning_rate": 0.001, "loss": 2.3587, "step": 4504 }, { "epoch": 0.1905829596412556, "grad_norm": 0.2552463114261627, "learning_rate": 0.001, "loss": 1.866, "step": 4505 }, { "epoch": 0.19062526440477198, "grad_norm": 0.41491034626960754, "learning_rate": 0.001, "loss": 2.1764, "step": 4506 }, { "epoch": 0.19066756916828834, "grad_norm": 0.2689819633960724, "learning_rate": 0.001, "loss": 1.6736, "step": 4507 }, { "epoch": 0.19070987393180472, "grad_norm": 0.2358677089214325, "learning_rate": 0.001, "loss": 1.9717, "step": 4508 }, { "epoch": 0.1907521786953211, "grad_norm": 0.2064763307571411, "learning_rate": 0.001, "loss": 2.6779, "step": 4509 }, { "epoch": 0.19079448345883745, "grad_norm": 0.24154812097549438, "learning_rate": 0.001, "loss": 1.7884, "step": 4510 }, { "epoch": 0.19083678822235384, "grad_norm": 0.22353674471378326, "learning_rate": 0.001, "loss": 1.9496, "step": 4511 }, { "epoch": 0.19087909298587022, "grad_norm": 0.2455575168132782, "learning_rate": 0.001, "loss": 2.3023, "step": 4512 }, { "epoch": 0.19092139774938657, "grad_norm": 0.22272691130638123, "learning_rate": 0.001, "loss": 2.6156, "step": 4513 }, { "epoch": 0.19096370251290296, "grad_norm": 0.21342520415782928, "learning_rate": 0.001, "loss": 2.2985, "step": 4514 }, { "epoch": 0.19100600727641934, "grad_norm": 0.21540196239948273, "learning_rate": 0.001, "loss": 2.5549, "step": 4515 }, { "epoch": 0.1910483120399357, "grad_norm": 0.20063146948814392, "learning_rate": 0.001, "loss": 3.4714, "step": 4516 }, { "epoch": 0.19109061680345207, "grad_norm": 0.1966932862997055, "learning_rate": 0.001, "loss": 2.1782, "step": 4517 }, { "epoch": 0.19113292156696843, "grad_norm": 1.2073802947998047, "learning_rate": 0.001, "loss": 2.668, "step": 4518 }, { "epoch": 0.1911752263304848, "grad_norm": 0.3695375323295593, "learning_rate": 0.001, "loss": 3.0143, "step": 4519 }, { "epoch": 0.1912175310940012, "grad_norm": 0.21040910482406616, "learning_rate": 0.001, "loss": 2.2652, "step": 4520 }, { "epoch": 0.19125983585751755, "grad_norm": 0.2612590491771698, "learning_rate": 0.001, "loss": 2.0682, "step": 4521 }, { "epoch": 0.19130214062103393, "grad_norm": 1.0337492227554321, "learning_rate": 0.001, "loss": 1.7501, "step": 4522 }, { "epoch": 0.1913444453845503, "grad_norm": 0.2554808259010315, "learning_rate": 0.001, "loss": 2.4155, "step": 4523 }, { "epoch": 0.19138675014806666, "grad_norm": 2.867785692214966, "learning_rate": 0.001, "loss": 1.7372, "step": 4524 }, { "epoch": 0.19142905491158305, "grad_norm": 3.803913116455078, "learning_rate": 0.001, "loss": 2.8557, "step": 4525 }, { "epoch": 0.19147135967509943, "grad_norm": 0.671362578868866, "learning_rate": 0.001, "loss": 1.9373, "step": 4526 }, { "epoch": 0.19151366443861578, "grad_norm": 0.33404502272605896, "learning_rate": 0.001, "loss": 2.3405, "step": 4527 }, { "epoch": 0.19155596920213216, "grad_norm": 7.813088893890381, "learning_rate": 0.001, "loss": 3.0833, "step": 4528 }, { "epoch": 0.19159827396564852, "grad_norm": 0.33136120438575745, "learning_rate": 0.001, "loss": 3.0437, "step": 4529 }, { "epoch": 0.1916405787291649, "grad_norm": 0.3208834230899811, "learning_rate": 0.001, "loss": 2.8585, "step": 4530 }, { "epoch": 0.19168288349268128, "grad_norm": 0.46846067905426025, "learning_rate": 0.001, "loss": 3.273, "step": 4531 }, { "epoch": 0.19172518825619764, "grad_norm": 0.4278513789176941, "learning_rate": 0.001, "loss": 2.7628, "step": 4532 }, { "epoch": 0.19176749301971402, "grad_norm": 0.3164735734462738, "learning_rate": 0.001, "loss": 3.0253, "step": 4533 }, { "epoch": 0.1918097977832304, "grad_norm": 0.22496308386325836, "learning_rate": 0.001, "loss": 2.1171, "step": 4534 }, { "epoch": 0.19185210254674676, "grad_norm": 0.2776702344417572, "learning_rate": 0.001, "loss": 2.7838, "step": 4535 }, { "epoch": 0.19189440731026314, "grad_norm": 0.6228007674217224, "learning_rate": 0.001, "loss": 2.2344, "step": 4536 }, { "epoch": 0.19193671207377952, "grad_norm": 0.48833128809928894, "learning_rate": 0.001, "loss": 2.986, "step": 4537 }, { "epoch": 0.19197901683729587, "grad_norm": 0.3756866753101349, "learning_rate": 0.001, "loss": 1.9291, "step": 4538 }, { "epoch": 0.19202132160081226, "grad_norm": 0.278314471244812, "learning_rate": 0.001, "loss": 2.1759, "step": 4539 }, { "epoch": 0.1920636263643286, "grad_norm": 1.2533029317855835, "learning_rate": 0.001, "loss": 2.107, "step": 4540 }, { "epoch": 0.192105931127845, "grad_norm": 0.2423766851425171, "learning_rate": 0.001, "loss": 1.7797, "step": 4541 }, { "epoch": 0.19214823589136137, "grad_norm": 0.3214251399040222, "learning_rate": 0.001, "loss": 2.94, "step": 4542 }, { "epoch": 0.19219054065487773, "grad_norm": 0.4931315779685974, "learning_rate": 0.001, "loss": 1.9467, "step": 4543 }, { "epoch": 0.1922328454183941, "grad_norm": 2.458348035812378, "learning_rate": 0.001, "loss": 2.5617, "step": 4544 }, { "epoch": 0.1922751501819105, "grad_norm": 1.2763197422027588, "learning_rate": 0.001, "loss": 2.5777, "step": 4545 }, { "epoch": 0.19231745494542685, "grad_norm": 0.2197532057762146, "learning_rate": 0.001, "loss": 1.8814, "step": 4546 }, { "epoch": 0.19235975970894323, "grad_norm": 0.335099458694458, "learning_rate": 0.001, "loss": 1.9736, "step": 4547 }, { "epoch": 0.1924020644724596, "grad_norm": 0.24733540415763855, "learning_rate": 0.001, "loss": 1.7578, "step": 4548 }, { "epoch": 0.19244436923597597, "grad_norm": 0.2788896858692169, "learning_rate": 0.001, "loss": 3.6972, "step": 4549 }, { "epoch": 0.19248667399949235, "grad_norm": 0.250205397605896, "learning_rate": 0.001, "loss": 2.5927, "step": 4550 }, { "epoch": 0.1925289787630087, "grad_norm": 0.9215936660766602, "learning_rate": 0.001, "loss": 1.9442, "step": 4551 }, { "epoch": 0.19257128352652508, "grad_norm": 0.2189418524503708, "learning_rate": 0.001, "loss": 2.3595, "step": 4552 }, { "epoch": 0.19261358829004147, "grad_norm": 0.2297450602054596, "learning_rate": 0.001, "loss": 2.0018, "step": 4553 }, { "epoch": 0.19265589305355782, "grad_norm": 0.8725435137748718, "learning_rate": 0.001, "loss": 2.3299, "step": 4554 }, { "epoch": 0.1926981978170742, "grad_norm": 0.2853243350982666, "learning_rate": 0.001, "loss": 3.1252, "step": 4555 }, { "epoch": 0.19274050258059058, "grad_norm": 0.26800212264060974, "learning_rate": 0.001, "loss": 2.5468, "step": 4556 }, { "epoch": 0.19278280734410694, "grad_norm": 0.4624558091163635, "learning_rate": 0.001, "loss": 2.0941, "step": 4557 }, { "epoch": 0.19282511210762332, "grad_norm": 0.2232256382703781, "learning_rate": 0.001, "loss": 2.4683, "step": 4558 }, { "epoch": 0.1928674168711397, "grad_norm": 0.2688036859035492, "learning_rate": 0.001, "loss": 2.223, "step": 4559 }, { "epoch": 0.19290972163465606, "grad_norm": 0.2234850972890854, "learning_rate": 0.001, "loss": 2.6623, "step": 4560 }, { "epoch": 0.19295202639817244, "grad_norm": 0.3354385495185852, "learning_rate": 0.001, "loss": 3.2651, "step": 4561 }, { "epoch": 0.1929943311616888, "grad_norm": 0.26778674125671387, "learning_rate": 0.001, "loss": 2.6781, "step": 4562 }, { "epoch": 0.19303663592520517, "grad_norm": 0.2896103858947754, "learning_rate": 0.001, "loss": 2.2987, "step": 4563 }, { "epoch": 0.19307894068872156, "grad_norm": 0.21023137867450714, "learning_rate": 0.001, "loss": 1.6636, "step": 4564 }, { "epoch": 0.1931212454522379, "grad_norm": 0.2824184000492096, "learning_rate": 0.001, "loss": 2.5228, "step": 4565 }, { "epoch": 0.1931635502157543, "grad_norm": 0.22193598747253418, "learning_rate": 0.001, "loss": 1.9797, "step": 4566 }, { "epoch": 0.19320585497927067, "grad_norm": 0.26808369159698486, "learning_rate": 0.001, "loss": 2.533, "step": 4567 }, { "epoch": 0.19324815974278703, "grad_norm": 0.2427438348531723, "learning_rate": 0.001, "loss": 2.6625, "step": 4568 }, { "epoch": 0.1932904645063034, "grad_norm": 0.2184421867132187, "learning_rate": 0.001, "loss": 2.5565, "step": 4569 }, { "epoch": 0.1933327692698198, "grad_norm": 0.4565901756286621, "learning_rate": 0.001, "loss": 2.0758, "step": 4570 }, { "epoch": 0.19337507403333615, "grad_norm": 0.9183976650238037, "learning_rate": 0.001, "loss": 2.1397, "step": 4571 }, { "epoch": 0.19341737879685253, "grad_norm": 0.20792505145072937, "learning_rate": 0.001, "loss": 2.7359, "step": 4572 }, { "epoch": 0.1934596835603689, "grad_norm": 0.7904244661331177, "learning_rate": 0.001, "loss": 3.3685, "step": 4573 }, { "epoch": 0.19350198832388527, "grad_norm": 0.2975200116634369, "learning_rate": 0.001, "loss": 3.3311, "step": 4574 }, { "epoch": 0.19354429308740165, "grad_norm": 4.709847450256348, "learning_rate": 0.001, "loss": 2.6445, "step": 4575 }, { "epoch": 0.193586597850918, "grad_norm": 0.23912140727043152, "learning_rate": 0.001, "loss": 2.272, "step": 4576 }, { "epoch": 0.19362890261443438, "grad_norm": 0.3137105405330658, "learning_rate": 0.001, "loss": 2.5401, "step": 4577 }, { "epoch": 0.19367120737795077, "grad_norm": 0.27049148082733154, "learning_rate": 0.001, "loss": 2.8427, "step": 4578 }, { "epoch": 0.19371351214146712, "grad_norm": 0.3511445224285126, "learning_rate": 0.001, "loss": 1.8104, "step": 4579 }, { "epoch": 0.1937558169049835, "grad_norm": 0.280909925699234, "learning_rate": 0.001, "loss": 1.8538, "step": 4580 }, { "epoch": 0.19379812166849988, "grad_norm": 0.2330571413040161, "learning_rate": 0.001, "loss": 1.9844, "step": 4581 }, { "epoch": 0.19384042643201624, "grad_norm": 0.2649915814399719, "learning_rate": 0.001, "loss": 3.0022, "step": 4582 }, { "epoch": 0.19388273119553262, "grad_norm": 0.28710460662841797, "learning_rate": 0.001, "loss": 2.5591, "step": 4583 }, { "epoch": 0.193925035959049, "grad_norm": 0.22108560800552368, "learning_rate": 0.001, "loss": 3.5141, "step": 4584 }, { "epoch": 0.19396734072256536, "grad_norm": 0.21730417013168335, "learning_rate": 0.001, "loss": 2.453, "step": 4585 }, { "epoch": 0.19400964548608174, "grad_norm": 0.4755695164203644, "learning_rate": 0.001, "loss": 2.3401, "step": 4586 }, { "epoch": 0.1940519502495981, "grad_norm": 0.2430569976568222, "learning_rate": 0.001, "loss": 2.5329, "step": 4587 }, { "epoch": 0.19409425501311448, "grad_norm": 3.246774435043335, "learning_rate": 0.001, "loss": 2.1003, "step": 4588 }, { "epoch": 0.19413655977663086, "grad_norm": 0.22115357220172882, "learning_rate": 0.001, "loss": 2.791, "step": 4589 }, { "epoch": 0.1941788645401472, "grad_norm": 0.3158797025680542, "learning_rate": 0.001, "loss": 2.5248, "step": 4590 }, { "epoch": 0.1942211693036636, "grad_norm": 0.21238943934440613, "learning_rate": 0.001, "loss": 2.0581, "step": 4591 }, { "epoch": 0.19426347406717998, "grad_norm": 0.2093350887298584, "learning_rate": 0.001, "loss": 2.4956, "step": 4592 }, { "epoch": 0.19430577883069633, "grad_norm": 0.22821201384067535, "learning_rate": 0.001, "loss": 3.2084, "step": 4593 }, { "epoch": 0.1943480835942127, "grad_norm": 0.26408034563064575, "learning_rate": 0.001, "loss": 2.2652, "step": 4594 }, { "epoch": 0.1943903883577291, "grad_norm": 0.24591988325119019, "learning_rate": 0.001, "loss": 2.1064, "step": 4595 }, { "epoch": 0.19443269312124545, "grad_norm": 5.068770408630371, "learning_rate": 0.001, "loss": 1.804, "step": 4596 }, { "epoch": 0.19447499788476183, "grad_norm": 0.22931112349033356, "learning_rate": 0.001, "loss": 1.7943, "step": 4597 }, { "epoch": 0.19451730264827818, "grad_norm": 0.5310946702957153, "learning_rate": 0.001, "loss": 2.0597, "step": 4598 }, { "epoch": 0.19455960741179457, "grad_norm": 0.30972105264663696, "learning_rate": 0.001, "loss": 3.2119, "step": 4599 }, { "epoch": 0.19460191217531095, "grad_norm": 0.2220851629972458, "learning_rate": 0.001, "loss": 2.2495, "step": 4600 }, { "epoch": 0.1946442169388273, "grad_norm": 0.871143639087677, "learning_rate": 0.001, "loss": 2.6081, "step": 4601 }, { "epoch": 0.19468652170234368, "grad_norm": 0.2440234124660492, "learning_rate": 0.001, "loss": 3.2444, "step": 4602 }, { "epoch": 0.19472882646586007, "grad_norm": 15.243217468261719, "learning_rate": 0.001, "loss": 2.0989, "step": 4603 }, { "epoch": 0.19477113122937642, "grad_norm": 0.22179925441741943, "learning_rate": 0.001, "loss": 2.6825, "step": 4604 }, { "epoch": 0.1948134359928928, "grad_norm": 0.2920258641242981, "learning_rate": 0.001, "loss": 1.8067, "step": 4605 }, { "epoch": 0.19485574075640918, "grad_norm": 0.45571228861808777, "learning_rate": 0.001, "loss": 2.4332, "step": 4606 }, { "epoch": 0.19489804551992554, "grad_norm": 0.21809923648834229, "learning_rate": 0.001, "loss": 1.9698, "step": 4607 }, { "epoch": 0.19494035028344192, "grad_norm": 0.22986778616905212, "learning_rate": 0.001, "loss": 2.3261, "step": 4608 }, { "epoch": 0.19498265504695828, "grad_norm": 0.2756772041320801, "learning_rate": 0.001, "loss": 3.1784, "step": 4609 }, { "epoch": 0.19502495981047466, "grad_norm": 0.20755811035633087, "learning_rate": 0.001, "loss": 2.1669, "step": 4610 }, { "epoch": 0.19506726457399104, "grad_norm": 0.33731144666671753, "learning_rate": 0.001, "loss": 2.3201, "step": 4611 }, { "epoch": 0.1951095693375074, "grad_norm": 0.35516980290412903, "learning_rate": 0.001, "loss": 2.119, "step": 4612 }, { "epoch": 0.19515187410102378, "grad_norm": 0.21821679174900055, "learning_rate": 0.001, "loss": 3.3628, "step": 4613 }, { "epoch": 0.19519417886454016, "grad_norm": 0.23565897345542908, "learning_rate": 0.001, "loss": 2.407, "step": 4614 }, { "epoch": 0.1952364836280565, "grad_norm": 0.19071324169635773, "learning_rate": 0.001, "loss": 2.0236, "step": 4615 }, { "epoch": 0.1952787883915729, "grad_norm": 0.21241721510887146, "learning_rate": 0.001, "loss": 1.9463, "step": 4616 }, { "epoch": 0.19532109315508928, "grad_norm": 0.21527446806430817, "learning_rate": 0.001, "loss": 2.137, "step": 4617 }, { "epoch": 0.19536339791860563, "grad_norm": 0.21654748916625977, "learning_rate": 0.001, "loss": 2.9063, "step": 4618 }, { "epoch": 0.195405702682122, "grad_norm": 0.26138582825660706, "learning_rate": 0.001, "loss": 2.4052, "step": 4619 }, { "epoch": 0.19544800744563837, "grad_norm": 0.2170308381319046, "learning_rate": 0.001, "loss": 1.9937, "step": 4620 }, { "epoch": 0.19549031220915475, "grad_norm": 1.4368832111358643, "learning_rate": 0.001, "loss": 2.1986, "step": 4621 }, { "epoch": 0.19553261697267113, "grad_norm": 0.21207021176815033, "learning_rate": 0.001, "loss": 1.8493, "step": 4622 }, { "epoch": 0.19557492173618748, "grad_norm": 0.3097752034664154, "learning_rate": 0.001, "loss": 1.6055, "step": 4623 }, { "epoch": 0.19561722649970387, "grad_norm": 0.2724337875843048, "learning_rate": 0.001, "loss": 2.3367, "step": 4624 }, { "epoch": 0.19565953126322025, "grad_norm": 0.21333764493465424, "learning_rate": 0.001, "loss": 1.6781, "step": 4625 }, { "epoch": 0.1957018360267366, "grad_norm": 0.30645281076431274, "learning_rate": 0.001, "loss": 2.288, "step": 4626 }, { "epoch": 0.19574414079025299, "grad_norm": 0.2575608193874359, "learning_rate": 0.001, "loss": 2.4528, "step": 4627 }, { "epoch": 0.19578644555376937, "grad_norm": 0.21486923098564148, "learning_rate": 0.001, "loss": 1.9437, "step": 4628 }, { "epoch": 0.19582875031728572, "grad_norm": 0.24129249155521393, "learning_rate": 0.001, "loss": 3.5545, "step": 4629 }, { "epoch": 0.1958710550808021, "grad_norm": 0.3026060461997986, "learning_rate": 0.001, "loss": 2.1121, "step": 4630 }, { "epoch": 0.19591335984431846, "grad_norm": 0.2635842561721802, "learning_rate": 0.001, "loss": 2.1426, "step": 4631 }, { "epoch": 0.19595566460783484, "grad_norm": 0.38324952125549316, "learning_rate": 0.001, "loss": 2.0874, "step": 4632 }, { "epoch": 0.19599796937135122, "grad_norm": 0.6430294513702393, "learning_rate": 0.001, "loss": 2.474, "step": 4633 }, { "epoch": 0.19604027413486758, "grad_norm": 0.2072354555130005, "learning_rate": 0.001, "loss": 3.0056, "step": 4634 }, { "epoch": 0.19608257889838396, "grad_norm": 0.22297710180282593, "learning_rate": 0.001, "loss": 2.1925, "step": 4635 }, { "epoch": 0.19612488366190034, "grad_norm": 2.166142702102661, "learning_rate": 0.001, "loss": 2.1752, "step": 4636 }, { "epoch": 0.1961671884254167, "grad_norm": 0.21612171828746796, "learning_rate": 0.001, "loss": 3.9448, "step": 4637 }, { "epoch": 0.19620949318893308, "grad_norm": 0.2798874080181122, "learning_rate": 0.001, "loss": 2.152, "step": 4638 }, { "epoch": 0.19625179795244946, "grad_norm": 0.29378223419189453, "learning_rate": 0.001, "loss": 1.913, "step": 4639 }, { "epoch": 0.1962941027159658, "grad_norm": 0.37038248777389526, "learning_rate": 0.001, "loss": 2.5038, "step": 4640 }, { "epoch": 0.1963364074794822, "grad_norm": 0.2652187645435333, "learning_rate": 0.001, "loss": 2.1267, "step": 4641 }, { "epoch": 0.19637871224299855, "grad_norm": 0.27471134066581726, "learning_rate": 0.001, "loss": 2.1653, "step": 4642 }, { "epoch": 0.19642101700651493, "grad_norm": 0.19736327230930328, "learning_rate": 0.001, "loss": 2.9415, "step": 4643 }, { "epoch": 0.1964633217700313, "grad_norm": 0.22357088327407837, "learning_rate": 0.001, "loss": 3.3843, "step": 4644 }, { "epoch": 0.19650562653354767, "grad_norm": 0.22968696057796478, "learning_rate": 0.001, "loss": 2.0118, "step": 4645 }, { "epoch": 0.19654793129706405, "grad_norm": 1.8765842914581299, "learning_rate": 0.001, "loss": 2.4737, "step": 4646 }, { "epoch": 0.19659023606058043, "grad_norm": 0.41798344254493713, "learning_rate": 0.001, "loss": 2.6721, "step": 4647 }, { "epoch": 0.19663254082409679, "grad_norm": 0.20289246737957, "learning_rate": 0.001, "loss": 3.4892, "step": 4648 }, { "epoch": 0.19667484558761317, "grad_norm": 0.2355775684118271, "learning_rate": 0.001, "loss": 1.8723, "step": 4649 }, { "epoch": 0.19671715035112955, "grad_norm": 0.2144707590341568, "learning_rate": 0.001, "loss": 2.787, "step": 4650 }, { "epoch": 0.1967594551146459, "grad_norm": 0.23321262001991272, "learning_rate": 0.001, "loss": 1.7408, "step": 4651 }, { "epoch": 0.19680175987816229, "grad_norm": 0.21460668742656708, "learning_rate": 0.001, "loss": 1.8128, "step": 4652 }, { "epoch": 0.19684406464167864, "grad_norm": 0.6912943124771118, "learning_rate": 0.001, "loss": 2.8737, "step": 4653 }, { "epoch": 0.19688636940519502, "grad_norm": 0.24109399318695068, "learning_rate": 0.001, "loss": 2.2734, "step": 4654 }, { "epoch": 0.1969286741687114, "grad_norm": 0.47939634323120117, "learning_rate": 0.001, "loss": 1.6569, "step": 4655 }, { "epoch": 0.19697097893222776, "grad_norm": 2.933166742324829, "learning_rate": 0.001, "loss": 2.6041, "step": 4656 }, { "epoch": 0.19701328369574414, "grad_norm": 0.8214706778526306, "learning_rate": 0.001, "loss": 2.518, "step": 4657 }, { "epoch": 0.19705558845926052, "grad_norm": 0.2619834542274475, "learning_rate": 0.001, "loss": 3.2975, "step": 4658 }, { "epoch": 0.19709789322277688, "grad_norm": 0.3553191125392914, "learning_rate": 0.001, "loss": 3.8384, "step": 4659 }, { "epoch": 0.19714019798629326, "grad_norm": 0.1962711215019226, "learning_rate": 0.001, "loss": 1.984, "step": 4660 }, { "epoch": 0.19718250274980964, "grad_norm": 0.2404264360666275, "learning_rate": 0.001, "loss": 2.4286, "step": 4661 }, { "epoch": 0.197224807513326, "grad_norm": 0.24695684015750885, "learning_rate": 0.001, "loss": 2.8653, "step": 4662 }, { "epoch": 0.19726711227684238, "grad_norm": 20.713294982910156, "learning_rate": 0.001, "loss": 2.2609, "step": 4663 }, { "epoch": 0.19730941704035873, "grad_norm": 0.25775420665740967, "learning_rate": 0.001, "loss": 2.2937, "step": 4664 }, { "epoch": 0.1973517218038751, "grad_norm": 0.24397379159927368, "learning_rate": 0.001, "loss": 2.0571, "step": 4665 }, { "epoch": 0.1973940265673915, "grad_norm": 0.38282090425491333, "learning_rate": 0.001, "loss": 2.2702, "step": 4666 }, { "epoch": 0.19743633133090785, "grad_norm": 0.23603591322898865, "learning_rate": 0.001, "loss": 2.01, "step": 4667 }, { "epoch": 0.19747863609442423, "grad_norm": 0.2705064117908478, "learning_rate": 0.001, "loss": 2.6685, "step": 4668 }, { "epoch": 0.1975209408579406, "grad_norm": 3.485954999923706, "learning_rate": 0.001, "loss": 3.6868, "step": 4669 }, { "epoch": 0.19756324562145697, "grad_norm": 0.3059000074863434, "learning_rate": 0.001, "loss": 2.5588, "step": 4670 }, { "epoch": 0.19760555038497335, "grad_norm": 2.187786102294922, "learning_rate": 0.001, "loss": 3.4677, "step": 4671 }, { "epoch": 0.19764785514848973, "grad_norm": 0.25730597972869873, "learning_rate": 0.001, "loss": 2.3108, "step": 4672 }, { "epoch": 0.19769015991200609, "grad_norm": 0.2296760380268097, "learning_rate": 0.001, "loss": 1.5381, "step": 4673 }, { "epoch": 0.19773246467552247, "grad_norm": 0.31700170040130615, "learning_rate": 0.001, "loss": 3.6578, "step": 4674 }, { "epoch": 0.19777476943903882, "grad_norm": 0.43023067712783813, "learning_rate": 0.001, "loss": 3.3044, "step": 4675 }, { "epoch": 0.1978170742025552, "grad_norm": 1.113132357597351, "learning_rate": 0.001, "loss": 3.9424, "step": 4676 }, { "epoch": 0.1978593789660716, "grad_norm": 0.2837026119232178, "learning_rate": 0.001, "loss": 2.3631, "step": 4677 }, { "epoch": 0.19790168372958794, "grad_norm": 0.29237067699432373, "learning_rate": 0.001, "loss": 2.443, "step": 4678 }, { "epoch": 0.19794398849310432, "grad_norm": 0.3563275635242462, "learning_rate": 0.001, "loss": 2.2434, "step": 4679 }, { "epoch": 0.1979862932566207, "grad_norm": 1.2392581701278687, "learning_rate": 0.001, "loss": 3.6908, "step": 4680 }, { "epoch": 0.19802859802013706, "grad_norm": 2.4038748741149902, "learning_rate": 0.001, "loss": 1.984, "step": 4681 }, { "epoch": 0.19807090278365344, "grad_norm": 0.44923508167266846, "learning_rate": 0.001, "loss": 2.7697, "step": 4682 }, { "epoch": 0.19811320754716982, "grad_norm": 0.3151043951511383, "learning_rate": 0.001, "loss": 2.9882, "step": 4683 }, { "epoch": 0.19815551231068618, "grad_norm": 0.46017053723335266, "learning_rate": 0.001, "loss": 1.9041, "step": 4684 }, { "epoch": 0.19819781707420256, "grad_norm": 0.3234018385410309, "learning_rate": 0.001, "loss": 1.8249, "step": 4685 }, { "epoch": 0.1982401218377189, "grad_norm": 0.3335753083229065, "learning_rate": 0.001, "loss": 2.3614, "step": 4686 }, { "epoch": 0.1982824266012353, "grad_norm": 0.382220059633255, "learning_rate": 0.001, "loss": 2.4, "step": 4687 }, { "epoch": 0.19832473136475168, "grad_norm": 0.9957152605056763, "learning_rate": 0.001, "loss": 3.0074, "step": 4688 }, { "epoch": 0.19836703612826803, "grad_norm": 0.2819012701511383, "learning_rate": 0.001, "loss": 3.5457, "step": 4689 }, { "epoch": 0.1984093408917844, "grad_norm": 2.3886940479278564, "learning_rate": 0.001, "loss": 3.4807, "step": 4690 }, { "epoch": 0.1984516456553008, "grad_norm": 0.35311177372932434, "learning_rate": 0.001, "loss": 3.1578, "step": 4691 }, { "epoch": 0.19849395041881715, "grad_norm": 1.2206075191497803, "learning_rate": 0.001, "loss": 2.128, "step": 4692 }, { "epoch": 0.19853625518233353, "grad_norm": 5.357852458953857, "learning_rate": 0.001, "loss": 2.2252, "step": 4693 }, { "epoch": 0.19857855994584991, "grad_norm": 0.4290313422679901, "learning_rate": 0.001, "loss": 2.0768, "step": 4694 }, { "epoch": 0.19862086470936627, "grad_norm": 0.22791223227977753, "learning_rate": 0.001, "loss": 1.7422, "step": 4695 }, { "epoch": 0.19866316947288265, "grad_norm": 0.24464336037635803, "learning_rate": 0.001, "loss": 2.6629, "step": 4696 }, { "epoch": 0.19870547423639903, "grad_norm": 1.3040649890899658, "learning_rate": 0.001, "loss": 2.1108, "step": 4697 }, { "epoch": 0.1987477789999154, "grad_norm": 0.5187906622886658, "learning_rate": 0.001, "loss": 2.7388, "step": 4698 }, { "epoch": 0.19879008376343177, "grad_norm": 0.2980749011039734, "learning_rate": 0.001, "loss": 3.4038, "step": 4699 }, { "epoch": 0.19883238852694812, "grad_norm": 0.3226117193698883, "learning_rate": 0.001, "loss": 2.8376, "step": 4700 }, { "epoch": 0.1988746932904645, "grad_norm": 0.2342327982187271, "learning_rate": 0.001, "loss": 1.9718, "step": 4701 }, { "epoch": 0.1989169980539809, "grad_norm": 0.23881351947784424, "learning_rate": 0.001, "loss": 3.1819, "step": 4702 }, { "epoch": 0.19895930281749724, "grad_norm": 0.1929883062839508, "learning_rate": 0.001, "loss": 2.0568, "step": 4703 }, { "epoch": 0.19900160758101362, "grad_norm": 0.25098490715026855, "learning_rate": 0.001, "loss": 2.0162, "step": 4704 }, { "epoch": 0.19904391234453, "grad_norm": 0.25516989827156067, "learning_rate": 0.001, "loss": 2.6734, "step": 4705 }, { "epoch": 0.19908621710804636, "grad_norm": 0.2264355570077896, "learning_rate": 0.001, "loss": 2.4311, "step": 4706 }, { "epoch": 0.19912852187156274, "grad_norm": 0.18645042181015015, "learning_rate": 0.001, "loss": 1.6494, "step": 4707 }, { "epoch": 0.19917082663507912, "grad_norm": 9.609095573425293, "learning_rate": 0.001, "loss": 2.3706, "step": 4708 }, { "epoch": 0.19921313139859548, "grad_norm": 0.2301723212003708, "learning_rate": 0.001, "loss": 2.3175, "step": 4709 }, { "epoch": 0.19925543616211186, "grad_norm": 0.30773094296455383, "learning_rate": 0.001, "loss": 3.0642, "step": 4710 }, { "epoch": 0.19929774092562821, "grad_norm": 0.48115605115890503, "learning_rate": 0.001, "loss": 2.8732, "step": 4711 }, { "epoch": 0.1993400456891446, "grad_norm": 0.20981243252754211, "learning_rate": 0.001, "loss": 2.5917, "step": 4712 }, { "epoch": 0.19938235045266098, "grad_norm": 0.22592686116695404, "learning_rate": 0.001, "loss": 2.8297, "step": 4713 }, { "epoch": 0.19942465521617733, "grad_norm": 0.27815452218055725, "learning_rate": 0.001, "loss": 2.3364, "step": 4714 }, { "epoch": 0.19946695997969371, "grad_norm": 0.23436151444911957, "learning_rate": 0.001, "loss": 2.8687, "step": 4715 }, { "epoch": 0.1995092647432101, "grad_norm": 1.096976637840271, "learning_rate": 0.001, "loss": 2.2385, "step": 4716 }, { "epoch": 0.19955156950672645, "grad_norm": 0.22871260344982147, "learning_rate": 0.001, "loss": 2.3228, "step": 4717 }, { "epoch": 0.19959387427024283, "grad_norm": 0.7824380397796631, "learning_rate": 0.001, "loss": 2.2027, "step": 4718 }, { "epoch": 0.19963617903375921, "grad_norm": 0.20783767104148865, "learning_rate": 0.001, "loss": 1.4938, "step": 4719 }, { "epoch": 0.19967848379727557, "grad_norm": 0.25429844856262207, "learning_rate": 0.001, "loss": 1.8029, "step": 4720 }, { "epoch": 0.19972078856079195, "grad_norm": 0.37199491262435913, "learning_rate": 0.001, "loss": 2.5307, "step": 4721 }, { "epoch": 0.1997630933243083, "grad_norm": 0.258245587348938, "learning_rate": 0.001, "loss": 2.2374, "step": 4722 }, { "epoch": 0.1998053980878247, "grad_norm": 0.2625525891780853, "learning_rate": 0.001, "loss": 3.0043, "step": 4723 }, { "epoch": 0.19984770285134107, "grad_norm": 6.895899772644043, "learning_rate": 0.001, "loss": 2.5071, "step": 4724 }, { "epoch": 0.19989000761485742, "grad_norm": 0.5165795087814331, "learning_rate": 0.001, "loss": 2.563, "step": 4725 }, { "epoch": 0.1999323123783738, "grad_norm": 1.534584879875183, "learning_rate": 0.001, "loss": 2.2309, "step": 4726 }, { "epoch": 0.1999746171418902, "grad_norm": 3.5470550060272217, "learning_rate": 0.001, "loss": 2.8135, "step": 4727 }, { "epoch": 0.20001692190540654, "grad_norm": 0.46171459555625916, "learning_rate": 0.001, "loss": 2.4945, "step": 4728 }, { "epoch": 0.20005922666892292, "grad_norm": 0.2912849485874176, "learning_rate": 0.001, "loss": 2.1541, "step": 4729 }, { "epoch": 0.2001015314324393, "grad_norm": 0.2877223491668701, "learning_rate": 0.001, "loss": 2.4918, "step": 4730 }, { "epoch": 0.20014383619595566, "grad_norm": 0.28247973322868347, "learning_rate": 0.001, "loss": 3.4501, "step": 4731 }, { "epoch": 0.20018614095947204, "grad_norm": 0.24483604729175568, "learning_rate": 0.001, "loss": 3.568, "step": 4732 }, { "epoch": 0.2002284457229884, "grad_norm": 0.2491256296634674, "learning_rate": 0.001, "loss": 2.456, "step": 4733 }, { "epoch": 0.20027075048650478, "grad_norm": 0.21148993074893951, "learning_rate": 0.001, "loss": 1.9497, "step": 4734 }, { "epoch": 0.20031305525002116, "grad_norm": 0.24428227543830872, "learning_rate": 0.001, "loss": 2.6689, "step": 4735 }, { "epoch": 0.20035536001353751, "grad_norm": 0.21488066017627716, "learning_rate": 0.001, "loss": 2.897, "step": 4736 }, { "epoch": 0.2003976647770539, "grad_norm": 0.2354801744222641, "learning_rate": 0.001, "loss": 2.4959, "step": 4737 }, { "epoch": 0.20043996954057028, "grad_norm": 0.3543696403503418, "learning_rate": 0.001, "loss": 2.3337, "step": 4738 }, { "epoch": 0.20048227430408663, "grad_norm": 0.21295201778411865, "learning_rate": 0.001, "loss": 2.2182, "step": 4739 }, { "epoch": 0.20052457906760301, "grad_norm": 0.2649983763694763, "learning_rate": 0.001, "loss": 2.0836, "step": 4740 }, { "epoch": 0.2005668838311194, "grad_norm": 0.4929693341255188, "learning_rate": 0.001, "loss": 2.1557, "step": 4741 }, { "epoch": 0.20060918859463575, "grad_norm": 0.24373185634613037, "learning_rate": 0.001, "loss": 2.2373, "step": 4742 }, { "epoch": 0.20065149335815213, "grad_norm": 0.2782137095928192, "learning_rate": 0.001, "loss": 2.0663, "step": 4743 }, { "epoch": 0.2006937981216685, "grad_norm": 0.9865705370903015, "learning_rate": 0.001, "loss": 2.4, "step": 4744 }, { "epoch": 0.20073610288518487, "grad_norm": 0.24436363577842712, "learning_rate": 0.001, "loss": 3.0016, "step": 4745 }, { "epoch": 0.20077840764870125, "grad_norm": 0.22285853326320648, "learning_rate": 0.001, "loss": 2.4025, "step": 4746 }, { "epoch": 0.2008207124122176, "grad_norm": 0.2801361083984375, "learning_rate": 0.001, "loss": 3.0465, "step": 4747 }, { "epoch": 0.200863017175734, "grad_norm": 1.4615576267242432, "learning_rate": 0.001, "loss": 1.813, "step": 4748 }, { "epoch": 0.20090532193925037, "grad_norm": 0.21313884854316711, "learning_rate": 0.001, "loss": 1.858, "step": 4749 }, { "epoch": 0.20094762670276672, "grad_norm": 0.7325378060340881, "learning_rate": 0.001, "loss": 3.6507, "step": 4750 }, { "epoch": 0.2009899314662831, "grad_norm": 102.32513427734375, "learning_rate": 0.001, "loss": 3.3369, "step": 4751 }, { "epoch": 0.2010322362297995, "grad_norm": 0.3230404555797577, "learning_rate": 0.001, "loss": 3.2636, "step": 4752 }, { "epoch": 0.20107454099331584, "grad_norm": 0.27479425072669983, "learning_rate": 0.001, "loss": 2.8126, "step": 4753 }, { "epoch": 0.20111684575683222, "grad_norm": 1.0016082525253296, "learning_rate": 0.001, "loss": 3.3368, "step": 4754 }, { "epoch": 0.20115915052034858, "grad_norm": 2.433990478515625, "learning_rate": 0.001, "loss": 2.5722, "step": 4755 }, { "epoch": 0.20120145528386496, "grad_norm": 0.29447877407073975, "learning_rate": 0.001, "loss": 2.011, "step": 4756 }, { "epoch": 0.20124376004738134, "grad_norm": 0.23931360244750977, "learning_rate": 0.001, "loss": 3.1154, "step": 4757 }, { "epoch": 0.2012860648108977, "grad_norm": 0.24119767546653748, "learning_rate": 0.001, "loss": 2.6907, "step": 4758 }, { "epoch": 0.20132836957441408, "grad_norm": 0.2806399166584015, "learning_rate": 0.001, "loss": 2.435, "step": 4759 }, { "epoch": 0.20137067433793046, "grad_norm": 0.2589947581291199, "learning_rate": 0.001, "loss": 3.0918, "step": 4760 }, { "epoch": 0.20141297910144682, "grad_norm": 0.2585409879684448, "learning_rate": 0.001, "loss": 1.6908, "step": 4761 }, { "epoch": 0.2014552838649632, "grad_norm": 0.4902653992176056, "learning_rate": 0.001, "loss": 2.5211, "step": 4762 }, { "epoch": 0.20149758862847958, "grad_norm": 0.2097211331129074, "learning_rate": 0.001, "loss": 1.7957, "step": 4763 }, { "epoch": 0.20153989339199593, "grad_norm": 0.26205360889434814, "learning_rate": 0.001, "loss": 1.7048, "step": 4764 }, { "epoch": 0.20158219815551232, "grad_norm": 1.476196527481079, "learning_rate": 0.001, "loss": 2.1979, "step": 4765 }, { "epoch": 0.20162450291902867, "grad_norm": 0.26606282591819763, "learning_rate": 0.001, "loss": 1.9978, "step": 4766 }, { "epoch": 0.20166680768254505, "grad_norm": 0.7488316297531128, "learning_rate": 0.001, "loss": 2.2828, "step": 4767 }, { "epoch": 0.20170911244606143, "grad_norm": 0.2467147260904312, "learning_rate": 0.001, "loss": 2.2932, "step": 4768 }, { "epoch": 0.2017514172095778, "grad_norm": 0.3320417106151581, "learning_rate": 0.001, "loss": 2.5512, "step": 4769 }, { "epoch": 0.20179372197309417, "grad_norm": 0.827613115310669, "learning_rate": 0.001, "loss": 2.9978, "step": 4770 }, { "epoch": 0.20183602673661055, "grad_norm": 0.2728056311607361, "learning_rate": 0.001, "loss": 2.3317, "step": 4771 }, { "epoch": 0.2018783315001269, "grad_norm": 12.807820320129395, "learning_rate": 0.001, "loss": 3.0231, "step": 4772 }, { "epoch": 0.2019206362636433, "grad_norm": 0.25953209400177, "learning_rate": 0.001, "loss": 2.5744, "step": 4773 }, { "epoch": 0.20196294102715967, "grad_norm": 0.2514730393886566, "learning_rate": 0.001, "loss": 1.816, "step": 4774 }, { "epoch": 0.20200524579067602, "grad_norm": 0.3252697288990021, "learning_rate": 0.001, "loss": 1.7334, "step": 4775 }, { "epoch": 0.2020475505541924, "grad_norm": 0.24858559668064117, "learning_rate": 0.001, "loss": 1.9274, "step": 4776 }, { "epoch": 0.20208985531770876, "grad_norm": 0.24600395560264587, "learning_rate": 0.001, "loss": 2.0192, "step": 4777 }, { "epoch": 0.20213216008122514, "grad_norm": 0.2425483614206314, "learning_rate": 0.001, "loss": 1.9709, "step": 4778 }, { "epoch": 0.20217446484474152, "grad_norm": 0.30808165669441223, "learning_rate": 0.001, "loss": 3.5698, "step": 4779 }, { "epoch": 0.20221676960825788, "grad_norm": 0.23074059188365936, "learning_rate": 0.001, "loss": 2.4182, "step": 4780 }, { "epoch": 0.20225907437177426, "grad_norm": 0.22748015820980072, "learning_rate": 0.001, "loss": 2.067, "step": 4781 }, { "epoch": 0.20230137913529064, "grad_norm": 0.23154856264591217, "learning_rate": 0.001, "loss": 1.9049, "step": 4782 }, { "epoch": 0.202343683898807, "grad_norm": 0.23665709793567657, "learning_rate": 0.001, "loss": 2.272, "step": 4783 }, { "epoch": 0.20238598866232338, "grad_norm": 0.20455607771873474, "learning_rate": 0.001, "loss": 2.1121, "step": 4784 }, { "epoch": 0.20242829342583976, "grad_norm": 0.4246728718280792, "learning_rate": 0.001, "loss": 2.6021, "step": 4785 }, { "epoch": 0.20247059818935612, "grad_norm": 0.235957533121109, "learning_rate": 0.001, "loss": 3.1363, "step": 4786 }, { "epoch": 0.2025129029528725, "grad_norm": 0.20879578590393066, "learning_rate": 0.001, "loss": 2.4947, "step": 4787 }, { "epoch": 0.20255520771638885, "grad_norm": 0.22662273049354553, "learning_rate": 0.001, "loss": 2.2168, "step": 4788 }, { "epoch": 0.20259751247990523, "grad_norm": 0.39572110772132874, "learning_rate": 0.001, "loss": 2.7348, "step": 4789 }, { "epoch": 0.20263981724342162, "grad_norm": 0.19888943433761597, "learning_rate": 0.001, "loss": 2.4301, "step": 4790 }, { "epoch": 0.20268212200693797, "grad_norm": 0.2209424376487732, "learning_rate": 0.001, "loss": 2.1574, "step": 4791 }, { "epoch": 0.20272442677045435, "grad_norm": 0.25813013315200806, "learning_rate": 0.001, "loss": 2.2174, "step": 4792 }, { "epoch": 0.20276673153397073, "grad_norm": 0.28615349531173706, "learning_rate": 0.001, "loss": 2.5578, "step": 4793 }, { "epoch": 0.2028090362974871, "grad_norm": 0.25967228412628174, "learning_rate": 0.001, "loss": 2.1363, "step": 4794 }, { "epoch": 0.20285134106100347, "grad_norm": 0.6237789988517761, "learning_rate": 0.001, "loss": 2.4102, "step": 4795 }, { "epoch": 0.20289364582451985, "grad_norm": 0.20853352546691895, "learning_rate": 0.001, "loss": 2.2166, "step": 4796 }, { "epoch": 0.2029359505880362, "grad_norm": 0.24653717875480652, "learning_rate": 0.001, "loss": 2.9338, "step": 4797 }, { "epoch": 0.2029782553515526, "grad_norm": 0.4108681082725525, "learning_rate": 0.001, "loss": 1.4424, "step": 4798 }, { "epoch": 0.20302056011506894, "grad_norm": 0.40089255571365356, "learning_rate": 0.001, "loss": 4.6068, "step": 4799 }, { "epoch": 0.20306286487858533, "grad_norm": 0.24011704325675964, "learning_rate": 0.001, "loss": 2.9857, "step": 4800 }, { "epoch": 0.2031051696421017, "grad_norm": 0.27986645698547363, "learning_rate": 0.001, "loss": 2.6674, "step": 4801 }, { "epoch": 0.20314747440561806, "grad_norm": 0.23206351697444916, "learning_rate": 0.001, "loss": 2.4333, "step": 4802 }, { "epoch": 0.20318977916913444, "grad_norm": 0.5023967623710632, "learning_rate": 0.001, "loss": 2.2802, "step": 4803 }, { "epoch": 0.20323208393265083, "grad_norm": 0.4342386722564697, "learning_rate": 0.001, "loss": 2.8626, "step": 4804 }, { "epoch": 0.20327438869616718, "grad_norm": 0.21476680040359497, "learning_rate": 0.001, "loss": 1.9667, "step": 4805 }, { "epoch": 0.20331669345968356, "grad_norm": 0.22180813550949097, "learning_rate": 0.001, "loss": 1.9456, "step": 4806 }, { "epoch": 0.20335899822319994, "grad_norm": 0.21526440978050232, "learning_rate": 0.001, "loss": 2.1147, "step": 4807 }, { "epoch": 0.2034013029867163, "grad_norm": 0.22872960567474365, "learning_rate": 0.001, "loss": 2.3991, "step": 4808 }, { "epoch": 0.20344360775023268, "grad_norm": 0.21979276835918427, "learning_rate": 0.001, "loss": 1.7426, "step": 4809 }, { "epoch": 0.20348591251374903, "grad_norm": 0.20522305369377136, "learning_rate": 0.001, "loss": 2.61, "step": 4810 }, { "epoch": 0.20352821727726542, "grad_norm": 0.24340516328811646, "learning_rate": 0.001, "loss": 3.118, "step": 4811 }, { "epoch": 0.2035705220407818, "grad_norm": 0.7604936361312866, "learning_rate": 0.001, "loss": 2.2295, "step": 4812 }, { "epoch": 0.20361282680429815, "grad_norm": 0.20879897475242615, "learning_rate": 0.001, "loss": 3.0593, "step": 4813 }, { "epoch": 0.20365513156781453, "grad_norm": 0.22496330738067627, "learning_rate": 0.001, "loss": 1.967, "step": 4814 }, { "epoch": 0.20369743633133092, "grad_norm": 0.6500147581100464, "learning_rate": 0.001, "loss": 2.5665, "step": 4815 }, { "epoch": 0.20373974109484727, "grad_norm": 0.2536025941371918, "learning_rate": 0.001, "loss": 3.4292, "step": 4816 }, { "epoch": 0.20378204585836365, "grad_norm": 0.25820499658584595, "learning_rate": 0.001, "loss": 2.0012, "step": 4817 }, { "epoch": 0.20382435062188003, "grad_norm": 0.886145830154419, "learning_rate": 0.001, "loss": 1.7369, "step": 4818 }, { "epoch": 0.2038666553853964, "grad_norm": 0.19858545064926147, "learning_rate": 0.001, "loss": 1.86, "step": 4819 }, { "epoch": 0.20390896014891277, "grad_norm": 0.6188323497772217, "learning_rate": 0.001, "loss": 1.8931, "step": 4820 }, { "epoch": 0.20395126491242915, "grad_norm": 0.2777750790119171, "learning_rate": 0.001, "loss": 3.6006, "step": 4821 }, { "epoch": 0.2039935696759455, "grad_norm": 0.26666122674942017, "learning_rate": 0.001, "loss": 2.4877, "step": 4822 }, { "epoch": 0.2040358744394619, "grad_norm": 0.2691092789173126, "learning_rate": 0.001, "loss": 2.1836, "step": 4823 }, { "epoch": 0.20407817920297824, "grad_norm": 0.24200117588043213, "learning_rate": 0.001, "loss": 2.5001, "step": 4824 }, { "epoch": 0.20412048396649463, "grad_norm": 0.2400779128074646, "learning_rate": 0.001, "loss": 1.6834, "step": 4825 }, { "epoch": 0.204162788730011, "grad_norm": 0.23831325769424438, "learning_rate": 0.001, "loss": 1.8685, "step": 4826 }, { "epoch": 0.20420509349352736, "grad_norm": 0.2239019125699997, "learning_rate": 0.001, "loss": 2.894, "step": 4827 }, { "epoch": 0.20424739825704374, "grad_norm": 0.23331467807292938, "learning_rate": 0.001, "loss": 2.893, "step": 4828 }, { "epoch": 0.20428970302056013, "grad_norm": 0.2696641981601715, "learning_rate": 0.001, "loss": 3.3553, "step": 4829 }, { "epoch": 0.20433200778407648, "grad_norm": 0.45643630623817444, "learning_rate": 0.001, "loss": 2.6336, "step": 4830 }, { "epoch": 0.20437431254759286, "grad_norm": 0.5224469900131226, "learning_rate": 0.001, "loss": 2.7963, "step": 4831 }, { "epoch": 0.20441661731110924, "grad_norm": 0.20430107414722443, "learning_rate": 0.001, "loss": 2.001, "step": 4832 }, { "epoch": 0.2044589220746256, "grad_norm": 0.47029760479927063, "learning_rate": 0.001, "loss": 1.7341, "step": 4833 }, { "epoch": 0.20450122683814198, "grad_norm": 0.20866377651691437, "learning_rate": 0.001, "loss": 2.1228, "step": 4834 }, { "epoch": 0.20454353160165833, "grad_norm": 0.23199744522571564, "learning_rate": 0.001, "loss": 1.6639, "step": 4835 }, { "epoch": 0.20458583636517472, "grad_norm": 0.23351909220218658, "learning_rate": 0.001, "loss": 2.9277, "step": 4836 }, { "epoch": 0.2046281411286911, "grad_norm": 0.2393893003463745, "learning_rate": 0.001, "loss": 2.4585, "step": 4837 }, { "epoch": 0.20467044589220745, "grad_norm": 0.19761449098587036, "learning_rate": 0.001, "loss": 1.9734, "step": 4838 }, { "epoch": 0.20471275065572384, "grad_norm": 0.45381811261177063, "learning_rate": 0.001, "loss": 2.3705, "step": 4839 }, { "epoch": 0.20475505541924022, "grad_norm": 0.25361669063568115, "learning_rate": 0.001, "loss": 2.7643, "step": 4840 }, { "epoch": 0.20479736018275657, "grad_norm": 0.23903019726276398, "learning_rate": 0.001, "loss": 2.1605, "step": 4841 }, { "epoch": 0.20483966494627295, "grad_norm": 1.0079925060272217, "learning_rate": 0.001, "loss": 2.5728, "step": 4842 }, { "epoch": 0.20488196970978934, "grad_norm": 0.22258150577545166, "learning_rate": 0.001, "loss": 2.7058, "step": 4843 }, { "epoch": 0.2049242744733057, "grad_norm": 0.22821152210235596, "learning_rate": 0.001, "loss": 1.7562, "step": 4844 }, { "epoch": 0.20496657923682207, "grad_norm": 0.29148948192596436, "learning_rate": 0.001, "loss": 2.5153, "step": 4845 }, { "epoch": 0.20500888400033843, "grad_norm": 0.37664884328842163, "learning_rate": 0.001, "loss": 2.9711, "step": 4846 }, { "epoch": 0.2050511887638548, "grad_norm": 0.2692352533340454, "learning_rate": 0.001, "loss": 2.3962, "step": 4847 }, { "epoch": 0.2050934935273712, "grad_norm": 0.2488355189561844, "learning_rate": 0.001, "loss": 2.6436, "step": 4848 }, { "epoch": 0.20513579829088754, "grad_norm": 33.88499069213867, "learning_rate": 0.001, "loss": 1.9808, "step": 4849 }, { "epoch": 0.20517810305440393, "grad_norm": 0.24988500773906708, "learning_rate": 0.001, "loss": 3.2433, "step": 4850 }, { "epoch": 0.2052204078179203, "grad_norm": 0.2517193853855133, "learning_rate": 0.001, "loss": 2.9665, "step": 4851 }, { "epoch": 0.20526271258143666, "grad_norm": 0.38323888182640076, "learning_rate": 0.001, "loss": 1.9108, "step": 4852 }, { "epoch": 0.20530501734495304, "grad_norm": 0.21491578221321106, "learning_rate": 0.001, "loss": 1.8844, "step": 4853 }, { "epoch": 0.20534732210846943, "grad_norm": 0.23121948540210724, "learning_rate": 0.001, "loss": 2.4052, "step": 4854 }, { "epoch": 0.20538962687198578, "grad_norm": 1.7590092420578003, "learning_rate": 0.001, "loss": 2.9853, "step": 4855 }, { "epoch": 0.20543193163550216, "grad_norm": 0.3250569701194763, "learning_rate": 0.001, "loss": 2.9398, "step": 4856 }, { "epoch": 0.20547423639901852, "grad_norm": 0.5734913349151611, "learning_rate": 0.001, "loss": 1.8469, "step": 4857 }, { "epoch": 0.2055165411625349, "grad_norm": 0.2421615719795227, "learning_rate": 0.001, "loss": 2.0313, "step": 4858 }, { "epoch": 0.20555884592605128, "grad_norm": 0.2648211121559143, "learning_rate": 0.001, "loss": 2.3516, "step": 4859 }, { "epoch": 0.20560115068956764, "grad_norm": 0.5653280019760132, "learning_rate": 0.001, "loss": 1.7878, "step": 4860 }, { "epoch": 0.20564345545308402, "grad_norm": 0.4042348861694336, "learning_rate": 0.001, "loss": 2.8081, "step": 4861 }, { "epoch": 0.2056857602166004, "grad_norm": 0.25248414278030396, "learning_rate": 0.001, "loss": 3.5081, "step": 4862 }, { "epoch": 0.20572806498011675, "grad_norm": 0.9863622784614563, "learning_rate": 0.001, "loss": 3.1157, "step": 4863 }, { "epoch": 0.20577036974363314, "grad_norm": 0.2207251340150833, "learning_rate": 0.001, "loss": 2.1768, "step": 4864 }, { "epoch": 0.20581267450714952, "grad_norm": 0.2811149060726166, "learning_rate": 0.001, "loss": 3.3907, "step": 4865 }, { "epoch": 0.20585497927066587, "grad_norm": 0.9956902265548706, "learning_rate": 0.001, "loss": 2.2663, "step": 4866 }, { "epoch": 0.20589728403418225, "grad_norm": 0.38394954800605774, "learning_rate": 0.001, "loss": 2.641, "step": 4867 }, { "epoch": 0.2059395887976986, "grad_norm": 0.1891484558582306, "learning_rate": 0.001, "loss": 2.0367, "step": 4868 }, { "epoch": 0.205981893561215, "grad_norm": 0.21494343876838684, "learning_rate": 0.001, "loss": 3.1822, "step": 4869 }, { "epoch": 0.20602419832473137, "grad_norm": 0.27545684576034546, "learning_rate": 0.001, "loss": 3.4021, "step": 4870 }, { "epoch": 0.20606650308824773, "grad_norm": 0.23246289789676666, "learning_rate": 0.001, "loss": 2.2404, "step": 4871 }, { "epoch": 0.2061088078517641, "grad_norm": 0.826824963092804, "learning_rate": 0.001, "loss": 3.1329, "step": 4872 }, { "epoch": 0.2061511126152805, "grad_norm": 0.38859671354293823, "learning_rate": 0.001, "loss": 2.1988, "step": 4873 }, { "epoch": 0.20619341737879684, "grad_norm": 0.22136381268501282, "learning_rate": 0.001, "loss": 2.243, "step": 4874 }, { "epoch": 0.20623572214231323, "grad_norm": 0.24336136877536774, "learning_rate": 0.001, "loss": 3.3275, "step": 4875 }, { "epoch": 0.2062780269058296, "grad_norm": 0.24462832510471344, "learning_rate": 0.001, "loss": 2.9589, "step": 4876 }, { "epoch": 0.20632033166934596, "grad_norm": 7.4848175048828125, "learning_rate": 0.001, "loss": 1.9259, "step": 4877 }, { "epoch": 0.20636263643286235, "grad_norm": 0.25238528847694397, "learning_rate": 0.001, "loss": 1.9616, "step": 4878 }, { "epoch": 0.2064049411963787, "grad_norm": 1.0370349884033203, "learning_rate": 0.001, "loss": 2.0602, "step": 4879 }, { "epoch": 0.20644724595989508, "grad_norm": 0.2667924165725708, "learning_rate": 0.001, "loss": 2.1587, "step": 4880 }, { "epoch": 0.20648955072341146, "grad_norm": 5.384062767028809, "learning_rate": 0.001, "loss": 2.1591, "step": 4881 }, { "epoch": 0.20653185548692782, "grad_norm": 0.653923511505127, "learning_rate": 0.001, "loss": 2.1794, "step": 4882 }, { "epoch": 0.2065741602504442, "grad_norm": 6.2755937576293945, "learning_rate": 0.001, "loss": 2.7158, "step": 4883 }, { "epoch": 0.20661646501396058, "grad_norm": 0.5690131187438965, "learning_rate": 0.001, "loss": 2.2172, "step": 4884 }, { "epoch": 0.20665876977747694, "grad_norm": 0.6004669070243835, "learning_rate": 0.001, "loss": 2.4962, "step": 4885 }, { "epoch": 0.20670107454099332, "grad_norm": 0.5099846124649048, "learning_rate": 0.001, "loss": 2.7323, "step": 4886 }, { "epoch": 0.2067433793045097, "grad_norm": 0.8244269490242004, "learning_rate": 0.001, "loss": 2.5037, "step": 4887 }, { "epoch": 0.20678568406802605, "grad_norm": 0.7311514616012573, "learning_rate": 0.001, "loss": 2.8543, "step": 4888 }, { "epoch": 0.20682798883154244, "grad_norm": 1.6396245956420898, "learning_rate": 0.001, "loss": 3.5585, "step": 4889 }, { "epoch": 0.2068702935950588, "grad_norm": 0.7014549374580383, "learning_rate": 0.001, "loss": 2.8487, "step": 4890 }, { "epoch": 0.20691259835857517, "grad_norm": 0.7353118062019348, "learning_rate": 0.001, "loss": 2.7439, "step": 4891 }, { "epoch": 0.20695490312209155, "grad_norm": 0.3765983283519745, "learning_rate": 0.001, "loss": 4.2012, "step": 4892 }, { "epoch": 0.2069972078856079, "grad_norm": 1.587646245956421, "learning_rate": 0.001, "loss": 2.1776, "step": 4893 }, { "epoch": 0.2070395126491243, "grad_norm": 0.45445260405540466, "learning_rate": 0.001, "loss": 2.7538, "step": 4894 }, { "epoch": 0.20708181741264067, "grad_norm": 0.5824608206748962, "learning_rate": 0.001, "loss": 2.4091, "step": 4895 }, { "epoch": 0.20712412217615703, "grad_norm": 0.29046350717544556, "learning_rate": 0.001, "loss": 2.3128, "step": 4896 }, { "epoch": 0.2071664269396734, "grad_norm": 0.2888144552707672, "learning_rate": 0.001, "loss": 2.5121, "step": 4897 }, { "epoch": 0.2072087317031898, "grad_norm": 0.4128365218639374, "learning_rate": 0.001, "loss": 2.8964, "step": 4898 }, { "epoch": 0.20725103646670615, "grad_norm": 2.1219639778137207, "learning_rate": 0.001, "loss": 2.9168, "step": 4899 }, { "epoch": 0.20729334123022253, "grad_norm": 0.910344660282135, "learning_rate": 0.001, "loss": 4.5335, "step": 4900 }, { "epoch": 0.20733564599373888, "grad_norm": 0.2882691025733948, "learning_rate": 0.001, "loss": 2.8958, "step": 4901 }, { "epoch": 0.20737795075725526, "grad_norm": 0.2687327563762665, "learning_rate": 0.001, "loss": 2.1742, "step": 4902 }, { "epoch": 0.20742025552077165, "grad_norm": 0.8853581547737122, "learning_rate": 0.001, "loss": 1.9429, "step": 4903 }, { "epoch": 0.207462560284288, "grad_norm": 1.759624719619751, "learning_rate": 0.001, "loss": 2.8563, "step": 4904 }, { "epoch": 0.20750486504780438, "grad_norm": 3.137960910797119, "learning_rate": 0.001, "loss": 3.4388, "step": 4905 }, { "epoch": 0.20754716981132076, "grad_norm": 3.9103238582611084, "learning_rate": 0.001, "loss": 2.4237, "step": 4906 }, { "epoch": 0.20758947457483712, "grad_norm": 0.40916767716407776, "learning_rate": 0.001, "loss": 2.2436, "step": 4907 }, { "epoch": 0.2076317793383535, "grad_norm": 0.30772218108177185, "learning_rate": 0.001, "loss": 2.3738, "step": 4908 }, { "epoch": 0.20767408410186988, "grad_norm": 1.7794135808944702, "learning_rate": 0.001, "loss": 3.5238, "step": 4909 }, { "epoch": 0.20771638886538624, "grad_norm": 0.23665037751197815, "learning_rate": 0.001, "loss": 1.9228, "step": 4910 }, { "epoch": 0.20775869362890262, "grad_norm": 0.2872195541858673, "learning_rate": 0.001, "loss": 2.6802, "step": 4911 }, { "epoch": 0.20780099839241897, "grad_norm": 6.244613170623779, "learning_rate": 0.001, "loss": 2.8313, "step": 4912 }, { "epoch": 0.20784330315593535, "grad_norm": 0.2514772117137909, "learning_rate": 0.001, "loss": 2.0803, "step": 4913 }, { "epoch": 0.20788560791945174, "grad_norm": 11.249589920043945, "learning_rate": 0.001, "loss": 2.0849, "step": 4914 }, { "epoch": 0.2079279126829681, "grad_norm": 0.3307616114616394, "learning_rate": 0.001, "loss": 2.9033, "step": 4915 }, { "epoch": 0.20797021744648447, "grad_norm": 0.3316502869129181, "learning_rate": 0.001, "loss": 2.3176, "step": 4916 }, { "epoch": 0.20801252221000086, "grad_norm": 0.4694264829158783, "learning_rate": 0.001, "loss": 3.0293, "step": 4917 }, { "epoch": 0.2080548269735172, "grad_norm": 0.2822750210762024, "learning_rate": 0.001, "loss": 2.4169, "step": 4918 }, { "epoch": 0.2080971317370336, "grad_norm": 0.604796826839447, "learning_rate": 0.001, "loss": 2.7267, "step": 4919 }, { "epoch": 0.20813943650054997, "grad_norm": 0.23968219757080078, "learning_rate": 0.001, "loss": 2.7295, "step": 4920 }, { "epoch": 0.20818174126406633, "grad_norm": 0.2379380464553833, "learning_rate": 0.001, "loss": 2.2191, "step": 4921 }, { "epoch": 0.2082240460275827, "grad_norm": 0.2360192835330963, "learning_rate": 0.001, "loss": 1.8814, "step": 4922 }, { "epoch": 0.20826635079109906, "grad_norm": 0.40133988857269287, "learning_rate": 0.001, "loss": 1.9194, "step": 4923 }, { "epoch": 0.20830865555461545, "grad_norm": 0.314656525850296, "learning_rate": 0.001, "loss": 2.3935, "step": 4924 }, { "epoch": 0.20835096031813183, "grad_norm": 1.1727277040481567, "learning_rate": 0.001, "loss": 2.8302, "step": 4925 }, { "epoch": 0.20839326508164818, "grad_norm": 0.22926130890846252, "learning_rate": 0.001, "loss": 2.7978, "step": 4926 }, { "epoch": 0.20843556984516456, "grad_norm": 0.4540160596370697, "learning_rate": 0.001, "loss": 1.8122, "step": 4927 }, { "epoch": 0.20847787460868095, "grad_norm": 23.994537353515625, "learning_rate": 0.001, "loss": 2.6313, "step": 4928 }, { "epoch": 0.2085201793721973, "grad_norm": 0.6194539070129395, "learning_rate": 0.001, "loss": 2.9595, "step": 4929 }, { "epoch": 0.20856248413571368, "grad_norm": 13.033817291259766, "learning_rate": 0.001, "loss": 2.1685, "step": 4930 }, { "epoch": 0.20860478889923006, "grad_norm": 0.323756605386734, "learning_rate": 0.001, "loss": 2.9983, "step": 4931 }, { "epoch": 0.20864709366274642, "grad_norm": 4.008752346038818, "learning_rate": 0.001, "loss": 3.0625, "step": 4932 }, { "epoch": 0.2086893984262628, "grad_norm": 0.28806641697883606, "learning_rate": 0.001, "loss": 2.3397, "step": 4933 }, { "epoch": 0.20873170318977916, "grad_norm": 0.49707770347595215, "learning_rate": 0.001, "loss": 2.1648, "step": 4934 }, { "epoch": 0.20877400795329554, "grad_norm": 0.29439014196395874, "learning_rate": 0.001, "loss": 2.0093, "step": 4935 }, { "epoch": 0.20881631271681192, "grad_norm": 0.7795244455337524, "learning_rate": 0.001, "loss": 2.36, "step": 4936 }, { "epoch": 0.20885861748032827, "grad_norm": 0.2714845836162567, "learning_rate": 0.001, "loss": 1.7267, "step": 4937 }, { "epoch": 0.20890092224384466, "grad_norm": 0.4351987838745117, "learning_rate": 0.001, "loss": 2.605, "step": 4938 }, { "epoch": 0.20894322700736104, "grad_norm": 0.39439719915390015, "learning_rate": 0.001, "loss": 2.6728, "step": 4939 }, { "epoch": 0.2089855317708774, "grad_norm": 0.8820605874061584, "learning_rate": 0.001, "loss": 3.4102, "step": 4940 }, { "epoch": 0.20902783653439377, "grad_norm": 0.3229409158229828, "learning_rate": 0.001, "loss": 2.9257, "step": 4941 }, { "epoch": 0.20907014129791016, "grad_norm": 0.3107205629348755, "learning_rate": 0.001, "loss": 2.3553, "step": 4942 }, { "epoch": 0.2091124460614265, "grad_norm": 0.36764320731163025, "learning_rate": 0.001, "loss": 3.036, "step": 4943 }, { "epoch": 0.2091547508249429, "grad_norm": 0.2639576494693756, "learning_rate": 0.001, "loss": 2.8147, "step": 4944 }, { "epoch": 0.20919705558845927, "grad_norm": 0.24890625476837158, "learning_rate": 0.001, "loss": 1.888, "step": 4945 }, { "epoch": 0.20923936035197563, "grad_norm": 0.28734108805656433, "learning_rate": 0.001, "loss": 2.3685, "step": 4946 }, { "epoch": 0.209281665115492, "grad_norm": 1.45154869556427, "learning_rate": 0.001, "loss": 2.8914, "step": 4947 }, { "epoch": 0.20932396987900836, "grad_norm": 0.24972866475582123, "learning_rate": 0.001, "loss": 2.1973, "step": 4948 }, { "epoch": 0.20936627464252475, "grad_norm": 0.7616045475006104, "learning_rate": 0.001, "loss": 2.4037, "step": 4949 }, { "epoch": 0.20940857940604113, "grad_norm": 0.3818029463291168, "learning_rate": 0.001, "loss": 2.3215, "step": 4950 }, { "epoch": 0.20945088416955748, "grad_norm": 32.00923156738281, "learning_rate": 0.001, "loss": 2.4591, "step": 4951 }, { "epoch": 0.20949318893307387, "grad_norm": 0.31729358434677124, "learning_rate": 0.001, "loss": 2.4413, "step": 4952 }, { "epoch": 0.20953549369659025, "grad_norm": 2.464691638946533, "learning_rate": 0.001, "loss": 2.4398, "step": 4953 }, { "epoch": 0.2095777984601066, "grad_norm": 35.59858322143555, "learning_rate": 0.001, "loss": 2.887, "step": 4954 }, { "epoch": 0.20962010322362298, "grad_norm": 0.5536134243011475, "learning_rate": 0.001, "loss": 2.9387, "step": 4955 }, { "epoch": 0.20966240798713937, "grad_norm": 1.5458406209945679, "learning_rate": 0.001, "loss": 2.1783, "step": 4956 }, { "epoch": 0.20970471275065572, "grad_norm": 0.8525711894035339, "learning_rate": 0.001, "loss": 3.0125, "step": 4957 }, { "epoch": 0.2097470175141721, "grad_norm": 3.157151460647583, "learning_rate": 0.001, "loss": 2.9392, "step": 4958 }, { "epoch": 0.20978932227768846, "grad_norm": 0.42130786180496216, "learning_rate": 0.001, "loss": 2.071, "step": 4959 }, { "epoch": 0.20983162704120484, "grad_norm": 2.541234016418457, "learning_rate": 0.001, "loss": 4.1087, "step": 4960 }, { "epoch": 0.20987393180472122, "grad_norm": 0.24158483743667603, "learning_rate": 0.001, "loss": 1.9446, "step": 4961 }, { "epoch": 0.20991623656823757, "grad_norm": 0.45275890827178955, "learning_rate": 0.001, "loss": 2.1538, "step": 4962 }, { "epoch": 0.20995854133175396, "grad_norm": 1.040623664855957, "learning_rate": 0.001, "loss": 2.0899, "step": 4963 }, { "epoch": 0.21000084609527034, "grad_norm": 0.2312449812889099, "learning_rate": 0.001, "loss": 1.9906, "step": 4964 }, { "epoch": 0.2100431508587867, "grad_norm": 0.33807173371315, "learning_rate": 0.001, "loss": 2.1848, "step": 4965 }, { "epoch": 0.21008545562230307, "grad_norm": 0.256266325712204, "learning_rate": 0.001, "loss": 2.1486, "step": 4966 }, { "epoch": 0.21012776038581946, "grad_norm": 0.4030238687992096, "learning_rate": 0.001, "loss": 1.9343, "step": 4967 }, { "epoch": 0.2101700651493358, "grad_norm": 0.33248621225357056, "learning_rate": 0.001, "loss": 2.2524, "step": 4968 }, { "epoch": 0.2102123699128522, "grad_norm": 0.3359518349170685, "learning_rate": 0.001, "loss": 2.6882, "step": 4969 }, { "epoch": 0.21025467467636855, "grad_norm": 0.3091052174568176, "learning_rate": 0.001, "loss": 3.1879, "step": 4970 }, { "epoch": 0.21029697943988493, "grad_norm": 0.21481722593307495, "learning_rate": 0.001, "loss": 1.8638, "step": 4971 }, { "epoch": 0.2103392842034013, "grad_norm": 0.21660932898521423, "learning_rate": 0.001, "loss": 2.0017, "step": 4972 }, { "epoch": 0.21038158896691767, "grad_norm": 0.4549258351325989, "learning_rate": 0.001, "loss": 3.1636, "step": 4973 }, { "epoch": 0.21042389373043405, "grad_norm": 1.2132890224456787, "learning_rate": 0.001, "loss": 3.037, "step": 4974 }, { "epoch": 0.21046619849395043, "grad_norm": 0.24998739361763, "learning_rate": 0.001, "loss": 2.3893, "step": 4975 }, { "epoch": 0.21050850325746678, "grad_norm": 0.2769694924354553, "learning_rate": 0.001, "loss": 2.0534, "step": 4976 }, { "epoch": 0.21055080802098317, "grad_norm": 8.817028999328613, "learning_rate": 0.001, "loss": 2.5478, "step": 4977 }, { "epoch": 0.21059311278449955, "grad_norm": 0.840341329574585, "learning_rate": 0.001, "loss": 2.5962, "step": 4978 }, { "epoch": 0.2106354175480159, "grad_norm": 0.33536335825920105, "learning_rate": 0.001, "loss": 1.973, "step": 4979 }, { "epoch": 0.21067772231153228, "grad_norm": 0.34550049901008606, "learning_rate": 0.001, "loss": 2.3634, "step": 4980 }, { "epoch": 0.21072002707504864, "grad_norm": 0.4002279043197632, "learning_rate": 0.001, "loss": 2.7841, "step": 4981 }, { "epoch": 0.21076233183856502, "grad_norm": 0.3583948314189911, "learning_rate": 0.001, "loss": 3.248, "step": 4982 }, { "epoch": 0.2108046366020814, "grad_norm": 0.2348349541425705, "learning_rate": 0.001, "loss": 2.4177, "step": 4983 }, { "epoch": 0.21084694136559776, "grad_norm": 0.2678260803222656, "learning_rate": 0.001, "loss": 2.775, "step": 4984 }, { "epoch": 0.21088924612911414, "grad_norm": 0.24458220601081848, "learning_rate": 0.001, "loss": 2.3764, "step": 4985 }, { "epoch": 0.21093155089263052, "grad_norm": 0.38108593225479126, "learning_rate": 0.001, "loss": 2.247, "step": 4986 }, { "epoch": 0.21097385565614687, "grad_norm": 0.21886327862739563, "learning_rate": 0.001, "loss": 2.7944, "step": 4987 }, { "epoch": 0.21101616041966326, "grad_norm": 7.095550537109375, "learning_rate": 0.001, "loss": 1.5504, "step": 4988 }, { "epoch": 0.21105846518317964, "grad_norm": 0.2182372361421585, "learning_rate": 0.001, "loss": 2.7729, "step": 4989 }, { "epoch": 0.211100769946696, "grad_norm": 0.24648889899253845, "learning_rate": 0.001, "loss": 2.2145, "step": 4990 }, { "epoch": 0.21114307471021238, "grad_norm": 0.22735580801963806, "learning_rate": 0.001, "loss": 1.9403, "step": 4991 }, { "epoch": 0.21118537947372873, "grad_norm": 2.2606966495513916, "learning_rate": 0.001, "loss": 2.5648, "step": 4992 }, { "epoch": 0.2112276842372451, "grad_norm": 1.0740227699279785, "learning_rate": 0.001, "loss": 3.2116, "step": 4993 }, { "epoch": 0.2112699890007615, "grad_norm": 1.1305190324783325, "learning_rate": 0.001, "loss": 3.5133, "step": 4994 }, { "epoch": 0.21131229376427785, "grad_norm": 0.8290959000587463, "learning_rate": 0.001, "loss": 2.4756, "step": 4995 }, { "epoch": 0.21135459852779423, "grad_norm": 0.30517685413360596, "learning_rate": 0.001, "loss": 2.2226, "step": 4996 }, { "epoch": 0.2113969032913106, "grad_norm": 0.25863179564476013, "learning_rate": 0.001, "loss": 2.1738, "step": 4997 }, { "epoch": 0.21143920805482697, "grad_norm": 0.33867284655570984, "learning_rate": 0.001, "loss": 2.6182, "step": 4998 }, { "epoch": 0.21148151281834335, "grad_norm": 0.3169691264629364, "learning_rate": 0.001, "loss": 2.8752, "step": 4999 }, { "epoch": 0.21152381758185973, "grad_norm": 0.2934655249118805, "learning_rate": 0.001, "loss": 3.4324, "step": 5000 }, { "epoch": 0.21156612234537608, "grad_norm": 1.9539580345153809, "learning_rate": 0.001, "loss": 2.0974, "step": 5001 }, { "epoch": 0.21160842710889247, "grad_norm": 0.28352606296539307, "learning_rate": 0.001, "loss": 2.3892, "step": 5002 }, { "epoch": 0.21165073187240882, "grad_norm": 0.36468642950057983, "learning_rate": 0.001, "loss": 2.67, "step": 5003 }, { "epoch": 0.2116930366359252, "grad_norm": 0.28954243659973145, "learning_rate": 0.001, "loss": 2.4779, "step": 5004 }, { "epoch": 0.21173534139944158, "grad_norm": 0.2951817810535431, "learning_rate": 0.001, "loss": 2.6193, "step": 5005 }, { "epoch": 0.21177764616295794, "grad_norm": 0.24036556482315063, "learning_rate": 0.001, "loss": 2.362, "step": 5006 }, { "epoch": 0.21181995092647432, "grad_norm": 0.23176927864551544, "learning_rate": 0.001, "loss": 2.4304, "step": 5007 }, { "epoch": 0.2118622556899907, "grad_norm": 0.2653713822364807, "learning_rate": 0.001, "loss": 2.4899, "step": 5008 }, { "epoch": 0.21190456045350706, "grad_norm": 0.22608830034732819, "learning_rate": 0.001, "loss": 1.915, "step": 5009 }, { "epoch": 0.21194686521702344, "grad_norm": 0.26712745428085327, "learning_rate": 0.001, "loss": 2.8369, "step": 5010 }, { "epoch": 0.21198916998053982, "grad_norm": 0.23458565771579742, "learning_rate": 0.001, "loss": 2.713, "step": 5011 }, { "epoch": 0.21203147474405618, "grad_norm": 0.3184855282306671, "learning_rate": 0.001, "loss": 3.2585, "step": 5012 }, { "epoch": 0.21207377950757256, "grad_norm": 0.29982033371925354, "learning_rate": 0.001, "loss": 2.6947, "step": 5013 }, { "epoch": 0.2121160842710889, "grad_norm": 0.22614946961402893, "learning_rate": 0.001, "loss": 1.8255, "step": 5014 }, { "epoch": 0.2121583890346053, "grad_norm": 0.22984737157821655, "learning_rate": 0.001, "loss": 1.9238, "step": 5015 }, { "epoch": 0.21220069379812168, "grad_norm": 0.2999686598777771, "learning_rate": 0.001, "loss": 2.102, "step": 5016 }, { "epoch": 0.21224299856163803, "grad_norm": 0.21866334974765778, "learning_rate": 0.001, "loss": 2.5355, "step": 5017 }, { "epoch": 0.2122853033251544, "grad_norm": 0.30054131150245667, "learning_rate": 0.001, "loss": 1.7776, "step": 5018 }, { "epoch": 0.2123276080886708, "grad_norm": 0.28100132942199707, "learning_rate": 0.001, "loss": 3.4669, "step": 5019 }, { "epoch": 0.21236991285218715, "grad_norm": 0.2859184443950653, "learning_rate": 0.001, "loss": 1.9491, "step": 5020 }, { "epoch": 0.21241221761570353, "grad_norm": 1.7632389068603516, "learning_rate": 0.001, "loss": 2.4757, "step": 5021 }, { "epoch": 0.2124545223792199, "grad_norm": 0.4322301149368286, "learning_rate": 0.001, "loss": 1.9597, "step": 5022 }, { "epoch": 0.21249682714273627, "grad_norm": 0.27014920115470886, "learning_rate": 0.001, "loss": 2.5075, "step": 5023 }, { "epoch": 0.21253913190625265, "grad_norm": 0.21609550714492798, "learning_rate": 0.001, "loss": 2.2271, "step": 5024 }, { "epoch": 0.212581436669769, "grad_norm": 0.2416316717863083, "learning_rate": 0.001, "loss": 2.0308, "step": 5025 }, { "epoch": 0.21262374143328538, "grad_norm": 0.6305524110794067, "learning_rate": 0.001, "loss": 2.16, "step": 5026 }, { "epoch": 0.21266604619680177, "grad_norm": 0.21432656049728394, "learning_rate": 0.001, "loss": 2.6648, "step": 5027 }, { "epoch": 0.21270835096031812, "grad_norm": 0.854989767074585, "learning_rate": 0.001, "loss": 2.3023, "step": 5028 }, { "epoch": 0.2127506557238345, "grad_norm": 0.2650429308414459, "learning_rate": 0.001, "loss": 2.1294, "step": 5029 }, { "epoch": 0.21279296048735089, "grad_norm": 0.1896660178899765, "learning_rate": 0.001, "loss": 1.7712, "step": 5030 }, { "epoch": 0.21283526525086724, "grad_norm": 0.4741089940071106, "learning_rate": 0.001, "loss": 2.5271, "step": 5031 }, { "epoch": 0.21287757001438362, "grad_norm": 0.22527028620243073, "learning_rate": 0.001, "loss": 2.7449, "step": 5032 }, { "epoch": 0.2129198747779, "grad_norm": 0.2307746261358261, "learning_rate": 0.001, "loss": 2.8616, "step": 5033 }, { "epoch": 0.21296217954141636, "grad_norm": 0.4615713655948639, "learning_rate": 0.001, "loss": 1.8831, "step": 5034 }, { "epoch": 0.21300448430493274, "grad_norm": 23.890384674072266, "learning_rate": 0.001, "loss": 1.4897, "step": 5035 }, { "epoch": 0.2130467890684491, "grad_norm": 0.497530072927475, "learning_rate": 0.001, "loss": 2.1057, "step": 5036 }, { "epoch": 0.21308909383196548, "grad_norm": 4.099823474884033, "learning_rate": 0.001, "loss": 2.2474, "step": 5037 }, { "epoch": 0.21313139859548186, "grad_norm": 0.23300927877426147, "learning_rate": 0.001, "loss": 2.6447, "step": 5038 }, { "epoch": 0.2131737033589982, "grad_norm": 1.2628371715545654, "learning_rate": 0.001, "loss": 2.3729, "step": 5039 }, { "epoch": 0.2132160081225146, "grad_norm": 3.1556689739227295, "learning_rate": 0.001, "loss": 2.0734, "step": 5040 }, { "epoch": 0.21325831288603098, "grad_norm": 0.23001788556575775, "learning_rate": 0.001, "loss": 2.0797, "step": 5041 }, { "epoch": 0.21330061764954733, "grad_norm": 0.3631579577922821, "learning_rate": 0.001, "loss": 2.1848, "step": 5042 }, { "epoch": 0.2133429224130637, "grad_norm": 0.2023169994354248, "learning_rate": 0.001, "loss": 2.0585, "step": 5043 }, { "epoch": 0.2133852271765801, "grad_norm": 0.25876331329345703, "learning_rate": 0.001, "loss": 2.2102, "step": 5044 }, { "epoch": 0.21342753194009645, "grad_norm": 0.22901210188865662, "learning_rate": 0.001, "loss": 2.1429, "step": 5045 }, { "epoch": 0.21346983670361283, "grad_norm": 0.472971647977829, "learning_rate": 0.001, "loss": 2.8811, "step": 5046 }, { "epoch": 0.21351214146712919, "grad_norm": 0.47565358877182007, "learning_rate": 0.001, "loss": 2.3727, "step": 5047 }, { "epoch": 0.21355444623064557, "grad_norm": 3.025669813156128, "learning_rate": 0.001, "loss": 1.6324, "step": 5048 }, { "epoch": 0.21359675099416195, "grad_norm": 0.7364016175270081, "learning_rate": 0.001, "loss": 2.7439, "step": 5049 }, { "epoch": 0.2136390557576783, "grad_norm": 0.25837811827659607, "learning_rate": 0.001, "loss": 2.591, "step": 5050 }, { "epoch": 0.21368136052119469, "grad_norm": 0.556201159954071, "learning_rate": 0.001, "loss": 2.0559, "step": 5051 }, { "epoch": 0.21372366528471107, "grad_norm": 0.23484918475151062, "learning_rate": 0.001, "loss": 2.0903, "step": 5052 }, { "epoch": 0.21376597004822742, "grad_norm": 0.20582491159439087, "learning_rate": 0.001, "loss": 2.5129, "step": 5053 }, { "epoch": 0.2138082748117438, "grad_norm": 0.4761744439601898, "learning_rate": 0.001, "loss": 2.3142, "step": 5054 }, { "epoch": 0.21385057957526019, "grad_norm": 0.5703874230384827, "learning_rate": 0.001, "loss": 2.5662, "step": 5055 }, { "epoch": 0.21389288433877654, "grad_norm": 0.19592377543449402, "learning_rate": 0.001, "loss": 2.497, "step": 5056 }, { "epoch": 0.21393518910229292, "grad_norm": 0.21815036237239838, "learning_rate": 0.001, "loss": 2.4716, "step": 5057 }, { "epoch": 0.2139774938658093, "grad_norm": 0.976707398891449, "learning_rate": 0.001, "loss": 2.0707, "step": 5058 }, { "epoch": 0.21401979862932566, "grad_norm": 1.2479373216629028, "learning_rate": 0.001, "loss": 1.7482, "step": 5059 }, { "epoch": 0.21406210339284204, "grad_norm": 0.31325292587280273, "learning_rate": 0.001, "loss": 2.5769, "step": 5060 }, { "epoch": 0.2141044081563584, "grad_norm": 3.2416415214538574, "learning_rate": 0.001, "loss": 2.0277, "step": 5061 }, { "epoch": 0.21414671291987478, "grad_norm": 0.21428047120571136, "learning_rate": 0.001, "loss": 1.7499, "step": 5062 }, { "epoch": 0.21418901768339116, "grad_norm": 0.7413913607597351, "learning_rate": 0.001, "loss": 3.0148, "step": 5063 }, { "epoch": 0.2142313224469075, "grad_norm": 3.3833765983581543, "learning_rate": 0.001, "loss": 2.9668, "step": 5064 }, { "epoch": 0.2142736272104239, "grad_norm": 0.934876024723053, "learning_rate": 0.001, "loss": 2.7919, "step": 5065 }, { "epoch": 0.21431593197394028, "grad_norm": 0.4856380224227905, "learning_rate": 0.001, "loss": 2.3463, "step": 5066 }, { "epoch": 0.21435823673745663, "grad_norm": 0.5465160012245178, "learning_rate": 0.001, "loss": 2.8415, "step": 5067 }, { "epoch": 0.214400541500973, "grad_norm": 0.22289830446243286, "learning_rate": 0.001, "loss": 2.0494, "step": 5068 }, { "epoch": 0.2144428462644894, "grad_norm": 2.262659788131714, "learning_rate": 0.001, "loss": 2.1616, "step": 5069 }, { "epoch": 0.21448515102800575, "grad_norm": 0.2547045052051544, "learning_rate": 0.001, "loss": 2.714, "step": 5070 }, { "epoch": 0.21452745579152213, "grad_norm": 0.2590230703353882, "learning_rate": 0.001, "loss": 2.928, "step": 5071 }, { "epoch": 0.21456976055503849, "grad_norm": 0.42084890604019165, "learning_rate": 0.001, "loss": 3.0684, "step": 5072 }, { "epoch": 0.21461206531855487, "grad_norm": 0.7488611340522766, "learning_rate": 0.001, "loss": 2.8074, "step": 5073 }, { "epoch": 0.21465437008207125, "grad_norm": 0.32366877794265747, "learning_rate": 0.001, "loss": 3.2808, "step": 5074 }, { "epoch": 0.2146966748455876, "grad_norm": 0.20374761521816254, "learning_rate": 0.001, "loss": 2.5365, "step": 5075 }, { "epoch": 0.21473897960910399, "grad_norm": 0.23549343645572662, "learning_rate": 0.001, "loss": 3.1777, "step": 5076 }, { "epoch": 0.21478128437262037, "grad_norm": 17.203941345214844, "learning_rate": 0.001, "loss": 2.5717, "step": 5077 }, { "epoch": 0.21482358913613672, "grad_norm": 0.25522732734680176, "learning_rate": 0.001, "loss": 2.9367, "step": 5078 }, { "epoch": 0.2148658938996531, "grad_norm": 0.3394809365272522, "learning_rate": 0.001, "loss": 2.3561, "step": 5079 }, { "epoch": 0.2149081986631695, "grad_norm": 0.21299435198307037, "learning_rate": 0.001, "loss": 2.7499, "step": 5080 }, { "epoch": 0.21495050342668584, "grad_norm": 0.1964573711156845, "learning_rate": 0.001, "loss": 2.3468, "step": 5081 }, { "epoch": 0.21499280819020222, "grad_norm": 0.3184814155101776, "learning_rate": 0.001, "loss": 2.4803, "step": 5082 }, { "epoch": 0.21503511295371858, "grad_norm": 0.34336036443710327, "learning_rate": 0.001, "loss": 3.3502, "step": 5083 }, { "epoch": 0.21507741771723496, "grad_norm": 0.25828707218170166, "learning_rate": 0.001, "loss": 3.5646, "step": 5084 }, { "epoch": 0.21511972248075134, "grad_norm": 0.4242289066314697, "learning_rate": 0.001, "loss": 2.8577, "step": 5085 }, { "epoch": 0.2151620272442677, "grad_norm": 0.18986240029335022, "learning_rate": 0.001, "loss": 1.9367, "step": 5086 }, { "epoch": 0.21520433200778408, "grad_norm": 0.19585952162742615, "learning_rate": 0.001, "loss": 2.1596, "step": 5087 }, { "epoch": 0.21524663677130046, "grad_norm": 0.3815001845359802, "learning_rate": 0.001, "loss": 2.9462, "step": 5088 }, { "epoch": 0.2152889415348168, "grad_norm": 0.26725849509239197, "learning_rate": 0.001, "loss": 3.1378, "step": 5089 }, { "epoch": 0.2153312462983332, "grad_norm": 0.4309972822666168, "learning_rate": 0.001, "loss": 3.3749, "step": 5090 }, { "epoch": 0.21537355106184958, "grad_norm": 0.19142082333564758, "learning_rate": 0.001, "loss": 2.2686, "step": 5091 }, { "epoch": 0.21541585582536593, "grad_norm": 0.22003363072872162, "learning_rate": 0.001, "loss": 1.996, "step": 5092 }, { "epoch": 0.2154581605888823, "grad_norm": 0.30616286396980286, "learning_rate": 0.001, "loss": 3.3171, "step": 5093 }, { "epoch": 0.21550046535239867, "grad_norm": 8.013957977294922, "learning_rate": 0.001, "loss": 2.2265, "step": 5094 }, { "epoch": 0.21554277011591505, "grad_norm": 0.3616850972175598, "learning_rate": 0.001, "loss": 3.1465, "step": 5095 }, { "epoch": 0.21558507487943143, "grad_norm": 1.7922744750976562, "learning_rate": 0.001, "loss": 2.7905, "step": 5096 }, { "epoch": 0.2156273796429478, "grad_norm": 0.18036864697933197, "learning_rate": 0.001, "loss": 1.8923, "step": 5097 }, { "epoch": 0.21566968440646417, "grad_norm": 0.5982964038848877, "learning_rate": 0.001, "loss": 2.0202, "step": 5098 }, { "epoch": 0.21571198916998055, "grad_norm": 0.37752029299736023, "learning_rate": 0.001, "loss": 1.8078, "step": 5099 }, { "epoch": 0.2157542939334969, "grad_norm": 0.28953149914741516, "learning_rate": 0.001, "loss": 2.2722, "step": 5100 }, { "epoch": 0.2157965986970133, "grad_norm": 0.275361031293869, "learning_rate": 0.001, "loss": 1.8862, "step": 5101 }, { "epoch": 0.21583890346052967, "grad_norm": 2.51975679397583, "learning_rate": 0.001, "loss": 2.4179, "step": 5102 }, { "epoch": 0.21588120822404602, "grad_norm": 3.455864191055298, "learning_rate": 0.001, "loss": 2.9148, "step": 5103 }, { "epoch": 0.2159235129875624, "grad_norm": 0.20818361639976501, "learning_rate": 0.001, "loss": 1.5796, "step": 5104 }, { "epoch": 0.21596581775107876, "grad_norm": 0.2759215831756592, "learning_rate": 0.001, "loss": 3.4658, "step": 5105 }, { "epoch": 0.21600812251459514, "grad_norm": 0.2338385432958603, "learning_rate": 0.001, "loss": 2.5726, "step": 5106 }, { "epoch": 0.21605042727811152, "grad_norm": 0.2205616980791092, "learning_rate": 0.001, "loss": 4.0847, "step": 5107 }, { "epoch": 0.21609273204162788, "grad_norm": 0.8315731287002563, "learning_rate": 0.001, "loss": 2.2934, "step": 5108 }, { "epoch": 0.21613503680514426, "grad_norm": 0.22039955854415894, "learning_rate": 0.001, "loss": 2.0346, "step": 5109 }, { "epoch": 0.21617734156866064, "grad_norm": 0.89706951379776, "learning_rate": 0.001, "loss": 2.0214, "step": 5110 }, { "epoch": 0.216219646332177, "grad_norm": 13.432646751403809, "learning_rate": 0.001, "loss": 2.9813, "step": 5111 }, { "epoch": 0.21626195109569338, "grad_norm": 0.20482416450977325, "learning_rate": 0.001, "loss": 2.2288, "step": 5112 }, { "epoch": 0.21630425585920976, "grad_norm": 0.4937507212162018, "learning_rate": 0.001, "loss": 1.7228, "step": 5113 }, { "epoch": 0.21634656062272611, "grad_norm": 0.3154240548610687, "learning_rate": 0.001, "loss": 2.3277, "step": 5114 }, { "epoch": 0.2163888653862425, "grad_norm": 0.4587913453578949, "learning_rate": 0.001, "loss": 2.2894, "step": 5115 }, { "epoch": 0.21643117014975885, "grad_norm": 0.39989644289016724, "learning_rate": 0.001, "loss": 2.5237, "step": 5116 }, { "epoch": 0.21647347491327523, "grad_norm": 0.21547210216522217, "learning_rate": 0.001, "loss": 2.0295, "step": 5117 }, { "epoch": 0.21651577967679161, "grad_norm": 2.3156797885894775, "learning_rate": 0.001, "loss": 2.5201, "step": 5118 }, { "epoch": 0.21655808444030797, "grad_norm": 0.19337671995162964, "learning_rate": 0.001, "loss": 1.8627, "step": 5119 }, { "epoch": 0.21660038920382435, "grad_norm": 0.24067647755146027, "learning_rate": 0.001, "loss": 2.2175, "step": 5120 }, { "epoch": 0.21664269396734073, "grad_norm": 10.653289794921875, "learning_rate": 0.001, "loss": 3.6184, "step": 5121 }, { "epoch": 0.2166849987308571, "grad_norm": 0.22319035232067108, "learning_rate": 0.001, "loss": 2.0583, "step": 5122 }, { "epoch": 0.21672730349437347, "grad_norm": 13.307822227478027, "learning_rate": 0.001, "loss": 1.885, "step": 5123 }, { "epoch": 0.21676960825788985, "grad_norm": 41.82732009887695, "learning_rate": 0.001, "loss": 3.325, "step": 5124 }, { "epoch": 0.2168119130214062, "grad_norm": 0.20223648846149445, "learning_rate": 0.001, "loss": 1.657, "step": 5125 }, { "epoch": 0.2168542177849226, "grad_norm": 0.708741307258606, "learning_rate": 0.001, "loss": 2.2723, "step": 5126 }, { "epoch": 0.21689652254843894, "grad_norm": 0.2860840857028961, "learning_rate": 0.001, "loss": 2.3074, "step": 5127 }, { "epoch": 0.21693882731195532, "grad_norm": 0.4499269425868988, "learning_rate": 0.001, "loss": 2.5004, "step": 5128 }, { "epoch": 0.2169811320754717, "grad_norm": 0.3753020763397217, "learning_rate": 0.001, "loss": 2.7281, "step": 5129 }, { "epoch": 0.21702343683898806, "grad_norm": 6.676267623901367, "learning_rate": 0.001, "loss": 2.5515, "step": 5130 }, { "epoch": 0.21706574160250444, "grad_norm": 2.6914873123168945, "learning_rate": 0.001, "loss": 2.5111, "step": 5131 }, { "epoch": 0.21710804636602082, "grad_norm": 0.24604924023151398, "learning_rate": 0.001, "loss": 2.1823, "step": 5132 }, { "epoch": 0.21715035112953718, "grad_norm": 0.2534256875514984, "learning_rate": 0.001, "loss": 2.6668, "step": 5133 }, { "epoch": 0.21719265589305356, "grad_norm": 0.32457131147384644, "learning_rate": 0.001, "loss": 3.2796, "step": 5134 }, { "epoch": 0.21723496065656994, "grad_norm": 0.3132564425468445, "learning_rate": 0.001, "loss": 3.2073, "step": 5135 }, { "epoch": 0.2172772654200863, "grad_norm": 0.2281041145324707, "learning_rate": 0.001, "loss": 2.6315, "step": 5136 }, { "epoch": 0.21731957018360268, "grad_norm": 0.7160950303077698, "learning_rate": 0.001, "loss": 2.3004, "step": 5137 }, { "epoch": 0.21736187494711903, "grad_norm": 0.2322395145893097, "learning_rate": 0.001, "loss": 2.7504, "step": 5138 }, { "epoch": 0.21740417971063541, "grad_norm": 0.3613683879375458, "learning_rate": 0.001, "loss": 2.5574, "step": 5139 }, { "epoch": 0.2174464844741518, "grad_norm": 0.24612249433994293, "learning_rate": 0.001, "loss": 2.8064, "step": 5140 }, { "epoch": 0.21748878923766815, "grad_norm": 0.2229098081588745, "learning_rate": 0.001, "loss": 2.07, "step": 5141 }, { "epoch": 0.21753109400118453, "grad_norm": 0.19228215515613556, "learning_rate": 0.001, "loss": 1.859, "step": 5142 }, { "epoch": 0.21757339876470091, "grad_norm": 0.3609655499458313, "learning_rate": 0.001, "loss": 3.1708, "step": 5143 }, { "epoch": 0.21761570352821727, "grad_norm": 0.3540642261505127, "learning_rate": 0.001, "loss": 2.7362, "step": 5144 }, { "epoch": 0.21765800829173365, "grad_norm": 0.37258389592170715, "learning_rate": 0.001, "loss": 2.3635, "step": 5145 }, { "epoch": 0.21770031305525003, "grad_norm": 0.2843034863471985, "learning_rate": 0.001, "loss": 2.8098, "step": 5146 }, { "epoch": 0.2177426178187664, "grad_norm": 0.2767336368560791, "learning_rate": 0.001, "loss": 2.7574, "step": 5147 }, { "epoch": 0.21778492258228277, "grad_norm": 0.21004167199134827, "learning_rate": 0.001, "loss": 2.2051, "step": 5148 }, { "epoch": 0.21782722734579912, "grad_norm": 0.23345938324928284, "learning_rate": 0.001, "loss": 3.7967, "step": 5149 }, { "epoch": 0.2178695321093155, "grad_norm": 0.44973114132881165, "learning_rate": 0.001, "loss": 3.6378, "step": 5150 }, { "epoch": 0.2179118368728319, "grad_norm": 0.26152345538139343, "learning_rate": 0.001, "loss": 2.7861, "step": 5151 }, { "epoch": 0.21795414163634824, "grad_norm": 0.3166126012802124, "learning_rate": 0.001, "loss": 2.9621, "step": 5152 }, { "epoch": 0.21799644639986462, "grad_norm": 0.2214854210615158, "learning_rate": 0.001, "loss": 2.0622, "step": 5153 }, { "epoch": 0.218038751163381, "grad_norm": 0.21160806715488434, "learning_rate": 0.001, "loss": 2.9396, "step": 5154 }, { "epoch": 0.21808105592689736, "grad_norm": 0.21767565608024597, "learning_rate": 0.001, "loss": 1.7393, "step": 5155 }, { "epoch": 0.21812336069041374, "grad_norm": 18.17046356201172, "learning_rate": 0.001, "loss": 2.2801, "step": 5156 }, { "epoch": 0.21816566545393012, "grad_norm": 0.4204169511795044, "learning_rate": 0.001, "loss": 2.1432, "step": 5157 }, { "epoch": 0.21820797021744648, "grad_norm": 0.42446818947792053, "learning_rate": 0.001, "loss": 1.7413, "step": 5158 }, { "epoch": 0.21825027498096286, "grad_norm": 0.2123391479253769, "learning_rate": 0.001, "loss": 2.2634, "step": 5159 }, { "epoch": 0.21829257974447921, "grad_norm": 0.2388673722743988, "learning_rate": 0.001, "loss": 2.2383, "step": 5160 }, { "epoch": 0.2183348845079956, "grad_norm": 0.870901882648468, "learning_rate": 0.001, "loss": 3.157, "step": 5161 }, { "epoch": 0.21837718927151198, "grad_norm": 0.3035809397697449, "learning_rate": 0.001, "loss": 3.1643, "step": 5162 }, { "epoch": 0.21841949403502833, "grad_norm": 0.40469470620155334, "learning_rate": 0.001, "loss": 2.211, "step": 5163 }, { "epoch": 0.21846179879854472, "grad_norm": 0.22423990070819855, "learning_rate": 0.001, "loss": 1.9705, "step": 5164 }, { "epoch": 0.2185041035620611, "grad_norm": 0.20097650587558746, "learning_rate": 0.001, "loss": 2.2083, "step": 5165 }, { "epoch": 0.21854640832557745, "grad_norm": 0.2772468626499176, "learning_rate": 0.001, "loss": 2.8581, "step": 5166 }, { "epoch": 0.21858871308909383, "grad_norm": 0.20195744931697845, "learning_rate": 0.001, "loss": 1.8894, "step": 5167 }, { "epoch": 0.21863101785261022, "grad_norm": 0.21510477364063263, "learning_rate": 0.001, "loss": 2.1195, "step": 5168 }, { "epoch": 0.21867332261612657, "grad_norm": 0.2677229344844818, "learning_rate": 0.001, "loss": 2.9506, "step": 5169 }, { "epoch": 0.21871562737964295, "grad_norm": 1.1444584131240845, "learning_rate": 0.001, "loss": 1.6645, "step": 5170 }, { "epoch": 0.2187579321431593, "grad_norm": 0.30999642610549927, "learning_rate": 0.001, "loss": 2.7087, "step": 5171 }, { "epoch": 0.2188002369066757, "grad_norm": 0.20951640605926514, "learning_rate": 0.001, "loss": 2.8629, "step": 5172 }, { "epoch": 0.21884254167019207, "grad_norm": 0.2823593020439148, "learning_rate": 0.001, "loss": 1.8454, "step": 5173 }, { "epoch": 0.21888484643370842, "grad_norm": 0.19759036600589752, "learning_rate": 0.001, "loss": 1.6832, "step": 5174 }, { "epoch": 0.2189271511972248, "grad_norm": 1.2790015935897827, "learning_rate": 0.001, "loss": 2.2598, "step": 5175 }, { "epoch": 0.2189694559607412, "grad_norm": 0.3004913926124573, "learning_rate": 0.001, "loss": 2.8445, "step": 5176 }, { "epoch": 0.21901176072425754, "grad_norm": 0.1984034925699234, "learning_rate": 0.001, "loss": 2.0588, "step": 5177 }, { "epoch": 0.21905406548777392, "grad_norm": 0.6654098033905029, "learning_rate": 0.001, "loss": 2.5635, "step": 5178 }, { "epoch": 0.2190963702512903, "grad_norm": 0.21716728806495667, "learning_rate": 0.001, "loss": 1.8904, "step": 5179 }, { "epoch": 0.21913867501480666, "grad_norm": 0.2529633939266205, "learning_rate": 0.001, "loss": 3.3847, "step": 5180 }, { "epoch": 0.21918097977832304, "grad_norm": 0.24113009870052338, "learning_rate": 0.001, "loss": 2.8504, "step": 5181 }, { "epoch": 0.21922328454183942, "grad_norm": 0.23012512922286987, "learning_rate": 0.001, "loss": 1.8835, "step": 5182 }, { "epoch": 0.21926558930535578, "grad_norm": 0.2334389090538025, "learning_rate": 0.001, "loss": 2.9091, "step": 5183 }, { "epoch": 0.21930789406887216, "grad_norm": 3.1486589908599854, "learning_rate": 0.001, "loss": 3.0328, "step": 5184 }, { "epoch": 0.21935019883238852, "grad_norm": 0.18506896495819092, "learning_rate": 0.001, "loss": 3.1343, "step": 5185 }, { "epoch": 0.2193925035959049, "grad_norm": 0.24055512249469757, "learning_rate": 0.001, "loss": 1.9749, "step": 5186 }, { "epoch": 0.21943480835942128, "grad_norm": 0.3492937684059143, "learning_rate": 0.001, "loss": 2.3837, "step": 5187 }, { "epoch": 0.21947711312293763, "grad_norm": 0.2223334014415741, "learning_rate": 0.001, "loss": 2.7189, "step": 5188 }, { "epoch": 0.21951941788645402, "grad_norm": 2.0929033756256104, "learning_rate": 0.001, "loss": 2.3026, "step": 5189 }, { "epoch": 0.2195617226499704, "grad_norm": 3.1999146938323975, "learning_rate": 0.001, "loss": 1.7523, "step": 5190 }, { "epoch": 0.21960402741348675, "grad_norm": 0.19789402186870575, "learning_rate": 0.001, "loss": 1.6299, "step": 5191 }, { "epoch": 0.21964633217700313, "grad_norm": 0.23743316531181335, "learning_rate": 0.001, "loss": 3.3135, "step": 5192 }, { "epoch": 0.21968863694051952, "grad_norm": 0.2639296054840088, "learning_rate": 0.001, "loss": 2.3337, "step": 5193 }, { "epoch": 0.21973094170403587, "grad_norm": 0.24590055644512177, "learning_rate": 0.001, "loss": 2.4579, "step": 5194 }, { "epoch": 0.21977324646755225, "grad_norm": 0.2431252896785736, "learning_rate": 0.001, "loss": 2.1513, "step": 5195 }, { "epoch": 0.2198155512310686, "grad_norm": 0.22125622630119324, "learning_rate": 0.001, "loss": 2.3895, "step": 5196 }, { "epoch": 0.219857855994585, "grad_norm": 0.19629360735416412, "learning_rate": 0.001, "loss": 2.045, "step": 5197 }, { "epoch": 0.21990016075810137, "grad_norm": 0.23924358189105988, "learning_rate": 0.001, "loss": 2.2106, "step": 5198 }, { "epoch": 0.21994246552161772, "grad_norm": 0.21963438391685486, "learning_rate": 0.001, "loss": 2.2913, "step": 5199 }, { "epoch": 0.2199847702851341, "grad_norm": 0.2901560366153717, "learning_rate": 0.001, "loss": 2.8481, "step": 5200 }, { "epoch": 0.2200270750486505, "grad_norm": 0.2538588345050812, "learning_rate": 0.001, "loss": 3.4287, "step": 5201 }, { "epoch": 0.22006937981216684, "grad_norm": 0.23842386901378632, "learning_rate": 0.001, "loss": 3.3795, "step": 5202 }, { "epoch": 0.22011168457568323, "grad_norm": 0.255536288022995, "learning_rate": 0.001, "loss": 3.2765, "step": 5203 }, { "epoch": 0.2201539893391996, "grad_norm": 4.658047199249268, "learning_rate": 0.001, "loss": 2.3231, "step": 5204 }, { "epoch": 0.22019629410271596, "grad_norm": 0.20021434128284454, "learning_rate": 0.001, "loss": 2.3354, "step": 5205 }, { "epoch": 0.22023859886623234, "grad_norm": 0.19822002947330475, "learning_rate": 0.001, "loss": 1.7009, "step": 5206 }, { "epoch": 0.2202809036297487, "grad_norm": 0.21478721499443054, "learning_rate": 0.001, "loss": 2.0951, "step": 5207 }, { "epoch": 0.22032320839326508, "grad_norm": 0.21483932435512543, "learning_rate": 0.001, "loss": 2.7571, "step": 5208 }, { "epoch": 0.22036551315678146, "grad_norm": 0.23699632287025452, "learning_rate": 0.001, "loss": 2.2913, "step": 5209 }, { "epoch": 0.22040781792029782, "grad_norm": 0.26198896765708923, "learning_rate": 0.001, "loss": 4.5366, "step": 5210 }, { "epoch": 0.2204501226838142, "grad_norm": 0.21379588544368744, "learning_rate": 0.001, "loss": 1.9979, "step": 5211 }, { "epoch": 0.22049242744733058, "grad_norm": 0.2645533084869385, "learning_rate": 0.001, "loss": 2.2221, "step": 5212 }, { "epoch": 0.22053473221084693, "grad_norm": 0.19984637200832367, "learning_rate": 0.001, "loss": 2.0598, "step": 5213 }, { "epoch": 0.22057703697436332, "grad_norm": 0.2022748589515686, "learning_rate": 0.001, "loss": 2.3214, "step": 5214 }, { "epoch": 0.2206193417378797, "grad_norm": 0.20694908499717712, "learning_rate": 0.001, "loss": 2.0205, "step": 5215 }, { "epoch": 0.22066164650139605, "grad_norm": 0.2556994557380676, "learning_rate": 0.001, "loss": 2.4558, "step": 5216 }, { "epoch": 0.22070395126491243, "grad_norm": 0.7074611186981201, "learning_rate": 0.001, "loss": 1.6551, "step": 5217 }, { "epoch": 0.2207462560284288, "grad_norm": 0.19164715707302094, "learning_rate": 0.001, "loss": 2.1213, "step": 5218 }, { "epoch": 0.22078856079194517, "grad_norm": 0.32563894987106323, "learning_rate": 0.001, "loss": 2.366, "step": 5219 }, { "epoch": 0.22083086555546155, "grad_norm": 0.21705080568790436, "learning_rate": 0.001, "loss": 2.4052, "step": 5220 }, { "epoch": 0.2208731703189779, "grad_norm": 0.20345237851142883, "learning_rate": 0.001, "loss": 2.2577, "step": 5221 }, { "epoch": 0.2209154750824943, "grad_norm": 0.2837914824485779, "learning_rate": 0.001, "loss": 2.1825, "step": 5222 }, { "epoch": 0.22095777984601067, "grad_norm": 1.1149832010269165, "learning_rate": 0.001, "loss": 2.357, "step": 5223 }, { "epoch": 0.22100008460952703, "grad_norm": 0.23472867906093597, "learning_rate": 0.001, "loss": 2.6525, "step": 5224 }, { "epoch": 0.2210423893730434, "grad_norm": 0.6274850964546204, "learning_rate": 0.001, "loss": 3.9738, "step": 5225 }, { "epoch": 0.2210846941365598, "grad_norm": 0.21737419068813324, "learning_rate": 0.001, "loss": 2.4552, "step": 5226 }, { "epoch": 0.22112699890007614, "grad_norm": 0.2056940793991089, "learning_rate": 0.001, "loss": 2.8104, "step": 5227 }, { "epoch": 0.22116930366359253, "grad_norm": 0.2175094336271286, "learning_rate": 0.001, "loss": 2.1255, "step": 5228 }, { "epoch": 0.22121160842710888, "grad_norm": 0.39655065536499023, "learning_rate": 0.001, "loss": 1.9519, "step": 5229 }, { "epoch": 0.22125391319062526, "grad_norm": 0.20697876811027527, "learning_rate": 0.001, "loss": 2.4179, "step": 5230 }, { "epoch": 0.22129621795414164, "grad_norm": 0.217100128531456, "learning_rate": 0.001, "loss": 2.7213, "step": 5231 }, { "epoch": 0.221338522717658, "grad_norm": 0.21442492306232452, "learning_rate": 0.001, "loss": 2.8483, "step": 5232 }, { "epoch": 0.22138082748117438, "grad_norm": 0.21602658927440643, "learning_rate": 0.001, "loss": 1.6262, "step": 5233 }, { "epoch": 0.22142313224469076, "grad_norm": 0.22940513491630554, "learning_rate": 0.001, "loss": 1.9484, "step": 5234 }, { "epoch": 0.22146543700820712, "grad_norm": 3.8185322284698486, "learning_rate": 0.001, "loss": 2.8154, "step": 5235 }, { "epoch": 0.2215077417717235, "grad_norm": 0.28936222195625305, "learning_rate": 0.001, "loss": 2.8811, "step": 5236 }, { "epoch": 0.22155004653523988, "grad_norm": 0.24605505168437958, "learning_rate": 0.001, "loss": 3.03, "step": 5237 }, { "epoch": 0.22159235129875623, "grad_norm": 0.20465493202209473, "learning_rate": 0.001, "loss": 2.3525, "step": 5238 }, { "epoch": 0.22163465606227262, "grad_norm": 0.20358186960220337, "learning_rate": 0.001, "loss": 2.1494, "step": 5239 }, { "epoch": 0.22167696082578897, "grad_norm": 0.2088003009557724, "learning_rate": 0.001, "loss": 1.9404, "step": 5240 }, { "epoch": 0.22171926558930535, "grad_norm": 0.20697228610515594, "learning_rate": 0.001, "loss": 3.8656, "step": 5241 }, { "epoch": 0.22176157035282174, "grad_norm": 0.22819316387176514, "learning_rate": 0.001, "loss": 2.1, "step": 5242 }, { "epoch": 0.2218038751163381, "grad_norm": 0.32436373829841614, "learning_rate": 0.001, "loss": 3.2291, "step": 5243 }, { "epoch": 0.22184617987985447, "grad_norm": 0.3842344880104065, "learning_rate": 0.001, "loss": 1.9864, "step": 5244 }, { "epoch": 0.22188848464337085, "grad_norm": 0.22547200322151184, "learning_rate": 0.001, "loss": 2.1806, "step": 5245 }, { "epoch": 0.2219307894068872, "grad_norm": 0.201228067278862, "learning_rate": 0.001, "loss": 2.6097, "step": 5246 }, { "epoch": 0.2219730941704036, "grad_norm": 0.21469905972480774, "learning_rate": 0.001, "loss": 1.884, "step": 5247 }, { "epoch": 0.22201539893391997, "grad_norm": 17.149986267089844, "learning_rate": 0.001, "loss": 2.0795, "step": 5248 }, { "epoch": 0.22205770369743633, "grad_norm": 2.927840232849121, "learning_rate": 0.001, "loss": 2.0508, "step": 5249 }, { "epoch": 0.2221000084609527, "grad_norm": 0.20757484436035156, "learning_rate": 0.001, "loss": 3.9396, "step": 5250 }, { "epoch": 0.22214231322446906, "grad_norm": 0.22015203535556793, "learning_rate": 0.001, "loss": 2.512, "step": 5251 }, { "epoch": 0.22218461798798544, "grad_norm": 0.22938333451747894, "learning_rate": 0.001, "loss": 2.2927, "step": 5252 }, { "epoch": 0.22222692275150183, "grad_norm": 0.24902082979679108, "learning_rate": 0.001, "loss": 2.9212, "step": 5253 }, { "epoch": 0.22226922751501818, "grad_norm": 0.23875977098941803, "learning_rate": 0.001, "loss": 2.0939, "step": 5254 }, { "epoch": 0.22231153227853456, "grad_norm": 0.21278776228427887, "learning_rate": 0.001, "loss": 1.9339, "step": 5255 }, { "epoch": 0.22235383704205094, "grad_norm": 0.2972838282585144, "learning_rate": 0.001, "loss": 3.0246, "step": 5256 }, { "epoch": 0.2223961418055673, "grad_norm": 0.20768025517463684, "learning_rate": 0.001, "loss": 1.9835, "step": 5257 }, { "epoch": 0.22243844656908368, "grad_norm": 5.894577980041504, "learning_rate": 0.001, "loss": 2.6385, "step": 5258 }, { "epoch": 0.22248075133260006, "grad_norm": 0.2140832394361496, "learning_rate": 0.001, "loss": 2.7344, "step": 5259 }, { "epoch": 0.22252305609611642, "grad_norm": 0.20773886144161224, "learning_rate": 0.001, "loss": 2.4667, "step": 5260 }, { "epoch": 0.2225653608596328, "grad_norm": 0.21243473887443542, "learning_rate": 0.001, "loss": 2.1209, "step": 5261 }, { "epoch": 0.22260766562314915, "grad_norm": 0.2120569497346878, "learning_rate": 0.001, "loss": 2.1573, "step": 5262 }, { "epoch": 0.22264997038666554, "grad_norm": 0.25889766216278076, "learning_rate": 0.001, "loss": 2.5132, "step": 5263 }, { "epoch": 0.22269227515018192, "grad_norm": 0.2106718271970749, "learning_rate": 0.001, "loss": 1.8965, "step": 5264 }, { "epoch": 0.22273457991369827, "grad_norm": 0.2909772992134094, "learning_rate": 0.001, "loss": 2.1466, "step": 5265 }, { "epoch": 0.22277688467721465, "grad_norm": 1.4828362464904785, "learning_rate": 0.001, "loss": 2.8965, "step": 5266 }, { "epoch": 0.22281918944073104, "grad_norm": 0.23213310539722443, "learning_rate": 0.001, "loss": 2.0563, "step": 5267 }, { "epoch": 0.2228614942042474, "grad_norm": 0.2343592345714569, "learning_rate": 0.001, "loss": 2.6482, "step": 5268 }, { "epoch": 0.22290379896776377, "grad_norm": 0.2614687979221344, "learning_rate": 0.001, "loss": 2.7576, "step": 5269 }, { "epoch": 0.22294610373128015, "grad_norm": 4.1902008056640625, "learning_rate": 0.001, "loss": 2.9158, "step": 5270 }, { "epoch": 0.2229884084947965, "grad_norm": 0.20279575884342194, "learning_rate": 0.001, "loss": 2.205, "step": 5271 }, { "epoch": 0.2230307132583129, "grad_norm": 0.373855322599411, "learning_rate": 0.001, "loss": 2.6883, "step": 5272 }, { "epoch": 0.22307301802182924, "grad_norm": 0.3078487813472748, "learning_rate": 0.001, "loss": 1.9063, "step": 5273 }, { "epoch": 0.22311532278534563, "grad_norm": 0.3240393102169037, "learning_rate": 0.001, "loss": 2.6232, "step": 5274 }, { "epoch": 0.223157627548862, "grad_norm": 0.3263351321220398, "learning_rate": 0.001, "loss": 2.3034, "step": 5275 }, { "epoch": 0.22319993231237836, "grad_norm": 0.7479755282402039, "learning_rate": 0.001, "loss": 1.8465, "step": 5276 }, { "epoch": 0.22324223707589474, "grad_norm": 2.0020651817321777, "learning_rate": 0.001, "loss": 1.859, "step": 5277 }, { "epoch": 0.22328454183941113, "grad_norm": 0.2206108123064041, "learning_rate": 0.001, "loss": 1.5864, "step": 5278 }, { "epoch": 0.22332684660292748, "grad_norm": 3.234041452407837, "learning_rate": 0.001, "loss": 2.1619, "step": 5279 }, { "epoch": 0.22336915136644386, "grad_norm": 0.21610672771930695, "learning_rate": 0.001, "loss": 1.948, "step": 5280 }, { "epoch": 0.22341145612996025, "grad_norm": 1.4125299453735352, "learning_rate": 0.001, "loss": 2.7824, "step": 5281 }, { "epoch": 0.2234537608934766, "grad_norm": 0.19420327246189117, "learning_rate": 0.001, "loss": 1.985, "step": 5282 }, { "epoch": 0.22349606565699298, "grad_norm": 0.26585274934768677, "learning_rate": 0.001, "loss": 2.6315, "step": 5283 }, { "epoch": 0.22353837042050934, "grad_norm": 1.4780011177062988, "learning_rate": 0.001, "loss": 2.5902, "step": 5284 }, { "epoch": 0.22358067518402572, "grad_norm": 0.29642167687416077, "learning_rate": 0.001, "loss": 1.7713, "step": 5285 }, { "epoch": 0.2236229799475421, "grad_norm": 0.192737877368927, "learning_rate": 0.001, "loss": 2.7865, "step": 5286 }, { "epoch": 0.22366528471105845, "grad_norm": 2.1191747188568115, "learning_rate": 0.001, "loss": 2.4322, "step": 5287 }, { "epoch": 0.22370758947457484, "grad_norm": 1.5505908727645874, "learning_rate": 0.001, "loss": 2.4076, "step": 5288 }, { "epoch": 0.22374989423809122, "grad_norm": 0.2791816294193268, "learning_rate": 0.001, "loss": 2.6396, "step": 5289 }, { "epoch": 0.22379219900160757, "grad_norm": 0.5878250598907471, "learning_rate": 0.001, "loss": 2.4715, "step": 5290 }, { "epoch": 0.22383450376512395, "grad_norm": 0.22260555624961853, "learning_rate": 0.001, "loss": 1.734, "step": 5291 }, { "epoch": 0.22387680852864034, "grad_norm": 0.27154138684272766, "learning_rate": 0.001, "loss": 2.9405, "step": 5292 }, { "epoch": 0.2239191132921567, "grad_norm": 0.24614472687244415, "learning_rate": 0.001, "loss": 3.2545, "step": 5293 }, { "epoch": 0.22396141805567307, "grad_norm": 0.26551756262779236, "learning_rate": 0.001, "loss": 2.62, "step": 5294 }, { "epoch": 0.22400372281918943, "grad_norm": 0.23047612607479095, "learning_rate": 0.001, "loss": 1.9018, "step": 5295 }, { "epoch": 0.2240460275827058, "grad_norm": 0.2711014151573181, "learning_rate": 0.001, "loss": 2.3669, "step": 5296 }, { "epoch": 0.2240883323462222, "grad_norm": 0.2241722196340561, "learning_rate": 0.001, "loss": 2.2924, "step": 5297 }, { "epoch": 0.22413063710973855, "grad_norm": 0.36074158549308777, "learning_rate": 0.001, "loss": 2.4914, "step": 5298 }, { "epoch": 0.22417294187325493, "grad_norm": 0.21040941774845123, "learning_rate": 0.001, "loss": 2.2778, "step": 5299 }, { "epoch": 0.2242152466367713, "grad_norm": 0.17329736053943634, "learning_rate": 0.001, "loss": 2.7799, "step": 5300 }, { "epoch": 0.22425755140028766, "grad_norm": 1.4445072412490845, "learning_rate": 0.001, "loss": 2.6139, "step": 5301 }, { "epoch": 0.22429985616380405, "grad_norm": 0.20153391361236572, "learning_rate": 0.001, "loss": 2.4595, "step": 5302 }, { "epoch": 0.22434216092732043, "grad_norm": 0.3603563606739044, "learning_rate": 0.001, "loss": 3.4425, "step": 5303 }, { "epoch": 0.22438446569083678, "grad_norm": 0.2601318955421448, "learning_rate": 0.001, "loss": 3.1128, "step": 5304 }, { "epoch": 0.22442677045435316, "grad_norm": 0.1929735243320465, "learning_rate": 0.001, "loss": 1.8997, "step": 5305 }, { "epoch": 0.22446907521786955, "grad_norm": 0.26424339413642883, "learning_rate": 0.001, "loss": 2.3221, "step": 5306 }, { "epoch": 0.2245113799813859, "grad_norm": 0.396243155002594, "learning_rate": 0.001, "loss": 2.9599, "step": 5307 }, { "epoch": 0.22455368474490228, "grad_norm": 1.7157014608383179, "learning_rate": 0.001, "loss": 2.1441, "step": 5308 }, { "epoch": 0.22459598950841864, "grad_norm": 0.33265525102615356, "learning_rate": 0.001, "loss": 1.9819, "step": 5309 }, { "epoch": 0.22463829427193502, "grad_norm": 0.6093931198120117, "learning_rate": 0.001, "loss": 2.9678, "step": 5310 }, { "epoch": 0.2246805990354514, "grad_norm": 0.2151605635881424, "learning_rate": 0.001, "loss": 2.5349, "step": 5311 }, { "epoch": 0.22472290379896775, "grad_norm": 8.028112411499023, "learning_rate": 0.001, "loss": 2.6196, "step": 5312 }, { "epoch": 0.22476520856248414, "grad_norm": 1.0721027851104736, "learning_rate": 0.001, "loss": 3.4655, "step": 5313 }, { "epoch": 0.22480751332600052, "grad_norm": 0.20032766461372375, "learning_rate": 0.001, "loss": 2.1955, "step": 5314 }, { "epoch": 0.22484981808951687, "grad_norm": 0.20679010450839996, "learning_rate": 0.001, "loss": 1.743, "step": 5315 }, { "epoch": 0.22489212285303326, "grad_norm": 0.23269832134246826, "learning_rate": 0.001, "loss": 1.9682, "step": 5316 }, { "epoch": 0.22493442761654964, "grad_norm": 0.2920213043689728, "learning_rate": 0.001, "loss": 2.6719, "step": 5317 }, { "epoch": 0.224976732380066, "grad_norm": 0.31730595231056213, "learning_rate": 0.001, "loss": 2.7025, "step": 5318 }, { "epoch": 0.22501903714358237, "grad_norm": 0.20346704125404358, "learning_rate": 0.001, "loss": 2.528, "step": 5319 }, { "epoch": 0.22506134190709873, "grad_norm": 0.9651232957839966, "learning_rate": 0.001, "loss": 2.3393, "step": 5320 }, { "epoch": 0.2251036466706151, "grad_norm": 3.8393514156341553, "learning_rate": 0.001, "loss": 1.8216, "step": 5321 }, { "epoch": 0.2251459514341315, "grad_norm": 0.1834096908569336, "learning_rate": 0.001, "loss": 2.0548, "step": 5322 }, { "epoch": 0.22518825619764785, "grad_norm": 0.4497200846672058, "learning_rate": 0.001, "loss": 2.9522, "step": 5323 }, { "epoch": 0.22523056096116423, "grad_norm": 0.21612843871116638, "learning_rate": 0.001, "loss": 2.9417, "step": 5324 }, { "epoch": 0.2252728657246806, "grad_norm": 0.23549750447273254, "learning_rate": 0.001, "loss": 1.8287, "step": 5325 }, { "epoch": 0.22531517048819696, "grad_norm": 0.25662145018577576, "learning_rate": 0.001, "loss": 3.0902, "step": 5326 }, { "epoch": 0.22535747525171335, "grad_norm": 0.24648882448673248, "learning_rate": 0.001, "loss": 2.5687, "step": 5327 }, { "epoch": 0.22539978001522973, "grad_norm": 0.23536236584186554, "learning_rate": 0.001, "loss": 2.0515, "step": 5328 }, { "epoch": 0.22544208477874608, "grad_norm": 0.25583964586257935, "learning_rate": 0.001, "loss": 2.2743, "step": 5329 }, { "epoch": 0.22548438954226246, "grad_norm": 6.257376194000244, "learning_rate": 0.001, "loss": 1.6795, "step": 5330 }, { "epoch": 0.22552669430577882, "grad_norm": 1.3603640794754028, "learning_rate": 0.001, "loss": 3.1865, "step": 5331 }, { "epoch": 0.2255689990692952, "grad_norm": 0.2052699625492096, "learning_rate": 0.001, "loss": 2.1218, "step": 5332 }, { "epoch": 0.22561130383281158, "grad_norm": 0.2767829895019531, "learning_rate": 0.001, "loss": 2.3987, "step": 5333 }, { "epoch": 0.22565360859632794, "grad_norm": 0.22107641398906708, "learning_rate": 0.001, "loss": 1.91, "step": 5334 }, { "epoch": 0.22569591335984432, "grad_norm": 0.5406429767608643, "learning_rate": 0.001, "loss": 2.1801, "step": 5335 }, { "epoch": 0.2257382181233607, "grad_norm": 0.3691091239452362, "learning_rate": 0.001, "loss": 2.8873, "step": 5336 }, { "epoch": 0.22578052288687706, "grad_norm": 0.25709986686706543, "learning_rate": 0.001, "loss": 3.6867, "step": 5337 }, { "epoch": 0.22582282765039344, "grad_norm": 1.0610352754592896, "learning_rate": 0.001, "loss": 2.0219, "step": 5338 }, { "epoch": 0.22586513241390982, "grad_norm": 0.2490183711051941, "learning_rate": 0.001, "loss": 3.2717, "step": 5339 }, { "epoch": 0.22590743717742617, "grad_norm": 0.3832576870918274, "learning_rate": 0.001, "loss": 2.6887, "step": 5340 }, { "epoch": 0.22594974194094256, "grad_norm": 0.20792420208454132, "learning_rate": 0.001, "loss": 1.4877, "step": 5341 }, { "epoch": 0.2259920467044589, "grad_norm": 0.2878759503364563, "learning_rate": 0.001, "loss": 2.5409, "step": 5342 }, { "epoch": 0.2260343514679753, "grad_norm": 0.20281533896923065, "learning_rate": 0.001, "loss": 1.8222, "step": 5343 }, { "epoch": 0.22607665623149167, "grad_norm": 0.3520990014076233, "learning_rate": 0.001, "loss": 2.7768, "step": 5344 }, { "epoch": 0.22611896099500803, "grad_norm": 0.25387975573539734, "learning_rate": 0.001, "loss": 2.5626, "step": 5345 }, { "epoch": 0.2261612657585244, "grad_norm": 0.2082633674144745, "learning_rate": 0.001, "loss": 1.8055, "step": 5346 }, { "epoch": 0.2262035705220408, "grad_norm": 0.24384428560733795, "learning_rate": 0.001, "loss": 3.0511, "step": 5347 }, { "epoch": 0.22624587528555715, "grad_norm": 0.2108755111694336, "learning_rate": 0.001, "loss": 2.4456, "step": 5348 }, { "epoch": 0.22628818004907353, "grad_norm": 0.23625120520591736, "learning_rate": 0.001, "loss": 2.0264, "step": 5349 }, { "epoch": 0.2263304848125899, "grad_norm": 0.19359584152698517, "learning_rate": 0.001, "loss": 2.1669, "step": 5350 }, { "epoch": 0.22637278957610626, "grad_norm": 0.23231805860996246, "learning_rate": 0.001, "loss": 2.3967, "step": 5351 }, { "epoch": 0.22641509433962265, "grad_norm": 0.2224705070257187, "learning_rate": 0.001, "loss": 3.0155, "step": 5352 }, { "epoch": 0.226457399103139, "grad_norm": 0.18623998761177063, "learning_rate": 0.001, "loss": 2.7007, "step": 5353 }, { "epoch": 0.22649970386665538, "grad_norm": 0.190797358751297, "learning_rate": 0.001, "loss": 1.8014, "step": 5354 }, { "epoch": 0.22654200863017177, "grad_norm": 0.2000328004360199, "learning_rate": 0.001, "loss": 2.1544, "step": 5355 }, { "epoch": 0.22658431339368812, "grad_norm": 0.26492246985435486, "learning_rate": 0.001, "loss": 2.5089, "step": 5356 }, { "epoch": 0.2266266181572045, "grad_norm": 0.22106075286865234, "learning_rate": 0.001, "loss": 2.0971, "step": 5357 }, { "epoch": 0.22666892292072088, "grad_norm": 0.20364297926425934, "learning_rate": 0.001, "loss": 2.6095, "step": 5358 }, { "epoch": 0.22671122768423724, "grad_norm": 0.30333849787712097, "learning_rate": 0.001, "loss": 1.6666, "step": 5359 }, { "epoch": 0.22675353244775362, "grad_norm": 0.20348311960697174, "learning_rate": 0.001, "loss": 1.8586, "step": 5360 }, { "epoch": 0.22679583721127, "grad_norm": 0.265525221824646, "learning_rate": 0.001, "loss": 2.3425, "step": 5361 }, { "epoch": 0.22683814197478636, "grad_norm": 0.2070625275373459, "learning_rate": 0.001, "loss": 1.855, "step": 5362 }, { "epoch": 0.22688044673830274, "grad_norm": 0.24439737200737, "learning_rate": 0.001, "loss": 2.8247, "step": 5363 }, { "epoch": 0.2269227515018191, "grad_norm": 0.18254782259464264, "learning_rate": 0.001, "loss": 2.1448, "step": 5364 }, { "epoch": 0.22696505626533547, "grad_norm": 0.2069178968667984, "learning_rate": 0.001, "loss": 2.2075, "step": 5365 }, { "epoch": 0.22700736102885186, "grad_norm": 0.2451086789369583, "learning_rate": 0.001, "loss": 2.7744, "step": 5366 }, { "epoch": 0.2270496657923682, "grad_norm": 0.2338859587907791, "learning_rate": 0.001, "loss": 2.2202, "step": 5367 }, { "epoch": 0.2270919705558846, "grad_norm": 0.17298074066638947, "learning_rate": 0.001, "loss": 2.8615, "step": 5368 }, { "epoch": 0.22713427531940097, "grad_norm": 0.20199984312057495, "learning_rate": 0.001, "loss": 2.1147, "step": 5369 }, { "epoch": 0.22717658008291733, "grad_norm": 0.18716251850128174, "learning_rate": 0.001, "loss": 2.4151, "step": 5370 }, { "epoch": 0.2272188848464337, "grad_norm": 0.22359853982925415, "learning_rate": 0.001, "loss": 2.2439, "step": 5371 }, { "epoch": 0.2272611896099501, "grad_norm": 0.1923590749502182, "learning_rate": 0.001, "loss": 1.878, "step": 5372 }, { "epoch": 0.22730349437346645, "grad_norm": 0.20053477585315704, "learning_rate": 0.001, "loss": 2.8747, "step": 5373 }, { "epoch": 0.22734579913698283, "grad_norm": 0.20686081051826477, "learning_rate": 0.001, "loss": 1.8966, "step": 5374 }, { "epoch": 0.22738810390049918, "grad_norm": 0.2037486433982849, "learning_rate": 0.001, "loss": 2.1247, "step": 5375 }, { "epoch": 0.22743040866401557, "grad_norm": 0.2048976719379425, "learning_rate": 0.001, "loss": 2.5233, "step": 5376 }, { "epoch": 0.22747271342753195, "grad_norm": 0.22234363853931427, "learning_rate": 0.001, "loss": 3.5596, "step": 5377 }, { "epoch": 0.2275150181910483, "grad_norm": 0.17984293401241302, "learning_rate": 0.001, "loss": 1.7908, "step": 5378 }, { "epoch": 0.22755732295456468, "grad_norm": 0.19727393984794617, "learning_rate": 0.001, "loss": 1.9901, "step": 5379 }, { "epoch": 0.22759962771808107, "grad_norm": 0.6455583572387695, "learning_rate": 0.001, "loss": 2.2072, "step": 5380 }, { "epoch": 0.22764193248159742, "grad_norm": 0.1867457777261734, "learning_rate": 0.001, "loss": 2.5531, "step": 5381 }, { "epoch": 0.2276842372451138, "grad_norm": 0.23731602728366852, "learning_rate": 0.001, "loss": 2.3504, "step": 5382 }, { "epoch": 0.22772654200863018, "grad_norm": 0.1865188479423523, "learning_rate": 0.001, "loss": 2.1391, "step": 5383 }, { "epoch": 0.22776884677214654, "grad_norm": 0.26208534836769104, "learning_rate": 0.001, "loss": 2.0536, "step": 5384 }, { "epoch": 0.22781115153566292, "grad_norm": 0.16341978311538696, "learning_rate": 0.001, "loss": 2.8151, "step": 5385 }, { "epoch": 0.22785345629917927, "grad_norm": 0.2644388675689697, "learning_rate": 0.001, "loss": 2.5974, "step": 5386 }, { "epoch": 0.22789576106269566, "grad_norm": 0.18066078424453735, "learning_rate": 0.001, "loss": 1.6, "step": 5387 }, { "epoch": 0.22793806582621204, "grad_norm": 0.34033629298210144, "learning_rate": 0.001, "loss": 1.4947, "step": 5388 }, { "epoch": 0.2279803705897284, "grad_norm": 0.19720013439655304, "learning_rate": 0.001, "loss": 1.9336, "step": 5389 }, { "epoch": 0.22802267535324477, "grad_norm": 0.20138509571552277, "learning_rate": 0.001, "loss": 2.4402, "step": 5390 }, { "epoch": 0.22806498011676116, "grad_norm": 0.163251593708992, "learning_rate": 0.001, "loss": 1.7993, "step": 5391 }, { "epoch": 0.2281072848802775, "grad_norm": 0.20354165136814117, "learning_rate": 0.001, "loss": 2.5252, "step": 5392 }, { "epoch": 0.2281495896437939, "grad_norm": 6.216144561767578, "learning_rate": 0.001, "loss": 2.3808, "step": 5393 }, { "epoch": 0.22819189440731028, "grad_norm": 0.21125631034374237, "learning_rate": 0.001, "loss": 2.29, "step": 5394 }, { "epoch": 0.22823419917082663, "grad_norm": 0.19609850645065308, "learning_rate": 0.001, "loss": 1.9172, "step": 5395 }, { "epoch": 0.228276503934343, "grad_norm": 0.22020597755908966, "learning_rate": 0.001, "loss": 3.1209, "step": 5396 }, { "epoch": 0.22831880869785937, "grad_norm": 0.21470773220062256, "learning_rate": 0.001, "loss": 2.4758, "step": 5397 }, { "epoch": 0.22836111346137575, "grad_norm": 0.24168118834495544, "learning_rate": 0.001, "loss": 2.8874, "step": 5398 }, { "epoch": 0.22840341822489213, "grad_norm": 0.2095698118209839, "learning_rate": 0.001, "loss": 2.1628, "step": 5399 }, { "epoch": 0.22844572298840848, "grad_norm": 0.2221812754869461, "learning_rate": 0.001, "loss": 2.1466, "step": 5400 }, { "epoch": 0.22848802775192487, "grad_norm": 0.17952899634838104, "learning_rate": 0.001, "loss": 2.3154, "step": 5401 }, { "epoch": 0.22853033251544125, "grad_norm": 0.1853719800710678, "learning_rate": 0.001, "loss": 1.7711, "step": 5402 }, { "epoch": 0.2285726372789576, "grad_norm": 0.2924804985523224, "learning_rate": 0.001, "loss": 1.6892, "step": 5403 }, { "epoch": 0.22861494204247398, "grad_norm": 0.20289921760559082, "learning_rate": 0.001, "loss": 2.1891, "step": 5404 }, { "epoch": 0.22865724680599037, "grad_norm": 0.2075899988412857, "learning_rate": 0.001, "loss": 2.1022, "step": 5405 }, { "epoch": 0.22869955156950672, "grad_norm": 0.19035397469997406, "learning_rate": 0.001, "loss": 1.5325, "step": 5406 }, { "epoch": 0.2287418563330231, "grad_norm": 9.618221282958984, "learning_rate": 0.001, "loss": 2.1168, "step": 5407 }, { "epoch": 0.22878416109653946, "grad_norm": 0.20112530887126923, "learning_rate": 0.001, "loss": 2.3236, "step": 5408 }, { "epoch": 0.22882646586005584, "grad_norm": 0.24566976726055145, "learning_rate": 0.001, "loss": 2.5824, "step": 5409 }, { "epoch": 0.22886877062357222, "grad_norm": 0.42258378863334656, "learning_rate": 0.001, "loss": 2.3928, "step": 5410 }, { "epoch": 0.22891107538708858, "grad_norm": 0.17424896359443665, "learning_rate": 0.001, "loss": 2.8459, "step": 5411 }, { "epoch": 0.22895338015060496, "grad_norm": 0.23018798232078552, "learning_rate": 0.001, "loss": 2.3751, "step": 5412 }, { "epoch": 0.22899568491412134, "grad_norm": 16.62144660949707, "learning_rate": 0.001, "loss": 1.8728, "step": 5413 }, { "epoch": 0.2290379896776377, "grad_norm": 0.19507117569446564, "learning_rate": 0.001, "loss": 2.0108, "step": 5414 }, { "epoch": 0.22908029444115408, "grad_norm": 0.18236559629440308, "learning_rate": 0.001, "loss": 1.877, "step": 5415 }, { "epoch": 0.22912259920467046, "grad_norm": 0.18296857178211212, "learning_rate": 0.001, "loss": 2.3096, "step": 5416 }, { "epoch": 0.2291649039681868, "grad_norm": 0.22751617431640625, "learning_rate": 0.001, "loss": 2.0818, "step": 5417 }, { "epoch": 0.2292072087317032, "grad_norm": 0.22728577256202698, "learning_rate": 0.001, "loss": 2.0094, "step": 5418 }, { "epoch": 0.22924951349521955, "grad_norm": 0.20583270490169525, "learning_rate": 0.001, "loss": 1.8739, "step": 5419 }, { "epoch": 0.22929181825873593, "grad_norm": 0.2076723724603653, "learning_rate": 0.001, "loss": 1.8793, "step": 5420 }, { "epoch": 0.2293341230222523, "grad_norm": 0.23529089987277985, "learning_rate": 0.001, "loss": 1.4192, "step": 5421 }, { "epoch": 0.22937642778576867, "grad_norm": 1.0901966094970703, "learning_rate": 0.001, "loss": 2.0787, "step": 5422 }, { "epoch": 0.22941873254928505, "grad_norm": 0.21631963551044464, "learning_rate": 0.001, "loss": 2.0501, "step": 5423 }, { "epoch": 0.22946103731280143, "grad_norm": 0.2734993100166321, "learning_rate": 0.001, "loss": 3.2331, "step": 5424 }, { "epoch": 0.22950334207631778, "grad_norm": 0.23572872579097748, "learning_rate": 0.001, "loss": 2.8713, "step": 5425 }, { "epoch": 0.22954564683983417, "grad_norm": 0.19519785046577454, "learning_rate": 0.001, "loss": 2.4311, "step": 5426 }, { "epoch": 0.22958795160335055, "grad_norm": 0.19540639221668243, "learning_rate": 0.001, "loss": 2.0317, "step": 5427 }, { "epoch": 0.2296302563668669, "grad_norm": 0.28334343433380127, "learning_rate": 0.001, "loss": 3.7543, "step": 5428 }, { "epoch": 0.22967256113038328, "grad_norm": 0.18688379228115082, "learning_rate": 0.001, "loss": 1.9003, "step": 5429 }, { "epoch": 0.22971486589389967, "grad_norm": 1.3220264911651611, "learning_rate": 0.001, "loss": 1.7229, "step": 5430 }, { "epoch": 0.22975717065741602, "grad_norm": 0.23925921320915222, "learning_rate": 0.001, "loss": 2.1974, "step": 5431 }, { "epoch": 0.2297994754209324, "grad_norm": 0.25730374455451965, "learning_rate": 0.001, "loss": 3.2443, "step": 5432 }, { "epoch": 0.22984178018444876, "grad_norm": 1.0473192930221558, "learning_rate": 0.001, "loss": 2.2703, "step": 5433 }, { "epoch": 0.22988408494796514, "grad_norm": 0.20525427162647247, "learning_rate": 0.001, "loss": 2.2747, "step": 5434 }, { "epoch": 0.22992638971148152, "grad_norm": 0.24831177294254303, "learning_rate": 0.001, "loss": 2.3047, "step": 5435 }, { "epoch": 0.22996869447499788, "grad_norm": 0.16241610050201416, "learning_rate": 0.001, "loss": 1.5626, "step": 5436 }, { "epoch": 0.23001099923851426, "grad_norm": 0.23656898736953735, "learning_rate": 0.001, "loss": 1.8401, "step": 5437 }, { "epoch": 0.23005330400203064, "grad_norm": 0.21393883228302002, "learning_rate": 0.001, "loss": 2.2871, "step": 5438 }, { "epoch": 0.230095608765547, "grad_norm": 0.471842497587204, "learning_rate": 0.001, "loss": 2.9926, "step": 5439 }, { "epoch": 0.23013791352906338, "grad_norm": 0.20878411829471588, "learning_rate": 0.001, "loss": 1.7661, "step": 5440 }, { "epoch": 0.23018021829257976, "grad_norm": 0.21290957927703857, "learning_rate": 0.001, "loss": 2.2779, "step": 5441 }, { "epoch": 0.2302225230560961, "grad_norm": 0.2180158644914627, "learning_rate": 0.001, "loss": 2.9909, "step": 5442 }, { "epoch": 0.2302648278196125, "grad_norm": 0.22294098138809204, "learning_rate": 0.001, "loss": 2.7471, "step": 5443 }, { "epoch": 0.23030713258312885, "grad_norm": 0.20272763073444366, "learning_rate": 0.001, "loss": 2.2887, "step": 5444 }, { "epoch": 0.23034943734664523, "grad_norm": 0.5208730697631836, "learning_rate": 0.001, "loss": 1.786, "step": 5445 }, { "epoch": 0.2303917421101616, "grad_norm": 0.23959921300411224, "learning_rate": 0.001, "loss": 2.5374, "step": 5446 }, { "epoch": 0.23043404687367797, "grad_norm": 0.22310377657413483, "learning_rate": 0.001, "loss": 2.9862, "step": 5447 }, { "epoch": 0.23047635163719435, "grad_norm": 0.27970150113105774, "learning_rate": 0.001, "loss": 2.2442, "step": 5448 }, { "epoch": 0.23051865640071073, "grad_norm": 0.230902299284935, "learning_rate": 0.001, "loss": 1.6546, "step": 5449 }, { "epoch": 0.23056096116422709, "grad_norm": 0.22653928399085999, "learning_rate": 0.001, "loss": 2.9463, "step": 5450 }, { "epoch": 0.23060326592774347, "grad_norm": 0.25787416100502014, "learning_rate": 0.001, "loss": 2.4843, "step": 5451 }, { "epoch": 0.23064557069125985, "grad_norm": 0.44561514258384705, "learning_rate": 0.001, "loss": 2.011, "step": 5452 }, { "epoch": 0.2306878754547762, "grad_norm": 0.27553293108940125, "learning_rate": 0.001, "loss": 2.7563, "step": 5453 }, { "epoch": 0.23073018021829259, "grad_norm": 0.2613121569156647, "learning_rate": 0.001, "loss": 2.2846, "step": 5454 }, { "epoch": 0.23077248498180894, "grad_norm": 0.36091411113739014, "learning_rate": 0.001, "loss": 3.0635, "step": 5455 }, { "epoch": 0.23081478974532532, "grad_norm": 0.19998879730701447, "learning_rate": 0.001, "loss": 2.578, "step": 5456 }, { "epoch": 0.2308570945088417, "grad_norm": 0.235711470246315, "learning_rate": 0.001, "loss": 2.1896, "step": 5457 }, { "epoch": 0.23089939927235806, "grad_norm": 0.7797324061393738, "learning_rate": 0.001, "loss": 2.4483, "step": 5458 }, { "epoch": 0.23094170403587444, "grad_norm": 0.17661023139953613, "learning_rate": 0.001, "loss": 2.4288, "step": 5459 }, { "epoch": 0.23098400879939082, "grad_norm": 0.18640023469924927, "learning_rate": 0.001, "loss": 2.1796, "step": 5460 }, { "epoch": 0.23102631356290718, "grad_norm": 7.270983695983887, "learning_rate": 0.001, "loss": 2.1537, "step": 5461 }, { "epoch": 0.23106861832642356, "grad_norm": 0.5781380534172058, "learning_rate": 0.001, "loss": 2.9149, "step": 5462 }, { "epoch": 0.23111092308993994, "grad_norm": 0.21426080167293549, "learning_rate": 0.001, "loss": 2.6075, "step": 5463 }, { "epoch": 0.2311532278534563, "grad_norm": 0.24475079774856567, "learning_rate": 0.001, "loss": 2.4721, "step": 5464 }, { "epoch": 0.23119553261697268, "grad_norm": 1.0542508363723755, "learning_rate": 0.001, "loss": 2.0507, "step": 5465 }, { "epoch": 0.23123783738048903, "grad_norm": 0.2142096906900406, "learning_rate": 0.001, "loss": 1.8177, "step": 5466 }, { "epoch": 0.2312801421440054, "grad_norm": 0.25511589646339417, "learning_rate": 0.001, "loss": 2.9547, "step": 5467 }, { "epoch": 0.2313224469075218, "grad_norm": 4.037811279296875, "learning_rate": 0.001, "loss": 2.0729, "step": 5468 }, { "epoch": 0.23136475167103815, "grad_norm": 0.42371609807014465, "learning_rate": 0.001, "loss": 2.684, "step": 5469 }, { "epoch": 0.23140705643455453, "grad_norm": 0.25769296288490295, "learning_rate": 0.001, "loss": 1.8104, "step": 5470 }, { "epoch": 0.2314493611980709, "grad_norm": 0.9849298596382141, "learning_rate": 0.001, "loss": 3.1677, "step": 5471 }, { "epoch": 0.23149166596158727, "grad_norm": 10.10745620727539, "learning_rate": 0.001, "loss": 2.3539, "step": 5472 }, { "epoch": 0.23153397072510365, "grad_norm": 0.28518936038017273, "learning_rate": 0.001, "loss": 3.5426, "step": 5473 }, { "epoch": 0.23157627548862003, "grad_norm": 0.40552154183387756, "learning_rate": 0.001, "loss": 2.9542, "step": 5474 }, { "epoch": 0.23161858025213639, "grad_norm": 0.26154863834381104, "learning_rate": 0.001, "loss": 1.9963, "step": 5475 }, { "epoch": 0.23166088501565277, "grad_norm": 0.2571628987789154, "learning_rate": 0.001, "loss": 1.95, "step": 5476 }, { "epoch": 0.23170318977916912, "grad_norm": 0.2619466185569763, "learning_rate": 0.001, "loss": 2.0637, "step": 5477 }, { "epoch": 0.2317454945426855, "grad_norm": 0.28519323468208313, "learning_rate": 0.001, "loss": 2.5762, "step": 5478 }, { "epoch": 0.23178779930620189, "grad_norm": 1.2424720525741577, "learning_rate": 0.001, "loss": 3.0967, "step": 5479 }, { "epoch": 0.23183010406971824, "grad_norm": 0.4895488917827606, "learning_rate": 0.001, "loss": 3.3258, "step": 5480 }, { "epoch": 0.23187240883323462, "grad_norm": 0.39501601457595825, "learning_rate": 0.001, "loss": 2.6346, "step": 5481 }, { "epoch": 0.231914713596751, "grad_norm": 0.2693521976470947, "learning_rate": 0.001, "loss": 2.1174, "step": 5482 }, { "epoch": 0.23195701836026736, "grad_norm": 0.2762487828731537, "learning_rate": 0.001, "loss": 2.0984, "step": 5483 }, { "epoch": 0.23199932312378374, "grad_norm": 0.2225947082042694, "learning_rate": 0.001, "loss": 1.9871, "step": 5484 }, { "epoch": 0.23204162788730012, "grad_norm": 0.38060519099235535, "learning_rate": 0.001, "loss": 2.3824, "step": 5485 }, { "epoch": 0.23208393265081648, "grad_norm": 0.21459133923053741, "learning_rate": 0.001, "loss": 2.0364, "step": 5486 }, { "epoch": 0.23212623741433286, "grad_norm": 0.5198157429695129, "learning_rate": 0.001, "loss": 3.1656, "step": 5487 }, { "epoch": 0.2321685421778492, "grad_norm": 0.675767183303833, "learning_rate": 0.001, "loss": 3.456, "step": 5488 }, { "epoch": 0.2322108469413656, "grad_norm": 0.22795476019382477, "learning_rate": 0.001, "loss": 3.3561, "step": 5489 }, { "epoch": 0.23225315170488198, "grad_norm": 0.9936857223510742, "learning_rate": 0.001, "loss": 2.4905, "step": 5490 }, { "epoch": 0.23229545646839833, "grad_norm": 0.3095644414424896, "learning_rate": 0.001, "loss": 2.0372, "step": 5491 }, { "epoch": 0.2323377612319147, "grad_norm": 0.18799656629562378, "learning_rate": 0.001, "loss": 1.9836, "step": 5492 }, { "epoch": 0.2323800659954311, "grad_norm": 0.21074536442756653, "learning_rate": 0.001, "loss": 2.2601, "step": 5493 }, { "epoch": 0.23242237075894745, "grad_norm": 0.32169005274772644, "learning_rate": 0.001, "loss": 3.0188, "step": 5494 }, { "epoch": 0.23246467552246383, "grad_norm": 0.21272505819797516, "learning_rate": 0.001, "loss": 2.1103, "step": 5495 }, { "epoch": 0.2325069802859802, "grad_norm": 0.4344709515571594, "learning_rate": 0.001, "loss": 3.0217, "step": 5496 }, { "epoch": 0.23254928504949657, "grad_norm": 1.6699187755584717, "learning_rate": 0.001, "loss": 2.5814, "step": 5497 }, { "epoch": 0.23259158981301295, "grad_norm": 0.22509506344795227, "learning_rate": 0.001, "loss": 2.8708, "step": 5498 }, { "epoch": 0.2326338945765293, "grad_norm": 0.24898000061511993, "learning_rate": 0.001, "loss": 2.8715, "step": 5499 }, { "epoch": 0.2326761993400457, "grad_norm": 0.7220833897590637, "learning_rate": 0.001, "loss": 2.1767, "step": 5500 }, { "epoch": 0.23271850410356207, "grad_norm": 0.25471919775009155, "learning_rate": 0.001, "loss": 3.0134, "step": 5501 }, { "epoch": 0.23276080886707842, "grad_norm": 0.2646358907222748, "learning_rate": 0.001, "loss": 2.0253, "step": 5502 }, { "epoch": 0.2328031136305948, "grad_norm": 0.20259220898151398, "learning_rate": 0.001, "loss": 2.053, "step": 5503 }, { "epoch": 0.2328454183941112, "grad_norm": 0.49306970834732056, "learning_rate": 0.001, "loss": 2.2513, "step": 5504 }, { "epoch": 0.23288772315762754, "grad_norm": 0.2066371738910675, "learning_rate": 0.001, "loss": 2.7308, "step": 5505 }, { "epoch": 0.23293002792114392, "grad_norm": 0.20641134679317474, "learning_rate": 0.001, "loss": 2.5732, "step": 5506 }, { "epoch": 0.2329723326846603, "grad_norm": 0.21827174723148346, "learning_rate": 0.001, "loss": 2.6558, "step": 5507 }, { "epoch": 0.23301463744817666, "grad_norm": 0.2925966680049896, "learning_rate": 0.001, "loss": 2.6788, "step": 5508 }, { "epoch": 0.23305694221169304, "grad_norm": 0.2853943109512329, "learning_rate": 0.001, "loss": 2.5224, "step": 5509 }, { "epoch": 0.2330992469752094, "grad_norm": 0.20848669111728668, "learning_rate": 0.001, "loss": 2.3025, "step": 5510 }, { "epoch": 0.23314155173872578, "grad_norm": 1.7982761859893799, "learning_rate": 0.001, "loss": 2.1487, "step": 5511 }, { "epoch": 0.23318385650224216, "grad_norm": 0.2628767490386963, "learning_rate": 0.001, "loss": 2.7454, "step": 5512 }, { "epoch": 0.2332261612657585, "grad_norm": 0.21487084031105042, "learning_rate": 0.001, "loss": 1.7897, "step": 5513 }, { "epoch": 0.2332684660292749, "grad_norm": 0.2565120756626129, "learning_rate": 0.001, "loss": 2.3647, "step": 5514 }, { "epoch": 0.23331077079279128, "grad_norm": 0.2229587584733963, "learning_rate": 0.001, "loss": 1.7196, "step": 5515 }, { "epoch": 0.23335307555630763, "grad_norm": 0.23122352361679077, "learning_rate": 0.001, "loss": 3.264, "step": 5516 }, { "epoch": 0.23339538031982401, "grad_norm": 0.2294238656759262, "learning_rate": 0.001, "loss": 2.1468, "step": 5517 }, { "epoch": 0.2334376850833404, "grad_norm": 1.3923436403274536, "learning_rate": 0.001, "loss": 2.8618, "step": 5518 }, { "epoch": 0.23347998984685675, "grad_norm": 4.941551685333252, "learning_rate": 0.001, "loss": 2.7653, "step": 5519 }, { "epoch": 0.23352229461037313, "grad_norm": 0.23686207830905914, "learning_rate": 0.001, "loss": 1.6694, "step": 5520 }, { "epoch": 0.2335645993738895, "grad_norm": 0.38587716221809387, "learning_rate": 0.001, "loss": 2.2298, "step": 5521 }, { "epoch": 0.23360690413740587, "grad_norm": 0.2625442147254944, "learning_rate": 0.001, "loss": 2.0646, "step": 5522 }, { "epoch": 0.23364920890092225, "grad_norm": 0.2582058906555176, "learning_rate": 0.001, "loss": 2.6844, "step": 5523 }, { "epoch": 0.2336915136644386, "grad_norm": 2.8828814029693604, "learning_rate": 0.001, "loss": 2.1936, "step": 5524 }, { "epoch": 0.233733818427955, "grad_norm": 0.2290409654378891, "learning_rate": 0.001, "loss": 1.9075, "step": 5525 }, { "epoch": 0.23377612319147137, "grad_norm": 0.1932823210954666, "learning_rate": 0.001, "loss": 3.3037, "step": 5526 }, { "epoch": 0.23381842795498772, "grad_norm": 0.20359812676906586, "learning_rate": 0.001, "loss": 2.209, "step": 5527 }, { "epoch": 0.2338607327185041, "grad_norm": 0.3113921582698822, "learning_rate": 0.001, "loss": 1.766, "step": 5528 }, { "epoch": 0.2339030374820205, "grad_norm": 20.93682098388672, "learning_rate": 0.001, "loss": 2.5909, "step": 5529 }, { "epoch": 0.23394534224553684, "grad_norm": 0.2424592673778534, "learning_rate": 0.001, "loss": 2.3092, "step": 5530 }, { "epoch": 0.23398764700905322, "grad_norm": 0.22494091093540192, "learning_rate": 0.001, "loss": 2.2942, "step": 5531 }, { "epoch": 0.23402995177256958, "grad_norm": 0.20457018911838531, "learning_rate": 0.001, "loss": 2.31, "step": 5532 }, { "epoch": 0.23407225653608596, "grad_norm": 0.32877421379089355, "learning_rate": 0.001, "loss": 1.8861, "step": 5533 }, { "epoch": 0.23411456129960234, "grad_norm": 0.42228415608406067, "learning_rate": 0.001, "loss": 2.7353, "step": 5534 }, { "epoch": 0.2341568660631187, "grad_norm": 0.47094154357910156, "learning_rate": 0.001, "loss": 3.8558, "step": 5535 }, { "epoch": 0.23419917082663508, "grad_norm": 0.2175481915473938, "learning_rate": 0.001, "loss": 2.0443, "step": 5536 }, { "epoch": 0.23424147559015146, "grad_norm": 3.194316864013672, "learning_rate": 0.001, "loss": 2.5488, "step": 5537 }, { "epoch": 0.23428378035366781, "grad_norm": 0.3026067912578583, "learning_rate": 0.001, "loss": 2.4307, "step": 5538 }, { "epoch": 0.2343260851171842, "grad_norm": 0.8778800368309021, "learning_rate": 0.001, "loss": 1.6685, "step": 5539 }, { "epoch": 0.23436838988070058, "grad_norm": 0.1916915625333786, "learning_rate": 0.001, "loss": 2.2532, "step": 5540 }, { "epoch": 0.23441069464421693, "grad_norm": 0.28568512201309204, "learning_rate": 0.001, "loss": 2.0453, "step": 5541 }, { "epoch": 0.23445299940773331, "grad_norm": 0.5260047316551208, "learning_rate": 0.001, "loss": 3.2691, "step": 5542 }, { "epoch": 0.23449530417124967, "grad_norm": 0.2400904893875122, "learning_rate": 0.001, "loss": 2.483, "step": 5543 }, { "epoch": 0.23453760893476605, "grad_norm": 0.232883483171463, "learning_rate": 0.001, "loss": 3.0007, "step": 5544 }, { "epoch": 0.23457991369828243, "grad_norm": 0.21904799342155457, "learning_rate": 0.001, "loss": 2.0221, "step": 5545 }, { "epoch": 0.2346222184617988, "grad_norm": 0.27808424830436707, "learning_rate": 0.001, "loss": 1.7361, "step": 5546 }, { "epoch": 0.23466452322531517, "grad_norm": 0.33674612641334534, "learning_rate": 0.001, "loss": 2.5554, "step": 5547 }, { "epoch": 0.23470682798883155, "grad_norm": 0.21071641147136688, "learning_rate": 0.001, "loss": 2.0207, "step": 5548 }, { "epoch": 0.2347491327523479, "grad_norm": 0.20552270114421844, "learning_rate": 0.001, "loss": 2.2293, "step": 5549 }, { "epoch": 0.2347914375158643, "grad_norm": 0.31367790699005127, "learning_rate": 0.001, "loss": 2.6712, "step": 5550 }, { "epoch": 0.23483374227938067, "grad_norm": 0.7364101409912109, "learning_rate": 0.001, "loss": 2.0776, "step": 5551 }, { "epoch": 0.23487604704289702, "grad_norm": 0.24621638655662537, "learning_rate": 0.001, "loss": 2.0778, "step": 5552 }, { "epoch": 0.2349183518064134, "grad_norm": 0.23604516685009003, "learning_rate": 0.001, "loss": 3.3079, "step": 5553 }, { "epoch": 0.2349606565699298, "grad_norm": 0.8118863105773926, "learning_rate": 0.001, "loss": 2.4182, "step": 5554 }, { "epoch": 0.23500296133344614, "grad_norm": 0.21892663836479187, "learning_rate": 0.001, "loss": 2.2327, "step": 5555 }, { "epoch": 0.23504526609696252, "grad_norm": 0.28527218103408813, "learning_rate": 0.001, "loss": 3.7358, "step": 5556 }, { "epoch": 0.23508757086047888, "grad_norm": 0.2317507416009903, "learning_rate": 0.001, "loss": 3.2241, "step": 5557 }, { "epoch": 0.23512987562399526, "grad_norm": 0.23719051480293274, "learning_rate": 0.001, "loss": 2.1869, "step": 5558 }, { "epoch": 0.23517218038751164, "grad_norm": 0.2504566013813019, "learning_rate": 0.001, "loss": 2.2632, "step": 5559 }, { "epoch": 0.235214485151028, "grad_norm": 0.2797103226184845, "learning_rate": 0.001, "loss": 2.0476, "step": 5560 }, { "epoch": 0.23525678991454438, "grad_norm": 0.223208948969841, "learning_rate": 0.001, "loss": 2.3264, "step": 5561 }, { "epoch": 0.23529909467806076, "grad_norm": 2.268007755279541, "learning_rate": 0.001, "loss": 3.4196, "step": 5562 }, { "epoch": 0.23534139944157711, "grad_norm": 0.20520193874835968, "learning_rate": 0.001, "loss": 2.1592, "step": 5563 }, { "epoch": 0.2353837042050935, "grad_norm": 0.1952827274799347, "learning_rate": 0.001, "loss": 2.2686, "step": 5564 }, { "epoch": 0.23542600896860988, "grad_norm": 0.24060063064098358, "learning_rate": 0.001, "loss": 2.6335, "step": 5565 }, { "epoch": 0.23546831373212623, "grad_norm": 0.18970201909542084, "learning_rate": 0.001, "loss": 2.1595, "step": 5566 }, { "epoch": 0.23551061849564262, "grad_norm": 0.20681913197040558, "learning_rate": 0.001, "loss": 2.7761, "step": 5567 }, { "epoch": 0.23555292325915897, "grad_norm": 1.009856104850769, "learning_rate": 0.001, "loss": 3.113, "step": 5568 }, { "epoch": 0.23559522802267535, "grad_norm": 0.20202496647834778, "learning_rate": 0.001, "loss": 2.7176, "step": 5569 }, { "epoch": 0.23563753278619173, "grad_norm": 0.2296670824289322, "learning_rate": 0.001, "loss": 1.9414, "step": 5570 }, { "epoch": 0.2356798375497081, "grad_norm": 2.0302107334136963, "learning_rate": 0.001, "loss": 3.5255, "step": 5571 }, { "epoch": 0.23572214231322447, "grad_norm": 0.2701435983181, "learning_rate": 0.001, "loss": 2.0376, "step": 5572 }, { "epoch": 0.23576444707674085, "grad_norm": 1.3323795795440674, "learning_rate": 0.001, "loss": 3.0352, "step": 5573 }, { "epoch": 0.2358067518402572, "grad_norm": 0.2649135887622833, "learning_rate": 0.001, "loss": 2.5785, "step": 5574 }, { "epoch": 0.2358490566037736, "grad_norm": 0.21373842656612396, "learning_rate": 0.001, "loss": 1.9535, "step": 5575 }, { "epoch": 0.23589136136728997, "grad_norm": 0.2295471578836441, "learning_rate": 0.001, "loss": 1.9545, "step": 5576 }, { "epoch": 0.23593366613080632, "grad_norm": 0.32685786485671997, "learning_rate": 0.001, "loss": 1.8934, "step": 5577 }, { "epoch": 0.2359759708943227, "grad_norm": 0.22259478271007538, "learning_rate": 0.001, "loss": 2.1198, "step": 5578 }, { "epoch": 0.23601827565783906, "grad_norm": 0.2274249643087387, "learning_rate": 0.001, "loss": 2.2864, "step": 5579 }, { "epoch": 0.23606058042135544, "grad_norm": 0.23607851564884186, "learning_rate": 0.001, "loss": 3.7908, "step": 5580 }, { "epoch": 0.23610288518487182, "grad_norm": 0.22319360077381134, "learning_rate": 0.001, "loss": 1.8991, "step": 5581 }, { "epoch": 0.23614518994838818, "grad_norm": 0.1910727173089981, "learning_rate": 0.001, "loss": 2.1673, "step": 5582 }, { "epoch": 0.23618749471190456, "grad_norm": 0.35509058833122253, "learning_rate": 0.001, "loss": 1.9815, "step": 5583 }, { "epoch": 0.23622979947542094, "grad_norm": 0.5298141837120056, "learning_rate": 0.001, "loss": 2.5921, "step": 5584 }, { "epoch": 0.2362721042389373, "grad_norm": 0.2098468840122223, "learning_rate": 0.001, "loss": 2.8481, "step": 5585 }, { "epoch": 0.23631440900245368, "grad_norm": 0.6692335605621338, "learning_rate": 0.001, "loss": 2.6247, "step": 5586 }, { "epoch": 0.23635671376597006, "grad_norm": 0.2933887541294098, "learning_rate": 0.001, "loss": 2.677, "step": 5587 }, { "epoch": 0.23639901852948642, "grad_norm": 1.0850200653076172, "learning_rate": 0.001, "loss": 2.6977, "step": 5588 }, { "epoch": 0.2364413232930028, "grad_norm": 8.362639427185059, "learning_rate": 0.001, "loss": 2.4108, "step": 5589 }, { "epoch": 0.23648362805651915, "grad_norm": 0.1976199746131897, "learning_rate": 0.001, "loss": 2.2232, "step": 5590 }, { "epoch": 0.23652593282003553, "grad_norm": 1.5596935749053955, "learning_rate": 0.001, "loss": 2.3909, "step": 5591 }, { "epoch": 0.23656823758355192, "grad_norm": 0.3760131895542145, "learning_rate": 0.001, "loss": 2.1015, "step": 5592 }, { "epoch": 0.23661054234706827, "grad_norm": 0.4867786169052124, "learning_rate": 0.001, "loss": 2.3708, "step": 5593 }, { "epoch": 0.23665284711058465, "grad_norm": 0.3288508355617523, "learning_rate": 0.001, "loss": 3.3302, "step": 5594 }, { "epoch": 0.23669515187410103, "grad_norm": 0.3380516767501831, "learning_rate": 0.001, "loss": 2.3632, "step": 5595 }, { "epoch": 0.2367374566376174, "grad_norm": 0.2778700590133667, "learning_rate": 0.001, "loss": 2.2904, "step": 5596 }, { "epoch": 0.23677976140113377, "grad_norm": 2.0466156005859375, "learning_rate": 0.001, "loss": 2.2768, "step": 5597 }, { "epoch": 0.23682206616465015, "grad_norm": 0.5960863828659058, "learning_rate": 0.001, "loss": 2.5767, "step": 5598 }, { "epoch": 0.2368643709281665, "grad_norm": 0.3480601906776428, "learning_rate": 0.001, "loss": 2.7427, "step": 5599 }, { "epoch": 0.2369066756916829, "grad_norm": 0.22647280991077423, "learning_rate": 0.001, "loss": 4.0171, "step": 5600 }, { "epoch": 0.23694898045519924, "grad_norm": 0.37474966049194336, "learning_rate": 0.001, "loss": 2.2962, "step": 5601 }, { "epoch": 0.23699128521871562, "grad_norm": 0.35157617926597595, "learning_rate": 0.001, "loss": 3.5374, "step": 5602 }, { "epoch": 0.237033589982232, "grad_norm": 1.5529965162277222, "learning_rate": 0.001, "loss": 3.0981, "step": 5603 }, { "epoch": 0.23707589474574836, "grad_norm": 0.4362848103046417, "learning_rate": 0.001, "loss": 3.007, "step": 5604 }, { "epoch": 0.23711819950926474, "grad_norm": 0.2729444205760956, "learning_rate": 0.001, "loss": 2.2965, "step": 5605 }, { "epoch": 0.23716050427278113, "grad_norm": 0.2660609781742096, "learning_rate": 0.001, "loss": 2.2928, "step": 5606 }, { "epoch": 0.23720280903629748, "grad_norm": 0.23978281021118164, "learning_rate": 0.001, "loss": 2.5681, "step": 5607 }, { "epoch": 0.23724511379981386, "grad_norm": 0.21553561091423035, "learning_rate": 0.001, "loss": 1.9796, "step": 5608 }, { "epoch": 0.23728741856333024, "grad_norm": 0.5897154808044434, "learning_rate": 0.001, "loss": 2.986, "step": 5609 }, { "epoch": 0.2373297233268466, "grad_norm": 0.22807319462299347, "learning_rate": 0.001, "loss": 3.1874, "step": 5610 }, { "epoch": 0.23737202809036298, "grad_norm": 0.22051000595092773, "learning_rate": 0.001, "loss": 1.8367, "step": 5611 }, { "epoch": 0.23741433285387933, "grad_norm": 59.09831237792969, "learning_rate": 0.001, "loss": 2.6667, "step": 5612 }, { "epoch": 0.23745663761739572, "grad_norm": 0.21315006911754608, "learning_rate": 0.001, "loss": 2.1231, "step": 5613 }, { "epoch": 0.2374989423809121, "grad_norm": 0.45930665731430054, "learning_rate": 0.001, "loss": 3.1726, "step": 5614 }, { "epoch": 0.23754124714442845, "grad_norm": 0.5922689437866211, "learning_rate": 0.001, "loss": 1.8162, "step": 5615 }, { "epoch": 0.23758355190794483, "grad_norm": 0.23892509937286377, "learning_rate": 0.001, "loss": 2.2867, "step": 5616 }, { "epoch": 0.23762585667146122, "grad_norm": 0.2679390013217926, "learning_rate": 0.001, "loss": 2.9999, "step": 5617 }, { "epoch": 0.23766816143497757, "grad_norm": 0.7704877257347107, "learning_rate": 0.001, "loss": 2.2267, "step": 5618 }, { "epoch": 0.23771046619849395, "grad_norm": 0.4141584038734436, "learning_rate": 0.001, "loss": 3.1586, "step": 5619 }, { "epoch": 0.23775277096201033, "grad_norm": 0.6287165284156799, "learning_rate": 0.001, "loss": 2.379, "step": 5620 }, { "epoch": 0.2377950757255267, "grad_norm": 0.19241921603679657, "learning_rate": 0.001, "loss": 1.791, "step": 5621 }, { "epoch": 0.23783738048904307, "grad_norm": 0.2662370502948761, "learning_rate": 0.001, "loss": 1.7727, "step": 5622 }, { "epoch": 0.23787968525255943, "grad_norm": 68.88314819335938, "learning_rate": 0.001, "loss": 2.168, "step": 5623 }, { "epoch": 0.2379219900160758, "grad_norm": 0.8763318061828613, "learning_rate": 0.001, "loss": 3.0477, "step": 5624 }, { "epoch": 0.2379642947795922, "grad_norm": 0.878894567489624, "learning_rate": 0.001, "loss": 1.9467, "step": 5625 }, { "epoch": 0.23800659954310854, "grad_norm": 0.4422621428966522, "learning_rate": 0.001, "loss": 2.7905, "step": 5626 }, { "epoch": 0.23804890430662493, "grad_norm": 5.424615383148193, "learning_rate": 0.001, "loss": 2.3152, "step": 5627 }, { "epoch": 0.2380912090701413, "grad_norm": 2.8828539848327637, "learning_rate": 0.001, "loss": 2.4996, "step": 5628 }, { "epoch": 0.23813351383365766, "grad_norm": 1.1331160068511963, "learning_rate": 0.001, "loss": 2.6441, "step": 5629 }, { "epoch": 0.23817581859717404, "grad_norm": 1.2550442218780518, "learning_rate": 0.001, "loss": 3.1826, "step": 5630 }, { "epoch": 0.23821812336069043, "grad_norm": 0.27109989523887634, "learning_rate": 0.001, "loss": 3.46, "step": 5631 }, { "epoch": 0.23826042812420678, "grad_norm": 0.2896735966205597, "learning_rate": 0.001, "loss": 2.7865, "step": 5632 }, { "epoch": 0.23830273288772316, "grad_norm": 0.2764877676963806, "learning_rate": 0.001, "loss": 2.0332, "step": 5633 }, { "epoch": 0.23834503765123952, "grad_norm": 1.915027141571045, "learning_rate": 0.001, "loss": 2.3694, "step": 5634 }, { "epoch": 0.2383873424147559, "grad_norm": 0.4358770549297333, "learning_rate": 0.001, "loss": 2.3084, "step": 5635 }, { "epoch": 0.23842964717827228, "grad_norm": 0.2749125063419342, "learning_rate": 0.001, "loss": 2.1625, "step": 5636 }, { "epoch": 0.23847195194178863, "grad_norm": 0.29137587547302246, "learning_rate": 0.001, "loss": 3.2571, "step": 5637 }, { "epoch": 0.23851425670530502, "grad_norm": 0.2710089385509491, "learning_rate": 0.001, "loss": 2.8085, "step": 5638 }, { "epoch": 0.2385565614688214, "grad_norm": 0.3743971288204193, "learning_rate": 0.001, "loss": 3.0382, "step": 5639 }, { "epoch": 0.23859886623233775, "grad_norm": 0.6103678345680237, "learning_rate": 0.001, "loss": 1.8752, "step": 5640 }, { "epoch": 0.23864117099585413, "grad_norm": 0.2807563543319702, "learning_rate": 0.001, "loss": 2.6665, "step": 5641 }, { "epoch": 0.23868347575937052, "grad_norm": 0.4093710780143738, "learning_rate": 0.001, "loss": 2.1668, "step": 5642 }, { "epoch": 0.23872578052288687, "grad_norm": 0.2517501711845398, "learning_rate": 0.001, "loss": 3.2762, "step": 5643 }, { "epoch": 0.23876808528640325, "grad_norm": 0.21582388877868652, "learning_rate": 0.001, "loss": 2.8667, "step": 5644 }, { "epoch": 0.2388103900499196, "grad_norm": 0.22406849265098572, "learning_rate": 0.001, "loss": 2.279, "step": 5645 }, { "epoch": 0.238852694813436, "grad_norm": 0.17993216216564178, "learning_rate": 0.001, "loss": 1.6104, "step": 5646 }, { "epoch": 0.23889499957695237, "grad_norm": 0.29950782656669617, "learning_rate": 0.001, "loss": 2.2527, "step": 5647 }, { "epoch": 0.23893730434046873, "grad_norm": 0.20286992192268372, "learning_rate": 0.001, "loss": 2.5939, "step": 5648 }, { "epoch": 0.2389796091039851, "grad_norm": 0.5291157960891724, "learning_rate": 0.001, "loss": 2.1778, "step": 5649 }, { "epoch": 0.2390219138675015, "grad_norm": 0.34574270248413086, "learning_rate": 0.001, "loss": 2.5221, "step": 5650 }, { "epoch": 0.23906421863101784, "grad_norm": 0.25159138441085815, "learning_rate": 0.001, "loss": 2.4263, "step": 5651 }, { "epoch": 0.23910652339453423, "grad_norm": 1.9903714656829834, "learning_rate": 0.001, "loss": 2.5326, "step": 5652 }, { "epoch": 0.2391488281580506, "grad_norm": 0.26104992628097534, "learning_rate": 0.001, "loss": 3.192, "step": 5653 }, { "epoch": 0.23919113292156696, "grad_norm": 0.21419936418533325, "learning_rate": 0.001, "loss": 2.0116, "step": 5654 }, { "epoch": 0.23923343768508334, "grad_norm": 0.49943071603775024, "learning_rate": 0.001, "loss": 1.9931, "step": 5655 }, { "epoch": 0.2392757424485997, "grad_norm": 0.23732519149780273, "learning_rate": 0.001, "loss": 2.1189, "step": 5656 }, { "epoch": 0.23931804721211608, "grad_norm": 0.2006048560142517, "learning_rate": 0.001, "loss": 2.1357, "step": 5657 }, { "epoch": 0.23936035197563246, "grad_norm": 0.2843586504459381, "learning_rate": 0.001, "loss": 2.3875, "step": 5658 }, { "epoch": 0.23940265673914882, "grad_norm": 0.16184455156326294, "learning_rate": 0.001, "loss": 2.2042, "step": 5659 }, { "epoch": 0.2394449615026652, "grad_norm": 0.1909385770559311, "learning_rate": 0.001, "loss": 2.8616, "step": 5660 }, { "epoch": 0.23948726626618158, "grad_norm": 0.18999890983104706, "learning_rate": 0.001, "loss": 1.9102, "step": 5661 }, { "epoch": 0.23952957102969794, "grad_norm": 2.6552658081054688, "learning_rate": 0.001, "loss": 2.8128, "step": 5662 }, { "epoch": 0.23957187579321432, "grad_norm": 0.21462197601795197, "learning_rate": 0.001, "loss": 2.404, "step": 5663 }, { "epoch": 0.2396141805567307, "grad_norm": 0.27352675795555115, "learning_rate": 0.001, "loss": 2.4358, "step": 5664 }, { "epoch": 0.23965648532024705, "grad_norm": 0.22441208362579346, "learning_rate": 0.001, "loss": 2.3097, "step": 5665 }, { "epoch": 0.23969879008376344, "grad_norm": 0.3627524673938751, "learning_rate": 0.001, "loss": 3.7734, "step": 5666 }, { "epoch": 0.23974109484727982, "grad_norm": 0.4762495756149292, "learning_rate": 0.001, "loss": 2.1284, "step": 5667 }, { "epoch": 0.23978339961079617, "grad_norm": 0.2010953575372696, "learning_rate": 0.001, "loss": 2.329, "step": 5668 }, { "epoch": 0.23982570437431255, "grad_norm": 0.24924032390117645, "learning_rate": 0.001, "loss": 3.1285, "step": 5669 }, { "epoch": 0.2398680091378289, "grad_norm": 0.6464244723320007, "learning_rate": 0.001, "loss": 2.4777, "step": 5670 }, { "epoch": 0.2399103139013453, "grad_norm": 0.1977522075176239, "learning_rate": 0.001, "loss": 2.1083, "step": 5671 }, { "epoch": 0.23995261866486167, "grad_norm": 0.1925719529390335, "learning_rate": 0.001, "loss": 2.2735, "step": 5672 }, { "epoch": 0.23999492342837803, "grad_norm": 0.18989835679531097, "learning_rate": 0.001, "loss": 2.142, "step": 5673 }, { "epoch": 0.2400372281918944, "grad_norm": 0.21661527454853058, "learning_rate": 0.001, "loss": 2.0072, "step": 5674 }, { "epoch": 0.2400795329554108, "grad_norm": 1.2266058921813965, "learning_rate": 0.001, "loss": 2.5989, "step": 5675 }, { "epoch": 0.24012183771892714, "grad_norm": 0.20844648778438568, "learning_rate": 0.001, "loss": 1.9061, "step": 5676 }, { "epoch": 0.24016414248244353, "grad_norm": 0.27534279227256775, "learning_rate": 0.001, "loss": 2.631, "step": 5677 }, { "epoch": 0.2402064472459599, "grad_norm": 0.2036500722169876, "learning_rate": 0.001, "loss": 1.604, "step": 5678 }, { "epoch": 0.24024875200947626, "grad_norm": 0.18914766609668732, "learning_rate": 0.001, "loss": 1.9546, "step": 5679 }, { "epoch": 0.24029105677299264, "grad_norm": 0.2073379009962082, "learning_rate": 0.001, "loss": 2.5842, "step": 5680 }, { "epoch": 0.240333361536509, "grad_norm": 0.32036593556404114, "learning_rate": 0.001, "loss": 2.5532, "step": 5681 }, { "epoch": 0.24037566630002538, "grad_norm": 0.1830267608165741, "learning_rate": 0.001, "loss": 2.4035, "step": 5682 }, { "epoch": 0.24041797106354176, "grad_norm": 0.1883174479007721, "learning_rate": 0.001, "loss": 2.1087, "step": 5683 }, { "epoch": 0.24046027582705812, "grad_norm": 0.2873155176639557, "learning_rate": 0.001, "loss": 2.2658, "step": 5684 }, { "epoch": 0.2405025805905745, "grad_norm": 0.22399446368217468, "learning_rate": 0.001, "loss": 1.8286, "step": 5685 }, { "epoch": 0.24054488535409088, "grad_norm": 0.7013282775878906, "learning_rate": 0.001, "loss": 2.4934, "step": 5686 }, { "epoch": 0.24058719011760724, "grad_norm": 0.21409347653388977, "learning_rate": 0.001, "loss": 2.2924, "step": 5687 }, { "epoch": 0.24062949488112362, "grad_norm": 2.542304515838623, "learning_rate": 0.001, "loss": 2.3646, "step": 5688 }, { "epoch": 0.24067179964464, "grad_norm": 0.39416250586509705, "learning_rate": 0.001, "loss": 2.9602, "step": 5689 }, { "epoch": 0.24071410440815635, "grad_norm": 0.307919979095459, "learning_rate": 0.001, "loss": 1.6349, "step": 5690 }, { "epoch": 0.24075640917167274, "grad_norm": 0.24086859822273254, "learning_rate": 0.001, "loss": 2.0097, "step": 5691 }, { "epoch": 0.2407987139351891, "grad_norm": 0.1972251832485199, "learning_rate": 0.001, "loss": 1.7443, "step": 5692 }, { "epoch": 0.24084101869870547, "grad_norm": 0.5142681002616882, "learning_rate": 0.001, "loss": 2.1825, "step": 5693 }, { "epoch": 0.24088332346222185, "grad_norm": 0.28559404611587524, "learning_rate": 0.001, "loss": 2.4702, "step": 5694 }, { "epoch": 0.2409256282257382, "grad_norm": 0.20487889647483826, "learning_rate": 0.001, "loss": 1.9676, "step": 5695 }, { "epoch": 0.2409679329892546, "grad_norm": 0.20150291919708252, "learning_rate": 0.001, "loss": 1.9557, "step": 5696 }, { "epoch": 0.24101023775277097, "grad_norm": 0.2249046266078949, "learning_rate": 0.001, "loss": 2.1542, "step": 5697 }, { "epoch": 0.24105254251628733, "grad_norm": 0.20710447430610657, "learning_rate": 0.001, "loss": 1.6919, "step": 5698 }, { "epoch": 0.2410948472798037, "grad_norm": 0.2290615290403366, "learning_rate": 0.001, "loss": 2.3559, "step": 5699 }, { "epoch": 0.2411371520433201, "grad_norm": 0.3252545893192291, "learning_rate": 0.001, "loss": 2.7864, "step": 5700 }, { "epoch": 0.24117945680683645, "grad_norm": 0.21582552790641785, "learning_rate": 0.001, "loss": 1.7943, "step": 5701 }, { "epoch": 0.24122176157035283, "grad_norm": 0.2762306332588196, "learning_rate": 0.001, "loss": 2.1902, "step": 5702 }, { "epoch": 0.24126406633386918, "grad_norm": 0.22092297673225403, "learning_rate": 0.001, "loss": 2.4124, "step": 5703 }, { "epoch": 0.24130637109738556, "grad_norm": 1.6592704057693481, "learning_rate": 0.001, "loss": 1.9375, "step": 5704 }, { "epoch": 0.24134867586090195, "grad_norm": 0.2536323666572571, "learning_rate": 0.001, "loss": 2.0439, "step": 5705 }, { "epoch": 0.2413909806244183, "grad_norm": 0.23811465501785278, "learning_rate": 0.001, "loss": 2.3419, "step": 5706 }, { "epoch": 0.24143328538793468, "grad_norm": 8.864980697631836, "learning_rate": 0.001, "loss": 3.1004, "step": 5707 }, { "epoch": 0.24147559015145106, "grad_norm": 0.23312745988368988, "learning_rate": 0.001, "loss": 2.076, "step": 5708 }, { "epoch": 0.24151789491496742, "grad_norm": 0.2500420808792114, "learning_rate": 0.001, "loss": 1.8015, "step": 5709 }, { "epoch": 0.2415601996784838, "grad_norm": 0.397246390581131, "learning_rate": 0.001, "loss": 2.0747, "step": 5710 }, { "epoch": 0.24160250444200018, "grad_norm": 0.256539523601532, "learning_rate": 0.001, "loss": 2.0406, "step": 5711 }, { "epoch": 0.24164480920551654, "grad_norm": 0.2969229817390442, "learning_rate": 0.001, "loss": 2.8112, "step": 5712 }, { "epoch": 0.24168711396903292, "grad_norm": 0.35546842217445374, "learning_rate": 0.001, "loss": 3.5403, "step": 5713 }, { "epoch": 0.24172941873254927, "grad_norm": 0.22269092500209808, "learning_rate": 0.001, "loss": 1.9222, "step": 5714 }, { "epoch": 0.24177172349606565, "grad_norm": 0.20512692630290985, "learning_rate": 0.001, "loss": 1.9088, "step": 5715 }, { "epoch": 0.24181402825958204, "grad_norm": 0.26039499044418335, "learning_rate": 0.001, "loss": 2.0708, "step": 5716 }, { "epoch": 0.2418563330230984, "grad_norm": 38.39719009399414, "learning_rate": 0.001, "loss": 1.9211, "step": 5717 }, { "epoch": 0.24189863778661477, "grad_norm": 0.3820383548736572, "learning_rate": 0.001, "loss": 3.4881, "step": 5718 }, { "epoch": 0.24194094255013116, "grad_norm": 0.3408060073852539, "learning_rate": 0.001, "loss": 2.9283, "step": 5719 }, { "epoch": 0.2419832473136475, "grad_norm": 0.36963334679603577, "learning_rate": 0.001, "loss": 3.4062, "step": 5720 }, { "epoch": 0.2420255520771639, "grad_norm": 1.3119021654129028, "learning_rate": 0.001, "loss": 3.7547, "step": 5721 }, { "epoch": 0.24206785684068027, "grad_norm": 0.27805081009864807, "learning_rate": 0.001, "loss": 3.2918, "step": 5722 }, { "epoch": 0.24211016160419663, "grad_norm": 0.3566412031650543, "learning_rate": 0.001, "loss": 2.1117, "step": 5723 }, { "epoch": 0.242152466367713, "grad_norm": 0.3360730707645416, "learning_rate": 0.001, "loss": 2.1076, "step": 5724 }, { "epoch": 0.24219477113122936, "grad_norm": 0.27928465604782104, "learning_rate": 0.001, "loss": 2.2339, "step": 5725 }, { "epoch": 0.24223707589474575, "grad_norm": 2.314484119415283, "learning_rate": 0.001, "loss": 2.1504, "step": 5726 }, { "epoch": 0.24227938065826213, "grad_norm": 0.2751530706882477, "learning_rate": 0.001, "loss": 2.5127, "step": 5727 }, { "epoch": 0.24232168542177848, "grad_norm": 0.20994529128074646, "learning_rate": 0.001, "loss": 2.1564, "step": 5728 }, { "epoch": 0.24236399018529486, "grad_norm": 0.246437668800354, "learning_rate": 0.001, "loss": 2.3628, "step": 5729 }, { "epoch": 0.24240629494881125, "grad_norm": 2.577066659927368, "learning_rate": 0.001, "loss": 2.6037, "step": 5730 }, { "epoch": 0.2424485997123276, "grad_norm": 0.2616536021232605, "learning_rate": 0.001, "loss": 2.1903, "step": 5731 }, { "epoch": 0.24249090447584398, "grad_norm": 0.24390681087970734, "learning_rate": 0.001, "loss": 2.4864, "step": 5732 }, { "epoch": 0.24253320923936036, "grad_norm": 0.18465809524059296, "learning_rate": 0.001, "loss": 1.9357, "step": 5733 }, { "epoch": 0.24257551400287672, "grad_norm": 1.6933213472366333, "learning_rate": 0.001, "loss": 1.7803, "step": 5734 }, { "epoch": 0.2426178187663931, "grad_norm": 0.25248533487319946, "learning_rate": 0.001, "loss": 2.9161, "step": 5735 }, { "epoch": 0.24266012352990945, "grad_norm": 0.20103050768375397, "learning_rate": 0.001, "loss": 1.9586, "step": 5736 }, { "epoch": 0.24270242829342584, "grad_norm": 0.8724969029426575, "learning_rate": 0.001, "loss": 1.979, "step": 5737 }, { "epoch": 0.24274473305694222, "grad_norm": 0.23048318922519684, "learning_rate": 0.001, "loss": 2.1012, "step": 5738 }, { "epoch": 0.24278703782045857, "grad_norm": 0.24367755651474, "learning_rate": 0.001, "loss": 2.41, "step": 5739 }, { "epoch": 0.24282934258397496, "grad_norm": 0.4339103102684021, "learning_rate": 0.001, "loss": 2.6809, "step": 5740 }, { "epoch": 0.24287164734749134, "grad_norm": 1.0650697946548462, "learning_rate": 0.001, "loss": 2.7325, "step": 5741 }, { "epoch": 0.2429139521110077, "grad_norm": 2.662815809249878, "learning_rate": 0.001, "loss": 1.9265, "step": 5742 }, { "epoch": 0.24295625687452407, "grad_norm": 0.1942417025566101, "learning_rate": 0.001, "loss": 1.7619, "step": 5743 }, { "epoch": 0.24299856163804046, "grad_norm": 0.28779444098472595, "learning_rate": 0.001, "loss": 2.8304, "step": 5744 }, { "epoch": 0.2430408664015568, "grad_norm": 0.28456562757492065, "learning_rate": 0.001, "loss": 3.3492, "step": 5745 }, { "epoch": 0.2430831711650732, "grad_norm": 2.139453172683716, "learning_rate": 0.001, "loss": 3.8633, "step": 5746 }, { "epoch": 0.24312547592858955, "grad_norm": 0.35128921270370483, "learning_rate": 0.001, "loss": 2.3143, "step": 5747 }, { "epoch": 0.24316778069210593, "grad_norm": 0.3761100172996521, "learning_rate": 0.001, "loss": 2.5602, "step": 5748 }, { "epoch": 0.2432100854556223, "grad_norm": 0.3312492370605469, "learning_rate": 0.001, "loss": 2.6397, "step": 5749 }, { "epoch": 0.24325239021913866, "grad_norm": 14.835386276245117, "learning_rate": 0.001, "loss": 2.1131, "step": 5750 }, { "epoch": 0.24329469498265505, "grad_norm": 0.33980563282966614, "learning_rate": 0.001, "loss": 3.0289, "step": 5751 }, { "epoch": 0.24333699974617143, "grad_norm": 0.3430834412574768, "learning_rate": 0.001, "loss": 2.298, "step": 5752 }, { "epoch": 0.24337930450968778, "grad_norm": 0.2788030207157135, "learning_rate": 0.001, "loss": 2.3714, "step": 5753 }, { "epoch": 0.24342160927320416, "grad_norm": 0.242362380027771, "learning_rate": 0.001, "loss": 2.3746, "step": 5754 }, { "epoch": 0.24346391403672055, "grad_norm": 0.4052402079105377, "learning_rate": 0.001, "loss": 3.5768, "step": 5755 }, { "epoch": 0.2435062188002369, "grad_norm": 0.24132952094078064, "learning_rate": 0.001, "loss": 3.8857, "step": 5756 }, { "epoch": 0.24354852356375328, "grad_norm": 1.1747184991836548, "learning_rate": 0.001, "loss": 2.5792, "step": 5757 }, { "epoch": 0.24359082832726964, "grad_norm": 0.3181629776954651, "learning_rate": 0.001, "loss": 2.4274, "step": 5758 }, { "epoch": 0.24363313309078602, "grad_norm": 0.22457624971866608, "learning_rate": 0.001, "loss": 2.6695, "step": 5759 }, { "epoch": 0.2436754378543024, "grad_norm": 0.24323022365570068, "learning_rate": 0.001, "loss": 2.0115, "step": 5760 }, { "epoch": 0.24371774261781876, "grad_norm": 0.2709481120109558, "learning_rate": 0.001, "loss": 2.3548, "step": 5761 }, { "epoch": 0.24376004738133514, "grad_norm": 0.20824141800403595, "learning_rate": 0.001, "loss": 1.701, "step": 5762 }, { "epoch": 0.24380235214485152, "grad_norm": 1.6470791101455688, "learning_rate": 0.001, "loss": 2.4078, "step": 5763 }, { "epoch": 0.24384465690836787, "grad_norm": 0.2733412981033325, "learning_rate": 0.001, "loss": 2.0431, "step": 5764 }, { "epoch": 0.24388696167188426, "grad_norm": 0.2784044146537781, "learning_rate": 0.001, "loss": 2.6605, "step": 5765 }, { "epoch": 0.24392926643540064, "grad_norm": 0.661558210849762, "learning_rate": 0.001, "loss": 2.7723, "step": 5766 }, { "epoch": 0.243971571198917, "grad_norm": 0.24639226496219635, "learning_rate": 0.001, "loss": 2.2698, "step": 5767 }, { "epoch": 0.24401387596243337, "grad_norm": 0.2241000235080719, "learning_rate": 0.001, "loss": 2.6614, "step": 5768 }, { "epoch": 0.24405618072594973, "grad_norm": 0.23688313364982605, "learning_rate": 0.001, "loss": 3.4856, "step": 5769 }, { "epoch": 0.2440984854894661, "grad_norm": 0.22975680232048035, "learning_rate": 0.001, "loss": 2.7368, "step": 5770 }, { "epoch": 0.2441407902529825, "grad_norm": 0.2196754515171051, "learning_rate": 0.001, "loss": 2.1829, "step": 5771 }, { "epoch": 0.24418309501649885, "grad_norm": 0.21035362780094147, "learning_rate": 0.001, "loss": 3.0376, "step": 5772 }, { "epoch": 0.24422539978001523, "grad_norm": 0.19896003603935242, "learning_rate": 0.001, "loss": 1.9874, "step": 5773 }, { "epoch": 0.2442677045435316, "grad_norm": 0.24322247505187988, "learning_rate": 0.001, "loss": 3.4208, "step": 5774 }, { "epoch": 0.24431000930704797, "grad_norm": 0.20282556116580963, "learning_rate": 0.001, "loss": 2.0628, "step": 5775 }, { "epoch": 0.24435231407056435, "grad_norm": 0.19425134360790253, "learning_rate": 0.001, "loss": 2.9513, "step": 5776 }, { "epoch": 0.24439461883408073, "grad_norm": 0.26466214656829834, "learning_rate": 0.001, "loss": 2.5617, "step": 5777 }, { "epoch": 0.24443692359759708, "grad_norm": 0.2062908262014389, "learning_rate": 0.001, "loss": 2.3014, "step": 5778 }, { "epoch": 0.24447922836111347, "grad_norm": 0.17950376868247986, "learning_rate": 0.001, "loss": 2.2402, "step": 5779 }, { "epoch": 0.24452153312462982, "grad_norm": 0.17766666412353516, "learning_rate": 0.001, "loss": 1.9251, "step": 5780 }, { "epoch": 0.2445638378881462, "grad_norm": 0.20533357560634613, "learning_rate": 0.001, "loss": 1.8011, "step": 5781 }, { "epoch": 0.24460614265166258, "grad_norm": 0.21567344665527344, "learning_rate": 0.001, "loss": 2.074, "step": 5782 }, { "epoch": 0.24464844741517894, "grad_norm": 0.17948715388774872, "learning_rate": 0.001, "loss": 2.2158, "step": 5783 }, { "epoch": 0.24469075217869532, "grad_norm": 0.2008078396320343, "learning_rate": 0.001, "loss": 1.9838, "step": 5784 }, { "epoch": 0.2447330569422117, "grad_norm": 0.6070753931999207, "learning_rate": 0.001, "loss": 2.0619, "step": 5785 }, { "epoch": 0.24477536170572806, "grad_norm": 0.21907269954681396, "learning_rate": 0.001, "loss": 2.5838, "step": 5786 }, { "epoch": 0.24481766646924444, "grad_norm": 0.46992072463035583, "learning_rate": 0.001, "loss": 3.1865, "step": 5787 }, { "epoch": 0.24485997123276082, "grad_norm": 0.16599377989768982, "learning_rate": 0.001, "loss": 1.9173, "step": 5788 }, { "epoch": 0.24490227599627717, "grad_norm": 0.1835167109966278, "learning_rate": 0.001, "loss": 2.0218, "step": 5789 }, { "epoch": 0.24494458075979356, "grad_norm": 3.3056118488311768, "learning_rate": 0.001, "loss": 2.8621, "step": 5790 }, { "epoch": 0.24498688552330994, "grad_norm": 0.4746275842189789, "learning_rate": 0.001, "loss": 2.476, "step": 5791 }, { "epoch": 0.2450291902868263, "grad_norm": 0.48032623529434204, "learning_rate": 0.001, "loss": 2.3223, "step": 5792 }, { "epoch": 0.24507149505034267, "grad_norm": 0.31363826990127563, "learning_rate": 0.001, "loss": 3.0721, "step": 5793 }, { "epoch": 0.24511379981385903, "grad_norm": 0.1737576723098755, "learning_rate": 0.001, "loss": 2.0079, "step": 5794 }, { "epoch": 0.2451561045773754, "grad_norm": 0.8084182143211365, "learning_rate": 0.001, "loss": 2.1507, "step": 5795 }, { "epoch": 0.2451984093408918, "grad_norm": 0.1980573832988739, "learning_rate": 0.001, "loss": 2.65, "step": 5796 }, { "epoch": 0.24524071410440815, "grad_norm": 0.32529035210609436, "learning_rate": 0.001, "loss": 2.2491, "step": 5797 }, { "epoch": 0.24528301886792453, "grad_norm": 0.2029356062412262, "learning_rate": 0.001, "loss": 2.3983, "step": 5798 }, { "epoch": 0.2453253236314409, "grad_norm": 0.1999257355928421, "learning_rate": 0.001, "loss": 2.4885, "step": 5799 }, { "epoch": 0.24536762839495727, "grad_norm": 0.34168320894241333, "learning_rate": 0.001, "loss": 2.712, "step": 5800 }, { "epoch": 0.24540993315847365, "grad_norm": 0.24313102662563324, "learning_rate": 0.001, "loss": 2.064, "step": 5801 }, { "epoch": 0.24545223792199003, "grad_norm": 0.2077663093805313, "learning_rate": 0.001, "loss": 2.3874, "step": 5802 }, { "epoch": 0.24549454268550638, "grad_norm": 0.21080611646175385, "learning_rate": 0.001, "loss": 4.459, "step": 5803 }, { "epoch": 0.24553684744902277, "grad_norm": 0.22150756418704987, "learning_rate": 0.001, "loss": 2.0071, "step": 5804 }, { "epoch": 0.24557915221253912, "grad_norm": 0.27595874667167664, "learning_rate": 0.001, "loss": 2.2498, "step": 5805 }, { "epoch": 0.2456214569760555, "grad_norm": 38.239009857177734, "learning_rate": 0.001, "loss": 1.7931, "step": 5806 }, { "epoch": 0.24566376173957188, "grad_norm": 0.24801005423069, "learning_rate": 0.001, "loss": 2.6051, "step": 5807 }, { "epoch": 0.24570606650308824, "grad_norm": 0.26121652126312256, "learning_rate": 0.001, "loss": 2.9589, "step": 5808 }, { "epoch": 0.24574837126660462, "grad_norm": 0.18741196393966675, "learning_rate": 0.001, "loss": 1.4772, "step": 5809 }, { "epoch": 0.245790676030121, "grad_norm": 0.2526096701622009, "learning_rate": 0.001, "loss": 1.857, "step": 5810 }, { "epoch": 0.24583298079363736, "grad_norm": 0.28198450803756714, "learning_rate": 0.001, "loss": 2.0914, "step": 5811 }, { "epoch": 0.24587528555715374, "grad_norm": 0.22132883965969086, "learning_rate": 0.001, "loss": 2.4616, "step": 5812 }, { "epoch": 0.24591759032067012, "grad_norm": 0.26563504338264465, "learning_rate": 0.001, "loss": 2.2087, "step": 5813 }, { "epoch": 0.24595989508418648, "grad_norm": 0.35990941524505615, "learning_rate": 0.001, "loss": 2.3027, "step": 5814 }, { "epoch": 0.24600219984770286, "grad_norm": 0.20027977228164673, "learning_rate": 0.001, "loss": 3.1369, "step": 5815 }, { "epoch": 0.2460445046112192, "grad_norm": 0.9017431735992432, "learning_rate": 0.001, "loss": 2.6157, "step": 5816 }, { "epoch": 0.2460868093747356, "grad_norm": 1.3316782712936401, "learning_rate": 0.001, "loss": 2.9245, "step": 5817 }, { "epoch": 0.24612911413825198, "grad_norm": 0.24641363322734833, "learning_rate": 0.001, "loss": 2.1923, "step": 5818 }, { "epoch": 0.24617141890176833, "grad_norm": 0.3345772325992584, "learning_rate": 0.001, "loss": 2.3912, "step": 5819 }, { "epoch": 0.2462137236652847, "grad_norm": 0.23146621882915497, "learning_rate": 0.001, "loss": 2.2022, "step": 5820 }, { "epoch": 0.2462560284288011, "grad_norm": 0.3035587966442108, "learning_rate": 0.001, "loss": 2.5982, "step": 5821 }, { "epoch": 0.24629833319231745, "grad_norm": 0.4304903447628021, "learning_rate": 0.001, "loss": 2.566, "step": 5822 }, { "epoch": 0.24634063795583383, "grad_norm": 0.21231508255004883, "learning_rate": 0.001, "loss": 2.6924, "step": 5823 }, { "epoch": 0.2463829427193502, "grad_norm": 0.21332456171512604, "learning_rate": 0.001, "loss": 2.458, "step": 5824 }, { "epoch": 0.24642524748286657, "grad_norm": 0.24201619625091553, "learning_rate": 0.001, "loss": 2.1323, "step": 5825 }, { "epoch": 0.24646755224638295, "grad_norm": 0.20796683430671692, "learning_rate": 0.001, "loss": 2.2657, "step": 5826 }, { "epoch": 0.2465098570098993, "grad_norm": 0.3387661278247833, "learning_rate": 0.001, "loss": 1.8694, "step": 5827 }, { "epoch": 0.24655216177341568, "grad_norm": 0.5283912420272827, "learning_rate": 0.001, "loss": 2.0767, "step": 5828 }, { "epoch": 0.24659446653693207, "grad_norm": 0.21991148591041565, "learning_rate": 0.001, "loss": 3.0532, "step": 5829 }, { "epoch": 0.24663677130044842, "grad_norm": 0.2817951738834381, "learning_rate": 0.001, "loss": 2.8324, "step": 5830 }, { "epoch": 0.2466790760639648, "grad_norm": 0.2231799066066742, "learning_rate": 0.001, "loss": 3.0957, "step": 5831 }, { "epoch": 0.24672138082748118, "grad_norm": 0.23108860850334167, "learning_rate": 0.001, "loss": 2.1957, "step": 5832 }, { "epoch": 0.24676368559099754, "grad_norm": 0.47794216871261597, "learning_rate": 0.001, "loss": 2.4318, "step": 5833 }, { "epoch": 0.24680599035451392, "grad_norm": 0.3009633421897888, "learning_rate": 0.001, "loss": 2.801, "step": 5834 }, { "epoch": 0.2468482951180303, "grad_norm": 0.1829899400472641, "learning_rate": 0.001, "loss": 1.762, "step": 5835 }, { "epoch": 0.24689059988154666, "grad_norm": 0.25043395161628723, "learning_rate": 0.001, "loss": 2.103, "step": 5836 }, { "epoch": 0.24693290464506304, "grad_norm": 1.0401873588562012, "learning_rate": 0.001, "loss": 2.5898, "step": 5837 }, { "epoch": 0.2469752094085794, "grad_norm": 0.22603413462638855, "learning_rate": 0.001, "loss": 3.3657, "step": 5838 }, { "epoch": 0.24701751417209578, "grad_norm": 0.25399866700172424, "learning_rate": 0.001, "loss": 2.7811, "step": 5839 }, { "epoch": 0.24705981893561216, "grad_norm": 0.24066664278507233, "learning_rate": 0.001, "loss": 3.1812, "step": 5840 }, { "epoch": 0.2471021236991285, "grad_norm": 0.27932751178741455, "learning_rate": 0.001, "loss": 2.5269, "step": 5841 }, { "epoch": 0.2471444284626449, "grad_norm": 0.2165471911430359, "learning_rate": 0.001, "loss": 1.9364, "step": 5842 }, { "epoch": 0.24718673322616128, "grad_norm": 0.1947283148765564, "learning_rate": 0.001, "loss": 1.9661, "step": 5843 }, { "epoch": 0.24722903798967763, "grad_norm": 0.245658740401268, "learning_rate": 0.001, "loss": 2.2399, "step": 5844 }, { "epoch": 0.247271342753194, "grad_norm": 0.21546167135238647, "learning_rate": 0.001, "loss": 2.4518, "step": 5845 }, { "epoch": 0.2473136475167104, "grad_norm": 0.18316084146499634, "learning_rate": 0.001, "loss": 1.8123, "step": 5846 }, { "epoch": 0.24735595228022675, "grad_norm": 2.9340388774871826, "learning_rate": 0.001, "loss": 2.8578, "step": 5847 }, { "epoch": 0.24739825704374313, "grad_norm": 0.16483551263809204, "learning_rate": 0.001, "loss": 1.7181, "step": 5848 }, { "epoch": 0.24744056180725948, "grad_norm": 0.18957403302192688, "learning_rate": 0.001, "loss": 2.1544, "step": 5849 }, { "epoch": 0.24748286657077587, "grad_norm": 0.19052253663539886, "learning_rate": 0.001, "loss": 1.8171, "step": 5850 }, { "epoch": 0.24752517133429225, "grad_norm": 0.19511641561985016, "learning_rate": 0.001, "loss": 1.7506, "step": 5851 }, { "epoch": 0.2475674760978086, "grad_norm": 0.7128787040710449, "learning_rate": 0.001, "loss": 1.6148, "step": 5852 }, { "epoch": 0.24760978086132499, "grad_norm": 0.1971040964126587, "learning_rate": 0.001, "loss": 2.5899, "step": 5853 }, { "epoch": 0.24765208562484137, "grad_norm": 0.3237464129924774, "learning_rate": 0.001, "loss": 1.9698, "step": 5854 }, { "epoch": 0.24769439038835772, "grad_norm": 0.20334038138389587, "learning_rate": 0.001, "loss": 1.8541, "step": 5855 }, { "epoch": 0.2477366951518741, "grad_norm": 0.19235415756702423, "learning_rate": 0.001, "loss": 2.1102, "step": 5856 }, { "epoch": 0.24777899991539049, "grad_norm": 0.18635153770446777, "learning_rate": 0.001, "loss": 1.7164, "step": 5857 }, { "epoch": 0.24782130467890684, "grad_norm": 0.8393286466598511, "learning_rate": 0.001, "loss": 2.0415, "step": 5858 }, { "epoch": 0.24786360944242322, "grad_norm": 0.20402997732162476, "learning_rate": 0.001, "loss": 2.5329, "step": 5859 }, { "epoch": 0.24790591420593958, "grad_norm": 0.20956696569919586, "learning_rate": 0.001, "loss": 1.7601, "step": 5860 }, { "epoch": 0.24794821896945596, "grad_norm": 0.3180994391441345, "learning_rate": 0.001, "loss": 2.319, "step": 5861 }, { "epoch": 0.24799052373297234, "grad_norm": 0.4629594087600708, "learning_rate": 0.001, "loss": 2.763, "step": 5862 }, { "epoch": 0.2480328284964887, "grad_norm": 0.2035917043685913, "learning_rate": 0.001, "loss": 2.7616, "step": 5863 }, { "epoch": 0.24807513326000508, "grad_norm": 0.20831398665905, "learning_rate": 0.001, "loss": 3.2368, "step": 5864 }, { "epoch": 0.24811743802352146, "grad_norm": 0.24462851881980896, "learning_rate": 0.001, "loss": 2.8819, "step": 5865 }, { "epoch": 0.2481597427870378, "grad_norm": 0.72509765625, "learning_rate": 0.001, "loss": 1.4765, "step": 5866 }, { "epoch": 0.2482020475505542, "grad_norm": 0.2004188746213913, "learning_rate": 0.001, "loss": 2.7114, "step": 5867 }, { "epoch": 0.24824435231407058, "grad_norm": 0.16426634788513184, "learning_rate": 0.001, "loss": 2.2513, "step": 5868 }, { "epoch": 0.24828665707758693, "grad_norm": 0.23938223719596863, "learning_rate": 0.001, "loss": 2.5919, "step": 5869 }, { "epoch": 0.2483289618411033, "grad_norm": 0.20206694304943085, "learning_rate": 0.001, "loss": 1.8613, "step": 5870 }, { "epoch": 0.24837126660461967, "grad_norm": 0.20341122150421143, "learning_rate": 0.001, "loss": 1.6956, "step": 5871 }, { "epoch": 0.24841357136813605, "grad_norm": 0.16674411296844482, "learning_rate": 0.001, "loss": 2.3249, "step": 5872 }, { "epoch": 0.24845587613165243, "grad_norm": 0.17640060186386108, "learning_rate": 0.001, "loss": 1.5507, "step": 5873 }, { "epoch": 0.24849818089516879, "grad_norm": 0.2345527559518814, "learning_rate": 0.001, "loss": 2.1983, "step": 5874 }, { "epoch": 0.24854048565868517, "grad_norm": 2.3376219272613525, "learning_rate": 0.001, "loss": 2.1126, "step": 5875 }, { "epoch": 0.24858279042220155, "grad_norm": 0.24027837812900543, "learning_rate": 0.001, "loss": 1.968, "step": 5876 }, { "epoch": 0.2486250951857179, "grad_norm": 0.22010159492492676, "learning_rate": 0.001, "loss": 1.8924, "step": 5877 }, { "epoch": 0.24866739994923429, "grad_norm": 0.1861301064491272, "learning_rate": 0.001, "loss": 2.1468, "step": 5878 }, { "epoch": 0.24870970471275067, "grad_norm": 0.2004723697900772, "learning_rate": 0.001, "loss": 2.5531, "step": 5879 }, { "epoch": 0.24875200947626702, "grad_norm": 0.33933591842651367, "learning_rate": 0.001, "loss": 2.7241, "step": 5880 }, { "epoch": 0.2487943142397834, "grad_norm": 0.26953572034835815, "learning_rate": 0.001, "loss": 2.3623, "step": 5881 }, { "epoch": 0.24883661900329976, "grad_norm": 0.1680213063955307, "learning_rate": 0.001, "loss": 2.5205, "step": 5882 }, { "epoch": 0.24887892376681614, "grad_norm": 0.17806370556354523, "learning_rate": 0.001, "loss": 2.0251, "step": 5883 }, { "epoch": 0.24892122853033252, "grad_norm": 0.44884923100471497, "learning_rate": 0.001, "loss": 1.8082, "step": 5884 }, { "epoch": 0.24896353329384888, "grad_norm": 0.28582075238227844, "learning_rate": 0.001, "loss": 3.7704, "step": 5885 }, { "epoch": 0.24900583805736526, "grad_norm": 0.21283996105194092, "learning_rate": 0.001, "loss": 1.7666, "step": 5886 }, { "epoch": 0.24904814282088164, "grad_norm": 0.22247281670570374, "learning_rate": 0.001, "loss": 2.6567, "step": 5887 }, { "epoch": 0.249090447584398, "grad_norm": 0.2375125139951706, "learning_rate": 0.001, "loss": 2.3062, "step": 5888 }, { "epoch": 0.24913275234791438, "grad_norm": 0.3201584219932556, "learning_rate": 0.001, "loss": 3.1853, "step": 5889 }, { "epoch": 0.24917505711143076, "grad_norm": 0.21060162782669067, "learning_rate": 0.001, "loss": 2.4099, "step": 5890 }, { "epoch": 0.2492173618749471, "grad_norm": 0.2675623595714569, "learning_rate": 0.001, "loss": 2.5034, "step": 5891 }, { "epoch": 0.2492596666384635, "grad_norm": 3.282320022583008, "learning_rate": 0.001, "loss": 1.659, "step": 5892 }, { "epoch": 0.24930197140197985, "grad_norm": 0.28002822399139404, "learning_rate": 0.001, "loss": 2.2949, "step": 5893 }, { "epoch": 0.24934427616549623, "grad_norm": 0.17490912973880768, "learning_rate": 0.001, "loss": 2.7314, "step": 5894 }, { "epoch": 0.2493865809290126, "grad_norm": 0.5958810448646545, "learning_rate": 0.001, "loss": 2.6676, "step": 5895 }, { "epoch": 0.24942888569252897, "grad_norm": 0.18583674728870392, "learning_rate": 0.001, "loss": 2.1634, "step": 5896 }, { "epoch": 0.24947119045604535, "grad_norm": 0.2628713548183441, "learning_rate": 0.001, "loss": 2.3261, "step": 5897 }, { "epoch": 0.24951349521956173, "grad_norm": 0.1902725249528885, "learning_rate": 0.001, "loss": 2.037, "step": 5898 }, { "epoch": 0.24955579998307809, "grad_norm": 0.8447865843772888, "learning_rate": 0.001, "loss": 1.9191, "step": 5899 }, { "epoch": 0.24959810474659447, "grad_norm": 0.25084027647972107, "learning_rate": 0.001, "loss": 2.6923, "step": 5900 }, { "epoch": 0.24964040951011085, "grad_norm": 0.2469928115606308, "learning_rate": 0.001, "loss": 2.6025, "step": 5901 }, { "epoch": 0.2496827142736272, "grad_norm": 0.18557626008987427, "learning_rate": 0.001, "loss": 1.8872, "step": 5902 }, { "epoch": 0.2497250190371436, "grad_norm": 0.22026236355304718, "learning_rate": 0.001, "loss": 2.0827, "step": 5903 }, { "epoch": 0.24976732380065994, "grad_norm": 0.21095015108585358, "learning_rate": 0.001, "loss": 2.2376, "step": 5904 }, { "epoch": 0.24980962856417632, "grad_norm": 0.18303923308849335, "learning_rate": 0.001, "loss": 3.1426, "step": 5905 }, { "epoch": 0.2498519333276927, "grad_norm": 0.9932180643081665, "learning_rate": 0.001, "loss": 2.3002, "step": 5906 }, { "epoch": 0.24989423809120906, "grad_norm": 0.2075134962797165, "learning_rate": 0.001, "loss": 2.2085, "step": 5907 }, { "epoch": 0.24993654285472544, "grad_norm": 0.1895306408405304, "learning_rate": 0.001, "loss": 1.8905, "step": 5908 }, { "epoch": 0.24997884761824182, "grad_norm": 0.2975296378135681, "learning_rate": 0.001, "loss": 3.0346, "step": 5909 }, { "epoch": 0.2500211523817582, "grad_norm": 0.19449001550674438, "learning_rate": 0.001, "loss": 2.3004, "step": 5910 }, { "epoch": 0.25006345714527456, "grad_norm": 0.24991470575332642, "learning_rate": 0.001, "loss": 3.1309, "step": 5911 }, { "epoch": 0.2501057619087909, "grad_norm": 0.35687020421028137, "learning_rate": 0.001, "loss": 2.2765, "step": 5912 }, { "epoch": 0.2501480666723073, "grad_norm": 0.17699697613716125, "learning_rate": 0.001, "loss": 2.4377, "step": 5913 }, { "epoch": 0.2501903714358237, "grad_norm": 0.2586126923561096, "learning_rate": 0.001, "loss": 3.4109, "step": 5914 }, { "epoch": 0.25023267619934003, "grad_norm": 0.21270103752613068, "learning_rate": 0.001, "loss": 1.8928, "step": 5915 }, { "epoch": 0.25027498096285644, "grad_norm": 0.225282222032547, "learning_rate": 0.001, "loss": 2.3448, "step": 5916 }, { "epoch": 0.2503172857263728, "grad_norm": 0.18407504260540009, "learning_rate": 0.001, "loss": 2.0276, "step": 5917 }, { "epoch": 0.25035959048988915, "grad_norm": 0.22186896204948425, "learning_rate": 0.001, "loss": 2.0891, "step": 5918 }, { "epoch": 0.25040189525340556, "grad_norm": 2.6034348011016846, "learning_rate": 0.001, "loss": 2.0726, "step": 5919 }, { "epoch": 0.2504442000169219, "grad_norm": 0.20303837954998016, "learning_rate": 0.001, "loss": 2.3935, "step": 5920 }, { "epoch": 0.25048650478043827, "grad_norm": 0.18995042145252228, "learning_rate": 0.001, "loss": 2.0791, "step": 5921 }, { "epoch": 0.2505288095439546, "grad_norm": 0.17359282076358795, "learning_rate": 0.001, "loss": 2.0846, "step": 5922 }, { "epoch": 0.25057111430747103, "grad_norm": 0.2554149031639099, "learning_rate": 0.001, "loss": 1.8015, "step": 5923 }, { "epoch": 0.2506134190709874, "grad_norm": 0.4858115315437317, "learning_rate": 0.001, "loss": 2.3795, "step": 5924 }, { "epoch": 0.25065572383450374, "grad_norm": 0.3626372516155243, "learning_rate": 0.001, "loss": 2.0548, "step": 5925 }, { "epoch": 0.25069802859802015, "grad_norm": 0.17694664001464844, "learning_rate": 0.001, "loss": 1.7527, "step": 5926 }, { "epoch": 0.2507403333615365, "grad_norm": 0.5203558206558228, "learning_rate": 0.001, "loss": 3.2289, "step": 5927 }, { "epoch": 0.25078263812505286, "grad_norm": 0.1976146101951599, "learning_rate": 0.001, "loss": 2.4577, "step": 5928 }, { "epoch": 0.25082494288856927, "grad_norm": 0.2331826388835907, "learning_rate": 0.001, "loss": 3.3164, "step": 5929 }, { "epoch": 0.2508672476520856, "grad_norm": 0.4622613787651062, "learning_rate": 0.001, "loss": 1.578, "step": 5930 }, { "epoch": 0.250909552415602, "grad_norm": 0.20590519905090332, "learning_rate": 0.001, "loss": 2.2907, "step": 5931 }, { "epoch": 0.2509518571791184, "grad_norm": 0.20743978023529053, "learning_rate": 0.001, "loss": 2.0356, "step": 5932 }, { "epoch": 0.25099416194263474, "grad_norm": 4.0422892570495605, "learning_rate": 0.001, "loss": 2.584, "step": 5933 }, { "epoch": 0.2510364667061511, "grad_norm": 0.4900633990764618, "learning_rate": 0.001, "loss": 3.1356, "step": 5934 }, { "epoch": 0.2510787714696675, "grad_norm": 0.19140180945396423, "learning_rate": 0.001, "loss": 2.1572, "step": 5935 }, { "epoch": 0.25112107623318386, "grad_norm": 0.2231162041425705, "learning_rate": 0.001, "loss": 2.8386, "step": 5936 }, { "epoch": 0.2511633809967002, "grad_norm": 1.3613148927688599, "learning_rate": 0.001, "loss": 2.2125, "step": 5937 }, { "epoch": 0.2512056857602166, "grad_norm": 0.23480670154094696, "learning_rate": 0.001, "loss": 2.3708, "step": 5938 }, { "epoch": 0.251247990523733, "grad_norm": 0.5639317035675049, "learning_rate": 0.001, "loss": 2.2903, "step": 5939 }, { "epoch": 0.25129029528724933, "grad_norm": 1.7350952625274658, "learning_rate": 0.001, "loss": 2.4496, "step": 5940 }, { "epoch": 0.25133260005076574, "grad_norm": 0.1928754597902298, "learning_rate": 0.001, "loss": 2.6528, "step": 5941 }, { "epoch": 0.2513749048142821, "grad_norm": 0.27500829100608826, "learning_rate": 0.001, "loss": 2.7894, "step": 5942 }, { "epoch": 0.25141720957779845, "grad_norm": 0.2504851520061493, "learning_rate": 0.001, "loss": 3.2059, "step": 5943 }, { "epoch": 0.2514595143413148, "grad_norm": 0.45656251907348633, "learning_rate": 0.001, "loss": 2.6401, "step": 5944 }, { "epoch": 0.2515018191048312, "grad_norm": 0.2430989146232605, "learning_rate": 0.001, "loss": 2.3806, "step": 5945 }, { "epoch": 0.25154412386834757, "grad_norm": 0.44327855110168457, "learning_rate": 0.001, "loss": 1.9777, "step": 5946 }, { "epoch": 0.2515864286318639, "grad_norm": 0.2197687029838562, "learning_rate": 0.001, "loss": 2.3825, "step": 5947 }, { "epoch": 0.25162873339538033, "grad_norm": 0.24564971029758453, "learning_rate": 0.001, "loss": 2.022, "step": 5948 }, { "epoch": 0.2516710381588967, "grad_norm": 0.41810470819473267, "learning_rate": 0.001, "loss": 2.4189, "step": 5949 }, { "epoch": 0.25171334292241304, "grad_norm": 0.2302566021680832, "learning_rate": 0.001, "loss": 2.1172, "step": 5950 }, { "epoch": 0.25175564768592945, "grad_norm": 0.5698367357254028, "learning_rate": 0.001, "loss": 1.8955, "step": 5951 }, { "epoch": 0.2517979524494458, "grad_norm": 0.20264704525470734, "learning_rate": 0.001, "loss": 2.55, "step": 5952 }, { "epoch": 0.25184025721296216, "grad_norm": 0.19961802661418915, "learning_rate": 0.001, "loss": 2.1735, "step": 5953 }, { "epoch": 0.25188256197647857, "grad_norm": 0.24171172082424164, "learning_rate": 0.001, "loss": 2.9635, "step": 5954 }, { "epoch": 0.2519248667399949, "grad_norm": 0.19584928452968597, "learning_rate": 0.001, "loss": 2.0816, "step": 5955 }, { "epoch": 0.2519671715035113, "grad_norm": 0.7652029991149902, "learning_rate": 0.001, "loss": 2.8902, "step": 5956 }, { "epoch": 0.2520094762670277, "grad_norm": 0.22400104999542236, "learning_rate": 0.001, "loss": 2.0901, "step": 5957 }, { "epoch": 0.25205178103054404, "grad_norm": 0.21903732419013977, "learning_rate": 0.001, "loss": 2.0618, "step": 5958 }, { "epoch": 0.2520940857940604, "grad_norm": 0.24086321890354156, "learning_rate": 0.001, "loss": 2.9158, "step": 5959 }, { "epoch": 0.2521363905575768, "grad_norm": 2.2688310146331787, "learning_rate": 0.001, "loss": 2.2415, "step": 5960 }, { "epoch": 0.25217869532109316, "grad_norm": 0.2769038677215576, "learning_rate": 0.001, "loss": 2.5179, "step": 5961 }, { "epoch": 0.2522210000846095, "grad_norm": 0.19732657074928284, "learning_rate": 0.001, "loss": 1.5416, "step": 5962 }, { "epoch": 0.2522633048481259, "grad_norm": 0.21410557627677917, "learning_rate": 0.001, "loss": 1.9668, "step": 5963 }, { "epoch": 0.2523056096116423, "grad_norm": 0.20264627039432526, "learning_rate": 0.001, "loss": 1.8539, "step": 5964 }, { "epoch": 0.25234791437515863, "grad_norm": 0.9442425966262817, "learning_rate": 0.001, "loss": 2.2133, "step": 5965 }, { "epoch": 0.252390219138675, "grad_norm": 0.17984417080879211, "learning_rate": 0.001, "loss": 1.9855, "step": 5966 }, { "epoch": 0.2524325239021914, "grad_norm": 0.3779272735118866, "learning_rate": 0.001, "loss": 3.2348, "step": 5967 }, { "epoch": 0.25247482866570775, "grad_norm": 0.23306319117546082, "learning_rate": 0.001, "loss": 2.9164, "step": 5968 }, { "epoch": 0.2525171334292241, "grad_norm": 0.21737343072891235, "learning_rate": 0.001, "loss": 2.4458, "step": 5969 }, { "epoch": 0.2525594381927405, "grad_norm": 0.38391655683517456, "learning_rate": 0.001, "loss": 1.901, "step": 5970 }, { "epoch": 0.25260174295625687, "grad_norm": 0.21682848036289215, "learning_rate": 0.001, "loss": 2.745, "step": 5971 }, { "epoch": 0.2526440477197732, "grad_norm": 0.1871444135904312, "learning_rate": 0.001, "loss": 2.6208, "step": 5972 }, { "epoch": 0.25268635248328963, "grad_norm": 0.23921331763267517, "learning_rate": 0.001, "loss": 2.7021, "step": 5973 }, { "epoch": 0.252728657246806, "grad_norm": 0.17137141525745392, "learning_rate": 0.001, "loss": 2.0486, "step": 5974 }, { "epoch": 0.25277096201032234, "grad_norm": 0.5864259600639343, "learning_rate": 0.001, "loss": 1.7707, "step": 5975 }, { "epoch": 0.25281326677383875, "grad_norm": 0.18779319524765015, "learning_rate": 0.001, "loss": 1.8549, "step": 5976 }, { "epoch": 0.2528555715373551, "grad_norm": 0.2662065625190735, "learning_rate": 0.001, "loss": 2.9272, "step": 5977 }, { "epoch": 0.25289787630087146, "grad_norm": 0.23567529022693634, "learning_rate": 0.001, "loss": 2.6869, "step": 5978 }, { "epoch": 0.25294018106438787, "grad_norm": 0.1899128556251526, "learning_rate": 0.001, "loss": 1.992, "step": 5979 }, { "epoch": 0.2529824858279042, "grad_norm": 0.23404709994792938, "learning_rate": 0.001, "loss": 2.3842, "step": 5980 }, { "epoch": 0.2530247905914206, "grad_norm": 0.5312284827232361, "learning_rate": 0.001, "loss": 2.2135, "step": 5981 }, { "epoch": 0.253067095354937, "grad_norm": 0.2125319540500641, "learning_rate": 0.001, "loss": 2.3937, "step": 5982 }, { "epoch": 0.25310940011845334, "grad_norm": 0.20524631440639496, "learning_rate": 0.001, "loss": 2.5639, "step": 5983 }, { "epoch": 0.2531517048819697, "grad_norm": 0.20020252466201782, "learning_rate": 0.001, "loss": 2.6334, "step": 5984 }, { "epoch": 0.2531940096454861, "grad_norm": 0.2192360907793045, "learning_rate": 0.001, "loss": 2.2578, "step": 5985 }, { "epoch": 0.25323631440900246, "grad_norm": 0.16975408792495728, "learning_rate": 0.001, "loss": 1.8028, "step": 5986 }, { "epoch": 0.2532786191725188, "grad_norm": 1.4105056524276733, "learning_rate": 0.001, "loss": 3.1986, "step": 5987 }, { "epoch": 0.2533209239360352, "grad_norm": 1.0389015674591064, "learning_rate": 0.001, "loss": 2.7766, "step": 5988 }, { "epoch": 0.2533632286995516, "grad_norm": 1.0410923957824707, "learning_rate": 0.001, "loss": 2.4953, "step": 5989 }, { "epoch": 0.25340553346306793, "grad_norm": 0.24617451429367065, "learning_rate": 0.001, "loss": 1.7532, "step": 5990 }, { "epoch": 0.2534478382265843, "grad_norm": 0.22218583524227142, "learning_rate": 0.001, "loss": 1.8373, "step": 5991 }, { "epoch": 0.2534901429901007, "grad_norm": 0.3823182284832001, "learning_rate": 0.001, "loss": 1.8441, "step": 5992 }, { "epoch": 0.25353244775361705, "grad_norm": 0.2810841202735901, "learning_rate": 0.001, "loss": 1.8284, "step": 5993 }, { "epoch": 0.2535747525171334, "grad_norm": 0.21739636361598969, "learning_rate": 0.001, "loss": 2.66, "step": 5994 }, { "epoch": 0.2536170572806498, "grad_norm": 0.8973679542541504, "learning_rate": 0.001, "loss": 2.4392, "step": 5995 }, { "epoch": 0.25365936204416617, "grad_norm": 0.24056527018547058, "learning_rate": 0.001, "loss": 2.4884, "step": 5996 }, { "epoch": 0.2537016668076825, "grad_norm": 0.7096660733222961, "learning_rate": 0.001, "loss": 2.4973, "step": 5997 }, { "epoch": 0.25374397157119893, "grad_norm": 3.935514450073242, "learning_rate": 0.001, "loss": 2.5481, "step": 5998 }, { "epoch": 0.2537862763347153, "grad_norm": 0.22361649572849274, "learning_rate": 0.001, "loss": 1.8946, "step": 5999 }, { "epoch": 0.25382858109823164, "grad_norm": 1.974503755569458, "learning_rate": 0.001, "loss": 2.4045, "step": 6000 }, { "epoch": 0.25387088586174805, "grad_norm": 0.44096559286117554, "learning_rate": 0.001, "loss": 2.4101, "step": 6001 }, { "epoch": 0.2539131906252644, "grad_norm": 0.4442439079284668, "learning_rate": 0.001, "loss": 2.372, "step": 6002 }, { "epoch": 0.25395549538878076, "grad_norm": 0.24630646407604218, "learning_rate": 0.001, "loss": 2.3218, "step": 6003 }, { "epoch": 0.25399780015229717, "grad_norm": 0.32622864842414856, "learning_rate": 0.001, "loss": 3.0714, "step": 6004 }, { "epoch": 0.2540401049158135, "grad_norm": 0.2776721119880676, "learning_rate": 0.001, "loss": 2.2819, "step": 6005 }, { "epoch": 0.2540824096793299, "grad_norm": 0.22410140931606293, "learning_rate": 0.001, "loss": 2.2823, "step": 6006 }, { "epoch": 0.2541247144428463, "grad_norm": 0.7463712096214294, "learning_rate": 0.001, "loss": 2.9199, "step": 6007 }, { "epoch": 0.25416701920636264, "grad_norm": 0.3214509189128876, "learning_rate": 0.001, "loss": 3.1198, "step": 6008 }, { "epoch": 0.254209323969879, "grad_norm": 0.2701822817325592, "learning_rate": 0.001, "loss": 1.9467, "step": 6009 }, { "epoch": 0.2542516287333954, "grad_norm": 0.28340399265289307, "learning_rate": 0.001, "loss": 1.888, "step": 6010 }, { "epoch": 0.25429393349691176, "grad_norm": 0.3803146779537201, "learning_rate": 0.001, "loss": 3.7349, "step": 6011 }, { "epoch": 0.2543362382604281, "grad_norm": 0.2079993635416031, "learning_rate": 0.001, "loss": 2.412, "step": 6012 }, { "epoch": 0.25437854302394447, "grad_norm": 0.2278539538383484, "learning_rate": 0.001, "loss": 2.1716, "step": 6013 }, { "epoch": 0.2544208477874609, "grad_norm": 0.23987050354480743, "learning_rate": 0.001, "loss": 2.7047, "step": 6014 }, { "epoch": 0.25446315255097723, "grad_norm": 0.32305946946144104, "learning_rate": 0.001, "loss": 2.1134, "step": 6015 }, { "epoch": 0.2545054573144936, "grad_norm": 0.3197750449180603, "learning_rate": 0.001, "loss": 2.5648, "step": 6016 }, { "epoch": 0.25454776207801, "grad_norm": 0.2103656828403473, "learning_rate": 0.001, "loss": 2.1189, "step": 6017 }, { "epoch": 0.25459006684152635, "grad_norm": 0.6218228936195374, "learning_rate": 0.001, "loss": 1.9693, "step": 6018 }, { "epoch": 0.2546323716050427, "grad_norm": 0.6921700835227966, "learning_rate": 0.001, "loss": 2.4787, "step": 6019 }, { "epoch": 0.2546746763685591, "grad_norm": 0.20332172513008118, "learning_rate": 0.001, "loss": 1.9037, "step": 6020 }, { "epoch": 0.25471698113207547, "grad_norm": 0.5180513262748718, "learning_rate": 0.001, "loss": 2.5619, "step": 6021 }, { "epoch": 0.2547592858955918, "grad_norm": 0.199415922164917, "learning_rate": 0.001, "loss": 2.1282, "step": 6022 }, { "epoch": 0.25480159065910823, "grad_norm": 0.20138099789619446, "learning_rate": 0.001, "loss": 1.8072, "step": 6023 }, { "epoch": 0.2548438954226246, "grad_norm": 0.24892349541187286, "learning_rate": 0.001, "loss": 2.2655, "step": 6024 }, { "epoch": 0.25488620018614094, "grad_norm": 0.4697567820549011, "learning_rate": 0.001, "loss": 2.9607, "step": 6025 }, { "epoch": 0.25492850494965735, "grad_norm": 0.21022146940231323, "learning_rate": 0.001, "loss": 2.0065, "step": 6026 }, { "epoch": 0.2549708097131737, "grad_norm": 2.630333423614502, "learning_rate": 0.001, "loss": 1.862, "step": 6027 }, { "epoch": 0.25501311447669006, "grad_norm": 0.2536892890930176, "learning_rate": 0.001, "loss": 2.4433, "step": 6028 }, { "epoch": 0.25505541924020647, "grad_norm": 0.212021142244339, "learning_rate": 0.001, "loss": 2.0887, "step": 6029 }, { "epoch": 0.2550977240037228, "grad_norm": 0.2172902524471283, "learning_rate": 0.001, "loss": 1.958, "step": 6030 }, { "epoch": 0.2551400287672392, "grad_norm": 0.22086498141288757, "learning_rate": 0.001, "loss": 2.5838, "step": 6031 }, { "epoch": 0.2551823335307556, "grad_norm": 0.2510300576686859, "learning_rate": 0.001, "loss": 1.6274, "step": 6032 }, { "epoch": 0.25522463829427194, "grad_norm": 1.1860932111740112, "learning_rate": 0.001, "loss": 3.0628, "step": 6033 }, { "epoch": 0.2552669430577883, "grad_norm": 7.115164756774902, "learning_rate": 0.001, "loss": 3.3898, "step": 6034 }, { "epoch": 0.25530924782130465, "grad_norm": 0.2606843411922455, "learning_rate": 0.001, "loss": 2.2712, "step": 6035 }, { "epoch": 0.25535155258482106, "grad_norm": 0.25481027364730835, "learning_rate": 0.001, "loss": 2.5696, "step": 6036 }, { "epoch": 0.2553938573483374, "grad_norm": 0.19426991045475006, "learning_rate": 0.001, "loss": 1.8278, "step": 6037 }, { "epoch": 0.25543616211185377, "grad_norm": 0.23399263620376587, "learning_rate": 0.001, "loss": 2.0001, "step": 6038 }, { "epoch": 0.2554784668753702, "grad_norm": 0.2326452136039734, "learning_rate": 0.001, "loss": 1.9532, "step": 6039 }, { "epoch": 0.25552077163888653, "grad_norm": 0.5606713891029358, "learning_rate": 0.001, "loss": 2.2031, "step": 6040 }, { "epoch": 0.2555630764024029, "grad_norm": 1.2523069381713867, "learning_rate": 0.001, "loss": 1.9169, "step": 6041 }, { "epoch": 0.2556053811659193, "grad_norm": 0.21582403779029846, "learning_rate": 0.001, "loss": 2.3978, "step": 6042 }, { "epoch": 0.25564768592943565, "grad_norm": 0.3164418339729309, "learning_rate": 0.001, "loss": 2.7011, "step": 6043 }, { "epoch": 0.255689990692952, "grad_norm": 1.014667272567749, "learning_rate": 0.001, "loss": 2.3931, "step": 6044 }, { "epoch": 0.2557322954564684, "grad_norm": 0.2742740213871002, "learning_rate": 0.001, "loss": 2.705, "step": 6045 }, { "epoch": 0.25577460021998477, "grad_norm": 0.3064813017845154, "learning_rate": 0.001, "loss": 2.5047, "step": 6046 }, { "epoch": 0.2558169049835011, "grad_norm": 0.2514945864677429, "learning_rate": 0.001, "loss": 1.743, "step": 6047 }, { "epoch": 0.25585920974701754, "grad_norm": 0.48261088132858276, "learning_rate": 0.001, "loss": 2.7783, "step": 6048 }, { "epoch": 0.2559015145105339, "grad_norm": 0.2113606333732605, "learning_rate": 0.001, "loss": 2.6006, "step": 6049 }, { "epoch": 0.25594381927405024, "grad_norm": 0.21159514784812927, "learning_rate": 0.001, "loss": 1.9746, "step": 6050 }, { "epoch": 0.25598612403756665, "grad_norm": 0.30281734466552734, "learning_rate": 0.001, "loss": 2.6729, "step": 6051 }, { "epoch": 0.256028428801083, "grad_norm": 0.19452209770679474, "learning_rate": 0.001, "loss": 1.9967, "step": 6052 }, { "epoch": 0.25607073356459936, "grad_norm": 0.19158554077148438, "learning_rate": 0.001, "loss": 2.0234, "step": 6053 }, { "epoch": 0.25611303832811577, "grad_norm": 0.3292371332645416, "learning_rate": 0.001, "loss": 1.8973, "step": 6054 }, { "epoch": 0.2561553430916321, "grad_norm": 0.1782860904932022, "learning_rate": 0.001, "loss": 1.786, "step": 6055 }, { "epoch": 0.2561976478551485, "grad_norm": 0.2003399133682251, "learning_rate": 0.001, "loss": 2.1325, "step": 6056 }, { "epoch": 0.25623995261866483, "grad_norm": 3.0092291831970215, "learning_rate": 0.001, "loss": 2.6524, "step": 6057 }, { "epoch": 0.25628225738218124, "grad_norm": 0.21355977654457092, "learning_rate": 0.001, "loss": 2.2556, "step": 6058 }, { "epoch": 0.2563245621456976, "grad_norm": 0.2934946119785309, "learning_rate": 0.001, "loss": 2.8166, "step": 6059 }, { "epoch": 0.25636686690921395, "grad_norm": 0.1829323172569275, "learning_rate": 0.001, "loss": 1.8922, "step": 6060 }, { "epoch": 0.25640917167273036, "grad_norm": 0.2807469069957733, "learning_rate": 0.001, "loss": 2.3787, "step": 6061 }, { "epoch": 0.2564514764362467, "grad_norm": 0.22693084180355072, "learning_rate": 0.001, "loss": 2.1324, "step": 6062 }, { "epoch": 0.25649378119976307, "grad_norm": 0.1689741611480713, "learning_rate": 0.001, "loss": 2.1221, "step": 6063 }, { "epoch": 0.2565360859632795, "grad_norm": 1.339165449142456, "learning_rate": 0.001, "loss": 1.8906, "step": 6064 }, { "epoch": 0.25657839072679584, "grad_norm": 1.1260621547698975, "learning_rate": 0.001, "loss": 2.2244, "step": 6065 }, { "epoch": 0.2566206954903122, "grad_norm": 0.2071322202682495, "learning_rate": 0.001, "loss": 2.0834, "step": 6066 }, { "epoch": 0.2566630002538286, "grad_norm": 0.2302442193031311, "learning_rate": 0.001, "loss": 1.8294, "step": 6067 }, { "epoch": 0.25670530501734495, "grad_norm": 0.4180930554866791, "learning_rate": 0.001, "loss": 2.1456, "step": 6068 }, { "epoch": 0.2567476097808613, "grad_norm": 0.2579159736633301, "learning_rate": 0.001, "loss": 2.7813, "step": 6069 }, { "epoch": 0.2567899145443777, "grad_norm": 0.3067325949668884, "learning_rate": 0.001, "loss": 2.3948, "step": 6070 }, { "epoch": 0.25683221930789407, "grad_norm": 0.2110936939716339, "learning_rate": 0.001, "loss": 2.5168, "step": 6071 }, { "epoch": 0.2568745240714104, "grad_norm": 0.4867793023586273, "learning_rate": 0.001, "loss": 3.3979, "step": 6072 }, { "epoch": 0.25691682883492684, "grad_norm": 0.22111758589744568, "learning_rate": 0.001, "loss": 1.9811, "step": 6073 }, { "epoch": 0.2569591335984432, "grad_norm": 0.7423004508018494, "learning_rate": 0.001, "loss": 2.1127, "step": 6074 }, { "epoch": 0.25700143836195954, "grad_norm": 0.32565468549728394, "learning_rate": 0.001, "loss": 1.619, "step": 6075 }, { "epoch": 0.25704374312547595, "grad_norm": 5.436065673828125, "learning_rate": 0.001, "loss": 2.3115, "step": 6076 }, { "epoch": 0.2570860478889923, "grad_norm": 0.3309813141822815, "learning_rate": 0.001, "loss": 2.1493, "step": 6077 }, { "epoch": 0.25712835265250866, "grad_norm": 0.7553781270980835, "learning_rate": 0.001, "loss": 3.0408, "step": 6078 }, { "epoch": 0.257170657416025, "grad_norm": 0.20468690991401672, "learning_rate": 0.001, "loss": 2.1414, "step": 6079 }, { "epoch": 0.2572129621795414, "grad_norm": 3.904078483581543, "learning_rate": 0.001, "loss": 2.7939, "step": 6080 }, { "epoch": 0.2572552669430578, "grad_norm": 0.3017195165157318, "learning_rate": 0.001, "loss": 2.5183, "step": 6081 }, { "epoch": 0.25729757170657414, "grad_norm": 0.3187876343727112, "learning_rate": 0.001, "loss": 3.2043, "step": 6082 }, { "epoch": 0.25733987647009055, "grad_norm": 0.29611068964004517, "learning_rate": 0.001, "loss": 2.7729, "step": 6083 }, { "epoch": 0.2573821812336069, "grad_norm": 0.26517346501350403, "learning_rate": 0.001, "loss": 3.1814, "step": 6084 }, { "epoch": 0.25742448599712325, "grad_norm": 0.22469617426395416, "learning_rate": 0.001, "loss": 2.2341, "step": 6085 }, { "epoch": 0.25746679076063966, "grad_norm": 0.7284866571426392, "learning_rate": 0.001, "loss": 2.2107, "step": 6086 }, { "epoch": 0.257509095524156, "grad_norm": 0.35012027621269226, "learning_rate": 0.001, "loss": 2.9378, "step": 6087 }, { "epoch": 0.25755140028767237, "grad_norm": 0.23448063433170319, "learning_rate": 0.001, "loss": 2.6047, "step": 6088 }, { "epoch": 0.2575937050511888, "grad_norm": 0.185800239443779, "learning_rate": 0.001, "loss": 3.7792, "step": 6089 }, { "epoch": 0.25763600981470514, "grad_norm": 0.8078601956367493, "learning_rate": 0.001, "loss": 2.4504, "step": 6090 }, { "epoch": 0.2576783145782215, "grad_norm": 0.22115640342235565, "learning_rate": 0.001, "loss": 2.1107, "step": 6091 }, { "epoch": 0.2577206193417379, "grad_norm": 0.2682878375053406, "learning_rate": 0.001, "loss": 2.3305, "step": 6092 }, { "epoch": 0.25776292410525425, "grad_norm": 1.0689104795455933, "learning_rate": 0.001, "loss": 2.6872, "step": 6093 }, { "epoch": 0.2578052288687706, "grad_norm": 0.22226876020431519, "learning_rate": 0.001, "loss": 2.9591, "step": 6094 }, { "epoch": 0.257847533632287, "grad_norm": 0.2324611246585846, "learning_rate": 0.001, "loss": 1.431, "step": 6095 }, { "epoch": 0.2578898383958034, "grad_norm": 0.2755308449268341, "learning_rate": 0.001, "loss": 2.6437, "step": 6096 }, { "epoch": 0.2579321431593197, "grad_norm": 0.39930108189582825, "learning_rate": 0.001, "loss": 2.093, "step": 6097 }, { "epoch": 0.25797444792283614, "grad_norm": 0.5387210845947266, "learning_rate": 0.001, "loss": 4.0456, "step": 6098 }, { "epoch": 0.2580167526863525, "grad_norm": 0.5283278822898865, "learning_rate": 0.001, "loss": 2.4854, "step": 6099 }, { "epoch": 0.25805905744986884, "grad_norm": 0.313729465007782, "learning_rate": 0.001, "loss": 2.2843, "step": 6100 }, { "epoch": 0.25810136221338525, "grad_norm": 0.21948851644992828, "learning_rate": 0.001, "loss": 2.1515, "step": 6101 }, { "epoch": 0.2581436669769016, "grad_norm": 0.21910326182842255, "learning_rate": 0.001, "loss": 2.5378, "step": 6102 }, { "epoch": 0.25818597174041796, "grad_norm": 0.22086754441261292, "learning_rate": 0.001, "loss": 2.3505, "step": 6103 }, { "epoch": 0.2582282765039343, "grad_norm": 0.23505359888076782, "learning_rate": 0.001, "loss": 2.2017, "step": 6104 }, { "epoch": 0.2582705812674507, "grad_norm": 0.23428547382354736, "learning_rate": 0.001, "loss": 1.8068, "step": 6105 }, { "epoch": 0.2583128860309671, "grad_norm": 0.16871504485607147, "learning_rate": 0.001, "loss": 1.9511, "step": 6106 }, { "epoch": 0.25835519079448344, "grad_norm": 0.19622443616390228, "learning_rate": 0.001, "loss": 2.0708, "step": 6107 }, { "epoch": 0.25839749555799985, "grad_norm": 0.2113541215658188, "learning_rate": 0.001, "loss": 2.1059, "step": 6108 }, { "epoch": 0.2584398003215162, "grad_norm": 0.2662128806114197, "learning_rate": 0.001, "loss": 2.7546, "step": 6109 }, { "epoch": 0.25848210508503255, "grad_norm": 0.18668299913406372, "learning_rate": 0.001, "loss": 1.9433, "step": 6110 }, { "epoch": 0.25852440984854896, "grad_norm": 0.31358543038368225, "learning_rate": 0.001, "loss": 1.6306, "step": 6111 }, { "epoch": 0.2585667146120653, "grad_norm": 0.23096436262130737, "learning_rate": 0.001, "loss": 1.8918, "step": 6112 }, { "epoch": 0.2586090193755817, "grad_norm": 0.21152295172214508, "learning_rate": 0.001, "loss": 2.1265, "step": 6113 }, { "epoch": 0.2586513241390981, "grad_norm": 0.40861940383911133, "learning_rate": 0.001, "loss": 1.994, "step": 6114 }, { "epoch": 0.25869362890261444, "grad_norm": 0.21992965042591095, "learning_rate": 0.001, "loss": 2.5881, "step": 6115 }, { "epoch": 0.2587359336661308, "grad_norm": 0.43303796648979187, "learning_rate": 0.001, "loss": 3.6542, "step": 6116 }, { "epoch": 0.2587782384296472, "grad_norm": 0.2100251019001007, "learning_rate": 0.001, "loss": 2.3924, "step": 6117 }, { "epoch": 0.25882054319316355, "grad_norm": 0.18890580534934998, "learning_rate": 0.001, "loss": 2.7952, "step": 6118 }, { "epoch": 0.2588628479566799, "grad_norm": 2.6557726860046387, "learning_rate": 0.001, "loss": 1.9914, "step": 6119 }, { "epoch": 0.2589051527201963, "grad_norm": 0.16840679943561554, "learning_rate": 0.001, "loss": 3.2511, "step": 6120 }, { "epoch": 0.2589474574837127, "grad_norm": 0.2200114130973816, "learning_rate": 0.001, "loss": 1.7654, "step": 6121 }, { "epoch": 0.258989762247229, "grad_norm": 0.2384369820356369, "learning_rate": 0.001, "loss": 2.5738, "step": 6122 }, { "epoch": 0.25903206701074544, "grad_norm": 0.2304677963256836, "learning_rate": 0.001, "loss": 1.8705, "step": 6123 }, { "epoch": 0.2590743717742618, "grad_norm": 0.23483799397945404, "learning_rate": 0.001, "loss": 2.1473, "step": 6124 }, { "epoch": 0.25911667653777815, "grad_norm": 0.3107430040836334, "learning_rate": 0.001, "loss": 2.4566, "step": 6125 }, { "epoch": 0.2591589813012945, "grad_norm": 0.4102107882499695, "learning_rate": 0.001, "loss": 2.1083, "step": 6126 }, { "epoch": 0.2592012860648109, "grad_norm": 0.28709524869918823, "learning_rate": 0.001, "loss": 2.6238, "step": 6127 }, { "epoch": 0.25924359082832726, "grad_norm": 0.23573820292949677, "learning_rate": 0.001, "loss": 1.296, "step": 6128 }, { "epoch": 0.2592858955918436, "grad_norm": 0.19910579919815063, "learning_rate": 0.001, "loss": 3.5181, "step": 6129 }, { "epoch": 0.25932820035536003, "grad_norm": 0.2777842879295349, "learning_rate": 0.001, "loss": 2.2883, "step": 6130 }, { "epoch": 0.2593705051188764, "grad_norm": 0.2618696391582489, "learning_rate": 0.001, "loss": 2.7957, "step": 6131 }, { "epoch": 0.25941280988239274, "grad_norm": 0.23511484265327454, "learning_rate": 0.001, "loss": 2.1526, "step": 6132 }, { "epoch": 0.25945511464590915, "grad_norm": 0.36623239517211914, "learning_rate": 0.001, "loss": 1.851, "step": 6133 }, { "epoch": 0.2594974194094255, "grad_norm": 2.1750786304473877, "learning_rate": 0.001, "loss": 2.3166, "step": 6134 }, { "epoch": 0.25953972417294185, "grad_norm": 4.039839744567871, "learning_rate": 0.001, "loss": 2.4438, "step": 6135 }, { "epoch": 0.25958202893645826, "grad_norm": 0.30647915601730347, "learning_rate": 0.001, "loss": 2.255, "step": 6136 }, { "epoch": 0.2596243336999746, "grad_norm": 0.23755225539207458, "learning_rate": 0.001, "loss": 2.0286, "step": 6137 }, { "epoch": 0.259666638463491, "grad_norm": 4.9150919914245605, "learning_rate": 0.001, "loss": 2.212, "step": 6138 }, { "epoch": 0.2597089432270074, "grad_norm": 0.3311949670314789, "learning_rate": 0.001, "loss": 3.6409, "step": 6139 }, { "epoch": 0.25975124799052374, "grad_norm": 0.3053383529186249, "learning_rate": 0.001, "loss": 2.218, "step": 6140 }, { "epoch": 0.2597935527540401, "grad_norm": 0.47039172053337097, "learning_rate": 0.001, "loss": 2.3472, "step": 6141 }, { "epoch": 0.2598358575175565, "grad_norm": 0.27455034852027893, "learning_rate": 0.001, "loss": 2.3315, "step": 6142 }, { "epoch": 0.25987816228107286, "grad_norm": 0.34887149930000305, "learning_rate": 0.001, "loss": 2.2812, "step": 6143 }, { "epoch": 0.2599204670445892, "grad_norm": 0.20073916018009186, "learning_rate": 0.001, "loss": 1.674, "step": 6144 }, { "epoch": 0.2599627718081056, "grad_norm": 0.5015227794647217, "learning_rate": 0.001, "loss": 3.2171, "step": 6145 }, { "epoch": 0.260005076571622, "grad_norm": 12.801533699035645, "learning_rate": 0.001, "loss": 2.7793, "step": 6146 }, { "epoch": 0.26004738133513833, "grad_norm": 0.8890368938446045, "learning_rate": 0.001, "loss": 2.3024, "step": 6147 }, { "epoch": 0.2600896860986547, "grad_norm": 0.24711109697818756, "learning_rate": 0.001, "loss": 2.2968, "step": 6148 }, { "epoch": 0.2601319908621711, "grad_norm": 2.7126057147979736, "learning_rate": 0.001, "loss": 1.9963, "step": 6149 }, { "epoch": 0.26017429562568745, "grad_norm": 0.18857067823410034, "learning_rate": 0.001, "loss": 2.1632, "step": 6150 }, { "epoch": 0.2602166003892038, "grad_norm": 0.2537596821784973, "learning_rate": 0.001, "loss": 1.9813, "step": 6151 }, { "epoch": 0.2602589051527202, "grad_norm": 0.2444206178188324, "learning_rate": 0.001, "loss": 3.3214, "step": 6152 }, { "epoch": 0.26030120991623656, "grad_norm": 0.19725318253040314, "learning_rate": 0.001, "loss": 1.9918, "step": 6153 }, { "epoch": 0.2603435146797529, "grad_norm": 0.38273319602012634, "learning_rate": 0.001, "loss": 3.036, "step": 6154 }, { "epoch": 0.26038581944326933, "grad_norm": 1.0224730968475342, "learning_rate": 0.001, "loss": 1.9985, "step": 6155 }, { "epoch": 0.2604281242067857, "grad_norm": 0.6665830016136169, "learning_rate": 0.001, "loss": 3.0693, "step": 6156 }, { "epoch": 0.26047042897030204, "grad_norm": 0.20416006445884705, "learning_rate": 0.001, "loss": 2.3206, "step": 6157 }, { "epoch": 0.26051273373381845, "grad_norm": 0.3051769733428955, "learning_rate": 0.001, "loss": 3.7681, "step": 6158 }, { "epoch": 0.2605550384973348, "grad_norm": 0.2127608060836792, "learning_rate": 0.001, "loss": 2.4234, "step": 6159 }, { "epoch": 0.26059734326085116, "grad_norm": 0.28691282868385315, "learning_rate": 0.001, "loss": 1.7813, "step": 6160 }, { "epoch": 0.26063964802436757, "grad_norm": 2.052556276321411, "learning_rate": 0.001, "loss": 3.2356, "step": 6161 }, { "epoch": 0.2606819527878839, "grad_norm": 0.20295457541942596, "learning_rate": 0.001, "loss": 2.1718, "step": 6162 }, { "epoch": 0.2607242575514003, "grad_norm": 0.19287841022014618, "learning_rate": 0.001, "loss": 1.7287, "step": 6163 }, { "epoch": 0.2607665623149167, "grad_norm": 0.3149377405643463, "learning_rate": 0.001, "loss": 3.1882, "step": 6164 }, { "epoch": 0.26080886707843304, "grad_norm": 0.21928805112838745, "learning_rate": 0.001, "loss": 2.8839, "step": 6165 }, { "epoch": 0.2608511718419494, "grad_norm": 0.30192065238952637, "learning_rate": 0.001, "loss": 2.0906, "step": 6166 }, { "epoch": 0.2608934766054658, "grad_norm": 0.22280140221118927, "learning_rate": 0.001, "loss": 2.7337, "step": 6167 }, { "epoch": 0.26093578136898216, "grad_norm": 0.19599604606628418, "learning_rate": 0.001, "loss": 2.2517, "step": 6168 }, { "epoch": 0.2609780861324985, "grad_norm": 0.20323944091796875, "learning_rate": 0.001, "loss": 1.906, "step": 6169 }, { "epoch": 0.26102039089601486, "grad_norm": 0.5633019804954529, "learning_rate": 0.001, "loss": 2.1333, "step": 6170 }, { "epoch": 0.2610626956595313, "grad_norm": 0.20369386672973633, "learning_rate": 0.001, "loss": 1.9435, "step": 6171 }, { "epoch": 0.26110500042304763, "grad_norm": 0.37366142868995667, "learning_rate": 0.001, "loss": 2.2786, "step": 6172 }, { "epoch": 0.261147305186564, "grad_norm": 0.21656358242034912, "learning_rate": 0.001, "loss": 2.4738, "step": 6173 }, { "epoch": 0.2611896099500804, "grad_norm": 0.1992012858390808, "learning_rate": 0.001, "loss": 2.1982, "step": 6174 }, { "epoch": 0.26123191471359675, "grad_norm": 0.32912105321884155, "learning_rate": 0.001, "loss": 2.3972, "step": 6175 }, { "epoch": 0.2612742194771131, "grad_norm": 0.16803164780139923, "learning_rate": 0.001, "loss": 1.9239, "step": 6176 }, { "epoch": 0.2613165242406295, "grad_norm": 0.2005642205476761, "learning_rate": 0.001, "loss": 2.1696, "step": 6177 }, { "epoch": 0.26135882900414587, "grad_norm": 0.22105731070041656, "learning_rate": 0.001, "loss": 3.1925, "step": 6178 }, { "epoch": 0.2614011337676622, "grad_norm": 0.20076580345630646, "learning_rate": 0.001, "loss": 2.1953, "step": 6179 }, { "epoch": 0.26144343853117863, "grad_norm": 0.19129331409931183, "learning_rate": 0.001, "loss": 2.6739, "step": 6180 }, { "epoch": 0.261485743294695, "grad_norm": 0.17908786237239838, "learning_rate": 0.001, "loss": 1.8419, "step": 6181 }, { "epoch": 0.26152804805821134, "grad_norm": 0.2999635636806488, "learning_rate": 0.001, "loss": 2.5631, "step": 6182 }, { "epoch": 0.26157035282172775, "grad_norm": 0.1999271810054779, "learning_rate": 0.001, "loss": 2.2973, "step": 6183 }, { "epoch": 0.2616126575852441, "grad_norm": 0.19798393547534943, "learning_rate": 0.001, "loss": 2.4701, "step": 6184 }, { "epoch": 0.26165496234876046, "grad_norm": 0.195541650056839, "learning_rate": 0.001, "loss": 3.1255, "step": 6185 }, { "epoch": 0.26169726711227687, "grad_norm": 0.18188412487506866, "learning_rate": 0.001, "loss": 2.1795, "step": 6186 }, { "epoch": 0.2617395718757932, "grad_norm": 0.22832369804382324, "learning_rate": 0.001, "loss": 3.4992, "step": 6187 }, { "epoch": 0.2617818766393096, "grad_norm": 0.20645490288734436, "learning_rate": 0.001, "loss": 2.6053, "step": 6188 }, { "epoch": 0.261824181402826, "grad_norm": 0.20869730412960052, "learning_rate": 0.001, "loss": 2.9876, "step": 6189 }, { "epoch": 0.26186648616634234, "grad_norm": 1.8209075927734375, "learning_rate": 0.001, "loss": 1.7739, "step": 6190 }, { "epoch": 0.2619087909298587, "grad_norm": 0.24617734551429749, "learning_rate": 0.001, "loss": 2.9264, "step": 6191 }, { "epoch": 0.26195109569337505, "grad_norm": 0.5746178030967712, "learning_rate": 0.001, "loss": 2.4101, "step": 6192 }, { "epoch": 0.26199340045689146, "grad_norm": 0.3251841366291046, "learning_rate": 0.001, "loss": 1.9637, "step": 6193 }, { "epoch": 0.2620357052204078, "grad_norm": 0.19260448217391968, "learning_rate": 0.001, "loss": 2.7791, "step": 6194 }, { "epoch": 0.26207800998392417, "grad_norm": 0.1845528781414032, "learning_rate": 0.001, "loss": 2.2625, "step": 6195 }, { "epoch": 0.2621203147474406, "grad_norm": 0.230647012591362, "learning_rate": 0.001, "loss": 3.0717, "step": 6196 }, { "epoch": 0.26216261951095693, "grad_norm": 0.2902037799358368, "learning_rate": 0.001, "loss": 1.7595, "step": 6197 }, { "epoch": 0.2622049242744733, "grad_norm": 0.20350608229637146, "learning_rate": 0.001, "loss": 2.0974, "step": 6198 }, { "epoch": 0.2622472290379897, "grad_norm": 0.6259804964065552, "learning_rate": 0.001, "loss": 2.2856, "step": 6199 }, { "epoch": 0.26228953380150605, "grad_norm": 0.16990482807159424, "learning_rate": 0.001, "loss": 2.5884, "step": 6200 }, { "epoch": 0.2623318385650224, "grad_norm": 0.2967141568660736, "learning_rate": 0.001, "loss": 2.3306, "step": 6201 }, { "epoch": 0.2623741433285388, "grad_norm": 0.3053082823753357, "learning_rate": 0.001, "loss": 2.2518, "step": 6202 }, { "epoch": 0.26241644809205517, "grad_norm": 0.9330534338951111, "learning_rate": 0.001, "loss": 2.6071, "step": 6203 }, { "epoch": 0.2624587528555715, "grad_norm": 0.19737285375595093, "learning_rate": 0.001, "loss": 2.0726, "step": 6204 }, { "epoch": 0.26250105761908793, "grad_norm": 0.23269009590148926, "learning_rate": 0.001, "loss": 1.6949, "step": 6205 }, { "epoch": 0.2625433623826043, "grad_norm": 0.1986543983221054, "learning_rate": 0.001, "loss": 2.53, "step": 6206 }, { "epoch": 0.26258566714612064, "grad_norm": 1.0258678197860718, "learning_rate": 0.001, "loss": 2.4325, "step": 6207 }, { "epoch": 0.26262797190963705, "grad_norm": 0.6095445156097412, "learning_rate": 0.001, "loss": 2.5046, "step": 6208 }, { "epoch": 0.2626702766731534, "grad_norm": 1.8578591346740723, "learning_rate": 0.001, "loss": 2.3447, "step": 6209 }, { "epoch": 0.26271258143666976, "grad_norm": 5.6780619621276855, "learning_rate": 0.001, "loss": 2.2678, "step": 6210 }, { "epoch": 0.26275488620018617, "grad_norm": 0.2798073887825012, "learning_rate": 0.001, "loss": 2.0733, "step": 6211 }, { "epoch": 0.2627971909637025, "grad_norm": 1.4034373760223389, "learning_rate": 0.001, "loss": 1.8523, "step": 6212 }, { "epoch": 0.2628394957272189, "grad_norm": 0.198062464594841, "learning_rate": 0.001, "loss": 1.4889, "step": 6213 }, { "epoch": 0.26288180049073523, "grad_norm": 0.2564472556114197, "learning_rate": 0.001, "loss": 2.4133, "step": 6214 }, { "epoch": 0.26292410525425164, "grad_norm": 2.446117877960205, "learning_rate": 0.001, "loss": 1.8318, "step": 6215 }, { "epoch": 0.262966410017768, "grad_norm": 0.28336429595947266, "learning_rate": 0.001, "loss": 3.2593, "step": 6216 }, { "epoch": 0.26300871478128435, "grad_norm": 0.29390016198158264, "learning_rate": 0.001, "loss": 2.292, "step": 6217 }, { "epoch": 0.26305101954480076, "grad_norm": 0.6104854941368103, "learning_rate": 0.001, "loss": 1.9547, "step": 6218 }, { "epoch": 0.2630933243083171, "grad_norm": 1.6165142059326172, "learning_rate": 0.001, "loss": 2.2211, "step": 6219 }, { "epoch": 0.26313562907183347, "grad_norm": 2.914968252182007, "learning_rate": 0.001, "loss": 2.4817, "step": 6220 }, { "epoch": 0.2631779338353499, "grad_norm": 0.24600031971931458, "learning_rate": 0.001, "loss": 2.0624, "step": 6221 }, { "epoch": 0.26322023859886623, "grad_norm": 0.23349520564079285, "learning_rate": 0.001, "loss": 1.9119, "step": 6222 }, { "epoch": 0.2632625433623826, "grad_norm": 0.2566237151622772, "learning_rate": 0.001, "loss": 3.1059, "step": 6223 }, { "epoch": 0.263304848125899, "grad_norm": 0.27912285923957825, "learning_rate": 0.001, "loss": 2.2333, "step": 6224 }, { "epoch": 0.26334715288941535, "grad_norm": 0.2630113661289215, "learning_rate": 0.001, "loss": 2.0277, "step": 6225 }, { "epoch": 0.2633894576529317, "grad_norm": 0.34040457010269165, "learning_rate": 0.001, "loss": 2.6417, "step": 6226 }, { "epoch": 0.2634317624164481, "grad_norm": 0.7498903870582581, "learning_rate": 0.001, "loss": 2.513, "step": 6227 }, { "epoch": 0.26347406717996447, "grad_norm": 0.4051353633403778, "learning_rate": 0.001, "loss": 2.2107, "step": 6228 }, { "epoch": 0.2635163719434808, "grad_norm": 0.288908451795578, "learning_rate": 0.001, "loss": 3.0754, "step": 6229 }, { "epoch": 0.26355867670699723, "grad_norm": 0.40644702315330505, "learning_rate": 0.001, "loss": 3.2407, "step": 6230 }, { "epoch": 0.2636009814705136, "grad_norm": 0.19740119576454163, "learning_rate": 0.001, "loss": 2.0704, "step": 6231 }, { "epoch": 0.26364328623402994, "grad_norm": 0.22680628299713135, "learning_rate": 0.001, "loss": 2.6613, "step": 6232 }, { "epoch": 0.26368559099754635, "grad_norm": 0.34792256355285645, "learning_rate": 0.001, "loss": 1.6583, "step": 6233 }, { "epoch": 0.2637278957610627, "grad_norm": 0.57359379529953, "learning_rate": 0.001, "loss": 3.129, "step": 6234 }, { "epoch": 0.26377020052457906, "grad_norm": 0.3252086937427521, "learning_rate": 0.001, "loss": 4.0645, "step": 6235 }, { "epoch": 0.26381250528809547, "grad_norm": 0.7273129224777222, "learning_rate": 0.001, "loss": 2.7411, "step": 6236 }, { "epoch": 0.2638548100516118, "grad_norm": 0.22891153395175934, "learning_rate": 0.001, "loss": 2.2799, "step": 6237 }, { "epoch": 0.2638971148151282, "grad_norm": 0.4040292501449585, "learning_rate": 0.001, "loss": 2.375, "step": 6238 }, { "epoch": 0.26393941957864453, "grad_norm": 20.426698684692383, "learning_rate": 0.001, "loss": 1.8637, "step": 6239 }, { "epoch": 0.26398172434216094, "grad_norm": 0.26762643456459045, "learning_rate": 0.001, "loss": 2.1107, "step": 6240 }, { "epoch": 0.2640240291056773, "grad_norm": 1.0472038984298706, "learning_rate": 0.001, "loss": 2.1314, "step": 6241 }, { "epoch": 0.26406633386919365, "grad_norm": 0.7407483458518982, "learning_rate": 0.001, "loss": 2.1423, "step": 6242 }, { "epoch": 0.26410863863271006, "grad_norm": 0.45358023047447205, "learning_rate": 0.001, "loss": 2.017, "step": 6243 }, { "epoch": 0.2641509433962264, "grad_norm": 0.40108269453048706, "learning_rate": 0.001, "loss": 2.1408, "step": 6244 }, { "epoch": 0.26419324815974277, "grad_norm": 0.20317232608795166, "learning_rate": 0.001, "loss": 2.2763, "step": 6245 }, { "epoch": 0.2642355529232592, "grad_norm": 0.21886859834194183, "learning_rate": 0.001, "loss": 2.9445, "step": 6246 }, { "epoch": 0.26427785768677553, "grad_norm": 0.2402205467224121, "learning_rate": 0.001, "loss": 3.2153, "step": 6247 }, { "epoch": 0.2643201624502919, "grad_norm": 0.4614834487438202, "learning_rate": 0.001, "loss": 2.5112, "step": 6248 }, { "epoch": 0.2643624672138083, "grad_norm": 0.4552881717681885, "learning_rate": 0.001, "loss": 2.1616, "step": 6249 }, { "epoch": 0.26440477197732465, "grad_norm": 0.20050367712974548, "learning_rate": 0.001, "loss": 2.7497, "step": 6250 }, { "epoch": 0.264447076740841, "grad_norm": 0.21264247596263885, "learning_rate": 0.001, "loss": 2.3534, "step": 6251 }, { "epoch": 0.2644893815043574, "grad_norm": 0.9750521183013916, "learning_rate": 0.001, "loss": 1.9891, "step": 6252 }, { "epoch": 0.26453168626787377, "grad_norm": 0.7031003832817078, "learning_rate": 0.001, "loss": 3.7157, "step": 6253 }, { "epoch": 0.2645739910313901, "grad_norm": 1.4832327365875244, "learning_rate": 0.001, "loss": 2.1368, "step": 6254 }, { "epoch": 0.26461629579490653, "grad_norm": 0.6920870542526245, "learning_rate": 0.001, "loss": 2.8061, "step": 6255 }, { "epoch": 0.2646586005584229, "grad_norm": 0.5719873309135437, "learning_rate": 0.001, "loss": 2.5427, "step": 6256 }, { "epoch": 0.26470090532193924, "grad_norm": 0.23134736716747284, "learning_rate": 0.001, "loss": 2.2727, "step": 6257 }, { "epoch": 0.26474321008545565, "grad_norm": 0.1953703761100769, "learning_rate": 0.001, "loss": 1.9514, "step": 6258 }, { "epoch": 0.264785514848972, "grad_norm": 0.27613356709480286, "learning_rate": 0.001, "loss": 2.8069, "step": 6259 }, { "epoch": 0.26482781961248836, "grad_norm": 33.50883102416992, "learning_rate": 0.001, "loss": 2.0961, "step": 6260 }, { "epoch": 0.2648701243760047, "grad_norm": 0.2973005771636963, "learning_rate": 0.001, "loss": 3.3931, "step": 6261 }, { "epoch": 0.2649124291395211, "grad_norm": 1.8105061054229736, "learning_rate": 0.001, "loss": 2.3645, "step": 6262 }, { "epoch": 0.2649547339030375, "grad_norm": 0.9858757853507996, "learning_rate": 0.001, "loss": 2.1567, "step": 6263 }, { "epoch": 0.26499703866655383, "grad_norm": 0.3192642629146576, "learning_rate": 0.001, "loss": 2.5659, "step": 6264 }, { "epoch": 0.26503934343007024, "grad_norm": 0.21430915594100952, "learning_rate": 0.001, "loss": 2.5984, "step": 6265 }, { "epoch": 0.2650816481935866, "grad_norm": 0.45341306924819946, "learning_rate": 0.001, "loss": 2.8938, "step": 6266 }, { "epoch": 0.26512395295710295, "grad_norm": 0.2323470115661621, "learning_rate": 0.001, "loss": 1.904, "step": 6267 }, { "epoch": 0.26516625772061936, "grad_norm": 0.1812925934791565, "learning_rate": 0.001, "loss": 1.8184, "step": 6268 }, { "epoch": 0.2652085624841357, "grad_norm": 0.23729638755321503, "learning_rate": 0.001, "loss": 2.2054, "step": 6269 }, { "epoch": 0.26525086724765207, "grad_norm": 0.24241091310977936, "learning_rate": 0.001, "loss": 2.2426, "step": 6270 }, { "epoch": 0.2652931720111685, "grad_norm": 0.22026705741882324, "learning_rate": 0.001, "loss": 1.9184, "step": 6271 }, { "epoch": 0.26533547677468483, "grad_norm": 0.3513158857822418, "learning_rate": 0.001, "loss": 2.5154, "step": 6272 }, { "epoch": 0.2653777815382012, "grad_norm": 1.525829792022705, "learning_rate": 0.001, "loss": 1.7338, "step": 6273 }, { "epoch": 0.2654200863017176, "grad_norm": 2.88657808303833, "learning_rate": 0.001, "loss": 2.0355, "step": 6274 }, { "epoch": 0.26546239106523395, "grad_norm": 1.638237476348877, "learning_rate": 0.001, "loss": 2.125, "step": 6275 }, { "epoch": 0.2655046958287503, "grad_norm": 0.27576902508735657, "learning_rate": 0.001, "loss": 2.2267, "step": 6276 }, { "epoch": 0.2655470005922667, "grad_norm": 1.6162000894546509, "learning_rate": 0.001, "loss": 2.4581, "step": 6277 }, { "epoch": 0.26558930535578307, "grad_norm": 0.23247942328453064, "learning_rate": 0.001, "loss": 2.4351, "step": 6278 }, { "epoch": 0.2656316101192994, "grad_norm": 0.28002870082855225, "learning_rate": 0.001, "loss": 2.1524, "step": 6279 }, { "epoch": 0.26567391488281583, "grad_norm": 0.23175565898418427, "learning_rate": 0.001, "loss": 2.1924, "step": 6280 }, { "epoch": 0.2657162196463322, "grad_norm": 0.7777692675590515, "learning_rate": 0.001, "loss": 2.6283, "step": 6281 }, { "epoch": 0.26575852440984854, "grad_norm": 1.0904306173324585, "learning_rate": 0.001, "loss": 2.1067, "step": 6282 }, { "epoch": 0.2658008291733649, "grad_norm": 0.6720946431159973, "learning_rate": 0.001, "loss": 1.938, "step": 6283 }, { "epoch": 0.2658431339368813, "grad_norm": 7.82070255279541, "learning_rate": 0.001, "loss": 2.4127, "step": 6284 }, { "epoch": 0.26588543870039766, "grad_norm": 0.4037260115146637, "learning_rate": 0.001, "loss": 3.3877, "step": 6285 }, { "epoch": 0.265927743463914, "grad_norm": 1.355433702468872, "learning_rate": 0.001, "loss": 1.7886, "step": 6286 }, { "epoch": 0.2659700482274304, "grad_norm": 0.42245426774024963, "learning_rate": 0.001, "loss": 3.5764, "step": 6287 }, { "epoch": 0.2660123529909468, "grad_norm": 0.24568170309066772, "learning_rate": 0.001, "loss": 1.8148, "step": 6288 }, { "epoch": 0.26605465775446313, "grad_norm": 6.263239860534668, "learning_rate": 0.001, "loss": 1.8413, "step": 6289 }, { "epoch": 0.26609696251797954, "grad_norm": 0.2733914256095886, "learning_rate": 0.001, "loss": 2.3739, "step": 6290 }, { "epoch": 0.2661392672814959, "grad_norm": 0.30786386132240295, "learning_rate": 0.001, "loss": 2.0434, "step": 6291 }, { "epoch": 0.26618157204501225, "grad_norm": 0.29940205812454224, "learning_rate": 0.001, "loss": 2.5419, "step": 6292 }, { "epoch": 0.26622387680852866, "grad_norm": 0.43194735050201416, "learning_rate": 0.001, "loss": 2.0004, "step": 6293 }, { "epoch": 0.266266181572045, "grad_norm": 0.3845593333244324, "learning_rate": 0.001, "loss": 2.1427, "step": 6294 }, { "epoch": 0.26630848633556137, "grad_norm": 4.627384185791016, "learning_rate": 0.001, "loss": 3.1407, "step": 6295 }, { "epoch": 0.2663507910990778, "grad_norm": 0.3965384066104889, "learning_rate": 0.001, "loss": 2.1406, "step": 6296 }, { "epoch": 0.26639309586259413, "grad_norm": 2.6226112842559814, "learning_rate": 0.001, "loss": 1.7126, "step": 6297 }, { "epoch": 0.2664354006261105, "grad_norm": 1.5446035861968994, "learning_rate": 0.001, "loss": 1.9526, "step": 6298 }, { "epoch": 0.2664777053896269, "grad_norm": 2.2716145515441895, "learning_rate": 0.001, "loss": 2.4368, "step": 6299 }, { "epoch": 0.26652001015314325, "grad_norm": 0.33402782678604126, "learning_rate": 0.001, "loss": 1.8137, "step": 6300 }, { "epoch": 0.2665623149166596, "grad_norm": 0.9563067555427551, "learning_rate": 0.001, "loss": 2.6016, "step": 6301 }, { "epoch": 0.266604619680176, "grad_norm": 0.3132352828979492, "learning_rate": 0.001, "loss": 2.9526, "step": 6302 }, { "epoch": 0.26664692444369237, "grad_norm": 0.6881158351898193, "learning_rate": 0.001, "loss": 2.4045, "step": 6303 }, { "epoch": 0.2666892292072087, "grad_norm": 1.5459450483322144, "learning_rate": 0.001, "loss": 2.9012, "step": 6304 }, { "epoch": 0.2667315339707251, "grad_norm": 0.40609946846961975, "learning_rate": 0.001, "loss": 1.9667, "step": 6305 }, { "epoch": 0.2667738387342415, "grad_norm": 0.35249534249305725, "learning_rate": 0.001, "loss": 3.4398, "step": 6306 }, { "epoch": 0.26681614349775784, "grad_norm": 0.981610894203186, "learning_rate": 0.001, "loss": 3.0861, "step": 6307 }, { "epoch": 0.2668584482612742, "grad_norm": 0.530106246471405, "learning_rate": 0.001, "loss": 2.5613, "step": 6308 }, { "epoch": 0.2669007530247906, "grad_norm": 2.370054244995117, "learning_rate": 0.001, "loss": 2.989, "step": 6309 }, { "epoch": 0.26694305778830696, "grad_norm": 0.7922685742378235, "learning_rate": 0.001, "loss": 2.8242, "step": 6310 }, { "epoch": 0.2669853625518233, "grad_norm": 0.472288578748703, "learning_rate": 0.001, "loss": 2.9797, "step": 6311 }, { "epoch": 0.2670276673153397, "grad_norm": 7.536009788513184, "learning_rate": 0.001, "loss": 3.7025, "step": 6312 }, { "epoch": 0.2670699720788561, "grad_norm": 0.35234659910202026, "learning_rate": 0.001, "loss": 2.187, "step": 6313 }, { "epoch": 0.26711227684237243, "grad_norm": 0.45496541261672974, "learning_rate": 0.001, "loss": 2.8294, "step": 6314 }, { "epoch": 0.26715458160588884, "grad_norm": 0.4241243302822113, "learning_rate": 0.001, "loss": 2.8042, "step": 6315 }, { "epoch": 0.2671968863694052, "grad_norm": 0.49575382471084595, "learning_rate": 0.001, "loss": 2.7069, "step": 6316 }, { "epoch": 0.26723919113292155, "grad_norm": 0.32094845175743103, "learning_rate": 0.001, "loss": 2.2842, "step": 6317 }, { "epoch": 0.26728149589643796, "grad_norm": 2.19527268409729, "learning_rate": 0.001, "loss": 3.9412, "step": 6318 }, { "epoch": 0.2673238006599543, "grad_norm": 0.24995988607406616, "learning_rate": 0.001, "loss": 2.4964, "step": 6319 }, { "epoch": 0.26736610542347067, "grad_norm": 4.221808433532715, "learning_rate": 0.001, "loss": 2.6365, "step": 6320 }, { "epoch": 0.2674084101869871, "grad_norm": 0.434870183467865, "learning_rate": 0.001, "loss": 2.3938, "step": 6321 }, { "epoch": 0.26745071495050343, "grad_norm": 0.32494959235191345, "learning_rate": 0.001, "loss": 3.9073, "step": 6322 }, { "epoch": 0.2674930197140198, "grad_norm": 0.314816951751709, "learning_rate": 0.001, "loss": 3.404, "step": 6323 }, { "epoch": 0.2675353244775362, "grad_norm": 0.3015631139278412, "learning_rate": 0.001, "loss": 2.1681, "step": 6324 }, { "epoch": 0.26757762924105255, "grad_norm": 0.23250603675842285, "learning_rate": 0.001, "loss": 2.4507, "step": 6325 }, { "epoch": 0.2676199340045689, "grad_norm": 0.77018141746521, "learning_rate": 0.001, "loss": 3.565, "step": 6326 }, { "epoch": 0.26766223876808526, "grad_norm": 2.2710511684417725, "learning_rate": 0.001, "loss": 2.027, "step": 6327 }, { "epoch": 0.26770454353160167, "grad_norm": 0.3492465317249298, "learning_rate": 0.001, "loss": 3.2861, "step": 6328 }, { "epoch": 0.267746848295118, "grad_norm": 2.129352331161499, "learning_rate": 0.001, "loss": 1.7412, "step": 6329 }, { "epoch": 0.2677891530586344, "grad_norm": 0.8317650556564331, "learning_rate": 0.001, "loss": 3.3198, "step": 6330 }, { "epoch": 0.2678314578221508, "grad_norm": 0.3606610894203186, "learning_rate": 0.001, "loss": 3.1525, "step": 6331 }, { "epoch": 0.26787376258566714, "grad_norm": 0.2575969696044922, "learning_rate": 0.001, "loss": 1.9891, "step": 6332 }, { "epoch": 0.2679160673491835, "grad_norm": 0.26098647713661194, "learning_rate": 0.001, "loss": 2.6293, "step": 6333 }, { "epoch": 0.2679583721126999, "grad_norm": 0.30769988894462585, "learning_rate": 0.001, "loss": 2.5751, "step": 6334 }, { "epoch": 0.26800067687621626, "grad_norm": 0.9987971186637878, "learning_rate": 0.001, "loss": 2.2541, "step": 6335 }, { "epoch": 0.2680429816397326, "grad_norm": 0.20395010709762573, "learning_rate": 0.001, "loss": 1.7506, "step": 6336 }, { "epoch": 0.268085286403249, "grad_norm": 0.23569338023662567, "learning_rate": 0.001, "loss": 1.9746, "step": 6337 }, { "epoch": 0.2681275911667654, "grad_norm": 0.3824636936187744, "learning_rate": 0.001, "loss": 3.2225, "step": 6338 }, { "epoch": 0.26816989593028173, "grad_norm": 0.46429404616355896, "learning_rate": 0.001, "loss": 3.0844, "step": 6339 }, { "epoch": 0.26821220069379814, "grad_norm": 0.6086142063140869, "learning_rate": 0.001, "loss": 3.0908, "step": 6340 }, { "epoch": 0.2682545054573145, "grad_norm": 0.24936416745185852, "learning_rate": 0.001, "loss": 2.1276, "step": 6341 }, { "epoch": 0.26829681022083085, "grad_norm": 0.41220951080322266, "learning_rate": 0.001, "loss": 2.7437, "step": 6342 }, { "epoch": 0.26833911498434726, "grad_norm": 0.2830599546432495, "learning_rate": 0.001, "loss": 2.2482, "step": 6343 }, { "epoch": 0.2683814197478636, "grad_norm": 0.2458266317844391, "learning_rate": 0.001, "loss": 2.1775, "step": 6344 }, { "epoch": 0.26842372451137997, "grad_norm": 0.33611828088760376, "learning_rate": 0.001, "loss": 2.4973, "step": 6345 }, { "epoch": 0.2684660292748964, "grad_norm": 0.7442197799682617, "learning_rate": 0.001, "loss": 3.3098, "step": 6346 }, { "epoch": 0.26850833403841273, "grad_norm": 0.23901230096817017, "learning_rate": 0.001, "loss": 2.303, "step": 6347 }, { "epoch": 0.2685506388019291, "grad_norm": 0.22979353368282318, "learning_rate": 0.001, "loss": 1.7865, "step": 6348 }, { "epoch": 0.2685929435654455, "grad_norm": 0.3396184742450714, "learning_rate": 0.001, "loss": 3.8322, "step": 6349 }, { "epoch": 0.26863524832896185, "grad_norm": 0.2517753541469574, "learning_rate": 0.001, "loss": 2.2335, "step": 6350 }, { "epoch": 0.2686775530924782, "grad_norm": 0.4144297242164612, "learning_rate": 0.001, "loss": 3.4627, "step": 6351 }, { "epoch": 0.26871985785599456, "grad_norm": 0.891348123550415, "learning_rate": 0.001, "loss": 2.9963, "step": 6352 }, { "epoch": 0.26876216261951097, "grad_norm": 0.18582716584205627, "learning_rate": 0.001, "loss": 2.2934, "step": 6353 }, { "epoch": 0.2688044673830273, "grad_norm": 0.27463603019714355, "learning_rate": 0.001, "loss": 2.0024, "step": 6354 }, { "epoch": 0.2688467721465437, "grad_norm": 7.595641613006592, "learning_rate": 0.001, "loss": 2.3449, "step": 6355 }, { "epoch": 0.2688890769100601, "grad_norm": 1.0186480283737183, "learning_rate": 0.001, "loss": 2.4801, "step": 6356 }, { "epoch": 0.26893138167357644, "grad_norm": 0.25770995020866394, "learning_rate": 0.001, "loss": 3.5221, "step": 6357 }, { "epoch": 0.2689736864370928, "grad_norm": 0.5055752396583557, "learning_rate": 0.001, "loss": 3.0373, "step": 6358 }, { "epoch": 0.2690159912006092, "grad_norm": 0.25883978605270386, "learning_rate": 0.001, "loss": 2.6142, "step": 6359 }, { "epoch": 0.26905829596412556, "grad_norm": 0.2824064791202545, "learning_rate": 0.001, "loss": 2.4801, "step": 6360 }, { "epoch": 0.2691006007276419, "grad_norm": 0.3055369555950165, "learning_rate": 0.001, "loss": 2.5225, "step": 6361 }, { "epoch": 0.2691429054911583, "grad_norm": 0.27755987644195557, "learning_rate": 0.001, "loss": 2.5036, "step": 6362 }, { "epoch": 0.2691852102546747, "grad_norm": 0.24744181334972382, "learning_rate": 0.001, "loss": 1.9392, "step": 6363 }, { "epoch": 0.26922751501819103, "grad_norm": 0.23263895511627197, "learning_rate": 0.001, "loss": 1.7806, "step": 6364 }, { "epoch": 0.26926981978170744, "grad_norm": 0.30707335472106934, "learning_rate": 0.001, "loss": 2.3249, "step": 6365 }, { "epoch": 0.2693121245452238, "grad_norm": 0.17739908397197723, "learning_rate": 0.001, "loss": 1.8609, "step": 6366 }, { "epoch": 0.26935442930874015, "grad_norm": 0.299203097820282, "learning_rate": 0.001, "loss": 2.6187, "step": 6367 }, { "epoch": 0.26939673407225656, "grad_norm": 0.27614837884902954, "learning_rate": 0.001, "loss": 2.5299, "step": 6368 }, { "epoch": 0.2694390388357729, "grad_norm": 1.2516486644744873, "learning_rate": 0.001, "loss": 2.1565, "step": 6369 }, { "epoch": 0.26948134359928927, "grad_norm": 0.43937593698501587, "learning_rate": 0.001, "loss": 3.1544, "step": 6370 }, { "epoch": 0.2695236483628057, "grad_norm": 0.272873193025589, "learning_rate": 0.001, "loss": 2.6493, "step": 6371 }, { "epoch": 0.26956595312632203, "grad_norm": 0.3647904694080353, "learning_rate": 0.001, "loss": 2.4063, "step": 6372 }, { "epoch": 0.2696082578898384, "grad_norm": 1.3242371082305908, "learning_rate": 0.001, "loss": 3.4809, "step": 6373 }, { "epoch": 0.26965056265335474, "grad_norm": 0.23816105723381042, "learning_rate": 0.001, "loss": 1.8286, "step": 6374 }, { "epoch": 0.26969286741687115, "grad_norm": 0.22475877404212952, "learning_rate": 0.001, "loss": 2.1492, "step": 6375 }, { "epoch": 0.2697351721803875, "grad_norm": 0.33347398042678833, "learning_rate": 0.001, "loss": 2.8582, "step": 6376 }, { "epoch": 0.26977747694390386, "grad_norm": 0.22079631686210632, "learning_rate": 0.001, "loss": 2.6284, "step": 6377 }, { "epoch": 0.26981978170742027, "grad_norm": 0.24591165781021118, "learning_rate": 0.001, "loss": 2.1861, "step": 6378 }, { "epoch": 0.2698620864709366, "grad_norm": 0.25623226165771484, "learning_rate": 0.001, "loss": 2.7529, "step": 6379 }, { "epoch": 0.269904391234453, "grad_norm": 0.3840174078941345, "learning_rate": 0.001, "loss": 2.0479, "step": 6380 }, { "epoch": 0.2699466959979694, "grad_norm": 0.29531073570251465, "learning_rate": 0.001, "loss": 2.1461, "step": 6381 }, { "epoch": 0.26998900076148574, "grad_norm": 0.22676776349544525, "learning_rate": 0.001, "loss": 2.2194, "step": 6382 }, { "epoch": 0.2700313055250021, "grad_norm": 0.233690544962883, "learning_rate": 0.001, "loss": 1.6935, "step": 6383 }, { "epoch": 0.2700736102885185, "grad_norm": 0.5040211081504822, "learning_rate": 0.001, "loss": 3.0911, "step": 6384 }, { "epoch": 0.27011591505203486, "grad_norm": 0.4640330374240875, "learning_rate": 0.001, "loss": 1.8026, "step": 6385 }, { "epoch": 0.2701582198155512, "grad_norm": 0.21546930074691772, "learning_rate": 0.001, "loss": 3.1936, "step": 6386 }, { "epoch": 0.2702005245790676, "grad_norm": 0.22518888115882874, "learning_rate": 0.001, "loss": 2.1559, "step": 6387 }, { "epoch": 0.270242829342584, "grad_norm": 0.3616427183151245, "learning_rate": 0.001, "loss": 2.9413, "step": 6388 }, { "epoch": 0.27028513410610033, "grad_norm": 0.22333014011383057, "learning_rate": 0.001, "loss": 2.1103, "step": 6389 }, { "epoch": 0.27032743886961674, "grad_norm": 3.191633462905884, "learning_rate": 0.001, "loss": 2.508, "step": 6390 }, { "epoch": 0.2703697436331331, "grad_norm": 0.24529823660850525, "learning_rate": 0.001, "loss": 2.4031, "step": 6391 }, { "epoch": 0.27041204839664945, "grad_norm": 0.21744905412197113, "learning_rate": 0.001, "loss": 3.51, "step": 6392 }, { "epoch": 0.27045435316016586, "grad_norm": 0.422944575548172, "learning_rate": 0.001, "loss": 3.0605, "step": 6393 }, { "epoch": 0.2704966579236822, "grad_norm": 0.29786035418510437, "learning_rate": 0.001, "loss": 2.2543, "step": 6394 }, { "epoch": 0.27053896268719857, "grad_norm": 0.5101473927497864, "learning_rate": 0.001, "loss": 3.697, "step": 6395 }, { "epoch": 0.2705812674507149, "grad_norm": 0.2863130569458008, "learning_rate": 0.001, "loss": 2.2927, "step": 6396 }, { "epoch": 0.27062357221423133, "grad_norm": 0.21719864010810852, "learning_rate": 0.001, "loss": 1.9034, "step": 6397 }, { "epoch": 0.2706658769777477, "grad_norm": 0.5843220353126526, "learning_rate": 0.001, "loss": 2.0929, "step": 6398 }, { "epoch": 0.27070818174126404, "grad_norm": 0.2397383600473404, "learning_rate": 0.001, "loss": 1.5715, "step": 6399 }, { "epoch": 0.27075048650478045, "grad_norm": 0.29469746351242065, "learning_rate": 0.001, "loss": 3.1697, "step": 6400 }, { "epoch": 0.2707927912682968, "grad_norm": 0.4289894998073578, "learning_rate": 0.001, "loss": 2.8917, "step": 6401 }, { "epoch": 0.27083509603181316, "grad_norm": 0.40637311339378357, "learning_rate": 0.001, "loss": 2.5034, "step": 6402 }, { "epoch": 0.27087740079532957, "grad_norm": 1.2302122116088867, "learning_rate": 0.001, "loss": 2.5266, "step": 6403 }, { "epoch": 0.2709197055588459, "grad_norm": 0.7081871628761292, "learning_rate": 0.001, "loss": 2.5219, "step": 6404 }, { "epoch": 0.2709620103223623, "grad_norm": 0.1945783644914627, "learning_rate": 0.001, "loss": 2.5327, "step": 6405 }, { "epoch": 0.2710043150858787, "grad_norm": 2.336282968521118, "learning_rate": 0.001, "loss": 2.8066, "step": 6406 }, { "epoch": 0.27104661984939504, "grad_norm": 0.2778642177581787, "learning_rate": 0.001, "loss": 2.8345, "step": 6407 }, { "epoch": 0.2710889246129114, "grad_norm": 0.3541242778301239, "learning_rate": 0.001, "loss": 2.5382, "step": 6408 }, { "epoch": 0.2711312293764278, "grad_norm": 3.476750373840332, "learning_rate": 0.001, "loss": 2.3818, "step": 6409 }, { "epoch": 0.27117353413994416, "grad_norm": 0.2823469936847687, "learning_rate": 0.001, "loss": 2.2147, "step": 6410 }, { "epoch": 0.2712158389034605, "grad_norm": 0.2130715698003769, "learning_rate": 0.001, "loss": 2.3666, "step": 6411 }, { "epoch": 0.2712581436669769, "grad_norm": 0.210503488779068, "learning_rate": 0.001, "loss": 1.7467, "step": 6412 }, { "epoch": 0.2713004484304933, "grad_norm": 0.22151534259319305, "learning_rate": 0.001, "loss": 2.9495, "step": 6413 }, { "epoch": 0.27134275319400963, "grad_norm": 0.24885092675685883, "learning_rate": 0.001, "loss": 2.2498, "step": 6414 }, { "epoch": 0.27138505795752604, "grad_norm": 0.23562543094158173, "learning_rate": 0.001, "loss": 2.2867, "step": 6415 }, { "epoch": 0.2714273627210424, "grad_norm": 0.20105452835559845, "learning_rate": 0.001, "loss": 2.7994, "step": 6416 }, { "epoch": 0.27146966748455875, "grad_norm": 0.2746374011039734, "learning_rate": 0.001, "loss": 1.9023, "step": 6417 }, { "epoch": 0.2715119722480751, "grad_norm": 1.5132687091827393, "learning_rate": 0.001, "loss": 1.8792, "step": 6418 }, { "epoch": 0.2715542770115915, "grad_norm": 0.24032434821128845, "learning_rate": 0.001, "loss": 1.9926, "step": 6419 }, { "epoch": 0.27159658177510787, "grad_norm": 0.37214019894599915, "learning_rate": 0.001, "loss": 2.6278, "step": 6420 }, { "epoch": 0.2716388865386242, "grad_norm": 0.2632519006729126, "learning_rate": 0.001, "loss": 2.4603, "step": 6421 }, { "epoch": 0.27168119130214063, "grad_norm": 1.0849531888961792, "learning_rate": 0.001, "loss": 2.8918, "step": 6422 }, { "epoch": 0.271723496065657, "grad_norm": 3.291983127593994, "learning_rate": 0.001, "loss": 2.1872, "step": 6423 }, { "epoch": 0.27176580082917334, "grad_norm": 0.26407113671302795, "learning_rate": 0.001, "loss": 1.9061, "step": 6424 }, { "epoch": 0.27180810559268975, "grad_norm": 0.2467721402645111, "learning_rate": 0.001, "loss": 2.1413, "step": 6425 }, { "epoch": 0.2718504103562061, "grad_norm": 2.2249884605407715, "learning_rate": 0.001, "loss": 2.9308, "step": 6426 }, { "epoch": 0.27189271511972246, "grad_norm": 0.866671621799469, "learning_rate": 0.001, "loss": 2.4567, "step": 6427 }, { "epoch": 0.27193501988323887, "grad_norm": 0.32258787751197815, "learning_rate": 0.001, "loss": 3.198, "step": 6428 }, { "epoch": 0.2719773246467552, "grad_norm": 2.03613018989563, "learning_rate": 0.001, "loss": 3.3261, "step": 6429 }, { "epoch": 0.2720196294102716, "grad_norm": 0.9766329526901245, "learning_rate": 0.001, "loss": 2.2824, "step": 6430 }, { "epoch": 0.272061934173788, "grad_norm": 0.31005382537841797, "learning_rate": 0.001, "loss": 3.2165, "step": 6431 }, { "epoch": 0.27210423893730434, "grad_norm": 0.32901322841644287, "learning_rate": 0.001, "loss": 1.9921, "step": 6432 }, { "epoch": 0.2721465437008207, "grad_norm": 0.24402910470962524, "learning_rate": 0.001, "loss": 3.7147, "step": 6433 }, { "epoch": 0.2721888484643371, "grad_norm": 0.24346059560775757, "learning_rate": 0.001, "loss": 2.2568, "step": 6434 }, { "epoch": 0.27223115322785346, "grad_norm": 0.27896568179130554, "learning_rate": 0.001, "loss": 2.3955, "step": 6435 }, { "epoch": 0.2722734579913698, "grad_norm": 0.7607430219650269, "learning_rate": 0.001, "loss": 2.6034, "step": 6436 }, { "epoch": 0.2723157627548862, "grad_norm": 0.5905636548995972, "learning_rate": 0.001, "loss": 2.1869, "step": 6437 }, { "epoch": 0.2723580675184026, "grad_norm": 0.24976111948490143, "learning_rate": 0.001, "loss": 2.6681, "step": 6438 }, { "epoch": 0.27240037228191893, "grad_norm": 0.4961627423763275, "learning_rate": 0.001, "loss": 3.1772, "step": 6439 }, { "epoch": 0.2724426770454353, "grad_norm": 0.23998108506202698, "learning_rate": 0.001, "loss": 2.0783, "step": 6440 }, { "epoch": 0.2724849818089517, "grad_norm": 1.2053577899932861, "learning_rate": 0.001, "loss": 3.2813, "step": 6441 }, { "epoch": 0.27252728657246805, "grad_norm": 0.2748907804489136, "learning_rate": 0.001, "loss": 2.2498, "step": 6442 }, { "epoch": 0.2725695913359844, "grad_norm": 0.31637445092201233, "learning_rate": 0.001, "loss": 2.2979, "step": 6443 }, { "epoch": 0.2726118960995008, "grad_norm": 1.9223753213882446, "learning_rate": 0.001, "loss": 2.6186, "step": 6444 }, { "epoch": 0.27265420086301717, "grad_norm": 0.6588495969772339, "learning_rate": 0.001, "loss": 2.2728, "step": 6445 }, { "epoch": 0.2726965056265335, "grad_norm": 4.742616176605225, "learning_rate": 0.001, "loss": 2.0405, "step": 6446 }, { "epoch": 0.27273881039004993, "grad_norm": 0.25673648715019226, "learning_rate": 0.001, "loss": 2.5662, "step": 6447 }, { "epoch": 0.2727811151535663, "grad_norm": 0.22692641615867615, "learning_rate": 0.001, "loss": 1.8061, "step": 6448 }, { "epoch": 0.27282341991708264, "grad_norm": 0.33545932173728943, "learning_rate": 0.001, "loss": 1.6433, "step": 6449 }, { "epoch": 0.27286572468059905, "grad_norm": 0.2357272207736969, "learning_rate": 0.001, "loss": 2.672, "step": 6450 }, { "epoch": 0.2729080294441154, "grad_norm": 0.28300443291664124, "learning_rate": 0.001, "loss": 2.1577, "step": 6451 }, { "epoch": 0.27295033420763176, "grad_norm": 0.2952788472175598, "learning_rate": 0.001, "loss": 2.1633, "step": 6452 }, { "epoch": 0.27299263897114817, "grad_norm": 0.2804337441921234, "learning_rate": 0.001, "loss": 1.9271, "step": 6453 }, { "epoch": 0.2730349437346645, "grad_norm": 0.24086658656597137, "learning_rate": 0.001, "loss": 2.162, "step": 6454 }, { "epoch": 0.2730772484981809, "grad_norm": 0.26707324385643005, "learning_rate": 0.001, "loss": 2.1178, "step": 6455 }, { "epoch": 0.2731195532616973, "grad_norm": 0.26330190896987915, "learning_rate": 0.001, "loss": 2.5571, "step": 6456 }, { "epoch": 0.27316185802521364, "grad_norm": 1.0483336448669434, "learning_rate": 0.001, "loss": 2.2618, "step": 6457 }, { "epoch": 0.27320416278873, "grad_norm": 0.28875404596328735, "learning_rate": 0.001, "loss": 2.8507, "step": 6458 }, { "epoch": 0.2732464675522464, "grad_norm": 0.3092382550239563, "learning_rate": 0.001, "loss": 2.0755, "step": 6459 }, { "epoch": 0.27328877231576276, "grad_norm": 0.24076971411705017, "learning_rate": 0.001, "loss": 2.2558, "step": 6460 }, { "epoch": 0.2733310770792791, "grad_norm": 0.26437196135520935, "learning_rate": 0.001, "loss": 3.098, "step": 6461 }, { "epoch": 0.2733733818427955, "grad_norm": 0.2675298750400543, "learning_rate": 0.001, "loss": 3.5501, "step": 6462 }, { "epoch": 0.2734156866063119, "grad_norm": 0.3043130338191986, "learning_rate": 0.001, "loss": 2.3021, "step": 6463 }, { "epoch": 0.27345799136982823, "grad_norm": 1.052797555923462, "learning_rate": 0.001, "loss": 2.6146, "step": 6464 }, { "epoch": 0.2735002961333446, "grad_norm": 0.23038017749786377, "learning_rate": 0.001, "loss": 1.6309, "step": 6465 }, { "epoch": 0.273542600896861, "grad_norm": 0.3373502194881439, "learning_rate": 0.001, "loss": 2.1514, "step": 6466 }, { "epoch": 0.27358490566037735, "grad_norm": 0.23279638588428497, "learning_rate": 0.001, "loss": 2.5891, "step": 6467 }, { "epoch": 0.2736272104238937, "grad_norm": 0.507875919342041, "learning_rate": 0.001, "loss": 2.5316, "step": 6468 }, { "epoch": 0.2736695151874101, "grad_norm": 0.21502485871315002, "learning_rate": 0.001, "loss": 2.2808, "step": 6469 }, { "epoch": 0.27371181995092647, "grad_norm": 0.19854310154914856, "learning_rate": 0.001, "loss": 3.3566, "step": 6470 }, { "epoch": 0.2737541247144428, "grad_norm": 0.2249358594417572, "learning_rate": 0.001, "loss": 1.8404, "step": 6471 }, { "epoch": 0.27379642947795924, "grad_norm": 0.2288307249546051, "learning_rate": 0.001, "loss": 1.924, "step": 6472 }, { "epoch": 0.2738387342414756, "grad_norm": 0.24865660071372986, "learning_rate": 0.001, "loss": 2.3673, "step": 6473 }, { "epoch": 0.27388103900499194, "grad_norm": 0.24267597496509552, "learning_rate": 0.001, "loss": 1.9281, "step": 6474 }, { "epoch": 0.27392334376850835, "grad_norm": 0.2626087963581085, "learning_rate": 0.001, "loss": 2.4203, "step": 6475 }, { "epoch": 0.2739656485320247, "grad_norm": 0.8168473839759827, "learning_rate": 0.001, "loss": 2.6164, "step": 6476 }, { "epoch": 0.27400795329554106, "grad_norm": 0.18421576917171478, "learning_rate": 0.001, "loss": 1.9259, "step": 6477 }, { "epoch": 0.27405025805905747, "grad_norm": 0.2525613307952881, "learning_rate": 0.001, "loss": 2.3877, "step": 6478 }, { "epoch": 0.2740925628225738, "grad_norm": 0.20929360389709473, "learning_rate": 0.001, "loss": 1.6114, "step": 6479 }, { "epoch": 0.2741348675860902, "grad_norm": 0.4461418688297272, "learning_rate": 0.001, "loss": 2.563, "step": 6480 }, { "epoch": 0.2741771723496066, "grad_norm": 0.2533968389034271, "learning_rate": 0.001, "loss": 2.7533, "step": 6481 }, { "epoch": 0.27421947711312294, "grad_norm": 1.817432165145874, "learning_rate": 0.001, "loss": 2.2708, "step": 6482 }, { "epoch": 0.2742617818766393, "grad_norm": 1.1035431623458862, "learning_rate": 0.001, "loss": 2.2485, "step": 6483 }, { "epoch": 0.2743040866401557, "grad_norm": 0.4007866680622101, "learning_rate": 0.001, "loss": 2.566, "step": 6484 }, { "epoch": 0.27434639140367206, "grad_norm": 0.569590151309967, "learning_rate": 0.001, "loss": 1.9645, "step": 6485 }, { "epoch": 0.2743886961671884, "grad_norm": 0.2620263695716858, "learning_rate": 0.001, "loss": 2.1197, "step": 6486 }, { "epoch": 0.27443100093070477, "grad_norm": 0.6177977323532104, "learning_rate": 0.001, "loss": 2.6985, "step": 6487 }, { "epoch": 0.2744733056942212, "grad_norm": 0.22808204591274261, "learning_rate": 0.001, "loss": 3.8149, "step": 6488 }, { "epoch": 0.27451561045773754, "grad_norm": 0.2721755802631378, "learning_rate": 0.001, "loss": 2.389, "step": 6489 }, { "epoch": 0.2745579152212539, "grad_norm": 0.35955166816711426, "learning_rate": 0.001, "loss": 2.1531, "step": 6490 }, { "epoch": 0.2746002199847703, "grad_norm": 0.2631028890609741, "learning_rate": 0.001, "loss": 1.944, "step": 6491 }, { "epoch": 0.27464252474828665, "grad_norm": 0.40836191177368164, "learning_rate": 0.001, "loss": 2.219, "step": 6492 }, { "epoch": 0.274684829511803, "grad_norm": 0.29734811186790466, "learning_rate": 0.001, "loss": 2.4754, "step": 6493 }, { "epoch": 0.2747271342753194, "grad_norm": 0.3713732063770294, "learning_rate": 0.001, "loss": 3.1679, "step": 6494 }, { "epoch": 0.27476943903883577, "grad_norm": 0.23647406697273254, "learning_rate": 0.001, "loss": 2.3628, "step": 6495 }, { "epoch": 0.2748117438023521, "grad_norm": 0.19687795639038086, "learning_rate": 0.001, "loss": 2.8735, "step": 6496 }, { "epoch": 0.27485404856586854, "grad_norm": 2.3540210723876953, "learning_rate": 0.001, "loss": 2.1741, "step": 6497 }, { "epoch": 0.2748963533293849, "grad_norm": 0.5432139039039612, "learning_rate": 0.001, "loss": 2.0557, "step": 6498 }, { "epoch": 0.27493865809290124, "grad_norm": 0.4527101516723633, "learning_rate": 0.001, "loss": 3.2012, "step": 6499 }, { "epoch": 0.27498096285641765, "grad_norm": 0.22198386490345, "learning_rate": 0.001, "loss": 2.1226, "step": 6500 }, { "epoch": 0.275023267619934, "grad_norm": 0.2468535304069519, "learning_rate": 0.001, "loss": 2.4435, "step": 6501 }, { "epoch": 0.27506557238345036, "grad_norm": 0.22474850714206696, "learning_rate": 0.001, "loss": 2.0643, "step": 6502 }, { "epoch": 0.2751078771469668, "grad_norm": 0.38860535621643066, "learning_rate": 0.001, "loss": 2.0566, "step": 6503 }, { "epoch": 0.2751501819104831, "grad_norm": 0.4149872064590454, "learning_rate": 0.001, "loss": 2.1151, "step": 6504 }, { "epoch": 0.2751924866739995, "grad_norm": 0.8003644943237305, "learning_rate": 0.001, "loss": 3.1268, "step": 6505 }, { "epoch": 0.2752347914375159, "grad_norm": 0.2186277061700821, "learning_rate": 0.001, "loss": 1.8286, "step": 6506 }, { "epoch": 0.27527709620103225, "grad_norm": 0.2277793288230896, "learning_rate": 0.001, "loss": 2.1968, "step": 6507 }, { "epoch": 0.2753194009645486, "grad_norm": 0.2010040283203125, "learning_rate": 0.001, "loss": 2.1456, "step": 6508 }, { "epoch": 0.27536170572806495, "grad_norm": 0.9116567373275757, "learning_rate": 0.001, "loss": 2.6635, "step": 6509 }, { "epoch": 0.27540401049158136, "grad_norm": 0.20740988850593567, "learning_rate": 0.001, "loss": 2.2698, "step": 6510 }, { "epoch": 0.2754463152550977, "grad_norm": 0.22273097932338715, "learning_rate": 0.001, "loss": 2.3287, "step": 6511 }, { "epoch": 0.27548862001861407, "grad_norm": 0.28017768263816833, "learning_rate": 0.001, "loss": 2.3398, "step": 6512 }, { "epoch": 0.2755309247821305, "grad_norm": 0.19713011384010315, "learning_rate": 0.001, "loss": 1.8994, "step": 6513 }, { "epoch": 0.27557322954564684, "grad_norm": 0.19678883254528046, "learning_rate": 0.001, "loss": 2.0113, "step": 6514 }, { "epoch": 0.2756155343091632, "grad_norm": 25.500926971435547, "learning_rate": 0.001, "loss": 1.8373, "step": 6515 }, { "epoch": 0.2756578390726796, "grad_norm": 2.405385732650757, "learning_rate": 0.001, "loss": 2.077, "step": 6516 }, { "epoch": 0.27570014383619595, "grad_norm": 0.9735079407691956, "learning_rate": 0.001, "loss": 2.5276, "step": 6517 }, { "epoch": 0.2757424485997123, "grad_norm": 0.3777754604816437, "learning_rate": 0.001, "loss": 2.6313, "step": 6518 }, { "epoch": 0.2757847533632287, "grad_norm": 0.25702303647994995, "learning_rate": 0.001, "loss": 2.9321, "step": 6519 }, { "epoch": 0.2758270581267451, "grad_norm": 1.9004207849502563, "learning_rate": 0.001, "loss": 1.8612, "step": 6520 }, { "epoch": 0.2758693628902614, "grad_norm": 0.2935592532157898, "learning_rate": 0.001, "loss": 3.1381, "step": 6521 }, { "epoch": 0.27591166765377784, "grad_norm": 0.7922021150588989, "learning_rate": 0.001, "loss": 2.1313, "step": 6522 }, { "epoch": 0.2759539724172942, "grad_norm": 0.2765513062477112, "learning_rate": 0.001, "loss": 2.7907, "step": 6523 }, { "epoch": 0.27599627718081055, "grad_norm": 0.24716876447200775, "learning_rate": 0.001, "loss": 2.1889, "step": 6524 }, { "epoch": 0.27603858194432696, "grad_norm": 0.24158848822116852, "learning_rate": 0.001, "loss": 1.8922, "step": 6525 }, { "epoch": 0.2760808867078433, "grad_norm": 0.20398221909999847, "learning_rate": 0.001, "loss": 2.2493, "step": 6526 }, { "epoch": 0.27612319147135966, "grad_norm": 0.22118747234344482, "learning_rate": 0.001, "loss": 2.6995, "step": 6527 }, { "epoch": 0.2761654962348761, "grad_norm": 0.21023960411548615, "learning_rate": 0.001, "loss": 2.1655, "step": 6528 }, { "epoch": 0.2762078009983924, "grad_norm": 0.2214033603668213, "learning_rate": 0.001, "loss": 1.7361, "step": 6529 }, { "epoch": 0.2762501057619088, "grad_norm": 0.4276762306690216, "learning_rate": 0.001, "loss": 2.3999, "step": 6530 }, { "epoch": 0.27629241052542514, "grad_norm": 0.2657619118690491, "learning_rate": 0.001, "loss": 3.2196, "step": 6531 }, { "epoch": 0.27633471528894155, "grad_norm": 0.47785693407058716, "learning_rate": 0.001, "loss": 1.933, "step": 6532 }, { "epoch": 0.2763770200524579, "grad_norm": 0.6065228581428528, "learning_rate": 0.001, "loss": 2.8416, "step": 6533 }, { "epoch": 0.27641932481597425, "grad_norm": 0.18479658663272858, "learning_rate": 0.001, "loss": 1.9656, "step": 6534 }, { "epoch": 0.27646162957949066, "grad_norm": 0.2248462438583374, "learning_rate": 0.001, "loss": 2.1931, "step": 6535 }, { "epoch": 0.276503934343007, "grad_norm": 0.23013022541999817, "learning_rate": 0.001, "loss": 3.1781, "step": 6536 }, { "epoch": 0.2765462391065234, "grad_norm": 0.23272770643234253, "learning_rate": 0.001, "loss": 2.345, "step": 6537 }, { "epoch": 0.2765885438700398, "grad_norm": 6.299062252044678, "learning_rate": 0.001, "loss": 2.0466, "step": 6538 }, { "epoch": 0.27663084863355614, "grad_norm": 0.2376430779695511, "learning_rate": 0.001, "loss": 2.2214, "step": 6539 }, { "epoch": 0.2766731533970725, "grad_norm": 4.812066555023193, "learning_rate": 0.001, "loss": 2.6048, "step": 6540 }, { "epoch": 0.2767154581605889, "grad_norm": 2.5265612602233887, "learning_rate": 0.001, "loss": 2.3992, "step": 6541 }, { "epoch": 0.27675776292410526, "grad_norm": 0.3985212445259094, "learning_rate": 0.001, "loss": 3.2, "step": 6542 }, { "epoch": 0.2768000676876216, "grad_norm": 4.070377349853516, "learning_rate": 0.001, "loss": 2.1483, "step": 6543 }, { "epoch": 0.276842372451138, "grad_norm": 0.7375186681747437, "learning_rate": 0.001, "loss": 1.9092, "step": 6544 }, { "epoch": 0.2768846772146544, "grad_norm": 0.2642837464809418, "learning_rate": 0.001, "loss": 2.5145, "step": 6545 }, { "epoch": 0.2769269819781707, "grad_norm": 0.23197656869888306, "learning_rate": 0.001, "loss": 2.6426, "step": 6546 }, { "epoch": 0.27696928674168714, "grad_norm": 0.4172825813293457, "learning_rate": 0.001, "loss": 1.7941, "step": 6547 }, { "epoch": 0.2770115915052035, "grad_norm": 0.1864205151796341, "learning_rate": 0.001, "loss": 1.6709, "step": 6548 }, { "epoch": 0.27705389626871985, "grad_norm": 1.1688998937606812, "learning_rate": 0.001, "loss": 2.374, "step": 6549 }, { "epoch": 0.27709620103223626, "grad_norm": 0.26885920763015747, "learning_rate": 0.001, "loss": 2.6946, "step": 6550 }, { "epoch": 0.2771385057957526, "grad_norm": 0.6076298952102661, "learning_rate": 0.001, "loss": 2.1671, "step": 6551 }, { "epoch": 0.27718081055926896, "grad_norm": 333.4671325683594, "learning_rate": 0.001, "loss": 2.3423, "step": 6552 }, { "epoch": 0.2772231153227853, "grad_norm": 0.7827109098434448, "learning_rate": 0.001, "loss": 2.1145, "step": 6553 }, { "epoch": 0.27726542008630173, "grad_norm": 2.1822445392608643, "learning_rate": 0.001, "loss": 1.8533, "step": 6554 }, { "epoch": 0.2773077248498181, "grad_norm": 81.6081314086914, "learning_rate": 0.001, "loss": 2.4029, "step": 6555 }, { "epoch": 0.27735002961333444, "grad_norm": 1.5109821557998657, "learning_rate": 0.001, "loss": 2.849, "step": 6556 }, { "epoch": 0.27739233437685085, "grad_norm": 3.2216641902923584, "learning_rate": 0.001, "loss": 2.4335, "step": 6557 }, { "epoch": 0.2774346391403672, "grad_norm": 0.28260374069213867, "learning_rate": 0.001, "loss": 2.8565, "step": 6558 }, { "epoch": 0.27747694390388356, "grad_norm": 0.2404414415359497, "learning_rate": 0.001, "loss": 2.3991, "step": 6559 }, { "epoch": 0.27751924866739996, "grad_norm": 0.2975144386291504, "learning_rate": 0.001, "loss": 2.8319, "step": 6560 }, { "epoch": 0.2775615534309163, "grad_norm": 8.33987045288086, "learning_rate": 0.001, "loss": 2.9683, "step": 6561 }, { "epoch": 0.2776038581944327, "grad_norm": 1.107771396636963, "learning_rate": 0.001, "loss": 2.3978, "step": 6562 }, { "epoch": 0.2776461629579491, "grad_norm": 0.24816367030143738, "learning_rate": 0.001, "loss": 2.367, "step": 6563 }, { "epoch": 0.27768846772146544, "grad_norm": 1.1018351316452026, "learning_rate": 0.001, "loss": 2.62, "step": 6564 }, { "epoch": 0.2777307724849818, "grad_norm": 1.640609860420227, "learning_rate": 0.001, "loss": 1.867, "step": 6565 }, { "epoch": 0.2777730772484982, "grad_norm": 0.2782337963581085, "learning_rate": 0.001, "loss": 2.3716, "step": 6566 }, { "epoch": 0.27781538201201456, "grad_norm": 1.2613625526428223, "learning_rate": 0.001, "loss": 2.6911, "step": 6567 }, { "epoch": 0.2778576867755309, "grad_norm": 3.5542197227478027, "learning_rate": 0.001, "loss": 2.6192, "step": 6568 }, { "epoch": 0.2778999915390473, "grad_norm": 0.22724595665931702, "learning_rate": 0.001, "loss": 1.9024, "step": 6569 }, { "epoch": 0.2779422963025637, "grad_norm": 0.21525609493255615, "learning_rate": 0.001, "loss": 2.0792, "step": 6570 }, { "epoch": 0.27798460106608003, "grad_norm": 1.333457589149475, "learning_rate": 0.001, "loss": 2.5817, "step": 6571 }, { "epoch": 0.27802690582959644, "grad_norm": 0.2638329863548279, "learning_rate": 0.001, "loss": 1.8625, "step": 6572 }, { "epoch": 0.2780692105931128, "grad_norm": 0.5335271954536438, "learning_rate": 0.001, "loss": 2.255, "step": 6573 }, { "epoch": 0.27811151535662915, "grad_norm": 0.8093989491462708, "learning_rate": 0.001, "loss": 2.6137, "step": 6574 }, { "epoch": 0.2781538201201455, "grad_norm": 0.2589428126811981, "learning_rate": 0.001, "loss": 2.4491, "step": 6575 }, { "epoch": 0.2781961248836619, "grad_norm": 1.0892338752746582, "learning_rate": 0.001, "loss": 2.1675, "step": 6576 }, { "epoch": 0.27823842964717826, "grad_norm": 0.3066917657852173, "learning_rate": 0.001, "loss": 2.3011, "step": 6577 }, { "epoch": 0.2782807344106946, "grad_norm": 0.6517425179481506, "learning_rate": 0.001, "loss": 2.241, "step": 6578 }, { "epoch": 0.27832303917421103, "grad_norm": 0.7544508576393127, "learning_rate": 0.001, "loss": 2.6692, "step": 6579 }, { "epoch": 0.2783653439377274, "grad_norm": 0.2718650698661804, "learning_rate": 0.001, "loss": 2.6371, "step": 6580 }, { "epoch": 0.27840764870124374, "grad_norm": 1.2337608337402344, "learning_rate": 0.001, "loss": 2.6096, "step": 6581 }, { "epoch": 0.27844995346476015, "grad_norm": 0.36760374903678894, "learning_rate": 0.001, "loss": 2.2351, "step": 6582 }, { "epoch": 0.2784922582282765, "grad_norm": 0.25954657793045044, "learning_rate": 0.001, "loss": 2.1766, "step": 6583 }, { "epoch": 0.27853456299179286, "grad_norm": 0.606953501701355, "learning_rate": 0.001, "loss": 2.1137, "step": 6584 }, { "epoch": 0.27857686775530927, "grad_norm": 0.2433394342660904, "learning_rate": 0.001, "loss": 1.8739, "step": 6585 }, { "epoch": 0.2786191725188256, "grad_norm": 0.2681048810482025, "learning_rate": 0.001, "loss": 2.2242, "step": 6586 }, { "epoch": 0.278661477282342, "grad_norm": 0.37554433941841125, "learning_rate": 0.001, "loss": 2.5414, "step": 6587 }, { "epoch": 0.2787037820458584, "grad_norm": 0.37288689613342285, "learning_rate": 0.001, "loss": 2.7379, "step": 6588 }, { "epoch": 0.27874608680937474, "grad_norm": 1.6531394720077515, "learning_rate": 0.001, "loss": 2.3285, "step": 6589 }, { "epoch": 0.2787883915728911, "grad_norm": 0.4495515823364258, "learning_rate": 0.001, "loss": 1.9095, "step": 6590 }, { "epoch": 0.2788306963364075, "grad_norm": 0.22591541707515717, "learning_rate": 0.001, "loss": 1.9806, "step": 6591 }, { "epoch": 0.27887300109992386, "grad_norm": 0.4093262255191803, "learning_rate": 0.001, "loss": 2.9568, "step": 6592 }, { "epoch": 0.2789153058634402, "grad_norm": 0.4655383825302124, "learning_rate": 0.001, "loss": 2.2092, "step": 6593 }, { "epoch": 0.2789576106269566, "grad_norm": 0.1933087557554245, "learning_rate": 0.001, "loss": 2.6561, "step": 6594 }, { "epoch": 0.278999915390473, "grad_norm": 0.7289586663246155, "learning_rate": 0.001, "loss": 2.2029, "step": 6595 }, { "epoch": 0.27904222015398933, "grad_norm": 0.33073756098747253, "learning_rate": 0.001, "loss": 2.1711, "step": 6596 }, { "epoch": 0.27908452491750574, "grad_norm": 0.1785137802362442, "learning_rate": 0.001, "loss": 1.7439, "step": 6597 }, { "epoch": 0.2791268296810221, "grad_norm": 0.23087824881076813, "learning_rate": 0.001, "loss": 2.3974, "step": 6598 }, { "epoch": 0.27916913444453845, "grad_norm": 0.2256430834531784, "learning_rate": 0.001, "loss": 2.225, "step": 6599 }, { "epoch": 0.2792114392080548, "grad_norm": 0.27692413330078125, "learning_rate": 0.001, "loss": 1.9553, "step": 6600 }, { "epoch": 0.2792537439715712, "grad_norm": 0.28464028239250183, "learning_rate": 0.001, "loss": 3.7032, "step": 6601 }, { "epoch": 0.27929604873508757, "grad_norm": 0.22122737765312195, "learning_rate": 0.001, "loss": 1.9469, "step": 6602 }, { "epoch": 0.2793383534986039, "grad_norm": 0.2612067461013794, "learning_rate": 0.001, "loss": 1.7212, "step": 6603 }, { "epoch": 0.27938065826212033, "grad_norm": 0.21582959592342377, "learning_rate": 0.001, "loss": 2.3386, "step": 6604 }, { "epoch": 0.2794229630256367, "grad_norm": 0.258429616689682, "learning_rate": 0.001, "loss": 3.188, "step": 6605 }, { "epoch": 0.27946526778915304, "grad_norm": 0.23203891515731812, "learning_rate": 0.001, "loss": 2.3323, "step": 6606 }, { "epoch": 0.27950757255266945, "grad_norm": 0.37351715564727783, "learning_rate": 0.001, "loss": 3.2925, "step": 6607 }, { "epoch": 0.2795498773161858, "grad_norm": 0.2870527505874634, "learning_rate": 0.001, "loss": 2.128, "step": 6608 }, { "epoch": 0.27959218207970216, "grad_norm": 0.2394646555185318, "learning_rate": 0.001, "loss": 2.1182, "step": 6609 }, { "epoch": 0.27963448684321857, "grad_norm": 1.8435173034667969, "learning_rate": 0.001, "loss": 2.1496, "step": 6610 }, { "epoch": 0.2796767916067349, "grad_norm": 0.689439058303833, "learning_rate": 0.001, "loss": 1.6171, "step": 6611 }, { "epoch": 0.2797190963702513, "grad_norm": 0.3655761182308197, "learning_rate": 0.001, "loss": 1.8437, "step": 6612 }, { "epoch": 0.2797614011337677, "grad_norm": 0.21056896448135376, "learning_rate": 0.001, "loss": 3.201, "step": 6613 }, { "epoch": 0.27980370589728404, "grad_norm": 0.20994161069393158, "learning_rate": 0.001, "loss": 2.0407, "step": 6614 }, { "epoch": 0.2798460106608004, "grad_norm": 0.20594006776809692, "learning_rate": 0.001, "loss": 2.0774, "step": 6615 }, { "epoch": 0.2798883154243168, "grad_norm": 0.24466033279895782, "learning_rate": 0.001, "loss": 2.101, "step": 6616 }, { "epoch": 0.27993062018783316, "grad_norm": 0.23356914520263672, "learning_rate": 0.001, "loss": 3.0406, "step": 6617 }, { "epoch": 0.2799729249513495, "grad_norm": 0.3080406188964844, "learning_rate": 0.001, "loss": 1.7894, "step": 6618 }, { "epoch": 0.2800152297148659, "grad_norm": 0.28324589133262634, "learning_rate": 0.001, "loss": 2.5605, "step": 6619 }, { "epoch": 0.2800575344783823, "grad_norm": 0.4697299599647522, "learning_rate": 0.001, "loss": 2.2976, "step": 6620 }, { "epoch": 0.28009983924189863, "grad_norm": 0.7480176091194153, "learning_rate": 0.001, "loss": 3.3983, "step": 6621 }, { "epoch": 0.280142144005415, "grad_norm": 1.2225714921951294, "learning_rate": 0.001, "loss": 2.6653, "step": 6622 }, { "epoch": 0.2801844487689314, "grad_norm": 1.5693175792694092, "learning_rate": 0.001, "loss": 2.5905, "step": 6623 }, { "epoch": 0.28022675353244775, "grad_norm": 1.6206374168395996, "learning_rate": 0.001, "loss": 2.0437, "step": 6624 }, { "epoch": 0.2802690582959641, "grad_norm": 0.19221502542495728, "learning_rate": 0.001, "loss": 2.4707, "step": 6625 }, { "epoch": 0.2803113630594805, "grad_norm": 0.8016247153282166, "learning_rate": 0.001, "loss": 2.6747, "step": 6626 }, { "epoch": 0.28035366782299687, "grad_norm": 0.21207623183727264, "learning_rate": 0.001, "loss": 2.1313, "step": 6627 }, { "epoch": 0.2803959725865132, "grad_norm": 0.22111967206001282, "learning_rate": 0.001, "loss": 3.0506, "step": 6628 }, { "epoch": 0.28043827735002963, "grad_norm": 0.21571913361549377, "learning_rate": 0.001, "loss": 1.8644, "step": 6629 }, { "epoch": 0.280480582113546, "grad_norm": 0.23307397961616516, "learning_rate": 0.001, "loss": 2.8528, "step": 6630 }, { "epoch": 0.28052288687706234, "grad_norm": 1.7991856336593628, "learning_rate": 0.001, "loss": 2.2838, "step": 6631 }, { "epoch": 0.28056519164057875, "grad_norm": 0.23802727460861206, "learning_rate": 0.001, "loss": 2.0143, "step": 6632 }, { "epoch": 0.2806074964040951, "grad_norm": 0.19570757448673248, "learning_rate": 0.001, "loss": 1.7036, "step": 6633 }, { "epoch": 0.28064980116761146, "grad_norm": 0.27703866362571716, "learning_rate": 0.001, "loss": 4.0551, "step": 6634 }, { "epoch": 0.28069210593112787, "grad_norm": 0.30882275104522705, "learning_rate": 0.001, "loss": 2.4837, "step": 6635 }, { "epoch": 0.2807344106946442, "grad_norm": 0.2575119137763977, "learning_rate": 0.001, "loss": 2.4555, "step": 6636 }, { "epoch": 0.2807767154581606, "grad_norm": 3.0365078449249268, "learning_rate": 0.001, "loss": 1.8518, "step": 6637 }, { "epoch": 0.280819020221677, "grad_norm": 0.21521303057670593, "learning_rate": 0.001, "loss": 2.1552, "step": 6638 }, { "epoch": 0.28086132498519334, "grad_norm": 0.2531343698501587, "learning_rate": 0.001, "loss": 2.9282, "step": 6639 }, { "epoch": 0.2809036297487097, "grad_norm": 0.1944054663181305, "learning_rate": 0.001, "loss": 2.3841, "step": 6640 }, { "epoch": 0.2809459345122261, "grad_norm": 0.18977178633213043, "learning_rate": 0.001, "loss": 1.9523, "step": 6641 }, { "epoch": 0.28098823927574246, "grad_norm": 0.20107170939445496, "learning_rate": 0.001, "loss": 1.9213, "step": 6642 }, { "epoch": 0.2810305440392588, "grad_norm": 1.5466896295547485, "learning_rate": 0.001, "loss": 2.2963, "step": 6643 }, { "epoch": 0.28107284880277517, "grad_norm": 0.20323142409324646, "learning_rate": 0.001, "loss": 2.3657, "step": 6644 }, { "epoch": 0.2811151535662916, "grad_norm": 0.20698612928390503, "learning_rate": 0.001, "loss": 2.1832, "step": 6645 }, { "epoch": 0.28115745832980793, "grad_norm": 0.265480101108551, "learning_rate": 0.001, "loss": 4.3758, "step": 6646 }, { "epoch": 0.2811997630933243, "grad_norm": 0.21881890296936035, "learning_rate": 0.001, "loss": 1.9424, "step": 6647 }, { "epoch": 0.2812420678568407, "grad_norm": 0.2621309459209442, "learning_rate": 0.001, "loss": 3.2508, "step": 6648 }, { "epoch": 0.28128437262035705, "grad_norm": 0.2738354206085205, "learning_rate": 0.001, "loss": 2.7365, "step": 6649 }, { "epoch": 0.2813266773838734, "grad_norm": 0.44590675830841064, "learning_rate": 0.001, "loss": 2.2821, "step": 6650 }, { "epoch": 0.2813689821473898, "grad_norm": 0.25638794898986816, "learning_rate": 0.001, "loss": 2.7701, "step": 6651 }, { "epoch": 0.28141128691090617, "grad_norm": 0.275796115398407, "learning_rate": 0.001, "loss": 2.6728, "step": 6652 }, { "epoch": 0.2814535916744225, "grad_norm": 0.2530985176563263, "learning_rate": 0.001, "loss": 1.9915, "step": 6653 }, { "epoch": 0.28149589643793893, "grad_norm": 0.1915893405675888, "learning_rate": 0.001, "loss": 1.9204, "step": 6654 }, { "epoch": 0.2815382012014553, "grad_norm": 2.445066213607788, "learning_rate": 0.001, "loss": 2.5217, "step": 6655 }, { "epoch": 0.28158050596497164, "grad_norm": 3.7785122394561768, "learning_rate": 0.001, "loss": 1.6211, "step": 6656 }, { "epoch": 0.28162281072848805, "grad_norm": 0.21273264288902283, "learning_rate": 0.001, "loss": 1.958, "step": 6657 }, { "epoch": 0.2816651154920044, "grad_norm": 0.29316845536231995, "learning_rate": 0.001, "loss": 2.8814, "step": 6658 }, { "epoch": 0.28170742025552076, "grad_norm": 16.398019790649414, "learning_rate": 0.001, "loss": 2.0424, "step": 6659 }, { "epoch": 0.28174972501903717, "grad_norm": 0.5399495363235474, "learning_rate": 0.001, "loss": 3.4691, "step": 6660 }, { "epoch": 0.2817920297825535, "grad_norm": 0.29314282536506653, "learning_rate": 0.001, "loss": 1.9568, "step": 6661 }, { "epoch": 0.2818343345460699, "grad_norm": 0.27456262707710266, "learning_rate": 0.001, "loss": 2.6013, "step": 6662 }, { "epoch": 0.2818766393095863, "grad_norm": 0.5269389152526855, "learning_rate": 0.001, "loss": 2.4911, "step": 6663 }, { "epoch": 0.28191894407310264, "grad_norm": 0.5170745849609375, "learning_rate": 0.001, "loss": 3.7857, "step": 6664 }, { "epoch": 0.281961248836619, "grad_norm": 0.21741123497486115, "learning_rate": 0.001, "loss": 2.2259, "step": 6665 }, { "epoch": 0.28200355360013535, "grad_norm": 0.19433678686618805, "learning_rate": 0.001, "loss": 2.3927, "step": 6666 }, { "epoch": 0.28204585836365176, "grad_norm": 0.2336796671152115, "learning_rate": 0.001, "loss": 2.2329, "step": 6667 }, { "epoch": 0.2820881631271681, "grad_norm": 0.3010079264640808, "learning_rate": 0.001, "loss": 2.8767, "step": 6668 }, { "epoch": 0.28213046789068447, "grad_norm": 23.44135856628418, "learning_rate": 0.001, "loss": 2.6002, "step": 6669 }, { "epoch": 0.2821727726542009, "grad_norm": 0.3416197597980499, "learning_rate": 0.001, "loss": 2.3501, "step": 6670 }, { "epoch": 0.28221507741771723, "grad_norm": 0.20520153641700745, "learning_rate": 0.001, "loss": 1.875, "step": 6671 }, { "epoch": 0.2822573821812336, "grad_norm": 0.2823079526424408, "learning_rate": 0.001, "loss": 2.699, "step": 6672 }, { "epoch": 0.28229968694475, "grad_norm": 0.23430481553077698, "learning_rate": 0.001, "loss": 3.0939, "step": 6673 }, { "epoch": 0.28234199170826635, "grad_norm": 0.2834431231021881, "learning_rate": 0.001, "loss": 2.2496, "step": 6674 }, { "epoch": 0.2823842964717827, "grad_norm": 0.5793172717094421, "learning_rate": 0.001, "loss": 2.2915, "step": 6675 }, { "epoch": 0.2824266012352991, "grad_norm": 0.4027354121208191, "learning_rate": 0.001, "loss": 2.1231, "step": 6676 }, { "epoch": 0.28246890599881547, "grad_norm": 0.24830132722854614, "learning_rate": 0.001, "loss": 1.8969, "step": 6677 }, { "epoch": 0.2825112107623318, "grad_norm": 0.27415531873703003, "learning_rate": 0.001, "loss": 2.1313, "step": 6678 }, { "epoch": 0.28255351552584823, "grad_norm": 0.21020923554897308, "learning_rate": 0.001, "loss": 1.6612, "step": 6679 }, { "epoch": 0.2825958202893646, "grad_norm": 0.4596908986568451, "learning_rate": 0.001, "loss": 3.1641, "step": 6680 }, { "epoch": 0.28263812505288094, "grad_norm": 0.2096545398235321, "learning_rate": 0.001, "loss": 2.3925, "step": 6681 }, { "epoch": 0.28268042981639735, "grad_norm": 0.21853958070278168, "learning_rate": 0.001, "loss": 1.7605, "step": 6682 }, { "epoch": 0.2827227345799137, "grad_norm": 0.2005593180656433, "learning_rate": 0.001, "loss": 2.0397, "step": 6683 }, { "epoch": 0.28276503934343006, "grad_norm": 9.324366569519043, "learning_rate": 0.001, "loss": 1.8011, "step": 6684 }, { "epoch": 0.28280734410694647, "grad_norm": 0.23955844342708588, "learning_rate": 0.001, "loss": 2.4135, "step": 6685 }, { "epoch": 0.2828496488704628, "grad_norm": 0.240536168217659, "learning_rate": 0.001, "loss": 2.2006, "step": 6686 }, { "epoch": 0.2828919536339792, "grad_norm": 0.2282586544752121, "learning_rate": 0.001, "loss": 2.4402, "step": 6687 }, { "epoch": 0.28293425839749553, "grad_norm": 0.23527540266513824, "learning_rate": 0.001, "loss": 2.5282, "step": 6688 }, { "epoch": 0.28297656316101194, "grad_norm": 0.2016860693693161, "learning_rate": 0.001, "loss": 2.9175, "step": 6689 }, { "epoch": 0.2830188679245283, "grad_norm": 0.46961989998817444, "learning_rate": 0.001, "loss": 1.9682, "step": 6690 }, { "epoch": 0.28306117268804465, "grad_norm": 3.572152614593506, "learning_rate": 0.001, "loss": 2.3511, "step": 6691 }, { "epoch": 0.28310347745156106, "grad_norm": 0.18426565825939178, "learning_rate": 0.001, "loss": 2.563, "step": 6692 }, { "epoch": 0.2831457822150774, "grad_norm": 0.20007674396038055, "learning_rate": 0.001, "loss": 2.182, "step": 6693 }, { "epoch": 0.28318808697859377, "grad_norm": 0.2859644591808319, "learning_rate": 0.001, "loss": 2.4287, "step": 6694 }, { "epoch": 0.2832303917421102, "grad_norm": 0.2305973470211029, "learning_rate": 0.001, "loss": 2.3868, "step": 6695 }, { "epoch": 0.28327269650562653, "grad_norm": 0.21478715538978577, "learning_rate": 0.001, "loss": 2.2786, "step": 6696 }, { "epoch": 0.2833150012691429, "grad_norm": 0.2607964277267456, "learning_rate": 0.001, "loss": 2.7508, "step": 6697 }, { "epoch": 0.2833573060326593, "grad_norm": 0.2294250726699829, "learning_rate": 0.001, "loss": 1.9693, "step": 6698 }, { "epoch": 0.28339961079617565, "grad_norm": 0.382492333650589, "learning_rate": 0.001, "loss": 3.9725, "step": 6699 }, { "epoch": 0.283441915559692, "grad_norm": 0.2415558397769928, "learning_rate": 0.001, "loss": 2.6939, "step": 6700 }, { "epoch": 0.2834842203232084, "grad_norm": 0.18394537270069122, "learning_rate": 0.001, "loss": 2.3632, "step": 6701 }, { "epoch": 0.28352652508672477, "grad_norm": 0.2569441795349121, "learning_rate": 0.001, "loss": 2.3462, "step": 6702 }, { "epoch": 0.2835688298502411, "grad_norm": 0.19975893199443817, "learning_rate": 0.001, "loss": 1.6878, "step": 6703 }, { "epoch": 0.28361113461375753, "grad_norm": 2.8916850090026855, "learning_rate": 0.001, "loss": 1.7501, "step": 6704 }, { "epoch": 0.2836534393772739, "grad_norm": 0.24953538179397583, "learning_rate": 0.001, "loss": 2.7791, "step": 6705 }, { "epoch": 0.28369574414079024, "grad_norm": 0.6636295318603516, "learning_rate": 0.001, "loss": 2.2578, "step": 6706 }, { "epoch": 0.28373804890430665, "grad_norm": 2.3027749061584473, "learning_rate": 0.001, "loss": 1.9202, "step": 6707 }, { "epoch": 0.283780353667823, "grad_norm": 0.17140471935272217, "learning_rate": 0.001, "loss": 1.9173, "step": 6708 }, { "epoch": 0.28382265843133936, "grad_norm": 0.1749950796365738, "learning_rate": 0.001, "loss": 1.5205, "step": 6709 }, { "epoch": 0.28386496319485577, "grad_norm": 0.8592566251754761, "learning_rate": 0.001, "loss": 3.4644, "step": 6710 }, { "epoch": 0.2839072679583721, "grad_norm": 0.17556998133659363, "learning_rate": 0.001, "loss": 1.9657, "step": 6711 }, { "epoch": 0.2839495727218885, "grad_norm": 0.2964968681335449, "learning_rate": 0.001, "loss": 2.8859, "step": 6712 }, { "epoch": 0.28399187748540483, "grad_norm": 0.6098200678825378, "learning_rate": 0.001, "loss": 1.7954, "step": 6713 }, { "epoch": 0.28403418224892124, "grad_norm": 0.43372607231140137, "learning_rate": 0.001, "loss": 3.3326, "step": 6714 }, { "epoch": 0.2840764870124376, "grad_norm": 0.4292704463005066, "learning_rate": 0.001, "loss": 2.5526, "step": 6715 }, { "epoch": 0.28411879177595395, "grad_norm": 0.21659818291664124, "learning_rate": 0.001, "loss": 1.9127, "step": 6716 }, { "epoch": 0.28416109653947036, "grad_norm": 0.4214901030063629, "learning_rate": 0.001, "loss": 1.7826, "step": 6717 }, { "epoch": 0.2842034013029867, "grad_norm": 0.3387224078178406, "learning_rate": 0.001, "loss": 2.7385, "step": 6718 }, { "epoch": 0.28424570606650307, "grad_norm": 0.5583015084266663, "learning_rate": 0.001, "loss": 3.1142, "step": 6719 }, { "epoch": 0.2842880108300195, "grad_norm": 0.19958928227424622, "learning_rate": 0.001, "loss": 3.1751, "step": 6720 }, { "epoch": 0.28433031559353583, "grad_norm": 0.22173653542995453, "learning_rate": 0.001, "loss": 3.0606, "step": 6721 }, { "epoch": 0.2843726203570522, "grad_norm": 0.24500377476215363, "learning_rate": 0.001, "loss": 2.0964, "step": 6722 }, { "epoch": 0.2844149251205686, "grad_norm": 0.21380500495433807, "learning_rate": 0.001, "loss": 2.1937, "step": 6723 }, { "epoch": 0.28445722988408495, "grad_norm": 0.21296393871307373, "learning_rate": 0.001, "loss": 2.1262, "step": 6724 }, { "epoch": 0.2844995346476013, "grad_norm": 0.19265830516815186, "learning_rate": 0.001, "loss": 2.587, "step": 6725 }, { "epoch": 0.2845418394111177, "grad_norm": 0.19577600061893463, "learning_rate": 0.001, "loss": 1.7467, "step": 6726 }, { "epoch": 0.28458414417463407, "grad_norm": 0.25337207317352295, "learning_rate": 0.001, "loss": 2.1288, "step": 6727 }, { "epoch": 0.2846264489381504, "grad_norm": 0.4300183653831482, "learning_rate": 0.001, "loss": 2.7892, "step": 6728 }, { "epoch": 0.28466875370166683, "grad_norm": 0.8722933530807495, "learning_rate": 0.001, "loss": 2.3566, "step": 6729 }, { "epoch": 0.2847110584651832, "grad_norm": 0.17936961352825165, "learning_rate": 0.001, "loss": 2.3686, "step": 6730 }, { "epoch": 0.28475336322869954, "grad_norm": 0.18349401652812958, "learning_rate": 0.001, "loss": 1.7784, "step": 6731 }, { "epoch": 0.28479566799221595, "grad_norm": 1.0016671419143677, "learning_rate": 0.001, "loss": 1.9157, "step": 6732 }, { "epoch": 0.2848379727557323, "grad_norm": 0.14717590808868408, "learning_rate": 0.001, "loss": 1.3068, "step": 6733 }, { "epoch": 0.28488027751924866, "grad_norm": 4.4579243659973145, "learning_rate": 0.001, "loss": 2.4236, "step": 6734 }, { "epoch": 0.284922582282765, "grad_norm": 0.2059265822172165, "learning_rate": 0.001, "loss": 1.9139, "step": 6735 }, { "epoch": 0.2849648870462814, "grad_norm": 0.2114630937576294, "learning_rate": 0.001, "loss": 3.1348, "step": 6736 }, { "epoch": 0.2850071918097978, "grad_norm": 0.2629503011703491, "learning_rate": 0.001, "loss": 2.6543, "step": 6737 }, { "epoch": 0.28504949657331413, "grad_norm": 2.027836799621582, "learning_rate": 0.001, "loss": 2.1327, "step": 6738 }, { "epoch": 0.28509180133683054, "grad_norm": 0.9196799993515015, "learning_rate": 0.001, "loss": 2.5672, "step": 6739 }, { "epoch": 0.2851341061003469, "grad_norm": 5.23358154296875, "learning_rate": 0.001, "loss": 2.0236, "step": 6740 }, { "epoch": 0.28517641086386325, "grad_norm": 0.17935171723365784, "learning_rate": 0.001, "loss": 2.6343, "step": 6741 }, { "epoch": 0.28521871562737966, "grad_norm": 1.6367027759552002, "learning_rate": 0.001, "loss": 1.7514, "step": 6742 }, { "epoch": 0.285261020390896, "grad_norm": 0.20048488676548004, "learning_rate": 0.001, "loss": 2.6243, "step": 6743 }, { "epoch": 0.28530332515441237, "grad_norm": 0.37594085931777954, "learning_rate": 0.001, "loss": 3.2875, "step": 6744 }, { "epoch": 0.2853456299179288, "grad_norm": 0.18674291670322418, "learning_rate": 0.001, "loss": 1.8737, "step": 6745 }, { "epoch": 0.28538793468144513, "grad_norm": 0.40229612588882446, "learning_rate": 0.001, "loss": 2.3867, "step": 6746 }, { "epoch": 0.2854302394449615, "grad_norm": 2.3481996059417725, "learning_rate": 0.001, "loss": 3.3213, "step": 6747 }, { "epoch": 0.2854725442084779, "grad_norm": 0.23974232375621796, "learning_rate": 0.001, "loss": 3.0777, "step": 6748 }, { "epoch": 0.28551484897199425, "grad_norm": 0.22430221736431122, "learning_rate": 0.001, "loss": 2.5951, "step": 6749 }, { "epoch": 0.2855571537355106, "grad_norm": 1.6026118993759155, "learning_rate": 0.001, "loss": 2.3724, "step": 6750 }, { "epoch": 0.285599458499027, "grad_norm": 2.978184938430786, "learning_rate": 0.001, "loss": 2.1911, "step": 6751 }, { "epoch": 0.28564176326254337, "grad_norm": 11.393836975097656, "learning_rate": 0.001, "loss": 3.553, "step": 6752 }, { "epoch": 0.2856840680260597, "grad_norm": 0.254300057888031, "learning_rate": 0.001, "loss": 2.9947, "step": 6753 }, { "epoch": 0.28572637278957613, "grad_norm": 0.5926849246025085, "learning_rate": 0.001, "loss": 2.2195, "step": 6754 }, { "epoch": 0.2857686775530925, "grad_norm": 6.964447975158691, "learning_rate": 0.001, "loss": 2.2015, "step": 6755 }, { "epoch": 0.28581098231660884, "grad_norm": 4.038476943969727, "learning_rate": 0.001, "loss": 2.186, "step": 6756 }, { "epoch": 0.2858532870801252, "grad_norm": 3.579437494277954, "learning_rate": 0.001, "loss": 2.3797, "step": 6757 }, { "epoch": 0.2858955918436416, "grad_norm": 4.091229438781738, "learning_rate": 0.001, "loss": 2.5496, "step": 6758 }, { "epoch": 0.28593789660715796, "grad_norm": 3.153745412826538, "learning_rate": 0.001, "loss": 2.2255, "step": 6759 }, { "epoch": 0.2859802013706743, "grad_norm": 1.0637880563735962, "learning_rate": 0.001, "loss": 2.5076, "step": 6760 }, { "epoch": 0.2860225061341907, "grad_norm": 1.7310835123062134, "learning_rate": 0.001, "loss": 2.6743, "step": 6761 }, { "epoch": 0.2860648108977071, "grad_norm": 0.3798615336418152, "learning_rate": 0.001, "loss": 2.1774, "step": 6762 }, { "epoch": 0.28610711566122343, "grad_norm": 0.35704922676086426, "learning_rate": 0.001, "loss": 1.6369, "step": 6763 }, { "epoch": 0.28614942042473984, "grad_norm": 0.22196084260940552, "learning_rate": 0.001, "loss": 2.2773, "step": 6764 }, { "epoch": 0.2861917251882562, "grad_norm": 0.2564329504966736, "learning_rate": 0.001, "loss": 2.0616, "step": 6765 }, { "epoch": 0.28623402995177255, "grad_norm": 0.662453830242157, "learning_rate": 0.001, "loss": 2.425, "step": 6766 }, { "epoch": 0.28627633471528896, "grad_norm": 0.275669127702713, "learning_rate": 0.001, "loss": 2.1394, "step": 6767 }, { "epoch": 0.2863186394788053, "grad_norm": 1.1164103746414185, "learning_rate": 0.001, "loss": 3.1801, "step": 6768 }, { "epoch": 0.28636094424232167, "grad_norm": 0.28561368584632874, "learning_rate": 0.001, "loss": 3.4721, "step": 6769 }, { "epoch": 0.2864032490058381, "grad_norm": 0.44134315848350525, "learning_rate": 0.001, "loss": 2.4825, "step": 6770 }, { "epoch": 0.28644555376935443, "grad_norm": 0.6540394425392151, "learning_rate": 0.001, "loss": 2.4181, "step": 6771 }, { "epoch": 0.2864878585328708, "grad_norm": 0.9629650712013245, "learning_rate": 0.001, "loss": 1.801, "step": 6772 }, { "epoch": 0.2865301632963872, "grad_norm": 1.2992072105407715, "learning_rate": 0.001, "loss": 2.4131, "step": 6773 }, { "epoch": 0.28657246805990355, "grad_norm": 0.8524438142776489, "learning_rate": 0.001, "loss": 2.1917, "step": 6774 }, { "epoch": 0.2866147728234199, "grad_norm": 18.05820083618164, "learning_rate": 0.001, "loss": 3.5782, "step": 6775 }, { "epoch": 0.2866570775869363, "grad_norm": 0.2133456915616989, "learning_rate": 0.001, "loss": 2.8237, "step": 6776 }, { "epoch": 0.28669938235045267, "grad_norm": 29.985143661499023, "learning_rate": 0.001, "loss": 1.9714, "step": 6777 }, { "epoch": 0.286741687113969, "grad_norm": 0.23235024511814117, "learning_rate": 0.001, "loss": 2.2575, "step": 6778 }, { "epoch": 0.2867839918774854, "grad_norm": 0.2286916971206665, "learning_rate": 0.001, "loss": 2.4285, "step": 6779 }, { "epoch": 0.2868262966410018, "grad_norm": 0.20938080549240112, "learning_rate": 0.001, "loss": 1.8264, "step": 6780 }, { "epoch": 0.28686860140451814, "grad_norm": 0.1856824904680252, "learning_rate": 0.001, "loss": 2.2964, "step": 6781 }, { "epoch": 0.2869109061680345, "grad_norm": 0.18935956060886383, "learning_rate": 0.001, "loss": 2.8474, "step": 6782 }, { "epoch": 0.2869532109315509, "grad_norm": 0.24744349718093872, "learning_rate": 0.001, "loss": 2.2822, "step": 6783 }, { "epoch": 0.28699551569506726, "grad_norm": 0.26295721530914307, "learning_rate": 0.001, "loss": 2.3905, "step": 6784 }, { "epoch": 0.2870378204585836, "grad_norm": 0.23481005430221558, "learning_rate": 0.001, "loss": 2.0109, "step": 6785 }, { "epoch": 0.2870801252221, "grad_norm": 0.17823268473148346, "learning_rate": 0.001, "loss": 2.6713, "step": 6786 }, { "epoch": 0.2871224299856164, "grad_norm": 0.28128448128700256, "learning_rate": 0.001, "loss": 2.5553, "step": 6787 }, { "epoch": 0.28716473474913273, "grad_norm": 0.23149077594280243, "learning_rate": 0.001, "loss": 1.8728, "step": 6788 }, { "epoch": 0.28720703951264914, "grad_norm": 0.21473278105258942, "learning_rate": 0.001, "loss": 2.493, "step": 6789 }, { "epoch": 0.2872493442761655, "grad_norm": 0.19875210523605347, "learning_rate": 0.001, "loss": 1.7056, "step": 6790 }, { "epoch": 0.28729164903968185, "grad_norm": 0.18944133818149567, "learning_rate": 0.001, "loss": 2.243, "step": 6791 }, { "epoch": 0.28733395380319826, "grad_norm": 0.17879356443881989, "learning_rate": 0.001, "loss": 1.9253, "step": 6792 }, { "epoch": 0.2873762585667146, "grad_norm": 0.39051762223243713, "learning_rate": 0.001, "loss": 2.477, "step": 6793 }, { "epoch": 0.28741856333023097, "grad_norm": 0.2168491631746292, "learning_rate": 0.001, "loss": 2.2378, "step": 6794 }, { "epoch": 0.2874608680937474, "grad_norm": 0.20587210357189178, "learning_rate": 0.001, "loss": 2.9123, "step": 6795 }, { "epoch": 0.28750317285726373, "grad_norm": 0.16420647501945496, "learning_rate": 0.001, "loss": 1.4775, "step": 6796 }, { "epoch": 0.2875454776207801, "grad_norm": 0.18433904647827148, "learning_rate": 0.001, "loss": 2.6196, "step": 6797 }, { "epoch": 0.2875877823842965, "grad_norm": 0.18781892955303192, "learning_rate": 0.001, "loss": 2.1033, "step": 6798 }, { "epoch": 0.28763008714781285, "grad_norm": 0.17372409999370575, "learning_rate": 0.001, "loss": 2.12, "step": 6799 }, { "epoch": 0.2876723919113292, "grad_norm": 0.18070591986179352, "learning_rate": 0.001, "loss": 2.02, "step": 6800 }, { "epoch": 0.28771469667484556, "grad_norm": 0.1879878044128418, "learning_rate": 0.001, "loss": 1.6728, "step": 6801 }, { "epoch": 0.28775700143836197, "grad_norm": 0.18465609848499298, "learning_rate": 0.001, "loss": 2.1977, "step": 6802 }, { "epoch": 0.2877993062018783, "grad_norm": 0.19959770143032074, "learning_rate": 0.001, "loss": 2.1808, "step": 6803 }, { "epoch": 0.2878416109653947, "grad_norm": 0.2111247032880783, "learning_rate": 0.001, "loss": 2.2085, "step": 6804 }, { "epoch": 0.2878839157289111, "grad_norm": 0.25699108839035034, "learning_rate": 0.001, "loss": 2.784, "step": 6805 }, { "epoch": 0.28792622049242744, "grad_norm": 0.17606748640537262, "learning_rate": 0.001, "loss": 1.9087, "step": 6806 }, { "epoch": 0.2879685252559438, "grad_norm": 0.17687219381332397, "learning_rate": 0.001, "loss": 1.848, "step": 6807 }, { "epoch": 0.2880108300194602, "grad_norm": 0.22173765301704407, "learning_rate": 0.001, "loss": 2.3972, "step": 6808 }, { "epoch": 0.28805313478297656, "grad_norm": 0.18182872235774994, "learning_rate": 0.001, "loss": 1.7431, "step": 6809 }, { "epoch": 0.2880954395464929, "grad_norm": 0.25314095616340637, "learning_rate": 0.001, "loss": 1.7966, "step": 6810 }, { "epoch": 0.2881377443100093, "grad_norm": 0.20727166533470154, "learning_rate": 0.001, "loss": 2.3798, "step": 6811 }, { "epoch": 0.2881800490735257, "grad_norm": 0.9613415002822876, "learning_rate": 0.001, "loss": 2.2462, "step": 6812 }, { "epoch": 0.28822235383704203, "grad_norm": 1.1291375160217285, "learning_rate": 0.001, "loss": 2.3434, "step": 6813 }, { "epoch": 0.28826465860055844, "grad_norm": 0.1664966493844986, "learning_rate": 0.001, "loss": 1.8012, "step": 6814 }, { "epoch": 0.2883069633640748, "grad_norm": 0.1893632560968399, "learning_rate": 0.001, "loss": 1.7286, "step": 6815 }, { "epoch": 0.28834926812759115, "grad_norm": 0.2506442070007324, "learning_rate": 0.001, "loss": 2.2874, "step": 6816 }, { "epoch": 0.28839157289110756, "grad_norm": 0.20881643891334534, "learning_rate": 0.001, "loss": 2.5897, "step": 6817 }, { "epoch": 0.2884338776546239, "grad_norm": 3.2081472873687744, "learning_rate": 0.001, "loss": 3.7027, "step": 6818 }, { "epoch": 0.28847618241814027, "grad_norm": 0.17766347527503967, "learning_rate": 0.001, "loss": 2.7331, "step": 6819 }, { "epoch": 0.2885184871816567, "grad_norm": 0.1828794777393341, "learning_rate": 0.001, "loss": 1.878, "step": 6820 }, { "epoch": 0.28856079194517303, "grad_norm": 0.2575604021549225, "learning_rate": 0.001, "loss": 2.9143, "step": 6821 }, { "epoch": 0.2886030967086894, "grad_norm": 0.21907007694244385, "learning_rate": 0.001, "loss": 2.0439, "step": 6822 }, { "epoch": 0.28864540147220574, "grad_norm": 0.19140586256980896, "learning_rate": 0.001, "loss": 1.9638, "step": 6823 }, { "epoch": 0.28868770623572215, "grad_norm": 0.829940915107727, "learning_rate": 0.001, "loss": 2.1286, "step": 6824 }, { "epoch": 0.2887300109992385, "grad_norm": 0.1640363186597824, "learning_rate": 0.001, "loss": 1.6881, "step": 6825 }, { "epoch": 0.28877231576275486, "grad_norm": 0.22927752137184143, "learning_rate": 0.001, "loss": 2.1967, "step": 6826 }, { "epoch": 0.28881462052627127, "grad_norm": 0.21113528311252594, "learning_rate": 0.001, "loss": 2.4409, "step": 6827 }, { "epoch": 0.2888569252897876, "grad_norm": 0.1978234350681305, "learning_rate": 0.001, "loss": 2.0009, "step": 6828 }, { "epoch": 0.288899230053304, "grad_norm": 0.43694573640823364, "learning_rate": 0.001, "loss": 2.7192, "step": 6829 }, { "epoch": 0.2889415348168204, "grad_norm": 0.20024238526821136, "learning_rate": 0.001, "loss": 2.2269, "step": 6830 }, { "epoch": 0.28898383958033674, "grad_norm": 0.17758913338184357, "learning_rate": 0.001, "loss": 1.9114, "step": 6831 }, { "epoch": 0.2890261443438531, "grad_norm": 0.2091837078332901, "learning_rate": 0.001, "loss": 2.1373, "step": 6832 }, { "epoch": 0.2890684491073695, "grad_norm": 0.28614622354507446, "learning_rate": 0.001, "loss": 2.3553, "step": 6833 }, { "epoch": 0.28911075387088586, "grad_norm": 0.22399480640888214, "learning_rate": 0.001, "loss": 2.0642, "step": 6834 }, { "epoch": 0.2891530586344022, "grad_norm": 0.24540039896965027, "learning_rate": 0.001, "loss": 1.8619, "step": 6835 }, { "epoch": 0.2891953633979186, "grad_norm": 0.6220614314079285, "learning_rate": 0.001, "loss": 1.9019, "step": 6836 }, { "epoch": 0.289237668161435, "grad_norm": 0.15628793835639954, "learning_rate": 0.001, "loss": 2.5514, "step": 6837 }, { "epoch": 0.28927997292495133, "grad_norm": 2.0576629638671875, "learning_rate": 0.001, "loss": 2.5113, "step": 6838 }, { "epoch": 0.28932227768846774, "grad_norm": 0.19702525436878204, "learning_rate": 0.001, "loss": 2.2104, "step": 6839 }, { "epoch": 0.2893645824519841, "grad_norm": 0.20886345207691193, "learning_rate": 0.001, "loss": 3.0677, "step": 6840 }, { "epoch": 0.28940688721550045, "grad_norm": 0.18030396103858948, "learning_rate": 0.001, "loss": 2.0693, "step": 6841 }, { "epoch": 0.28944919197901686, "grad_norm": 0.16617818176746368, "learning_rate": 0.001, "loss": 1.7641, "step": 6842 }, { "epoch": 0.2894914967425332, "grad_norm": 0.22310234606266022, "learning_rate": 0.001, "loss": 1.9379, "step": 6843 }, { "epoch": 0.28953380150604957, "grad_norm": 0.19781459867954254, "learning_rate": 0.001, "loss": 1.9957, "step": 6844 }, { "epoch": 0.289576106269566, "grad_norm": 0.9519227743148804, "learning_rate": 0.001, "loss": 2.2693, "step": 6845 }, { "epoch": 0.28961841103308233, "grad_norm": 0.69413161277771, "learning_rate": 0.001, "loss": 2.4312, "step": 6846 }, { "epoch": 0.2896607157965987, "grad_norm": 0.23835915327072144, "learning_rate": 0.001, "loss": 2.2305, "step": 6847 }, { "epoch": 0.28970302056011504, "grad_norm": 0.16248169541358948, "learning_rate": 0.001, "loss": 1.9366, "step": 6848 }, { "epoch": 0.28974532532363145, "grad_norm": 1.342553973197937, "learning_rate": 0.001, "loss": 2.4784, "step": 6849 }, { "epoch": 0.2897876300871478, "grad_norm": 0.2539532482624054, "learning_rate": 0.001, "loss": 2.0127, "step": 6850 }, { "epoch": 0.28982993485066416, "grad_norm": 0.3332529664039612, "learning_rate": 0.001, "loss": 2.0516, "step": 6851 }, { "epoch": 0.28987223961418057, "grad_norm": 0.25316402316093445, "learning_rate": 0.001, "loss": 2.1151, "step": 6852 }, { "epoch": 0.2899145443776969, "grad_norm": 0.2206842005252838, "learning_rate": 0.001, "loss": 2.1286, "step": 6853 }, { "epoch": 0.2899568491412133, "grad_norm": 0.18992824852466583, "learning_rate": 0.001, "loss": 2.1739, "step": 6854 }, { "epoch": 0.2899991539047297, "grad_norm": 0.22509132325649261, "learning_rate": 0.001, "loss": 1.8315, "step": 6855 }, { "epoch": 0.29004145866824604, "grad_norm": 0.22096151113510132, "learning_rate": 0.001, "loss": 2.0061, "step": 6856 }, { "epoch": 0.2900837634317624, "grad_norm": 0.27027440071105957, "learning_rate": 0.001, "loss": 2.488, "step": 6857 }, { "epoch": 0.2901260681952788, "grad_norm": 0.2823043465614319, "learning_rate": 0.001, "loss": 2.5708, "step": 6858 }, { "epoch": 0.29016837295879516, "grad_norm": 0.2073088437318802, "learning_rate": 0.001, "loss": 2.6952, "step": 6859 }, { "epoch": 0.2902106777223115, "grad_norm": 0.18988396227359772, "learning_rate": 0.001, "loss": 2.1142, "step": 6860 }, { "epoch": 0.2902529824858279, "grad_norm": 0.1997257024049759, "learning_rate": 0.001, "loss": 2.0511, "step": 6861 }, { "epoch": 0.2902952872493443, "grad_norm": 0.17761564254760742, "learning_rate": 0.001, "loss": 1.7944, "step": 6862 }, { "epoch": 0.29033759201286063, "grad_norm": 0.258060485124588, "learning_rate": 0.001, "loss": 3.3614, "step": 6863 }, { "epoch": 0.29037989677637704, "grad_norm": 0.17017389833927155, "learning_rate": 0.001, "loss": 1.5792, "step": 6864 }, { "epoch": 0.2904222015398934, "grad_norm": 0.4084915518760681, "learning_rate": 0.001, "loss": 1.8688, "step": 6865 }, { "epoch": 0.29046450630340975, "grad_norm": 0.2108771950006485, "learning_rate": 0.001, "loss": 2.1209, "step": 6866 }, { "epoch": 0.29050681106692616, "grad_norm": 0.42666342854499817, "learning_rate": 0.001, "loss": 2.7563, "step": 6867 }, { "epoch": 0.2905491158304425, "grad_norm": 0.18676477670669556, "learning_rate": 0.001, "loss": 2.3573, "step": 6868 }, { "epoch": 0.29059142059395887, "grad_norm": 0.586787760257721, "learning_rate": 0.001, "loss": 2.2616, "step": 6869 }, { "epoch": 0.2906337253574752, "grad_norm": 2.2827725410461426, "learning_rate": 0.001, "loss": 1.948, "step": 6870 }, { "epoch": 0.29067603012099164, "grad_norm": 0.1944393664598465, "learning_rate": 0.001, "loss": 2.7219, "step": 6871 }, { "epoch": 0.290718334884508, "grad_norm": 2.946514844894409, "learning_rate": 0.001, "loss": 2.3146, "step": 6872 }, { "epoch": 0.29076063964802434, "grad_norm": 0.410149484872818, "learning_rate": 0.001, "loss": 2.6918, "step": 6873 }, { "epoch": 0.29080294441154075, "grad_norm": 0.18303263187408447, "learning_rate": 0.001, "loss": 1.4522, "step": 6874 }, { "epoch": 0.2908452491750571, "grad_norm": 0.8593881726264954, "learning_rate": 0.001, "loss": 2.6251, "step": 6875 }, { "epoch": 0.29088755393857346, "grad_norm": 2.545989751815796, "learning_rate": 0.001, "loss": 2.0312, "step": 6876 }, { "epoch": 0.29092985870208987, "grad_norm": 0.30340850353240967, "learning_rate": 0.001, "loss": 2.2144, "step": 6877 }, { "epoch": 0.2909721634656062, "grad_norm": 0.23748478293418884, "learning_rate": 0.001, "loss": 3.4675, "step": 6878 }, { "epoch": 0.2910144682291226, "grad_norm": 18.90664291381836, "learning_rate": 0.001, "loss": 2.7135, "step": 6879 }, { "epoch": 0.291056772992639, "grad_norm": 0.2496711015701294, "learning_rate": 0.001, "loss": 2.5996, "step": 6880 }, { "epoch": 0.29109907775615534, "grad_norm": 1.5730305910110474, "learning_rate": 0.001, "loss": 3.1332, "step": 6881 }, { "epoch": 0.2911413825196717, "grad_norm": 0.4019278585910797, "learning_rate": 0.001, "loss": 1.94, "step": 6882 }, { "epoch": 0.2911836872831881, "grad_norm": 2.1472578048706055, "learning_rate": 0.001, "loss": 2.5725, "step": 6883 }, { "epoch": 0.29122599204670446, "grad_norm": 0.42344650626182556, "learning_rate": 0.001, "loss": 2.5348, "step": 6884 }, { "epoch": 0.2912682968102208, "grad_norm": 0.7056704759597778, "learning_rate": 0.001, "loss": 2.6225, "step": 6885 }, { "epoch": 0.2913106015737372, "grad_norm": 0.6706409454345703, "learning_rate": 0.001, "loss": 2.3904, "step": 6886 }, { "epoch": 0.2913529063372536, "grad_norm": 0.35459625720977783, "learning_rate": 0.001, "loss": 2.1607, "step": 6887 }, { "epoch": 0.29139521110076994, "grad_norm": 0.3938932418823242, "learning_rate": 0.001, "loss": 2.4787, "step": 6888 }, { "epoch": 0.29143751586428635, "grad_norm": 0.2852618992328644, "learning_rate": 0.001, "loss": 2.3739, "step": 6889 }, { "epoch": 0.2914798206278027, "grad_norm": 1.7088735103607178, "learning_rate": 0.001, "loss": 2.6956, "step": 6890 }, { "epoch": 0.29152212539131905, "grad_norm": 0.21869906783103943, "learning_rate": 0.001, "loss": 2.1744, "step": 6891 }, { "epoch": 0.2915644301548354, "grad_norm": 0.40579238533973694, "learning_rate": 0.001, "loss": 2.3586, "step": 6892 }, { "epoch": 0.2916067349183518, "grad_norm": 0.26919931173324585, "learning_rate": 0.001, "loss": 1.6718, "step": 6893 }, { "epoch": 0.29164903968186817, "grad_norm": 0.33402884006500244, "learning_rate": 0.001, "loss": 3.2099, "step": 6894 }, { "epoch": 0.2916913444453845, "grad_norm": 5.1144490242004395, "learning_rate": 0.001, "loss": 2.5433, "step": 6895 }, { "epoch": 0.29173364920890094, "grad_norm": 27.068161010742188, "learning_rate": 0.001, "loss": 2.5207, "step": 6896 }, { "epoch": 0.2917759539724173, "grad_norm": 1.7865883111953735, "learning_rate": 0.001, "loss": 3.3911, "step": 6897 }, { "epoch": 0.29181825873593364, "grad_norm": 2.192460298538208, "learning_rate": 0.001, "loss": 2.4101, "step": 6898 }, { "epoch": 0.29186056349945005, "grad_norm": 0.3550018072128296, "learning_rate": 0.001, "loss": 2.3135, "step": 6899 }, { "epoch": 0.2919028682629664, "grad_norm": 0.24947704374790192, "learning_rate": 0.001, "loss": 3.6274, "step": 6900 }, { "epoch": 0.29194517302648276, "grad_norm": 0.23705358803272247, "learning_rate": 0.001, "loss": 3.0381, "step": 6901 }, { "epoch": 0.2919874777899992, "grad_norm": 1.2429789304733276, "learning_rate": 0.001, "loss": 2.1528, "step": 6902 }, { "epoch": 0.2920297825535155, "grad_norm": 0.28556519746780396, "learning_rate": 0.001, "loss": 1.9076, "step": 6903 }, { "epoch": 0.2920720873170319, "grad_norm": 3.0117995738983154, "learning_rate": 0.001, "loss": 2.4768, "step": 6904 }, { "epoch": 0.2921143920805483, "grad_norm": 0.43369370698928833, "learning_rate": 0.001, "loss": 1.9007, "step": 6905 }, { "epoch": 0.29215669684406465, "grad_norm": 0.2250383198261261, "learning_rate": 0.001, "loss": 2.0614, "step": 6906 }, { "epoch": 0.292199001607581, "grad_norm": 0.26837220788002014, "learning_rate": 0.001, "loss": 2.5939, "step": 6907 }, { "epoch": 0.2922413063710974, "grad_norm": 0.20127150416374207, "learning_rate": 0.001, "loss": 2.2609, "step": 6908 }, { "epoch": 0.29228361113461376, "grad_norm": 0.9822574257850647, "learning_rate": 0.001, "loss": 1.8011, "step": 6909 }, { "epoch": 0.2923259158981301, "grad_norm": 0.2298155575990677, "learning_rate": 0.001, "loss": 1.9496, "step": 6910 }, { "epoch": 0.2923682206616465, "grad_norm": 1.8333818912506104, "learning_rate": 0.001, "loss": 2.3752, "step": 6911 }, { "epoch": 0.2924105254251629, "grad_norm": 0.18054918944835663, "learning_rate": 0.001, "loss": 2.273, "step": 6912 }, { "epoch": 0.29245283018867924, "grad_norm": 0.22334930300712585, "learning_rate": 0.001, "loss": 1.8855, "step": 6913 }, { "epoch": 0.2924951349521956, "grad_norm": 0.22284521162509918, "learning_rate": 0.001, "loss": 1.9008, "step": 6914 }, { "epoch": 0.292537439715712, "grad_norm": 0.18738651275634766, "learning_rate": 0.001, "loss": 3.3555, "step": 6915 }, { "epoch": 0.29257974447922835, "grad_norm": 0.23615574836730957, "learning_rate": 0.001, "loss": 2.1617, "step": 6916 }, { "epoch": 0.2926220492427447, "grad_norm": 0.19093096256256104, "learning_rate": 0.001, "loss": 2.0053, "step": 6917 }, { "epoch": 0.2926643540062611, "grad_norm": 0.22282744944095612, "learning_rate": 0.001, "loss": 2.3443, "step": 6918 }, { "epoch": 0.2927066587697775, "grad_norm": 0.35610735416412354, "learning_rate": 0.001, "loss": 2.5339, "step": 6919 }, { "epoch": 0.2927489635332938, "grad_norm": 0.2231018990278244, "learning_rate": 0.001, "loss": 2.6499, "step": 6920 }, { "epoch": 0.29279126829681024, "grad_norm": 9.339323043823242, "learning_rate": 0.001, "loss": 2.2003, "step": 6921 }, { "epoch": 0.2928335730603266, "grad_norm": 0.27400538325309753, "learning_rate": 0.001, "loss": 2.139, "step": 6922 }, { "epoch": 0.29287587782384294, "grad_norm": 1.792051076889038, "learning_rate": 0.001, "loss": 3.2322, "step": 6923 }, { "epoch": 0.29291818258735935, "grad_norm": 0.20132683217525482, "learning_rate": 0.001, "loss": 1.7977, "step": 6924 }, { "epoch": 0.2929604873508757, "grad_norm": 0.22404009103775024, "learning_rate": 0.001, "loss": 2.1195, "step": 6925 }, { "epoch": 0.29300279211439206, "grad_norm": 0.32715481519699097, "learning_rate": 0.001, "loss": 2.158, "step": 6926 }, { "epoch": 0.2930450968779085, "grad_norm": 0.4097978472709656, "learning_rate": 0.001, "loss": 2.2345, "step": 6927 }, { "epoch": 0.2930874016414248, "grad_norm": 15.934835433959961, "learning_rate": 0.001, "loss": 2.2525, "step": 6928 }, { "epoch": 0.2931297064049412, "grad_norm": 0.20492605865001678, "learning_rate": 0.001, "loss": 2.2868, "step": 6929 }, { "epoch": 0.2931720111684576, "grad_norm": 0.22894705832004547, "learning_rate": 0.001, "loss": 3.1435, "step": 6930 }, { "epoch": 0.29321431593197395, "grad_norm": 0.19995182752609253, "learning_rate": 0.001, "loss": 2.3582, "step": 6931 }, { "epoch": 0.2932566206954903, "grad_norm": 13.218109130859375, "learning_rate": 0.001, "loss": 4.1725, "step": 6932 }, { "epoch": 0.2932989254590067, "grad_norm": 151.23451232910156, "learning_rate": 0.001, "loss": 1.6978, "step": 6933 }, { "epoch": 0.29334123022252306, "grad_norm": 0.6989339590072632, "learning_rate": 0.001, "loss": 1.5756, "step": 6934 }, { "epoch": 0.2933835349860394, "grad_norm": 11.406463623046875, "learning_rate": 0.001, "loss": 2.1872, "step": 6935 }, { "epoch": 0.2934258397495558, "grad_norm": 0.19447572529315948, "learning_rate": 0.001, "loss": 1.9257, "step": 6936 }, { "epoch": 0.2934681445130722, "grad_norm": 0.7553207874298096, "learning_rate": 0.001, "loss": 2.0954, "step": 6937 }, { "epoch": 0.29351044927658854, "grad_norm": 0.6458339095115662, "learning_rate": 0.001, "loss": 1.9439, "step": 6938 }, { "epoch": 0.2935527540401049, "grad_norm": 4.88887357711792, "learning_rate": 0.001, "loss": 2.7551, "step": 6939 }, { "epoch": 0.2935950588036213, "grad_norm": 0.16294671595096588, "learning_rate": 0.001, "loss": 2.1921, "step": 6940 }, { "epoch": 0.29363736356713765, "grad_norm": 0.1700448840856552, "learning_rate": 0.001, "loss": 2.3544, "step": 6941 }, { "epoch": 0.293679668330654, "grad_norm": 0.9373911023139954, "learning_rate": 0.001, "loss": 3.519, "step": 6942 }, { "epoch": 0.2937219730941704, "grad_norm": 0.16765141487121582, "learning_rate": 0.001, "loss": 1.753, "step": 6943 }, { "epoch": 0.2937642778576868, "grad_norm": 0.785889744758606, "learning_rate": 0.001, "loss": 2.0261, "step": 6944 }, { "epoch": 0.2938065826212031, "grad_norm": 0.36694565415382385, "learning_rate": 0.001, "loss": 2.8762, "step": 6945 }, { "epoch": 0.29384888738471954, "grad_norm": 2.4932944774627686, "learning_rate": 0.001, "loss": 3.5164, "step": 6946 }, { "epoch": 0.2938911921482359, "grad_norm": 0.3365854024887085, "learning_rate": 0.001, "loss": 2.5705, "step": 6947 }, { "epoch": 0.29393349691175225, "grad_norm": 0.18186473846435547, "learning_rate": 0.001, "loss": 2.2288, "step": 6948 }, { "epoch": 0.29397580167526866, "grad_norm": 0.18802011013031006, "learning_rate": 0.001, "loss": 1.6162, "step": 6949 }, { "epoch": 0.294018106438785, "grad_norm": 0.6282293796539307, "learning_rate": 0.001, "loss": 2.0641, "step": 6950 }, { "epoch": 0.29406041120230136, "grad_norm": 0.18980659544467926, "learning_rate": 0.001, "loss": 2.1692, "step": 6951 }, { "epoch": 0.2941027159658178, "grad_norm": 0.25274673104286194, "learning_rate": 0.001, "loss": 2.1386, "step": 6952 }, { "epoch": 0.29414502072933413, "grad_norm": 1.6487905979156494, "learning_rate": 0.001, "loss": 2.0647, "step": 6953 }, { "epoch": 0.2941873254928505, "grad_norm": 0.38848909735679626, "learning_rate": 0.001, "loss": 1.3862, "step": 6954 }, { "epoch": 0.2942296302563669, "grad_norm": 0.5868200063705444, "learning_rate": 0.001, "loss": 2.8731, "step": 6955 }, { "epoch": 0.29427193501988325, "grad_norm": 117.72666931152344, "learning_rate": 0.001, "loss": 3.0126, "step": 6956 }, { "epoch": 0.2943142397833996, "grad_norm": 0.19856488704681396, "learning_rate": 0.001, "loss": 2.3552, "step": 6957 }, { "epoch": 0.294356544546916, "grad_norm": 0.29041945934295654, "learning_rate": 0.001, "loss": 3.019, "step": 6958 }, { "epoch": 0.29439884931043236, "grad_norm": 0.22051405906677246, "learning_rate": 0.001, "loss": 2.6603, "step": 6959 }, { "epoch": 0.2944411540739487, "grad_norm": 0.1814699023962021, "learning_rate": 0.001, "loss": 1.5751, "step": 6960 }, { "epoch": 0.2944834588374651, "grad_norm": 8.469980239868164, "learning_rate": 0.001, "loss": 1.8405, "step": 6961 }, { "epoch": 0.2945257636009815, "grad_norm": 0.3591289222240448, "learning_rate": 0.001, "loss": 1.9221, "step": 6962 }, { "epoch": 0.29456806836449784, "grad_norm": 0.23890052735805511, "learning_rate": 0.001, "loss": 2.0497, "step": 6963 }, { "epoch": 0.2946103731280142, "grad_norm": 0.29208847880363464, "learning_rate": 0.001, "loss": 2.6245, "step": 6964 }, { "epoch": 0.2946526778915306, "grad_norm": 0.19358113408088684, "learning_rate": 0.001, "loss": 2.2433, "step": 6965 }, { "epoch": 0.29469498265504696, "grad_norm": 0.24694213271141052, "learning_rate": 0.001, "loss": 1.7421, "step": 6966 }, { "epoch": 0.2947372874185633, "grad_norm": 0.22472073137760162, "learning_rate": 0.001, "loss": 2.1198, "step": 6967 }, { "epoch": 0.2947795921820797, "grad_norm": 0.3076910376548767, "learning_rate": 0.001, "loss": 1.9558, "step": 6968 }, { "epoch": 0.2948218969455961, "grad_norm": 0.21519528329372406, "learning_rate": 0.001, "loss": 3.0599, "step": 6969 }, { "epoch": 0.29486420170911243, "grad_norm": 0.2277740240097046, "learning_rate": 0.001, "loss": 2.8818, "step": 6970 }, { "epoch": 0.29490650647262884, "grad_norm": 4.881925106048584, "learning_rate": 0.001, "loss": 1.9913, "step": 6971 }, { "epoch": 0.2949488112361452, "grad_norm": 0.2809251844882965, "learning_rate": 0.001, "loss": 2.8118, "step": 6972 }, { "epoch": 0.29499111599966155, "grad_norm": 0.322844535112381, "learning_rate": 0.001, "loss": 3.4413, "step": 6973 }, { "epoch": 0.29503342076317796, "grad_norm": 0.2614366114139557, "learning_rate": 0.001, "loss": 2.5353, "step": 6974 }, { "epoch": 0.2950757255266943, "grad_norm": 0.23935487866401672, "learning_rate": 0.001, "loss": 2.6036, "step": 6975 }, { "epoch": 0.29511803029021066, "grad_norm": 0.2764032185077667, "learning_rate": 0.001, "loss": 2.577, "step": 6976 }, { "epoch": 0.2951603350537271, "grad_norm": 0.2084626853466034, "learning_rate": 0.001, "loss": 2.0175, "step": 6977 }, { "epoch": 0.29520263981724343, "grad_norm": 0.39110100269317627, "learning_rate": 0.001, "loss": 3.6172, "step": 6978 }, { "epoch": 0.2952449445807598, "grad_norm": 0.5247201919555664, "learning_rate": 0.001, "loss": 3.2458, "step": 6979 }, { "epoch": 0.2952872493442762, "grad_norm": 0.2991565465927124, "learning_rate": 0.001, "loss": 3.4346, "step": 6980 }, { "epoch": 0.29532955410779255, "grad_norm": 0.218788743019104, "learning_rate": 0.001, "loss": 2.654, "step": 6981 }, { "epoch": 0.2953718588713089, "grad_norm": 0.19704671204090118, "learning_rate": 0.001, "loss": 2.1183, "step": 6982 }, { "epoch": 0.29541416363482526, "grad_norm": 0.1847701370716095, "learning_rate": 0.001, "loss": 2.2644, "step": 6983 }, { "epoch": 0.29545646839834167, "grad_norm": 0.20297656953334808, "learning_rate": 0.001, "loss": 1.8673, "step": 6984 }, { "epoch": 0.295498773161858, "grad_norm": 1.8230969905853271, "learning_rate": 0.001, "loss": 1.7627, "step": 6985 }, { "epoch": 0.2955410779253744, "grad_norm": 0.22176900506019592, "learning_rate": 0.001, "loss": 2.2876, "step": 6986 }, { "epoch": 0.2955833826888908, "grad_norm": 0.1842503547668457, "learning_rate": 0.001, "loss": 2.764, "step": 6987 }, { "epoch": 0.29562568745240714, "grad_norm": 0.2775016725063324, "learning_rate": 0.001, "loss": 1.8702, "step": 6988 }, { "epoch": 0.2956679922159235, "grad_norm": 0.21142591536045074, "learning_rate": 0.001, "loss": 3.3746, "step": 6989 }, { "epoch": 0.2957102969794399, "grad_norm": 0.2473433017730713, "learning_rate": 0.001, "loss": 2.1965, "step": 6990 }, { "epoch": 0.29575260174295626, "grad_norm": 0.2217579185962677, "learning_rate": 0.001, "loss": 3.064, "step": 6991 }, { "epoch": 0.2957949065064726, "grad_norm": 0.2861359417438507, "learning_rate": 0.001, "loss": 2.5936, "step": 6992 }, { "epoch": 0.295837211269989, "grad_norm": 0.6411170363426208, "learning_rate": 0.001, "loss": 2.0748, "step": 6993 }, { "epoch": 0.2958795160335054, "grad_norm": 0.20746761560440063, "learning_rate": 0.001, "loss": 1.8649, "step": 6994 }, { "epoch": 0.29592182079702173, "grad_norm": 0.19803252816200256, "learning_rate": 0.001, "loss": 2.6345, "step": 6995 }, { "epoch": 0.29596412556053814, "grad_norm": 0.19498823583126068, "learning_rate": 0.001, "loss": 2.8652, "step": 6996 }, { "epoch": 0.2960064303240545, "grad_norm": 0.18126071989536285, "learning_rate": 0.001, "loss": 1.7674, "step": 6997 }, { "epoch": 0.29604873508757085, "grad_norm": 0.17771077156066895, "learning_rate": 0.001, "loss": 1.8077, "step": 6998 }, { "epoch": 0.29609103985108726, "grad_norm": 0.2599290609359741, "learning_rate": 0.001, "loss": 2.336, "step": 6999 }, { "epoch": 0.2961333446146036, "grad_norm": 0.3005380928516388, "learning_rate": 0.001, "loss": 2.4787, "step": 7000 }, { "epoch": 0.29617564937811997, "grad_norm": 0.18659712374210358, "learning_rate": 0.001, "loss": 2.1932, "step": 7001 }, { "epoch": 0.2962179541416364, "grad_norm": 0.19324898719787598, "learning_rate": 0.001, "loss": 2.0008, "step": 7002 }, { "epoch": 0.29626025890515273, "grad_norm": 0.3487750291824341, "learning_rate": 0.001, "loss": 2.2476, "step": 7003 }, { "epoch": 0.2963025636686691, "grad_norm": 0.20598524808883667, "learning_rate": 0.001, "loss": 2.0175, "step": 7004 }, { "epoch": 0.29634486843218544, "grad_norm": 0.20581454038619995, "learning_rate": 0.001, "loss": 2.5366, "step": 7005 }, { "epoch": 0.29638717319570185, "grad_norm": 0.19093303382396698, "learning_rate": 0.001, "loss": 2.4369, "step": 7006 }, { "epoch": 0.2964294779592182, "grad_norm": 0.1642286777496338, "learning_rate": 0.001, "loss": 2.6144, "step": 7007 }, { "epoch": 0.29647178272273456, "grad_norm": 0.5392095446586609, "learning_rate": 0.001, "loss": 2.2483, "step": 7008 }, { "epoch": 0.29651408748625097, "grad_norm": 0.22686930000782013, "learning_rate": 0.001, "loss": 1.9133, "step": 7009 }, { "epoch": 0.2965563922497673, "grad_norm": 0.22740709781646729, "learning_rate": 0.001, "loss": 1.4307, "step": 7010 }, { "epoch": 0.2965986970132837, "grad_norm": 0.19837690889835358, "learning_rate": 0.001, "loss": 1.8655, "step": 7011 }, { "epoch": 0.2966410017768001, "grad_norm": 0.32683438062667847, "learning_rate": 0.001, "loss": 2.1361, "step": 7012 }, { "epoch": 0.29668330654031644, "grad_norm": 4.837608814239502, "learning_rate": 0.001, "loss": 3.105, "step": 7013 }, { "epoch": 0.2967256113038328, "grad_norm": 0.1853974610567093, "learning_rate": 0.001, "loss": 1.8663, "step": 7014 }, { "epoch": 0.2967679160673492, "grad_norm": 3.4521446228027344, "learning_rate": 0.001, "loss": 2.0969, "step": 7015 }, { "epoch": 0.29681022083086556, "grad_norm": 11.476473808288574, "learning_rate": 0.001, "loss": 2.1661, "step": 7016 }, { "epoch": 0.2968525255943819, "grad_norm": 0.2935603857040405, "learning_rate": 0.001, "loss": 2.8407, "step": 7017 }, { "epoch": 0.2968948303578983, "grad_norm": 0.22967737913131714, "learning_rate": 0.001, "loss": 2.347, "step": 7018 }, { "epoch": 0.2969371351214147, "grad_norm": 0.24542132019996643, "learning_rate": 0.001, "loss": 2.7476, "step": 7019 }, { "epoch": 0.29697943988493103, "grad_norm": 0.3539610207080841, "learning_rate": 0.001, "loss": 3.4266, "step": 7020 }, { "epoch": 0.29702174464844744, "grad_norm": 2.1936635971069336, "learning_rate": 0.001, "loss": 2.4714, "step": 7021 }, { "epoch": 0.2970640494119638, "grad_norm": 36.311824798583984, "learning_rate": 0.001, "loss": 2.2848, "step": 7022 }, { "epoch": 0.29710635417548015, "grad_norm": 0.7815760970115662, "learning_rate": 0.001, "loss": 3.0078, "step": 7023 }, { "epoch": 0.29714865893899656, "grad_norm": 0.25058576464653015, "learning_rate": 0.001, "loss": 1.7609, "step": 7024 }, { "epoch": 0.2971909637025129, "grad_norm": 0.32843679189682007, "learning_rate": 0.001, "loss": 2.2564, "step": 7025 }, { "epoch": 0.29723326846602927, "grad_norm": 0.31752750277519226, "learning_rate": 0.001, "loss": 3.8093, "step": 7026 }, { "epoch": 0.2972755732295456, "grad_norm": 0.3511388301849365, "learning_rate": 0.001, "loss": 2.9753, "step": 7027 }, { "epoch": 0.29731787799306203, "grad_norm": 0.18950346112251282, "learning_rate": 0.001, "loss": 1.8184, "step": 7028 }, { "epoch": 0.2973601827565784, "grad_norm": 1.3205153942108154, "learning_rate": 0.001, "loss": 2.4554, "step": 7029 }, { "epoch": 0.29740248752009474, "grad_norm": 0.5435582995414734, "learning_rate": 0.001, "loss": 2.9064, "step": 7030 }, { "epoch": 0.29744479228361115, "grad_norm": 0.6610992550849915, "learning_rate": 0.001, "loss": 2.4811, "step": 7031 }, { "epoch": 0.2974870970471275, "grad_norm": 0.43292543292045593, "learning_rate": 0.001, "loss": 2.1861, "step": 7032 }, { "epoch": 0.29752940181064386, "grad_norm": 0.29178565740585327, "learning_rate": 0.001, "loss": 2.0465, "step": 7033 }, { "epoch": 0.29757170657416027, "grad_norm": 0.4427778720855713, "learning_rate": 0.001, "loss": 2.1454, "step": 7034 }, { "epoch": 0.2976140113376766, "grad_norm": 0.36568501591682434, "learning_rate": 0.001, "loss": 2.513, "step": 7035 }, { "epoch": 0.297656316101193, "grad_norm": 0.7165125608444214, "learning_rate": 0.001, "loss": 1.9268, "step": 7036 }, { "epoch": 0.2976986208647094, "grad_norm": 0.23342393338680267, "learning_rate": 0.001, "loss": 2.8504, "step": 7037 }, { "epoch": 0.29774092562822574, "grad_norm": 0.24309587478637695, "learning_rate": 0.001, "loss": 2.6698, "step": 7038 }, { "epoch": 0.2977832303917421, "grad_norm": 1.0469741821289062, "learning_rate": 0.001, "loss": 4.0021, "step": 7039 }, { "epoch": 0.2978255351552585, "grad_norm": 4.104727268218994, "learning_rate": 0.001, "loss": 2.4115, "step": 7040 }, { "epoch": 0.29786783991877486, "grad_norm": 2.293821096420288, "learning_rate": 0.001, "loss": 2.3395, "step": 7041 }, { "epoch": 0.2979101446822912, "grad_norm": 0.2298813909292221, "learning_rate": 0.001, "loss": 1.913, "step": 7042 }, { "epoch": 0.2979524494458076, "grad_norm": 0.5691928267478943, "learning_rate": 0.001, "loss": 2.0014, "step": 7043 }, { "epoch": 0.297994754209324, "grad_norm": 0.4775361120700836, "learning_rate": 0.001, "loss": 2.5362, "step": 7044 }, { "epoch": 0.29803705897284033, "grad_norm": 0.364666223526001, "learning_rate": 0.001, "loss": 2.8652, "step": 7045 }, { "epoch": 0.29807936373635674, "grad_norm": 0.9387134313583374, "learning_rate": 0.001, "loss": 2.1165, "step": 7046 }, { "epoch": 0.2981216684998731, "grad_norm": 0.2410208135843277, "learning_rate": 0.001, "loss": 3.0745, "step": 7047 }, { "epoch": 0.29816397326338945, "grad_norm": 0.2509887218475342, "learning_rate": 0.001, "loss": 1.7976, "step": 7048 }, { "epoch": 0.2982062780269058, "grad_norm": 1.8324601650238037, "learning_rate": 0.001, "loss": 2.6896, "step": 7049 }, { "epoch": 0.2982485827904222, "grad_norm": 0.342830091714859, "learning_rate": 0.001, "loss": 2.2083, "step": 7050 }, { "epoch": 0.29829088755393857, "grad_norm": 0.263128399848938, "learning_rate": 0.001, "loss": 2.8128, "step": 7051 }, { "epoch": 0.2983331923174549, "grad_norm": 0.2615799903869629, "learning_rate": 0.001, "loss": 1.9154, "step": 7052 }, { "epoch": 0.29837549708097133, "grad_norm": 0.3741340935230255, "learning_rate": 0.001, "loss": 2.8024, "step": 7053 }, { "epoch": 0.2984178018444877, "grad_norm": 0.21344539523124695, "learning_rate": 0.001, "loss": 2.0821, "step": 7054 }, { "epoch": 0.29846010660800404, "grad_norm": 7.319907188415527, "learning_rate": 0.001, "loss": 2.5422, "step": 7055 }, { "epoch": 0.29850241137152045, "grad_norm": 0.1762029081583023, "learning_rate": 0.001, "loss": 1.8821, "step": 7056 }, { "epoch": 0.2985447161350368, "grad_norm": 0.21324655413627625, "learning_rate": 0.001, "loss": 1.4527, "step": 7057 }, { "epoch": 0.29858702089855316, "grad_norm": 0.35666510462760925, "learning_rate": 0.001, "loss": 3.0456, "step": 7058 }, { "epoch": 0.29862932566206957, "grad_norm": 0.21469701826572418, "learning_rate": 0.001, "loss": 2.0336, "step": 7059 }, { "epoch": 0.2986716304255859, "grad_norm": 0.39980030059814453, "learning_rate": 0.001, "loss": 3.3548, "step": 7060 }, { "epoch": 0.2987139351891023, "grad_norm": 0.23621757328510284, "learning_rate": 0.001, "loss": 2.3665, "step": 7061 }, { "epoch": 0.2987562399526187, "grad_norm": 0.26678037643432617, "learning_rate": 0.001, "loss": 2.1806, "step": 7062 }, { "epoch": 0.29879854471613504, "grad_norm": 0.5849965810775757, "learning_rate": 0.001, "loss": 1.8085, "step": 7063 }, { "epoch": 0.2988408494796514, "grad_norm": 0.4421706199645996, "learning_rate": 0.001, "loss": 2.1855, "step": 7064 }, { "epoch": 0.2988831542431678, "grad_norm": 0.2031574845314026, "learning_rate": 0.001, "loss": 1.6082, "step": 7065 }, { "epoch": 0.29892545900668416, "grad_norm": 0.24453617632389069, "learning_rate": 0.001, "loss": 1.9515, "step": 7066 }, { "epoch": 0.2989677637702005, "grad_norm": 0.28984296321868896, "learning_rate": 0.001, "loss": 2.3851, "step": 7067 }, { "epoch": 0.2990100685337169, "grad_norm": 0.22082221508026123, "learning_rate": 0.001, "loss": 1.8154, "step": 7068 }, { "epoch": 0.2990523732972333, "grad_norm": 0.20017731189727783, "learning_rate": 0.001, "loss": 1.9377, "step": 7069 }, { "epoch": 0.29909467806074963, "grad_norm": 0.1992572695016861, "learning_rate": 0.001, "loss": 1.5338, "step": 7070 }, { "epoch": 0.29913698282426604, "grad_norm": 3.3462400436401367, "learning_rate": 0.001, "loss": 2.8609, "step": 7071 }, { "epoch": 0.2991792875877824, "grad_norm": 0.1810547411441803, "learning_rate": 0.001, "loss": 2.7506, "step": 7072 }, { "epoch": 0.29922159235129875, "grad_norm": 0.351933091878891, "learning_rate": 0.001, "loss": 1.9723, "step": 7073 }, { "epoch": 0.2992638971148151, "grad_norm": 0.2719806134700775, "learning_rate": 0.001, "loss": 2.5537, "step": 7074 }, { "epoch": 0.2993062018783315, "grad_norm": 1.2375730276107788, "learning_rate": 0.001, "loss": 2.7143, "step": 7075 }, { "epoch": 0.29934850664184787, "grad_norm": 0.27189958095550537, "learning_rate": 0.001, "loss": 2.8949, "step": 7076 }, { "epoch": 0.2993908114053642, "grad_norm": 2.186654567718506, "learning_rate": 0.001, "loss": 3.3108, "step": 7077 }, { "epoch": 0.29943311616888063, "grad_norm": 0.2271018624305725, "learning_rate": 0.001, "loss": 1.9664, "step": 7078 }, { "epoch": 0.299475420932397, "grad_norm": 0.20025469362735748, "learning_rate": 0.001, "loss": 2.14, "step": 7079 }, { "epoch": 0.29951772569591334, "grad_norm": 1.0243349075317383, "learning_rate": 0.001, "loss": 1.9008, "step": 7080 }, { "epoch": 0.29956003045942975, "grad_norm": 0.27229562401771545, "learning_rate": 0.001, "loss": 3.3795, "step": 7081 }, { "epoch": 0.2996023352229461, "grad_norm": 0.20607994496822357, "learning_rate": 0.001, "loss": 2.6859, "step": 7082 }, { "epoch": 0.29964463998646246, "grad_norm": 0.18946023285388947, "learning_rate": 0.001, "loss": 1.9361, "step": 7083 }, { "epoch": 0.29968694474997887, "grad_norm": 0.22694577276706696, "learning_rate": 0.001, "loss": 2.872, "step": 7084 }, { "epoch": 0.2997292495134952, "grad_norm": 0.19697017967700958, "learning_rate": 0.001, "loss": 1.8643, "step": 7085 }, { "epoch": 0.2997715542770116, "grad_norm": 0.20350822806358337, "learning_rate": 0.001, "loss": 1.8414, "step": 7086 }, { "epoch": 0.299813859040528, "grad_norm": 1.2988654375076294, "learning_rate": 0.001, "loss": 2.5616, "step": 7087 }, { "epoch": 0.29985616380404434, "grad_norm": 7.280746936798096, "learning_rate": 0.001, "loss": 2.5211, "step": 7088 }, { "epoch": 0.2998984685675607, "grad_norm": 0.9409200549125671, "learning_rate": 0.001, "loss": 1.8807, "step": 7089 }, { "epoch": 0.2999407733310771, "grad_norm": 0.20737378299236298, "learning_rate": 0.001, "loss": 2.2614, "step": 7090 }, { "epoch": 0.29998307809459346, "grad_norm": 0.19399072229862213, "learning_rate": 0.001, "loss": 1.5657, "step": 7091 }, { "epoch": 0.3000253828581098, "grad_norm": 2.7120447158813477, "learning_rate": 0.001, "loss": 3.1643, "step": 7092 }, { "epoch": 0.3000676876216262, "grad_norm": 0.23883989453315735, "learning_rate": 0.001, "loss": 1.4332, "step": 7093 }, { "epoch": 0.3001099923851426, "grad_norm": 0.24009618163108826, "learning_rate": 0.001, "loss": 2.0072, "step": 7094 }, { "epoch": 0.30015229714865893, "grad_norm": 0.27275872230529785, "learning_rate": 0.001, "loss": 1.8004, "step": 7095 }, { "epoch": 0.3001946019121753, "grad_norm": 0.3240390419960022, "learning_rate": 0.001, "loss": 2.4354, "step": 7096 }, { "epoch": 0.3002369066756917, "grad_norm": 0.2155575454235077, "learning_rate": 0.001, "loss": 2.4924, "step": 7097 }, { "epoch": 0.30027921143920805, "grad_norm": 0.26140278577804565, "learning_rate": 0.001, "loss": 3.0696, "step": 7098 }, { "epoch": 0.3003215162027244, "grad_norm": 0.20411476492881775, "learning_rate": 0.001, "loss": 2.8706, "step": 7099 }, { "epoch": 0.3003638209662408, "grad_norm": 0.26278483867645264, "learning_rate": 0.001, "loss": 2.6213, "step": 7100 }, { "epoch": 0.30040612572975717, "grad_norm": 0.23542357981204987, "learning_rate": 0.001, "loss": 2.4739, "step": 7101 }, { "epoch": 0.3004484304932735, "grad_norm": 1.478679895401001, "learning_rate": 0.001, "loss": 2.8704, "step": 7102 }, { "epoch": 0.30049073525678993, "grad_norm": 0.22880683839321136, "learning_rate": 0.001, "loss": 2.2796, "step": 7103 }, { "epoch": 0.3005330400203063, "grad_norm": 0.22094206511974335, "learning_rate": 0.001, "loss": 2.1502, "step": 7104 }, { "epoch": 0.30057534478382264, "grad_norm": 0.2112313210964203, "learning_rate": 0.001, "loss": 2.5717, "step": 7105 }, { "epoch": 0.30061764954733905, "grad_norm": 0.19635865092277527, "learning_rate": 0.001, "loss": 1.7046, "step": 7106 }, { "epoch": 0.3006599543108554, "grad_norm": 0.42038843035697937, "learning_rate": 0.001, "loss": 3.0465, "step": 7107 }, { "epoch": 0.30070225907437176, "grad_norm": 0.2118614763021469, "learning_rate": 0.001, "loss": 2.0218, "step": 7108 }, { "epoch": 0.30074456383788817, "grad_norm": 0.1884838491678238, "learning_rate": 0.001, "loss": 1.6187, "step": 7109 }, { "epoch": 0.3007868686014045, "grad_norm": 1.542157530784607, "learning_rate": 0.001, "loss": 2.7272, "step": 7110 }, { "epoch": 0.3008291733649209, "grad_norm": 0.19439256191253662, "learning_rate": 0.001, "loss": 1.8408, "step": 7111 }, { "epoch": 0.3008714781284373, "grad_norm": 0.21478168666362762, "learning_rate": 0.001, "loss": 2.5348, "step": 7112 }, { "epoch": 0.30091378289195364, "grad_norm": 0.2651658058166504, "learning_rate": 0.001, "loss": 2.2955, "step": 7113 }, { "epoch": 0.30095608765547, "grad_norm": 0.20375876128673553, "learning_rate": 0.001, "loss": 2.7385, "step": 7114 }, { "epoch": 0.3009983924189864, "grad_norm": 1.0855168104171753, "learning_rate": 0.001, "loss": 1.7983, "step": 7115 }, { "epoch": 0.30104069718250276, "grad_norm": 1.9149056673049927, "learning_rate": 0.001, "loss": 2.4025, "step": 7116 }, { "epoch": 0.3010830019460191, "grad_norm": 0.2241235226392746, "learning_rate": 0.001, "loss": 2.6799, "step": 7117 }, { "epoch": 0.30112530670953547, "grad_norm": 0.21922898292541504, "learning_rate": 0.001, "loss": 2.2658, "step": 7118 }, { "epoch": 0.3011676114730519, "grad_norm": 0.2675611078739166, "learning_rate": 0.001, "loss": 1.4285, "step": 7119 }, { "epoch": 0.30120991623656823, "grad_norm": 0.5068369507789612, "learning_rate": 0.001, "loss": 2.1726, "step": 7120 }, { "epoch": 0.3012522210000846, "grad_norm": 0.2993006110191345, "learning_rate": 0.001, "loss": 2.4697, "step": 7121 }, { "epoch": 0.301294525763601, "grad_norm": 0.31761422753334045, "learning_rate": 0.001, "loss": 2.6937, "step": 7122 }, { "epoch": 0.30133683052711735, "grad_norm": 0.2203313410282135, "learning_rate": 0.001, "loss": 2.4712, "step": 7123 }, { "epoch": 0.3013791352906337, "grad_norm": 0.2073364555835724, "learning_rate": 0.001, "loss": 2.3447, "step": 7124 }, { "epoch": 0.3014214400541501, "grad_norm": 0.27055227756500244, "learning_rate": 0.001, "loss": 1.9593, "step": 7125 }, { "epoch": 0.30146374481766647, "grad_norm": 0.19267384707927704, "learning_rate": 0.001, "loss": 2.5415, "step": 7126 }, { "epoch": 0.3015060495811828, "grad_norm": 0.21630926430225372, "learning_rate": 0.001, "loss": 3.0923, "step": 7127 }, { "epoch": 0.30154835434469923, "grad_norm": 0.17075297236442566, "learning_rate": 0.001, "loss": 1.9153, "step": 7128 }, { "epoch": 0.3015906591082156, "grad_norm": 2.4271461963653564, "learning_rate": 0.001, "loss": 2.0476, "step": 7129 }, { "epoch": 0.30163296387173194, "grad_norm": 0.1639959067106247, "learning_rate": 0.001, "loss": 1.7935, "step": 7130 }, { "epoch": 0.30167526863524835, "grad_norm": 0.17364169657230377, "learning_rate": 0.001, "loss": 2.065, "step": 7131 }, { "epoch": 0.3017175733987647, "grad_norm": 0.19003529846668243, "learning_rate": 0.001, "loss": 2.4537, "step": 7132 }, { "epoch": 0.30175987816228106, "grad_norm": 6.679607391357422, "learning_rate": 0.001, "loss": 2.5314, "step": 7133 }, { "epoch": 0.30180218292579747, "grad_norm": 0.20183472335338593, "learning_rate": 0.001, "loss": 2.6579, "step": 7134 }, { "epoch": 0.3018444876893138, "grad_norm": 2.5921237468719482, "learning_rate": 0.001, "loss": 2.0, "step": 7135 }, { "epoch": 0.3018867924528302, "grad_norm": 0.23701635003089905, "learning_rate": 0.001, "loss": 2.559, "step": 7136 }, { "epoch": 0.3019290972163466, "grad_norm": 0.5344852805137634, "learning_rate": 0.001, "loss": 2.0646, "step": 7137 }, { "epoch": 0.30197140197986294, "grad_norm": 0.20276984572410583, "learning_rate": 0.001, "loss": 2.3369, "step": 7138 }, { "epoch": 0.3020137067433793, "grad_norm": 0.23550361394882202, "learning_rate": 0.001, "loss": 2.3764, "step": 7139 }, { "epoch": 0.30205601150689565, "grad_norm": 0.1865498572587967, "learning_rate": 0.001, "loss": 1.8937, "step": 7140 }, { "epoch": 0.30209831627041206, "grad_norm": 0.18712198734283447, "learning_rate": 0.001, "loss": 2.3899, "step": 7141 }, { "epoch": 0.3021406210339284, "grad_norm": 1.8322774171829224, "learning_rate": 0.001, "loss": 1.7824, "step": 7142 }, { "epoch": 0.30218292579744477, "grad_norm": 1.2885193824768066, "learning_rate": 0.001, "loss": 1.8025, "step": 7143 }, { "epoch": 0.3022252305609612, "grad_norm": 1.9234724044799805, "learning_rate": 0.001, "loss": 3.0237, "step": 7144 }, { "epoch": 0.30226753532447753, "grad_norm": 0.21429021656513214, "learning_rate": 0.001, "loss": 2.3753, "step": 7145 }, { "epoch": 0.3023098400879939, "grad_norm": 0.1841968446969986, "learning_rate": 0.001, "loss": 1.9292, "step": 7146 }, { "epoch": 0.3023521448515103, "grad_norm": 1.093137264251709, "learning_rate": 0.001, "loss": 1.7962, "step": 7147 }, { "epoch": 0.30239444961502665, "grad_norm": 0.3102741539478302, "learning_rate": 0.001, "loss": 2.7395, "step": 7148 }, { "epoch": 0.302436754378543, "grad_norm": 1.5967626571655273, "learning_rate": 0.001, "loss": 2.026, "step": 7149 }, { "epoch": 0.3024790591420594, "grad_norm": 0.1948966681957245, "learning_rate": 0.001, "loss": 2.3974, "step": 7150 }, { "epoch": 0.30252136390557577, "grad_norm": 5.692415714263916, "learning_rate": 0.001, "loss": 1.7977, "step": 7151 }, { "epoch": 0.3025636686690921, "grad_norm": 0.3406045436859131, "learning_rate": 0.001, "loss": 3.7244, "step": 7152 }, { "epoch": 0.30260597343260853, "grad_norm": 0.19864249229431152, "learning_rate": 0.001, "loss": 2.0721, "step": 7153 }, { "epoch": 0.3026482781961249, "grad_norm": 0.22038595378398895, "learning_rate": 0.001, "loss": 1.618, "step": 7154 }, { "epoch": 0.30269058295964124, "grad_norm": 0.17631886899471283, "learning_rate": 0.001, "loss": 2.1884, "step": 7155 }, { "epoch": 0.30273288772315765, "grad_norm": 0.1953791379928589, "learning_rate": 0.001, "loss": 1.4864, "step": 7156 }, { "epoch": 0.302775192486674, "grad_norm": 0.3843629062175751, "learning_rate": 0.001, "loss": 2.1935, "step": 7157 }, { "epoch": 0.30281749725019036, "grad_norm": 0.562131941318512, "learning_rate": 0.001, "loss": 2.6914, "step": 7158 }, { "epoch": 0.30285980201370677, "grad_norm": 0.30789318680763245, "learning_rate": 0.001, "loss": 3.5069, "step": 7159 }, { "epoch": 0.3029021067772231, "grad_norm": 5.395096778869629, "learning_rate": 0.001, "loss": 2.2879, "step": 7160 }, { "epoch": 0.3029444115407395, "grad_norm": 0.22398367524147034, "learning_rate": 0.001, "loss": 2.5926, "step": 7161 }, { "epoch": 0.30298671630425583, "grad_norm": 0.3233366012573242, "learning_rate": 0.001, "loss": 2.6733, "step": 7162 }, { "epoch": 0.30302902106777224, "grad_norm": 0.20136740803718567, "learning_rate": 0.001, "loss": 1.6234, "step": 7163 }, { "epoch": 0.3030713258312886, "grad_norm": 0.20825202763080597, "learning_rate": 0.001, "loss": 2.3597, "step": 7164 }, { "epoch": 0.30311363059480495, "grad_norm": 1.0632126331329346, "learning_rate": 0.001, "loss": 1.722, "step": 7165 }, { "epoch": 0.30315593535832136, "grad_norm": 0.22308948636054993, "learning_rate": 0.001, "loss": 2.4832, "step": 7166 }, { "epoch": 0.3031982401218377, "grad_norm": 0.22294947504997253, "learning_rate": 0.001, "loss": 2.4689, "step": 7167 }, { "epoch": 0.30324054488535407, "grad_norm": 0.18253259360790253, "learning_rate": 0.001, "loss": 2.1326, "step": 7168 }, { "epoch": 0.3032828496488705, "grad_norm": 2.8974242210388184, "learning_rate": 0.001, "loss": 2.3822, "step": 7169 }, { "epoch": 0.30332515441238683, "grad_norm": 0.19481533765792847, "learning_rate": 0.001, "loss": 2.0672, "step": 7170 }, { "epoch": 0.3033674591759032, "grad_norm": 0.42890578508377075, "learning_rate": 0.001, "loss": 2.517, "step": 7171 }, { "epoch": 0.3034097639394196, "grad_norm": 0.24793508648872375, "learning_rate": 0.001, "loss": 1.7478, "step": 7172 }, { "epoch": 0.30345206870293595, "grad_norm": 0.1969098150730133, "learning_rate": 0.001, "loss": 1.9163, "step": 7173 }, { "epoch": 0.3034943734664523, "grad_norm": 0.37608563899993896, "learning_rate": 0.001, "loss": 2.0317, "step": 7174 }, { "epoch": 0.3035366782299687, "grad_norm": 0.30871760845184326, "learning_rate": 0.001, "loss": 1.9419, "step": 7175 }, { "epoch": 0.30357898299348507, "grad_norm": 0.18646669387817383, "learning_rate": 0.001, "loss": 1.7309, "step": 7176 }, { "epoch": 0.3036212877570014, "grad_norm": 0.17259854078292847, "learning_rate": 0.001, "loss": 1.7845, "step": 7177 }, { "epoch": 0.30366359252051783, "grad_norm": 0.17856760323047638, "learning_rate": 0.001, "loss": 2.4272, "step": 7178 }, { "epoch": 0.3037058972840342, "grad_norm": 2.3033905029296875, "learning_rate": 0.001, "loss": 2.732, "step": 7179 }, { "epoch": 0.30374820204755054, "grad_norm": 0.4790777266025543, "learning_rate": 0.001, "loss": 2.2848, "step": 7180 }, { "epoch": 0.30379050681106695, "grad_norm": 0.2084704339504242, "learning_rate": 0.001, "loss": 3.398, "step": 7181 }, { "epoch": 0.3038328115745833, "grad_norm": 0.3605867028236389, "learning_rate": 0.001, "loss": 3.1206, "step": 7182 }, { "epoch": 0.30387511633809966, "grad_norm": 0.17509806156158447, "learning_rate": 0.001, "loss": 1.7837, "step": 7183 }, { "epoch": 0.303917421101616, "grad_norm": 0.22098810970783234, "learning_rate": 0.001, "loss": 2.0828, "step": 7184 }, { "epoch": 0.3039597258651324, "grad_norm": 3.9246859550476074, "learning_rate": 0.001, "loss": 3.2388, "step": 7185 }, { "epoch": 0.3040020306286488, "grad_norm": 0.1589023768901825, "learning_rate": 0.001, "loss": 2.3912, "step": 7186 }, { "epoch": 0.30404433539216513, "grad_norm": 0.20874576270580292, "learning_rate": 0.001, "loss": 2.8516, "step": 7187 }, { "epoch": 0.30408664015568154, "grad_norm": 1.3450440168380737, "learning_rate": 0.001, "loss": 1.8097, "step": 7188 }, { "epoch": 0.3041289449191979, "grad_norm": 1.1989301443099976, "learning_rate": 0.001, "loss": 1.8467, "step": 7189 }, { "epoch": 0.30417124968271425, "grad_norm": 0.21062646806240082, "learning_rate": 0.001, "loss": 2.1306, "step": 7190 }, { "epoch": 0.30421355444623066, "grad_norm": 4.8882012367248535, "learning_rate": 0.001, "loss": 2.5283, "step": 7191 }, { "epoch": 0.304255859209747, "grad_norm": 0.22382013499736786, "learning_rate": 0.001, "loss": 2.0692, "step": 7192 }, { "epoch": 0.30429816397326337, "grad_norm": 0.6270066499710083, "learning_rate": 0.001, "loss": 1.8903, "step": 7193 }, { "epoch": 0.3043404687367798, "grad_norm": 0.21127991378307343, "learning_rate": 0.001, "loss": 3.1819, "step": 7194 }, { "epoch": 0.30438277350029613, "grad_norm": 0.20672987401485443, "learning_rate": 0.001, "loss": 3.1049, "step": 7195 }, { "epoch": 0.3044250782638125, "grad_norm": 0.5075833201408386, "learning_rate": 0.001, "loss": 1.8776, "step": 7196 }, { "epoch": 0.3044673830273289, "grad_norm": 0.2034783810377121, "learning_rate": 0.001, "loss": 1.9582, "step": 7197 }, { "epoch": 0.30450968779084525, "grad_norm": 0.2227412313222885, "learning_rate": 0.001, "loss": 2.315, "step": 7198 }, { "epoch": 0.3045519925543616, "grad_norm": 0.38511255383491516, "learning_rate": 0.001, "loss": 2.3305, "step": 7199 }, { "epoch": 0.304594297317878, "grad_norm": 0.48111987113952637, "learning_rate": 0.001, "loss": 2.6835, "step": 7200 }, { "epoch": 0.30463660208139437, "grad_norm": 0.39257773756980896, "learning_rate": 0.001, "loss": 2.4686, "step": 7201 }, { "epoch": 0.3046789068449107, "grad_norm": 0.24036632478237152, "learning_rate": 0.001, "loss": 3.5692, "step": 7202 }, { "epoch": 0.30472121160842713, "grad_norm": 1.5374091863632202, "learning_rate": 0.001, "loss": 2.1117, "step": 7203 }, { "epoch": 0.3047635163719435, "grad_norm": 0.21697300672531128, "learning_rate": 0.001, "loss": 3.4893, "step": 7204 }, { "epoch": 0.30480582113545984, "grad_norm": 0.2033626139163971, "learning_rate": 0.001, "loss": 2.6569, "step": 7205 }, { "epoch": 0.30484812589897625, "grad_norm": 0.5761461853981018, "learning_rate": 0.001, "loss": 2.5943, "step": 7206 }, { "epoch": 0.3048904306624926, "grad_norm": 0.17697875201702118, "learning_rate": 0.001, "loss": 1.7242, "step": 7207 }, { "epoch": 0.30493273542600896, "grad_norm": 0.20617154240608215, "learning_rate": 0.001, "loss": 1.9209, "step": 7208 }, { "epoch": 0.3049750401895253, "grad_norm": 1.6445116996765137, "learning_rate": 0.001, "loss": 2.3285, "step": 7209 }, { "epoch": 0.3050173449530417, "grad_norm": 0.2126307338476181, "learning_rate": 0.001, "loss": 2.9291, "step": 7210 }, { "epoch": 0.3050596497165581, "grad_norm": 0.2812674939632416, "learning_rate": 0.001, "loss": 2.0021, "step": 7211 }, { "epoch": 0.30510195448007443, "grad_norm": 0.19099974632263184, "learning_rate": 0.001, "loss": 1.9334, "step": 7212 }, { "epoch": 0.30514425924359084, "grad_norm": 0.23043900728225708, "learning_rate": 0.001, "loss": 3.7938, "step": 7213 }, { "epoch": 0.3051865640071072, "grad_norm": 0.8108448386192322, "learning_rate": 0.001, "loss": 2.4988, "step": 7214 }, { "epoch": 0.30522886877062355, "grad_norm": 0.22695882618427277, "learning_rate": 0.001, "loss": 2.3641, "step": 7215 }, { "epoch": 0.30527117353413996, "grad_norm": 0.2140110582113266, "learning_rate": 0.001, "loss": 2.7109, "step": 7216 }, { "epoch": 0.3053134782976563, "grad_norm": 0.18746429681777954, "learning_rate": 0.001, "loss": 1.5037, "step": 7217 }, { "epoch": 0.30535578306117267, "grad_norm": 0.5185785293579102, "learning_rate": 0.001, "loss": 2.1417, "step": 7218 }, { "epoch": 0.3053980878246891, "grad_norm": 0.22843578457832336, "learning_rate": 0.001, "loss": 2.9488, "step": 7219 }, { "epoch": 0.30544039258820543, "grad_norm": 0.22175458073616028, "learning_rate": 0.001, "loss": 1.8912, "step": 7220 }, { "epoch": 0.3054826973517218, "grad_norm": 0.3052138388156891, "learning_rate": 0.001, "loss": 2.7883, "step": 7221 }, { "epoch": 0.3055250021152382, "grad_norm": 0.20926591753959656, "learning_rate": 0.001, "loss": 2.2513, "step": 7222 }, { "epoch": 0.30556730687875455, "grad_norm": 0.5725785493850708, "learning_rate": 0.001, "loss": 2.0737, "step": 7223 }, { "epoch": 0.3056096116422709, "grad_norm": 0.2553465962409973, "learning_rate": 0.001, "loss": 2.8104, "step": 7224 }, { "epoch": 0.3056519164057873, "grad_norm": 0.17359638214111328, "learning_rate": 0.001, "loss": 1.9327, "step": 7225 }, { "epoch": 0.30569422116930367, "grad_norm": 0.38395628333091736, "learning_rate": 0.001, "loss": 3.3217, "step": 7226 }, { "epoch": 0.30573652593282, "grad_norm": 0.4329422414302826, "learning_rate": 0.001, "loss": 3.0564, "step": 7227 }, { "epoch": 0.30577883069633643, "grad_norm": 0.19598042964935303, "learning_rate": 0.001, "loss": 2.1527, "step": 7228 }, { "epoch": 0.3058211354598528, "grad_norm": 4.16204309463501, "learning_rate": 0.001, "loss": 2.1645, "step": 7229 }, { "epoch": 0.30586344022336914, "grad_norm": 0.9371064901351929, "learning_rate": 0.001, "loss": 2.5554, "step": 7230 }, { "epoch": 0.3059057449868855, "grad_norm": 89.23577117919922, "learning_rate": 0.001, "loss": 1.957, "step": 7231 }, { "epoch": 0.3059480497504019, "grad_norm": 2.158701181411743, "learning_rate": 0.001, "loss": 2.8319, "step": 7232 }, { "epoch": 0.30599035451391826, "grad_norm": 0.17827636003494263, "learning_rate": 0.001, "loss": 2.1969, "step": 7233 }, { "epoch": 0.3060326592774346, "grad_norm": 0.21226316690444946, "learning_rate": 0.001, "loss": 1.4754, "step": 7234 }, { "epoch": 0.306074964040951, "grad_norm": 0.6273579001426697, "learning_rate": 0.001, "loss": 1.9398, "step": 7235 }, { "epoch": 0.3061172688044674, "grad_norm": 3.001936197280884, "learning_rate": 0.001, "loss": 2.9792, "step": 7236 }, { "epoch": 0.30615957356798373, "grad_norm": 0.8433371186256409, "learning_rate": 0.001, "loss": 2.1398, "step": 7237 }, { "epoch": 0.30620187833150014, "grad_norm": 0.518166720867157, "learning_rate": 0.001, "loss": 2.1598, "step": 7238 }, { "epoch": 0.3062441830950165, "grad_norm": 0.25381919741630554, "learning_rate": 0.001, "loss": 2.3477, "step": 7239 }, { "epoch": 0.30628648785853285, "grad_norm": 0.3235541880130768, "learning_rate": 0.001, "loss": 2.4599, "step": 7240 }, { "epoch": 0.30632879262204926, "grad_norm": 0.46788913011550903, "learning_rate": 0.001, "loss": 2.0962, "step": 7241 }, { "epoch": 0.3063710973855656, "grad_norm": 0.26095959544181824, "learning_rate": 0.001, "loss": 3.9055, "step": 7242 }, { "epoch": 0.30641340214908197, "grad_norm": 0.44010379910469055, "learning_rate": 0.001, "loss": 2.2192, "step": 7243 }, { "epoch": 0.3064557069125984, "grad_norm": 0.24772557616233826, "learning_rate": 0.001, "loss": 1.9465, "step": 7244 }, { "epoch": 0.30649801167611473, "grad_norm": 0.22386042773723602, "learning_rate": 0.001, "loss": 2.0204, "step": 7245 }, { "epoch": 0.3065403164396311, "grad_norm": 0.1983170062303543, "learning_rate": 0.001, "loss": 1.712, "step": 7246 }, { "epoch": 0.3065826212031475, "grad_norm": 0.2919590175151825, "learning_rate": 0.001, "loss": 2.2712, "step": 7247 }, { "epoch": 0.30662492596666385, "grad_norm": 1.4634429216384888, "learning_rate": 0.001, "loss": 1.5808, "step": 7248 }, { "epoch": 0.3066672307301802, "grad_norm": 0.24671746790409088, "learning_rate": 0.001, "loss": 3.1394, "step": 7249 }, { "epoch": 0.3067095354936966, "grad_norm": 4.342376708984375, "learning_rate": 0.001, "loss": 1.5847, "step": 7250 }, { "epoch": 0.30675184025721297, "grad_norm": 0.6360762715339661, "learning_rate": 0.001, "loss": 2.1356, "step": 7251 }, { "epoch": 0.3067941450207293, "grad_norm": 0.19539184868335724, "learning_rate": 0.001, "loss": 3.3058, "step": 7252 }, { "epoch": 0.3068364497842457, "grad_norm": 0.22951632738113403, "learning_rate": 0.001, "loss": 2.0753, "step": 7253 }, { "epoch": 0.3068787545477621, "grad_norm": 0.3070700764656067, "learning_rate": 0.001, "loss": 1.9365, "step": 7254 }, { "epoch": 0.30692105931127844, "grad_norm": 0.36672693490982056, "learning_rate": 0.001, "loss": 3.0584, "step": 7255 }, { "epoch": 0.3069633640747948, "grad_norm": 0.8749989867210388, "learning_rate": 0.001, "loss": 1.9865, "step": 7256 }, { "epoch": 0.3070056688383112, "grad_norm": 0.4010057747364044, "learning_rate": 0.001, "loss": 2.2224, "step": 7257 }, { "epoch": 0.30704797360182756, "grad_norm": 0.19571296870708466, "learning_rate": 0.001, "loss": 1.8307, "step": 7258 }, { "epoch": 0.3070902783653439, "grad_norm": 0.6532837748527527, "learning_rate": 0.001, "loss": 2.183, "step": 7259 }, { "epoch": 0.3071325831288603, "grad_norm": 0.22160778939723969, "learning_rate": 0.001, "loss": 2.2073, "step": 7260 }, { "epoch": 0.3071748878923767, "grad_norm": 0.24020320177078247, "learning_rate": 0.001, "loss": 2.6333, "step": 7261 }, { "epoch": 0.30721719265589303, "grad_norm": 0.1901521235704422, "learning_rate": 0.001, "loss": 2.0718, "step": 7262 }, { "epoch": 0.30725949741940944, "grad_norm": 0.19890253245830536, "learning_rate": 0.001, "loss": 1.6972, "step": 7263 }, { "epoch": 0.3073018021829258, "grad_norm": 0.17532768845558167, "learning_rate": 0.001, "loss": 2.0171, "step": 7264 }, { "epoch": 0.30734410694644215, "grad_norm": 0.21217653155326843, "learning_rate": 0.001, "loss": 3.6026, "step": 7265 }, { "epoch": 0.30738641170995856, "grad_norm": 0.23952527344226837, "learning_rate": 0.001, "loss": 1.9304, "step": 7266 }, { "epoch": 0.3074287164734749, "grad_norm": 0.2269655019044876, "learning_rate": 0.001, "loss": 2.0697, "step": 7267 }, { "epoch": 0.30747102123699127, "grad_norm": 0.21532459557056427, "learning_rate": 0.001, "loss": 2.6024, "step": 7268 }, { "epoch": 0.3075133260005077, "grad_norm": 0.20854906737804413, "learning_rate": 0.001, "loss": 2.4159, "step": 7269 }, { "epoch": 0.30755563076402403, "grad_norm": 0.2222970575094223, "learning_rate": 0.001, "loss": 3.1089, "step": 7270 }, { "epoch": 0.3075979355275404, "grad_norm": 0.8760474920272827, "learning_rate": 0.001, "loss": 1.7882, "step": 7271 }, { "epoch": 0.3076402402910568, "grad_norm": 0.18725240230560303, "learning_rate": 0.001, "loss": 2.9421, "step": 7272 }, { "epoch": 0.30768254505457315, "grad_norm": 0.9830304384231567, "learning_rate": 0.001, "loss": 1.7632, "step": 7273 }, { "epoch": 0.3077248498180895, "grad_norm": 0.2174868881702423, "learning_rate": 0.001, "loss": 3.1616, "step": 7274 }, { "epoch": 0.30776715458160586, "grad_norm": 0.2975056767463684, "learning_rate": 0.001, "loss": 2.1744, "step": 7275 }, { "epoch": 0.30780945934512227, "grad_norm": 0.23097103834152222, "learning_rate": 0.001, "loss": 2.6325, "step": 7276 }, { "epoch": 0.3078517641086386, "grad_norm": 0.20248940587043762, "learning_rate": 0.001, "loss": 2.3101, "step": 7277 }, { "epoch": 0.307894068872155, "grad_norm": 0.21180301904678345, "learning_rate": 0.001, "loss": 2.1885, "step": 7278 }, { "epoch": 0.3079363736356714, "grad_norm": 0.20690077543258667, "learning_rate": 0.001, "loss": 2.6467, "step": 7279 }, { "epoch": 0.30797867839918774, "grad_norm": 0.19214750826358795, "learning_rate": 0.001, "loss": 2.5726, "step": 7280 }, { "epoch": 0.3080209831627041, "grad_norm": 0.6494995355606079, "learning_rate": 0.001, "loss": 2.6384, "step": 7281 }, { "epoch": 0.3080632879262205, "grad_norm": 0.19168926775455475, "learning_rate": 0.001, "loss": 2.1541, "step": 7282 }, { "epoch": 0.30810559268973686, "grad_norm": 0.2198658436536789, "learning_rate": 0.001, "loss": 2.046, "step": 7283 }, { "epoch": 0.3081478974532532, "grad_norm": 0.23235653340816498, "learning_rate": 0.001, "loss": 2.2637, "step": 7284 }, { "epoch": 0.3081902022167696, "grad_norm": 0.48129093647003174, "learning_rate": 0.001, "loss": 2.2735, "step": 7285 }, { "epoch": 0.308232506980286, "grad_norm": 0.1928509920835495, "learning_rate": 0.001, "loss": 2.4181, "step": 7286 }, { "epoch": 0.30827481174380233, "grad_norm": 0.21180304884910583, "learning_rate": 0.001, "loss": 1.9391, "step": 7287 }, { "epoch": 0.30831711650731874, "grad_norm": 0.5490750074386597, "learning_rate": 0.001, "loss": 2.0321, "step": 7288 }, { "epoch": 0.3083594212708351, "grad_norm": 0.19193410873413086, "learning_rate": 0.001, "loss": 2.4427, "step": 7289 }, { "epoch": 0.30840172603435145, "grad_norm": 0.1852174997329712, "learning_rate": 0.001, "loss": 1.7946, "step": 7290 }, { "epoch": 0.30844403079786786, "grad_norm": 0.1935269683599472, "learning_rate": 0.001, "loss": 2.5008, "step": 7291 }, { "epoch": 0.3084863355613842, "grad_norm": 0.5893869400024414, "learning_rate": 0.001, "loss": 1.7927, "step": 7292 }, { "epoch": 0.30852864032490057, "grad_norm": 0.4908425807952881, "learning_rate": 0.001, "loss": 2.8926, "step": 7293 }, { "epoch": 0.308570945088417, "grad_norm": 0.20681232213974, "learning_rate": 0.001, "loss": 1.8814, "step": 7294 }, { "epoch": 0.30861324985193334, "grad_norm": 1.3799690008163452, "learning_rate": 0.001, "loss": 2.032, "step": 7295 }, { "epoch": 0.3086555546154497, "grad_norm": 0.26583728194236755, "learning_rate": 0.001, "loss": 2.3084, "step": 7296 }, { "epoch": 0.30869785937896604, "grad_norm": 0.21364301443099976, "learning_rate": 0.001, "loss": 2.5819, "step": 7297 }, { "epoch": 0.30874016414248245, "grad_norm": 0.1767185777425766, "learning_rate": 0.001, "loss": 2.2752, "step": 7298 }, { "epoch": 0.3087824689059988, "grad_norm": 0.19230739772319794, "learning_rate": 0.001, "loss": 2.4451, "step": 7299 }, { "epoch": 0.30882477366951516, "grad_norm": 0.19486257433891296, "learning_rate": 0.001, "loss": 2.663, "step": 7300 }, { "epoch": 0.30886707843303157, "grad_norm": 0.23837964236736298, "learning_rate": 0.001, "loss": 2.2232, "step": 7301 }, { "epoch": 0.3089093831965479, "grad_norm": 0.2171543687582016, "learning_rate": 0.001, "loss": 2.9941, "step": 7302 }, { "epoch": 0.3089516879600643, "grad_norm": 0.21793997287750244, "learning_rate": 0.001, "loss": 2.36, "step": 7303 }, { "epoch": 0.3089939927235807, "grad_norm": 0.22793272137641907, "learning_rate": 0.001, "loss": 1.9991, "step": 7304 }, { "epoch": 0.30903629748709704, "grad_norm": 0.23069630563259125, "learning_rate": 0.001, "loss": 1.94, "step": 7305 }, { "epoch": 0.3090786022506134, "grad_norm": 0.25365108251571655, "learning_rate": 0.001, "loss": 2.1211, "step": 7306 }, { "epoch": 0.3091209070141298, "grad_norm": 0.981309175491333, "learning_rate": 0.001, "loss": 2.3867, "step": 7307 }, { "epoch": 0.30916321177764616, "grad_norm": 0.2560966908931732, "learning_rate": 0.001, "loss": 3.7126, "step": 7308 }, { "epoch": 0.3092055165411625, "grad_norm": 0.18280810117721558, "learning_rate": 0.001, "loss": 1.8863, "step": 7309 }, { "epoch": 0.3092478213046789, "grad_norm": 1.1143485307693481, "learning_rate": 0.001, "loss": 2.3522, "step": 7310 }, { "epoch": 0.3092901260681953, "grad_norm": 0.42736926674842834, "learning_rate": 0.001, "loss": 2.6357, "step": 7311 }, { "epoch": 0.30933243083171164, "grad_norm": 0.3606073260307312, "learning_rate": 0.001, "loss": 3.6859, "step": 7312 }, { "epoch": 0.30937473559522805, "grad_norm": 0.5770615339279175, "learning_rate": 0.001, "loss": 2.6721, "step": 7313 }, { "epoch": 0.3094170403587444, "grad_norm": 0.3912082314491272, "learning_rate": 0.001, "loss": 2.4154, "step": 7314 }, { "epoch": 0.30945934512226075, "grad_norm": 0.21277593076229095, "learning_rate": 0.001, "loss": 1.8694, "step": 7315 }, { "epoch": 0.30950164988577716, "grad_norm": 0.17572276294231415, "learning_rate": 0.001, "loss": 2.1676, "step": 7316 }, { "epoch": 0.3095439546492935, "grad_norm": 0.20815758407115936, "learning_rate": 0.001, "loss": 3.3823, "step": 7317 }, { "epoch": 0.30958625941280987, "grad_norm": 0.745972752571106, "learning_rate": 0.001, "loss": 2.1945, "step": 7318 }, { "epoch": 0.3096285641763263, "grad_norm": 0.20606304705142975, "learning_rate": 0.001, "loss": 1.9767, "step": 7319 }, { "epoch": 0.30967086893984264, "grad_norm": 0.19630566239356995, "learning_rate": 0.001, "loss": 1.8691, "step": 7320 }, { "epoch": 0.309713173703359, "grad_norm": 0.48055750131607056, "learning_rate": 0.001, "loss": 1.9261, "step": 7321 }, { "epoch": 0.30975547846687534, "grad_norm": 2.7202658653259277, "learning_rate": 0.001, "loss": 2.2182, "step": 7322 }, { "epoch": 0.30979778323039175, "grad_norm": 0.5181911587715149, "learning_rate": 0.001, "loss": 4.2096, "step": 7323 }, { "epoch": 0.3098400879939081, "grad_norm": 0.17982327938079834, "learning_rate": 0.001, "loss": 1.9361, "step": 7324 }, { "epoch": 0.30988239275742446, "grad_norm": 0.19139717519283295, "learning_rate": 0.001, "loss": 2.3097, "step": 7325 }, { "epoch": 0.3099246975209409, "grad_norm": 0.2525288462638855, "learning_rate": 0.001, "loss": 2.176, "step": 7326 }, { "epoch": 0.3099670022844572, "grad_norm": 0.3544459640979767, "learning_rate": 0.001, "loss": 3.4766, "step": 7327 }, { "epoch": 0.3100093070479736, "grad_norm": 0.20294804871082306, "learning_rate": 0.001, "loss": 1.8903, "step": 7328 }, { "epoch": 0.31005161181149, "grad_norm": 2.3171162605285645, "learning_rate": 0.001, "loss": 2.4474, "step": 7329 }, { "epoch": 0.31009391657500635, "grad_norm": 0.22611065208911896, "learning_rate": 0.001, "loss": 2.5749, "step": 7330 }, { "epoch": 0.3101362213385227, "grad_norm": 1.3491861820220947, "learning_rate": 0.001, "loss": 1.9441, "step": 7331 }, { "epoch": 0.3101785261020391, "grad_norm": 0.22623848915100098, "learning_rate": 0.001, "loss": 2.951, "step": 7332 }, { "epoch": 0.31022083086555546, "grad_norm": 1.876600742340088, "learning_rate": 0.001, "loss": 1.9068, "step": 7333 }, { "epoch": 0.3102631356290718, "grad_norm": 3.5861880779266357, "learning_rate": 0.001, "loss": 2.7255, "step": 7334 }, { "epoch": 0.3103054403925882, "grad_norm": 0.2411908656358719, "learning_rate": 0.001, "loss": 1.5527, "step": 7335 }, { "epoch": 0.3103477451561046, "grad_norm": 1.0028561353683472, "learning_rate": 0.001, "loss": 2.5271, "step": 7336 }, { "epoch": 0.31039004991962094, "grad_norm": 0.23291008174419403, "learning_rate": 0.001, "loss": 2.0163, "step": 7337 }, { "epoch": 0.31043235468313735, "grad_norm": 0.2906794846057892, "learning_rate": 0.001, "loss": 2.2704, "step": 7338 }, { "epoch": 0.3104746594466537, "grad_norm": 0.7202196717262268, "learning_rate": 0.001, "loss": 2.9688, "step": 7339 }, { "epoch": 0.31051696421017005, "grad_norm": 0.2299274504184723, "learning_rate": 0.001, "loss": 1.9663, "step": 7340 }, { "epoch": 0.31055926897368646, "grad_norm": 0.25570148229599, "learning_rate": 0.001, "loss": 2.3401, "step": 7341 }, { "epoch": 0.3106015737372028, "grad_norm": 0.18588940799236298, "learning_rate": 0.001, "loss": 2.0283, "step": 7342 }, { "epoch": 0.3106438785007192, "grad_norm": 2.1442577838897705, "learning_rate": 0.001, "loss": 2.3488, "step": 7343 }, { "epoch": 0.3106861832642355, "grad_norm": 2.738429069519043, "learning_rate": 0.001, "loss": 1.9623, "step": 7344 }, { "epoch": 0.31072848802775194, "grad_norm": 0.20081764459609985, "learning_rate": 0.001, "loss": 2.3359, "step": 7345 }, { "epoch": 0.3107707927912683, "grad_norm": 0.44594720005989075, "learning_rate": 0.001, "loss": 2.2799, "step": 7346 }, { "epoch": 0.31081309755478465, "grad_norm": 0.26621145009994507, "learning_rate": 0.001, "loss": 3.0893, "step": 7347 }, { "epoch": 0.31085540231830106, "grad_norm": 1.0780677795410156, "learning_rate": 0.001, "loss": 2.2325, "step": 7348 }, { "epoch": 0.3108977070818174, "grad_norm": 0.33734557032585144, "learning_rate": 0.001, "loss": 1.9325, "step": 7349 }, { "epoch": 0.31094001184533376, "grad_norm": 0.7444893717765808, "learning_rate": 0.001, "loss": 2.5934, "step": 7350 }, { "epoch": 0.3109823166088502, "grad_norm": 0.2100495994091034, "learning_rate": 0.001, "loss": 2.7241, "step": 7351 }, { "epoch": 0.3110246213723665, "grad_norm": 0.2134474217891693, "learning_rate": 0.001, "loss": 3.2037, "step": 7352 }, { "epoch": 0.3110669261358829, "grad_norm": 0.21119053661823273, "learning_rate": 0.001, "loss": 2.3662, "step": 7353 }, { "epoch": 0.3111092308993993, "grad_norm": 2.0805516242980957, "learning_rate": 0.001, "loss": 2.2039, "step": 7354 }, { "epoch": 0.31115153566291565, "grad_norm": 0.245794877409935, "learning_rate": 0.001, "loss": 2.1602, "step": 7355 }, { "epoch": 0.311193840426432, "grad_norm": 0.25053372979164124, "learning_rate": 0.001, "loss": 2.8372, "step": 7356 }, { "epoch": 0.3112361451899484, "grad_norm": 0.2969318628311157, "learning_rate": 0.001, "loss": 2.893, "step": 7357 }, { "epoch": 0.31127844995346476, "grad_norm": 0.20529517531394958, "learning_rate": 0.001, "loss": 1.6362, "step": 7358 }, { "epoch": 0.3113207547169811, "grad_norm": 0.43395882844924927, "learning_rate": 0.001, "loss": 3.1913, "step": 7359 }, { "epoch": 0.31136305948049753, "grad_norm": 0.18605674803256989, "learning_rate": 0.001, "loss": 2.0649, "step": 7360 }, { "epoch": 0.3114053642440139, "grad_norm": 16.036762237548828, "learning_rate": 0.001, "loss": 1.9725, "step": 7361 }, { "epoch": 0.31144766900753024, "grad_norm": 0.7822217345237732, "learning_rate": 0.001, "loss": 2.0036, "step": 7362 }, { "epoch": 0.31148997377104665, "grad_norm": 2.8043062686920166, "learning_rate": 0.001, "loss": 2.5385, "step": 7363 }, { "epoch": 0.311532278534563, "grad_norm": 0.18811993300914764, "learning_rate": 0.001, "loss": 1.9102, "step": 7364 }, { "epoch": 0.31157458329807936, "grad_norm": 0.9021945595741272, "learning_rate": 0.001, "loss": 2.5355, "step": 7365 }, { "epoch": 0.3116168880615957, "grad_norm": 0.247837632894516, "learning_rate": 0.001, "loss": 1.8979, "step": 7366 }, { "epoch": 0.3116591928251121, "grad_norm": 0.4068886637687683, "learning_rate": 0.001, "loss": 2.5863, "step": 7367 }, { "epoch": 0.3117014975886285, "grad_norm": 0.3906690776348114, "learning_rate": 0.001, "loss": 1.8295, "step": 7368 }, { "epoch": 0.3117438023521448, "grad_norm": 0.18249556422233582, "learning_rate": 0.001, "loss": 2.2368, "step": 7369 }, { "epoch": 0.31178610711566124, "grad_norm": 0.22424672544002533, "learning_rate": 0.001, "loss": 1.736, "step": 7370 }, { "epoch": 0.3118284118791776, "grad_norm": 1.013100504875183, "learning_rate": 0.001, "loss": 2.3112, "step": 7371 }, { "epoch": 0.31187071664269395, "grad_norm": 0.5906060338020325, "learning_rate": 0.001, "loss": 2.3438, "step": 7372 }, { "epoch": 0.31191302140621036, "grad_norm": 0.22655218839645386, "learning_rate": 0.001, "loss": 1.9258, "step": 7373 }, { "epoch": 0.3119553261697267, "grad_norm": 0.8316282629966736, "learning_rate": 0.001, "loss": 2.0015, "step": 7374 }, { "epoch": 0.31199763093324306, "grad_norm": 0.3435095548629761, "learning_rate": 0.001, "loss": 2.3837, "step": 7375 }, { "epoch": 0.3120399356967595, "grad_norm": 1.1091632843017578, "learning_rate": 0.001, "loss": 2.3188, "step": 7376 }, { "epoch": 0.31208224046027583, "grad_norm": 0.35905349254608154, "learning_rate": 0.001, "loss": 2.6176, "step": 7377 }, { "epoch": 0.3121245452237922, "grad_norm": 0.2939832806587219, "learning_rate": 0.001, "loss": 2.6581, "step": 7378 }, { "epoch": 0.3121668499873086, "grad_norm": 0.21034960448741913, "learning_rate": 0.001, "loss": 1.7465, "step": 7379 }, { "epoch": 0.31220915475082495, "grad_norm": 0.23007825016975403, "learning_rate": 0.001, "loss": 2.6893, "step": 7380 }, { "epoch": 0.3122514595143413, "grad_norm": 0.1853979229927063, "learning_rate": 0.001, "loss": 1.961, "step": 7381 }, { "epoch": 0.3122937642778577, "grad_norm": 4.700438499450684, "learning_rate": 0.001, "loss": 1.9521, "step": 7382 }, { "epoch": 0.31233606904137406, "grad_norm": 0.210820272564888, "learning_rate": 0.001, "loss": 2.2501, "step": 7383 }, { "epoch": 0.3123783738048904, "grad_norm": 0.2408655881881714, "learning_rate": 0.001, "loss": 2.5988, "step": 7384 }, { "epoch": 0.31242067856840683, "grad_norm": 7.633557319641113, "learning_rate": 0.001, "loss": 2.0167, "step": 7385 }, { "epoch": 0.3124629833319232, "grad_norm": 0.8851701617240906, "learning_rate": 0.001, "loss": 2.1323, "step": 7386 }, { "epoch": 0.31250528809543954, "grad_norm": 0.4367370009422302, "learning_rate": 0.001, "loss": 2.7794, "step": 7387 }, { "epoch": 0.3125475928589559, "grad_norm": 0.24811141192913055, "learning_rate": 0.001, "loss": 1.8663, "step": 7388 }, { "epoch": 0.3125898976224723, "grad_norm": 0.24149896204471588, "learning_rate": 0.001, "loss": 2.0688, "step": 7389 }, { "epoch": 0.31263220238598866, "grad_norm": 0.17393232882022858, "learning_rate": 0.001, "loss": 2.2648, "step": 7390 }, { "epoch": 0.312674507149505, "grad_norm": 0.6203790903091431, "learning_rate": 0.001, "loss": 2.4404, "step": 7391 }, { "epoch": 0.3127168119130214, "grad_norm": 0.21590745449066162, "learning_rate": 0.001, "loss": 2.0125, "step": 7392 }, { "epoch": 0.3127591166765378, "grad_norm": 0.2051093429327011, "learning_rate": 0.001, "loss": 1.9774, "step": 7393 }, { "epoch": 0.31280142144005413, "grad_norm": 0.9935820698738098, "learning_rate": 0.001, "loss": 1.9072, "step": 7394 }, { "epoch": 0.31284372620357054, "grad_norm": 0.18654446303844452, "learning_rate": 0.001, "loss": 2.4551, "step": 7395 }, { "epoch": 0.3128860309670869, "grad_norm": 0.19215311110019684, "learning_rate": 0.001, "loss": 1.9348, "step": 7396 }, { "epoch": 0.31292833573060325, "grad_norm": 0.18548458814620972, "learning_rate": 0.001, "loss": 1.9123, "step": 7397 }, { "epoch": 0.31297064049411966, "grad_norm": 0.2123180329799652, "learning_rate": 0.001, "loss": 2.5744, "step": 7398 }, { "epoch": 0.313012945257636, "grad_norm": 0.20290523767471313, "learning_rate": 0.001, "loss": 2.0443, "step": 7399 }, { "epoch": 0.31305525002115236, "grad_norm": 0.2106354683637619, "learning_rate": 0.001, "loss": 2.2838, "step": 7400 }, { "epoch": 0.3130975547846688, "grad_norm": 0.9356090426445007, "learning_rate": 0.001, "loss": 2.6152, "step": 7401 }, { "epoch": 0.31313985954818513, "grad_norm": 0.190611332654953, "learning_rate": 0.001, "loss": 3.1069, "step": 7402 }, { "epoch": 0.3131821643117015, "grad_norm": 0.20556482672691345, "learning_rate": 0.001, "loss": 1.7953, "step": 7403 }, { "epoch": 0.3132244690752179, "grad_norm": 0.2476384937763214, "learning_rate": 0.001, "loss": 1.7635, "step": 7404 }, { "epoch": 0.31326677383873425, "grad_norm": 0.2387237846851349, "learning_rate": 0.001, "loss": 1.9521, "step": 7405 }, { "epoch": 0.3133090786022506, "grad_norm": 1.1636114120483398, "learning_rate": 0.001, "loss": 1.8156, "step": 7406 }, { "epoch": 0.313351383365767, "grad_norm": 0.36299633979797363, "learning_rate": 0.001, "loss": 1.984, "step": 7407 }, { "epoch": 0.31339368812928337, "grad_norm": 0.20854444801807404, "learning_rate": 0.001, "loss": 2.8094, "step": 7408 }, { "epoch": 0.3134359928927997, "grad_norm": 0.20307183265686035, "learning_rate": 0.001, "loss": 2.0501, "step": 7409 }, { "epoch": 0.3134782976563161, "grad_norm": 0.37564030289649963, "learning_rate": 0.001, "loss": 1.8919, "step": 7410 }, { "epoch": 0.3135206024198325, "grad_norm": 0.4626857340335846, "learning_rate": 0.001, "loss": 3.2498, "step": 7411 }, { "epoch": 0.31356290718334884, "grad_norm": 1.1579762697219849, "learning_rate": 0.001, "loss": 2.1231, "step": 7412 }, { "epoch": 0.3136052119468652, "grad_norm": 0.17268231511116028, "learning_rate": 0.001, "loss": 2.8643, "step": 7413 }, { "epoch": 0.3136475167103816, "grad_norm": 0.2048521339893341, "learning_rate": 0.001, "loss": 3.5636, "step": 7414 }, { "epoch": 0.31368982147389796, "grad_norm": 0.3608675003051758, "learning_rate": 0.001, "loss": 3.2036, "step": 7415 }, { "epoch": 0.3137321262374143, "grad_norm": 0.24735359847545624, "learning_rate": 0.001, "loss": 2.1055, "step": 7416 }, { "epoch": 0.3137744310009307, "grad_norm": 0.6630021333694458, "learning_rate": 0.001, "loss": 3.1071, "step": 7417 }, { "epoch": 0.3138167357644471, "grad_norm": 0.19361117482185364, "learning_rate": 0.001, "loss": 2.294, "step": 7418 }, { "epoch": 0.31385904052796343, "grad_norm": 0.26215800642967224, "learning_rate": 0.001, "loss": 2.9492, "step": 7419 }, { "epoch": 0.31390134529147984, "grad_norm": 0.20346727967262268, "learning_rate": 0.001, "loss": 2.087, "step": 7420 }, { "epoch": 0.3139436500549962, "grad_norm": 0.3235960304737091, "learning_rate": 0.001, "loss": 2.0558, "step": 7421 }, { "epoch": 0.31398595481851255, "grad_norm": 0.2161642611026764, "learning_rate": 0.001, "loss": 2.1842, "step": 7422 }, { "epoch": 0.31402825958202896, "grad_norm": 0.19181302189826965, "learning_rate": 0.001, "loss": 1.9424, "step": 7423 }, { "epoch": 0.3140705643455453, "grad_norm": 19.93857765197754, "learning_rate": 0.001, "loss": 2.7717, "step": 7424 }, { "epoch": 0.31411286910906167, "grad_norm": 0.2410607635974884, "learning_rate": 0.001, "loss": 2.7044, "step": 7425 }, { "epoch": 0.3141551738725781, "grad_norm": 1.307002067565918, "learning_rate": 0.001, "loss": 2.1455, "step": 7426 }, { "epoch": 0.31419747863609443, "grad_norm": 0.4844810962677002, "learning_rate": 0.001, "loss": 2.3331, "step": 7427 }, { "epoch": 0.3142397833996108, "grad_norm": 0.28429657220840454, "learning_rate": 0.001, "loss": 2.89, "step": 7428 }, { "epoch": 0.3142820881631272, "grad_norm": 0.8632631897926331, "learning_rate": 0.001, "loss": 1.8695, "step": 7429 }, { "epoch": 0.31432439292664355, "grad_norm": 0.3193078637123108, "learning_rate": 0.001, "loss": 3.2732, "step": 7430 }, { "epoch": 0.3143666976901599, "grad_norm": 0.20497079193592072, "learning_rate": 0.001, "loss": 2.1384, "step": 7431 }, { "epoch": 0.31440900245367626, "grad_norm": 0.2516421973705292, "learning_rate": 0.001, "loss": 2.2816, "step": 7432 }, { "epoch": 0.31445130721719267, "grad_norm": 1.0141093730926514, "learning_rate": 0.001, "loss": 2.6545, "step": 7433 }, { "epoch": 0.314493611980709, "grad_norm": 0.18999618291854858, "learning_rate": 0.001, "loss": 2.0399, "step": 7434 }, { "epoch": 0.3145359167442254, "grad_norm": 0.456870436668396, "learning_rate": 0.001, "loss": 2.8804, "step": 7435 }, { "epoch": 0.3145782215077418, "grad_norm": 0.46013543009757996, "learning_rate": 0.001, "loss": 2.5783, "step": 7436 }, { "epoch": 0.31462052627125814, "grad_norm": 0.20753584802150726, "learning_rate": 0.001, "loss": 2.4055, "step": 7437 }, { "epoch": 0.3146628310347745, "grad_norm": 0.23144669830799103, "learning_rate": 0.001, "loss": 2.9632, "step": 7438 }, { "epoch": 0.3147051357982909, "grad_norm": 0.3479333817958832, "learning_rate": 0.001, "loss": 1.792, "step": 7439 }, { "epoch": 0.31474744056180726, "grad_norm": 4.283021926879883, "learning_rate": 0.001, "loss": 2.1915, "step": 7440 }, { "epoch": 0.3147897453253236, "grad_norm": 7.6823649406433105, "learning_rate": 0.001, "loss": 2.5411, "step": 7441 }, { "epoch": 0.31483205008884, "grad_norm": 0.6836051344871521, "learning_rate": 0.001, "loss": 2.3475, "step": 7442 }, { "epoch": 0.3148743548523564, "grad_norm": 1.2467254400253296, "learning_rate": 0.001, "loss": 2.448, "step": 7443 }, { "epoch": 0.31491665961587273, "grad_norm": 0.4439103603363037, "learning_rate": 0.001, "loss": 2.1324, "step": 7444 }, { "epoch": 0.31495896437938914, "grad_norm": 0.23518876731395721, "learning_rate": 0.001, "loss": 2.3276, "step": 7445 }, { "epoch": 0.3150012691429055, "grad_norm": 0.5438497066497803, "learning_rate": 0.001, "loss": 1.9945, "step": 7446 }, { "epoch": 0.31504357390642185, "grad_norm": 1.6677125692367554, "learning_rate": 0.001, "loss": 1.5225, "step": 7447 }, { "epoch": 0.31508587866993826, "grad_norm": 0.20245139300823212, "learning_rate": 0.001, "loss": 1.973, "step": 7448 }, { "epoch": 0.3151281834334546, "grad_norm": 0.24195541441440582, "learning_rate": 0.001, "loss": 1.9298, "step": 7449 }, { "epoch": 0.31517048819697097, "grad_norm": 0.21246738731861115, "learning_rate": 0.001, "loss": 2.0905, "step": 7450 }, { "epoch": 0.3152127929604874, "grad_norm": 0.2219218611717224, "learning_rate": 0.001, "loss": 1.9244, "step": 7451 }, { "epoch": 0.31525509772400373, "grad_norm": 0.20514735579490662, "learning_rate": 0.001, "loss": 2.4173, "step": 7452 }, { "epoch": 0.3152974024875201, "grad_norm": 0.1975151151418686, "learning_rate": 0.001, "loss": 2.7215, "step": 7453 }, { "epoch": 0.3153397072510365, "grad_norm": 0.2547701299190521, "learning_rate": 0.001, "loss": 2.3066, "step": 7454 }, { "epoch": 0.31538201201455285, "grad_norm": 0.2303609549999237, "learning_rate": 0.001, "loss": 2.6705, "step": 7455 }, { "epoch": 0.3154243167780692, "grad_norm": 0.2129909247159958, "learning_rate": 0.001, "loss": 1.9109, "step": 7456 }, { "epoch": 0.31546662154158556, "grad_norm": 0.2366720288991928, "learning_rate": 0.001, "loss": 2.8221, "step": 7457 }, { "epoch": 0.31550892630510197, "grad_norm": 0.23550964891910553, "learning_rate": 0.001, "loss": 3.5912, "step": 7458 }, { "epoch": 0.3155512310686183, "grad_norm": 0.21231484413146973, "learning_rate": 0.001, "loss": 1.7912, "step": 7459 }, { "epoch": 0.3155935358321347, "grad_norm": 0.22212854027748108, "learning_rate": 0.001, "loss": 2.5323, "step": 7460 }, { "epoch": 0.3156358405956511, "grad_norm": 1.3847850561141968, "learning_rate": 0.001, "loss": 2.2599, "step": 7461 }, { "epoch": 0.31567814535916744, "grad_norm": 0.6198214292526245, "learning_rate": 0.001, "loss": 2.3198, "step": 7462 }, { "epoch": 0.3157204501226838, "grad_norm": 0.20347964763641357, "learning_rate": 0.001, "loss": 1.7696, "step": 7463 }, { "epoch": 0.3157627548862002, "grad_norm": 0.3482247292995453, "learning_rate": 0.001, "loss": 1.8097, "step": 7464 }, { "epoch": 0.31580505964971656, "grad_norm": 0.22890900075435638, "learning_rate": 0.001, "loss": 2.2547, "step": 7465 }, { "epoch": 0.3158473644132329, "grad_norm": 0.21213634312152863, "learning_rate": 0.001, "loss": 2.1473, "step": 7466 }, { "epoch": 0.3158896691767493, "grad_norm": 0.2835163176059723, "learning_rate": 0.001, "loss": 2.5594, "step": 7467 }, { "epoch": 0.3159319739402657, "grad_norm": 0.22125250101089478, "learning_rate": 0.001, "loss": 2.9808, "step": 7468 }, { "epoch": 0.31597427870378203, "grad_norm": 2.557072401046753, "learning_rate": 0.001, "loss": 3.2332, "step": 7469 }, { "epoch": 0.31601658346729844, "grad_norm": 4.832421779632568, "learning_rate": 0.001, "loss": 2.2446, "step": 7470 }, { "epoch": 0.3160588882308148, "grad_norm": 0.19227334856987, "learning_rate": 0.001, "loss": 2.5917, "step": 7471 }, { "epoch": 0.31610119299433115, "grad_norm": 0.23523429036140442, "learning_rate": 0.001, "loss": 2.5136, "step": 7472 }, { "epoch": 0.31614349775784756, "grad_norm": 0.22452674806118011, "learning_rate": 0.001, "loss": 2.0461, "step": 7473 }, { "epoch": 0.3161858025213639, "grad_norm": 0.43354332447052, "learning_rate": 0.001, "loss": 3.0801, "step": 7474 }, { "epoch": 0.31622810728488027, "grad_norm": 0.21420817077159882, "learning_rate": 0.001, "loss": 1.6362, "step": 7475 }, { "epoch": 0.3162704120483967, "grad_norm": 5.511373043060303, "learning_rate": 0.001, "loss": 2.1619, "step": 7476 }, { "epoch": 0.31631271681191303, "grad_norm": 0.647841215133667, "learning_rate": 0.001, "loss": 2.2618, "step": 7477 }, { "epoch": 0.3163550215754294, "grad_norm": 0.4985596537590027, "learning_rate": 0.001, "loss": 2.6925, "step": 7478 }, { "epoch": 0.31639732633894574, "grad_norm": 0.6579802632331848, "learning_rate": 0.001, "loss": 2.7019, "step": 7479 }, { "epoch": 0.31643963110246215, "grad_norm": 2.1017627716064453, "learning_rate": 0.001, "loss": 2.4639, "step": 7480 }, { "epoch": 0.3164819358659785, "grad_norm": 0.5756150484085083, "learning_rate": 0.001, "loss": 1.7458, "step": 7481 }, { "epoch": 0.31652424062949486, "grad_norm": 0.24982336163520813, "learning_rate": 0.001, "loss": 3.0768, "step": 7482 }, { "epoch": 0.31656654539301127, "grad_norm": 0.5327016711235046, "learning_rate": 0.001, "loss": 2.1362, "step": 7483 }, { "epoch": 0.3166088501565276, "grad_norm": 7.926521301269531, "learning_rate": 0.001, "loss": 2.0401, "step": 7484 }, { "epoch": 0.316651154920044, "grad_norm": 0.2336919903755188, "learning_rate": 0.001, "loss": 2.5418, "step": 7485 }, { "epoch": 0.3166934596835604, "grad_norm": 1.4902650117874146, "learning_rate": 0.001, "loss": 2.053, "step": 7486 }, { "epoch": 0.31673576444707674, "grad_norm": 0.3811624348163605, "learning_rate": 0.001, "loss": 1.8232, "step": 7487 }, { "epoch": 0.3167780692105931, "grad_norm": 0.37704822421073914, "learning_rate": 0.001, "loss": 2.9608, "step": 7488 }, { "epoch": 0.3168203739741095, "grad_norm": 0.35828453302383423, "learning_rate": 0.001, "loss": 1.9027, "step": 7489 }, { "epoch": 0.31686267873762586, "grad_norm": 0.22911661863327026, "learning_rate": 0.001, "loss": 2.7649, "step": 7490 }, { "epoch": 0.3169049835011422, "grad_norm": 0.2562866806983948, "learning_rate": 0.001, "loss": 2.4122, "step": 7491 }, { "epoch": 0.3169472882646586, "grad_norm": 0.24646058678627014, "learning_rate": 0.001, "loss": 2.0809, "step": 7492 }, { "epoch": 0.316989593028175, "grad_norm": 1.1058540344238281, "learning_rate": 0.001, "loss": 2.0834, "step": 7493 }, { "epoch": 0.31703189779169133, "grad_norm": 0.20626600086688995, "learning_rate": 0.001, "loss": 2.2533, "step": 7494 }, { "epoch": 0.31707420255520774, "grad_norm": 0.32516318559646606, "learning_rate": 0.001, "loss": 3.1424, "step": 7495 }, { "epoch": 0.3171165073187241, "grad_norm": 0.2439742237329483, "learning_rate": 0.001, "loss": 2.2094, "step": 7496 }, { "epoch": 0.31715881208224045, "grad_norm": 0.34792065620422363, "learning_rate": 0.001, "loss": 1.9122, "step": 7497 }, { "epoch": 0.31720111684575686, "grad_norm": 0.23733051121234894, "learning_rate": 0.001, "loss": 1.936, "step": 7498 }, { "epoch": 0.3172434216092732, "grad_norm": 0.24519434571266174, "learning_rate": 0.001, "loss": 2.036, "step": 7499 }, { "epoch": 0.31728572637278957, "grad_norm": 0.4781532883644104, "learning_rate": 0.001, "loss": 3.1208, "step": 7500 }, { "epoch": 0.3173280311363059, "grad_norm": 0.19272254407405853, "learning_rate": 0.001, "loss": 2.552, "step": 7501 }, { "epoch": 0.31737033589982233, "grad_norm": 0.32179877161979675, "learning_rate": 0.001, "loss": 2.8888, "step": 7502 }, { "epoch": 0.3174126406633387, "grad_norm": 0.7427499890327454, "learning_rate": 0.001, "loss": 2.615, "step": 7503 }, { "epoch": 0.31745494542685504, "grad_norm": 0.31697311997413635, "learning_rate": 0.001, "loss": 2.3646, "step": 7504 }, { "epoch": 0.31749725019037145, "grad_norm": 0.16490371525287628, "learning_rate": 0.001, "loss": 2.6595, "step": 7505 }, { "epoch": 0.3175395549538878, "grad_norm": 0.2627149224281311, "learning_rate": 0.001, "loss": 4.131, "step": 7506 }, { "epoch": 0.31758185971740416, "grad_norm": 0.8162776827812195, "learning_rate": 0.001, "loss": 1.8456, "step": 7507 }, { "epoch": 0.31762416448092057, "grad_norm": 0.3852151036262512, "learning_rate": 0.001, "loss": 3.4449, "step": 7508 }, { "epoch": 0.3176664692444369, "grad_norm": 1.2743273973464966, "learning_rate": 0.001, "loss": 2.073, "step": 7509 }, { "epoch": 0.3177087740079533, "grad_norm": 0.23382671177387238, "learning_rate": 0.001, "loss": 1.7473, "step": 7510 }, { "epoch": 0.3177510787714697, "grad_norm": 0.25078028440475464, "learning_rate": 0.001, "loss": 2.8671, "step": 7511 }, { "epoch": 0.31779338353498604, "grad_norm": 0.18183058500289917, "learning_rate": 0.001, "loss": 2.3517, "step": 7512 }, { "epoch": 0.3178356882985024, "grad_norm": 0.20492641627788544, "learning_rate": 0.001, "loss": 1.6892, "step": 7513 }, { "epoch": 0.3178779930620188, "grad_norm": 0.19157011806964874, "learning_rate": 0.001, "loss": 1.9524, "step": 7514 }, { "epoch": 0.31792029782553516, "grad_norm": 0.20687264204025269, "learning_rate": 0.001, "loss": 2.091, "step": 7515 }, { "epoch": 0.3179626025890515, "grad_norm": 0.3631476163864136, "learning_rate": 0.001, "loss": 2.5191, "step": 7516 }, { "epoch": 0.3180049073525679, "grad_norm": 2.8174641132354736, "learning_rate": 0.001, "loss": 2.2289, "step": 7517 }, { "epoch": 0.3180472121160843, "grad_norm": 0.21862949430942535, "learning_rate": 0.001, "loss": 2.8965, "step": 7518 }, { "epoch": 0.31808951687960063, "grad_norm": 1.085720419883728, "learning_rate": 0.001, "loss": 2.622, "step": 7519 }, { "epoch": 0.31813182164311704, "grad_norm": 0.18160761892795563, "learning_rate": 0.001, "loss": 2.2491, "step": 7520 }, { "epoch": 0.3181741264066334, "grad_norm": 0.8975410461425781, "learning_rate": 0.001, "loss": 2.8174, "step": 7521 }, { "epoch": 0.31821643117014975, "grad_norm": 1.3470760583877563, "learning_rate": 0.001, "loss": 1.7925, "step": 7522 }, { "epoch": 0.3182587359336661, "grad_norm": 1.8409106731414795, "learning_rate": 0.001, "loss": 2.8657, "step": 7523 }, { "epoch": 0.3183010406971825, "grad_norm": 0.17481787502765656, "learning_rate": 0.001, "loss": 2.4448, "step": 7524 }, { "epoch": 0.31834334546069887, "grad_norm": 0.2951485514640808, "learning_rate": 0.001, "loss": 2.678, "step": 7525 }, { "epoch": 0.3183856502242152, "grad_norm": 0.22341516613960266, "learning_rate": 0.001, "loss": 2.1617, "step": 7526 }, { "epoch": 0.31842795498773163, "grad_norm": 0.2601909935474396, "learning_rate": 0.001, "loss": 2.1188, "step": 7527 }, { "epoch": 0.318470259751248, "grad_norm": 0.2952847480773926, "learning_rate": 0.001, "loss": 2.1086, "step": 7528 }, { "epoch": 0.31851256451476434, "grad_norm": 0.4443856477737427, "learning_rate": 0.001, "loss": 3.2807, "step": 7529 }, { "epoch": 0.31855486927828075, "grad_norm": 0.235245943069458, "learning_rate": 0.001, "loss": 2.9082, "step": 7530 }, { "epoch": 0.3185971740417971, "grad_norm": 1.0847169160842896, "learning_rate": 0.001, "loss": 2.0038, "step": 7531 }, { "epoch": 0.31863947880531346, "grad_norm": 0.245633065700531, "learning_rate": 0.001, "loss": 2.6333, "step": 7532 }, { "epoch": 0.31868178356882987, "grad_norm": 0.2640596330165863, "learning_rate": 0.001, "loss": 3.2857, "step": 7533 }, { "epoch": 0.3187240883323462, "grad_norm": 0.5016381740570068, "learning_rate": 0.001, "loss": 3.0914, "step": 7534 }, { "epoch": 0.3187663930958626, "grad_norm": 0.19767212867736816, "learning_rate": 0.001, "loss": 1.7764, "step": 7535 }, { "epoch": 0.318808697859379, "grad_norm": 0.2402283400297165, "learning_rate": 0.001, "loss": 2.7833, "step": 7536 }, { "epoch": 0.31885100262289534, "grad_norm": 4.479025363922119, "learning_rate": 0.001, "loss": 2.2062, "step": 7537 }, { "epoch": 0.3188933073864117, "grad_norm": 0.584766685962677, "learning_rate": 0.001, "loss": 1.7729, "step": 7538 }, { "epoch": 0.3189356121499281, "grad_norm": 0.5840973258018494, "learning_rate": 0.001, "loss": 1.8462, "step": 7539 }, { "epoch": 0.31897791691344446, "grad_norm": 0.2561521828174591, "learning_rate": 0.001, "loss": 2.3039, "step": 7540 }, { "epoch": 0.3190202216769608, "grad_norm": 4.742803573608398, "learning_rate": 0.001, "loss": 2.0092, "step": 7541 }, { "epoch": 0.3190625264404772, "grad_norm": 0.24484741687774658, "learning_rate": 0.001, "loss": 2.2363, "step": 7542 }, { "epoch": 0.3191048312039936, "grad_norm": 0.70278000831604, "learning_rate": 0.001, "loss": 3.5423, "step": 7543 }, { "epoch": 0.31914713596750993, "grad_norm": 0.22903594374656677, "learning_rate": 0.001, "loss": 1.8414, "step": 7544 }, { "epoch": 0.3191894407310263, "grad_norm": 0.5062339901924133, "learning_rate": 0.001, "loss": 2.1526, "step": 7545 }, { "epoch": 0.3192317454945427, "grad_norm": 0.2158339023590088, "learning_rate": 0.001, "loss": 1.9886, "step": 7546 }, { "epoch": 0.31927405025805905, "grad_norm": 1.0523641109466553, "learning_rate": 0.001, "loss": 2.6195, "step": 7547 }, { "epoch": 0.3193163550215754, "grad_norm": 0.2197253257036209, "learning_rate": 0.001, "loss": 1.5022, "step": 7548 }, { "epoch": 0.3193586597850918, "grad_norm": 2.6306278705596924, "learning_rate": 0.001, "loss": 2.143, "step": 7549 }, { "epoch": 0.31940096454860817, "grad_norm": 0.7273126840591431, "learning_rate": 0.001, "loss": 2.4476, "step": 7550 }, { "epoch": 0.3194432693121245, "grad_norm": 0.29238003492355347, "learning_rate": 0.001, "loss": 1.9445, "step": 7551 }, { "epoch": 0.31948557407564093, "grad_norm": 0.6033421158790588, "learning_rate": 0.001, "loss": 2.5735, "step": 7552 }, { "epoch": 0.3195278788391573, "grad_norm": 0.723403811454773, "learning_rate": 0.001, "loss": 2.9264, "step": 7553 }, { "epoch": 0.31957018360267364, "grad_norm": 0.3378523886203766, "learning_rate": 0.001, "loss": 2.0737, "step": 7554 }, { "epoch": 0.31961248836619005, "grad_norm": 0.2882983684539795, "learning_rate": 0.001, "loss": 3.3238, "step": 7555 }, { "epoch": 0.3196547931297064, "grad_norm": 1.7969506978988647, "learning_rate": 0.001, "loss": 2.2955, "step": 7556 }, { "epoch": 0.31969709789322276, "grad_norm": 1.0519235134124756, "learning_rate": 0.001, "loss": 2.9223, "step": 7557 }, { "epoch": 0.31973940265673917, "grad_norm": 0.26675179600715637, "learning_rate": 0.001, "loss": 3.1042, "step": 7558 }, { "epoch": 0.3197817074202555, "grad_norm": 0.210018128156662, "learning_rate": 0.001, "loss": 2.4462, "step": 7559 }, { "epoch": 0.3198240121837719, "grad_norm": 0.24346289038658142, "learning_rate": 0.001, "loss": 2.5506, "step": 7560 }, { "epoch": 0.3198663169472883, "grad_norm": 0.2894313633441925, "learning_rate": 0.001, "loss": 2.6206, "step": 7561 }, { "epoch": 0.31990862171080464, "grad_norm": 0.23805563151836395, "learning_rate": 0.001, "loss": 2.1749, "step": 7562 }, { "epoch": 0.319950926474321, "grad_norm": 0.2397831827402115, "learning_rate": 0.001, "loss": 2.7243, "step": 7563 }, { "epoch": 0.3199932312378374, "grad_norm": 0.23759059607982635, "learning_rate": 0.001, "loss": 1.9441, "step": 7564 }, { "epoch": 0.32003553600135376, "grad_norm": 1.2827054262161255, "learning_rate": 0.001, "loss": 2.5177, "step": 7565 }, { "epoch": 0.3200778407648701, "grad_norm": 0.3205539882183075, "learning_rate": 0.001, "loss": 3.2377, "step": 7566 }, { "epoch": 0.3201201455283865, "grad_norm": 0.3002196252346039, "learning_rate": 0.001, "loss": 2.2226, "step": 7567 }, { "epoch": 0.3201624502919029, "grad_norm": 0.35749757289886475, "learning_rate": 0.001, "loss": 2.0588, "step": 7568 }, { "epoch": 0.32020475505541923, "grad_norm": 0.4479646384716034, "learning_rate": 0.001, "loss": 2.8293, "step": 7569 }, { "epoch": 0.3202470598189356, "grad_norm": 1.4561418294906616, "learning_rate": 0.001, "loss": 1.874, "step": 7570 }, { "epoch": 0.320289364582452, "grad_norm": 0.25509679317474365, "learning_rate": 0.001, "loss": 1.9602, "step": 7571 }, { "epoch": 0.32033166934596835, "grad_norm": 0.33940833806991577, "learning_rate": 0.001, "loss": 3.8938, "step": 7572 }, { "epoch": 0.3203739741094847, "grad_norm": 0.23563840985298157, "learning_rate": 0.001, "loss": 1.9939, "step": 7573 }, { "epoch": 0.3204162788730011, "grad_norm": 0.5354148745536804, "learning_rate": 0.001, "loss": 2.3731, "step": 7574 }, { "epoch": 0.32045858363651747, "grad_norm": 0.6357294917106628, "learning_rate": 0.001, "loss": 2.4846, "step": 7575 }, { "epoch": 0.3205008884000338, "grad_norm": 0.24302570521831512, "learning_rate": 0.001, "loss": 2.7975, "step": 7576 }, { "epoch": 0.32054319316355023, "grad_norm": 0.46470755338668823, "learning_rate": 0.001, "loss": 2.1249, "step": 7577 }, { "epoch": 0.3205854979270666, "grad_norm": 0.24483340978622437, "learning_rate": 0.001, "loss": 3.0459, "step": 7578 }, { "epoch": 0.32062780269058294, "grad_norm": 0.22136029601097107, "learning_rate": 0.001, "loss": 1.8347, "step": 7579 }, { "epoch": 0.32067010745409935, "grad_norm": 7.803838729858398, "learning_rate": 0.001, "loss": 1.7036, "step": 7580 }, { "epoch": 0.3207124122176157, "grad_norm": 0.2764716148376465, "learning_rate": 0.001, "loss": 2.6748, "step": 7581 }, { "epoch": 0.32075471698113206, "grad_norm": 0.44170138239860535, "learning_rate": 0.001, "loss": 2.1851, "step": 7582 }, { "epoch": 0.32079702174464847, "grad_norm": 0.6049759387969971, "learning_rate": 0.001, "loss": 2.012, "step": 7583 }, { "epoch": 0.3208393265081648, "grad_norm": 0.18830320239067078, "learning_rate": 0.001, "loss": 2.1773, "step": 7584 }, { "epoch": 0.3208816312716812, "grad_norm": 0.3015429377555847, "learning_rate": 0.001, "loss": 2.196, "step": 7585 }, { "epoch": 0.3209239360351976, "grad_norm": 0.20359709858894348, "learning_rate": 0.001, "loss": 2.4333, "step": 7586 }, { "epoch": 0.32096624079871394, "grad_norm": 0.4667384922504425, "learning_rate": 0.001, "loss": 2.0174, "step": 7587 }, { "epoch": 0.3210085455622303, "grad_norm": 1.0664136409759521, "learning_rate": 0.001, "loss": 2.0681, "step": 7588 }, { "epoch": 0.3210508503257467, "grad_norm": 0.37264055013656616, "learning_rate": 0.001, "loss": 2.3549, "step": 7589 }, { "epoch": 0.32109315508926306, "grad_norm": 0.2381688803434372, "learning_rate": 0.001, "loss": 2.0513, "step": 7590 }, { "epoch": 0.3211354598527794, "grad_norm": 1.4251641035079956, "learning_rate": 0.001, "loss": 2.5208, "step": 7591 }, { "epoch": 0.32117776461629577, "grad_norm": 0.6149337291717529, "learning_rate": 0.001, "loss": 2.0792, "step": 7592 }, { "epoch": 0.3212200693798122, "grad_norm": 0.18450385332107544, "learning_rate": 0.001, "loss": 3.026, "step": 7593 }, { "epoch": 0.32126237414332853, "grad_norm": 0.26451027393341064, "learning_rate": 0.001, "loss": 2.5821, "step": 7594 }, { "epoch": 0.3213046789068449, "grad_norm": 0.2782851755619049, "learning_rate": 0.001, "loss": 2.0855, "step": 7595 }, { "epoch": 0.3213469836703613, "grad_norm": 0.20386971533298492, "learning_rate": 0.001, "loss": 1.6014, "step": 7596 }, { "epoch": 0.32138928843387765, "grad_norm": 1.3683080673217773, "learning_rate": 0.001, "loss": 3.028, "step": 7597 }, { "epoch": 0.321431593197394, "grad_norm": 1.1451998949050903, "learning_rate": 0.001, "loss": 2.1981, "step": 7598 }, { "epoch": 0.3214738979609104, "grad_norm": 0.43921521306037903, "learning_rate": 0.001, "loss": 2.2234, "step": 7599 }, { "epoch": 0.32151620272442677, "grad_norm": 0.2476237416267395, "learning_rate": 0.001, "loss": 3.1997, "step": 7600 }, { "epoch": 0.3215585074879431, "grad_norm": 0.21488797664642334, "learning_rate": 0.001, "loss": 2.2913, "step": 7601 }, { "epoch": 0.32160081225145953, "grad_norm": 0.2706213891506195, "learning_rate": 0.001, "loss": 2.4037, "step": 7602 }, { "epoch": 0.3216431170149759, "grad_norm": 0.23860187828540802, "learning_rate": 0.001, "loss": 2.116, "step": 7603 }, { "epoch": 0.32168542177849224, "grad_norm": 0.23283398151397705, "learning_rate": 0.001, "loss": 2.6682, "step": 7604 }, { "epoch": 0.32172772654200865, "grad_norm": 0.4454127550125122, "learning_rate": 0.001, "loss": 2.2326, "step": 7605 }, { "epoch": 0.321770031305525, "grad_norm": 0.2188045084476471, "learning_rate": 0.001, "loss": 2.3459, "step": 7606 }, { "epoch": 0.32181233606904136, "grad_norm": 0.2107754945755005, "learning_rate": 0.001, "loss": 1.8558, "step": 7607 }, { "epoch": 0.32185464083255777, "grad_norm": 0.24413500726222992, "learning_rate": 0.001, "loss": 2.6748, "step": 7608 }, { "epoch": 0.3218969455960741, "grad_norm": 0.1848825365304947, "learning_rate": 0.001, "loss": 1.7373, "step": 7609 }, { "epoch": 0.3219392503595905, "grad_norm": 0.4209091365337372, "learning_rate": 0.001, "loss": 2.9954, "step": 7610 }, { "epoch": 0.3219815551231069, "grad_norm": 1.6776453256607056, "learning_rate": 0.001, "loss": 3.9771, "step": 7611 }, { "epoch": 0.32202385988662324, "grad_norm": 0.21313358843326569, "learning_rate": 0.001, "loss": 2.2837, "step": 7612 }, { "epoch": 0.3220661646501396, "grad_norm": 0.4032410979270935, "learning_rate": 0.001, "loss": 2.0786, "step": 7613 }, { "epoch": 0.32210846941365595, "grad_norm": 0.18987895548343658, "learning_rate": 0.001, "loss": 2.3311, "step": 7614 }, { "epoch": 0.32215077417717236, "grad_norm": 7.441311359405518, "learning_rate": 0.001, "loss": 2.2271, "step": 7615 }, { "epoch": 0.3221930789406887, "grad_norm": 0.20409175753593445, "learning_rate": 0.001, "loss": 2.9311, "step": 7616 }, { "epoch": 0.32223538370420507, "grad_norm": 0.19200804829597473, "learning_rate": 0.001, "loss": 1.5997, "step": 7617 }, { "epoch": 0.3222776884677215, "grad_norm": 0.2112921178340912, "learning_rate": 0.001, "loss": 1.853, "step": 7618 }, { "epoch": 0.32231999323123783, "grad_norm": 0.5670382976531982, "learning_rate": 0.001, "loss": 2.1103, "step": 7619 }, { "epoch": 0.3223622979947542, "grad_norm": 1.1795889139175415, "learning_rate": 0.001, "loss": 1.5708, "step": 7620 }, { "epoch": 0.3224046027582706, "grad_norm": 0.19757835566997528, "learning_rate": 0.001, "loss": 2.6667, "step": 7621 }, { "epoch": 0.32244690752178695, "grad_norm": 0.3224509060382843, "learning_rate": 0.001, "loss": 2.1601, "step": 7622 }, { "epoch": 0.3224892122853033, "grad_norm": 3.6661248207092285, "learning_rate": 0.001, "loss": 2.2917, "step": 7623 }, { "epoch": 0.3225315170488197, "grad_norm": 0.26291152834892273, "learning_rate": 0.001, "loss": 2.5686, "step": 7624 }, { "epoch": 0.32257382181233607, "grad_norm": 0.2524521052837372, "learning_rate": 0.001, "loss": 2.3755, "step": 7625 }, { "epoch": 0.3226161265758524, "grad_norm": 0.22929400205612183, "learning_rate": 0.001, "loss": 2.6761, "step": 7626 }, { "epoch": 0.32265843133936883, "grad_norm": 0.2434311956167221, "learning_rate": 0.001, "loss": 2.5041, "step": 7627 }, { "epoch": 0.3227007361028852, "grad_norm": 0.3644380569458008, "learning_rate": 0.001, "loss": 2.0181, "step": 7628 }, { "epoch": 0.32274304086640154, "grad_norm": 0.8881004452705383, "learning_rate": 0.001, "loss": 2.3105, "step": 7629 }, { "epoch": 0.32278534562991795, "grad_norm": 1.9201459884643555, "learning_rate": 0.001, "loss": 2.3565, "step": 7630 }, { "epoch": 0.3228276503934343, "grad_norm": 0.22108227014541626, "learning_rate": 0.001, "loss": 1.9022, "step": 7631 }, { "epoch": 0.32286995515695066, "grad_norm": 0.5766029953956604, "learning_rate": 0.001, "loss": 1.7875, "step": 7632 }, { "epoch": 0.32291225992046707, "grad_norm": 0.23156306147575378, "learning_rate": 0.001, "loss": 2.9351, "step": 7633 }, { "epoch": 0.3229545646839834, "grad_norm": 0.20917218923568726, "learning_rate": 0.001, "loss": 1.7399, "step": 7634 }, { "epoch": 0.3229968694474998, "grad_norm": 0.2092711329460144, "learning_rate": 0.001, "loss": 2.2154, "step": 7635 }, { "epoch": 0.32303917421101613, "grad_norm": 1.8786296844482422, "learning_rate": 0.001, "loss": 1.755, "step": 7636 }, { "epoch": 0.32308147897453254, "grad_norm": 5.879979133605957, "learning_rate": 0.001, "loss": 2.5399, "step": 7637 }, { "epoch": 0.3231237837380489, "grad_norm": 1.0984258651733398, "learning_rate": 0.001, "loss": 2.2887, "step": 7638 }, { "epoch": 0.32316608850156525, "grad_norm": 0.2462794929742813, "learning_rate": 0.001, "loss": 1.8517, "step": 7639 }, { "epoch": 0.32320839326508166, "grad_norm": 0.22659048438072205, "learning_rate": 0.001, "loss": 1.6135, "step": 7640 }, { "epoch": 0.323250698028598, "grad_norm": 0.2993631064891815, "learning_rate": 0.001, "loss": 1.9505, "step": 7641 }, { "epoch": 0.32329300279211437, "grad_norm": 0.43561097979545593, "learning_rate": 0.001, "loss": 2.3488, "step": 7642 }, { "epoch": 0.3233353075556308, "grad_norm": 0.2653340995311737, "learning_rate": 0.001, "loss": 3.7175, "step": 7643 }, { "epoch": 0.32337761231914713, "grad_norm": 0.2567252218723297, "learning_rate": 0.001, "loss": 2.252, "step": 7644 }, { "epoch": 0.3234199170826635, "grad_norm": 0.2634236514568329, "learning_rate": 0.001, "loss": 2.5914, "step": 7645 }, { "epoch": 0.3234622218461799, "grad_norm": 0.21972741186618805, "learning_rate": 0.001, "loss": 1.9794, "step": 7646 }, { "epoch": 0.32350452660969625, "grad_norm": 0.37712669372558594, "learning_rate": 0.001, "loss": 2.7509, "step": 7647 }, { "epoch": 0.3235468313732126, "grad_norm": 0.7490605711936951, "learning_rate": 0.001, "loss": 2.8407, "step": 7648 }, { "epoch": 0.323589136136729, "grad_norm": 0.38894638419151306, "learning_rate": 0.001, "loss": 2.2041, "step": 7649 }, { "epoch": 0.32363144090024537, "grad_norm": 0.4028621315956116, "learning_rate": 0.001, "loss": 2.0586, "step": 7650 }, { "epoch": 0.3236737456637617, "grad_norm": 2.477647542953491, "learning_rate": 0.001, "loss": 3.4487, "step": 7651 }, { "epoch": 0.32371605042727813, "grad_norm": 0.26158443093299866, "learning_rate": 0.001, "loss": 2.305, "step": 7652 }, { "epoch": 0.3237583551907945, "grad_norm": 0.2559559643268585, "learning_rate": 0.001, "loss": 2.3287, "step": 7653 }, { "epoch": 0.32380065995431084, "grad_norm": 0.2276749461889267, "learning_rate": 0.001, "loss": 2.2801, "step": 7654 }, { "epoch": 0.32384296471782725, "grad_norm": 0.2165883332490921, "learning_rate": 0.001, "loss": 2.4184, "step": 7655 }, { "epoch": 0.3238852694813436, "grad_norm": 1.6801161766052246, "learning_rate": 0.001, "loss": 2.2247, "step": 7656 }, { "epoch": 0.32392757424485996, "grad_norm": 0.23615668714046478, "learning_rate": 0.001, "loss": 3.3127, "step": 7657 }, { "epoch": 0.3239698790083763, "grad_norm": 0.20711363852024078, "learning_rate": 0.001, "loss": 2.502, "step": 7658 }, { "epoch": 0.3240121837718927, "grad_norm": 0.20527245104312897, "learning_rate": 0.001, "loss": 1.7657, "step": 7659 }, { "epoch": 0.3240544885354091, "grad_norm": 0.18716438114643097, "learning_rate": 0.001, "loss": 2.4437, "step": 7660 }, { "epoch": 0.32409679329892543, "grad_norm": 0.25125744938850403, "learning_rate": 0.001, "loss": 2.0741, "step": 7661 }, { "epoch": 0.32413909806244184, "grad_norm": 0.1993718147277832, "learning_rate": 0.001, "loss": 3.3968, "step": 7662 }, { "epoch": 0.3241814028259582, "grad_norm": 0.1872645914554596, "learning_rate": 0.001, "loss": 2.0158, "step": 7663 }, { "epoch": 0.32422370758947455, "grad_norm": 0.19530023634433746, "learning_rate": 0.001, "loss": 1.9639, "step": 7664 }, { "epoch": 0.32426601235299096, "grad_norm": 0.2583797872066498, "learning_rate": 0.001, "loss": 1.7254, "step": 7665 }, { "epoch": 0.3243083171165073, "grad_norm": 2.2203147411346436, "learning_rate": 0.001, "loss": 2.6183, "step": 7666 }, { "epoch": 0.32435062188002367, "grad_norm": 0.19957537949085236, "learning_rate": 0.001, "loss": 2.202, "step": 7667 }, { "epoch": 0.3243929266435401, "grad_norm": 0.2118930220603943, "learning_rate": 0.001, "loss": 3.7787, "step": 7668 }, { "epoch": 0.32443523140705643, "grad_norm": 0.21599611639976501, "learning_rate": 0.001, "loss": 2.5277, "step": 7669 }, { "epoch": 0.3244775361705728, "grad_norm": 3.4629387855529785, "learning_rate": 0.001, "loss": 1.6031, "step": 7670 }, { "epoch": 0.3245198409340892, "grad_norm": 0.29955390095710754, "learning_rate": 0.001, "loss": 1.6658, "step": 7671 }, { "epoch": 0.32456214569760555, "grad_norm": 0.20181111991405487, "learning_rate": 0.001, "loss": 1.9581, "step": 7672 }, { "epoch": 0.3246044504611219, "grad_norm": 0.8020063638687134, "learning_rate": 0.001, "loss": 2.6512, "step": 7673 }, { "epoch": 0.3246467552246383, "grad_norm": 0.17198266088962555, "learning_rate": 0.001, "loss": 2.0142, "step": 7674 }, { "epoch": 0.32468905998815467, "grad_norm": 0.25351399183273315, "learning_rate": 0.001, "loss": 2.301, "step": 7675 }, { "epoch": 0.324731364751671, "grad_norm": 0.28780749440193176, "learning_rate": 0.001, "loss": 1.8023, "step": 7676 }, { "epoch": 0.32477366951518744, "grad_norm": 8.679863929748535, "learning_rate": 0.001, "loss": 2.0141, "step": 7677 }, { "epoch": 0.3248159742787038, "grad_norm": 1.8151581287384033, "learning_rate": 0.001, "loss": 1.7588, "step": 7678 }, { "epoch": 0.32485827904222014, "grad_norm": 0.19262565672397614, "learning_rate": 0.001, "loss": 1.7654, "step": 7679 }, { "epoch": 0.32490058380573655, "grad_norm": 0.2888406217098236, "learning_rate": 0.001, "loss": 2.3853, "step": 7680 }, { "epoch": 0.3249428885692529, "grad_norm": 0.5525591373443604, "learning_rate": 0.001, "loss": 2.9946, "step": 7681 }, { "epoch": 0.32498519333276926, "grad_norm": 0.319832444190979, "learning_rate": 0.001, "loss": 2.0593, "step": 7682 }, { "epoch": 0.3250274980962856, "grad_norm": 0.3268459737300873, "learning_rate": 0.001, "loss": 2.1757, "step": 7683 }, { "epoch": 0.325069802859802, "grad_norm": 2.9600412845611572, "learning_rate": 0.001, "loss": 2.3893, "step": 7684 }, { "epoch": 0.3251121076233184, "grad_norm": 0.9362667202949524, "learning_rate": 0.001, "loss": 2.2399, "step": 7685 }, { "epoch": 0.32515441238683473, "grad_norm": 0.509442150592804, "learning_rate": 0.001, "loss": 2.1812, "step": 7686 }, { "epoch": 0.32519671715035114, "grad_norm": 0.6412566900253296, "learning_rate": 0.001, "loss": 2.117, "step": 7687 }, { "epoch": 0.3252390219138675, "grad_norm": 0.25562146306037903, "learning_rate": 0.001, "loss": 2.2778, "step": 7688 }, { "epoch": 0.32528132667738385, "grad_norm": 0.20876967906951904, "learning_rate": 0.001, "loss": 1.7797, "step": 7689 }, { "epoch": 0.32532363144090026, "grad_norm": 0.2410915642976761, "learning_rate": 0.001, "loss": 1.6797, "step": 7690 }, { "epoch": 0.3253659362044166, "grad_norm": 0.26328933238983154, "learning_rate": 0.001, "loss": 2.514, "step": 7691 }, { "epoch": 0.32540824096793297, "grad_norm": 0.23529529571533203, "learning_rate": 0.001, "loss": 2.1006, "step": 7692 }, { "epoch": 0.3254505457314494, "grad_norm": 0.31655412912368774, "learning_rate": 0.001, "loss": 2.7627, "step": 7693 }, { "epoch": 0.32549285049496574, "grad_norm": 0.6058570742607117, "learning_rate": 0.001, "loss": 2.5461, "step": 7694 }, { "epoch": 0.3255351552584821, "grad_norm": 0.21855738759040833, "learning_rate": 0.001, "loss": 1.8018, "step": 7695 }, { "epoch": 0.3255774600219985, "grad_norm": 0.22689220309257507, "learning_rate": 0.001, "loss": 2.122, "step": 7696 }, { "epoch": 0.32561976478551485, "grad_norm": 0.22445081174373627, "learning_rate": 0.001, "loss": 1.8022, "step": 7697 }, { "epoch": 0.3256620695490312, "grad_norm": 0.2299547791481018, "learning_rate": 0.001, "loss": 2.2955, "step": 7698 }, { "epoch": 0.3257043743125476, "grad_norm": 0.5000811219215393, "learning_rate": 0.001, "loss": 2.6577, "step": 7699 }, { "epoch": 0.32574667907606397, "grad_norm": 2.1569273471832275, "learning_rate": 0.001, "loss": 2.2027, "step": 7700 }, { "epoch": 0.3257889838395803, "grad_norm": 0.25455546379089355, "learning_rate": 0.001, "loss": 2.4252, "step": 7701 }, { "epoch": 0.32583128860309674, "grad_norm": 0.21060289442539215, "learning_rate": 0.001, "loss": 2.2641, "step": 7702 }, { "epoch": 0.3258735933666131, "grad_norm": 0.1968967467546463, "learning_rate": 0.001, "loss": 2.5999, "step": 7703 }, { "epoch": 0.32591589813012944, "grad_norm": 0.31110793352127075, "learning_rate": 0.001, "loss": 2.2596, "step": 7704 }, { "epoch": 0.3259582028936458, "grad_norm": 2.084595203399658, "learning_rate": 0.001, "loss": 2.7048, "step": 7705 }, { "epoch": 0.3260005076571622, "grad_norm": 0.22130149602890015, "learning_rate": 0.001, "loss": 1.5586, "step": 7706 }, { "epoch": 0.32604281242067856, "grad_norm": 0.2199217677116394, "learning_rate": 0.001, "loss": 1.9871, "step": 7707 }, { "epoch": 0.3260851171841949, "grad_norm": 0.19622403383255005, "learning_rate": 0.001, "loss": 1.8239, "step": 7708 }, { "epoch": 0.3261274219477113, "grad_norm": 0.23500365018844604, "learning_rate": 0.001, "loss": 2.7668, "step": 7709 }, { "epoch": 0.3261697267112277, "grad_norm": 0.4715394973754883, "learning_rate": 0.001, "loss": 3.1356, "step": 7710 }, { "epoch": 0.32621203147474404, "grad_norm": 0.6608024835586548, "learning_rate": 0.001, "loss": 2.1273, "step": 7711 }, { "epoch": 0.32625433623826045, "grad_norm": 0.2573659420013428, "learning_rate": 0.001, "loss": 2.0172, "step": 7712 }, { "epoch": 0.3262966410017768, "grad_norm": 3.757869243621826, "learning_rate": 0.001, "loss": 2.2053, "step": 7713 }, { "epoch": 0.32633894576529315, "grad_norm": 0.36734887957572937, "learning_rate": 0.001, "loss": 2.188, "step": 7714 }, { "epoch": 0.32638125052880956, "grad_norm": 0.8497750163078308, "learning_rate": 0.001, "loss": 3.191, "step": 7715 }, { "epoch": 0.3264235552923259, "grad_norm": 1.0639852285385132, "learning_rate": 0.001, "loss": 2.1099, "step": 7716 }, { "epoch": 0.32646586005584227, "grad_norm": 0.19022592902183533, "learning_rate": 0.001, "loss": 1.6883, "step": 7717 }, { "epoch": 0.3265081648193587, "grad_norm": 0.19042854011058807, "learning_rate": 0.001, "loss": 2.3408, "step": 7718 }, { "epoch": 0.32655046958287504, "grad_norm": 0.2183213233947754, "learning_rate": 0.001, "loss": 2.0902, "step": 7719 }, { "epoch": 0.3265927743463914, "grad_norm": 0.2381005436182022, "learning_rate": 0.001, "loss": 2.5141, "step": 7720 }, { "epoch": 0.3266350791099078, "grad_norm": 0.2116747945547104, "learning_rate": 0.001, "loss": 1.8768, "step": 7721 }, { "epoch": 0.32667738387342415, "grad_norm": 0.43030673265457153, "learning_rate": 0.001, "loss": 3.133, "step": 7722 }, { "epoch": 0.3267196886369405, "grad_norm": 3.445666551589966, "learning_rate": 0.001, "loss": 1.8229, "step": 7723 }, { "epoch": 0.3267619934004569, "grad_norm": 1.1962391138076782, "learning_rate": 0.001, "loss": 2.3559, "step": 7724 }, { "epoch": 0.3268042981639733, "grad_norm": 0.4430636763572693, "learning_rate": 0.001, "loss": 2.3892, "step": 7725 }, { "epoch": 0.3268466029274896, "grad_norm": 0.21626804769039154, "learning_rate": 0.001, "loss": 2.156, "step": 7726 }, { "epoch": 0.326888907691006, "grad_norm": 0.21447373926639557, "learning_rate": 0.001, "loss": 1.9008, "step": 7727 }, { "epoch": 0.3269312124545224, "grad_norm": 0.2664428949356079, "learning_rate": 0.001, "loss": 2.1011, "step": 7728 }, { "epoch": 0.32697351721803875, "grad_norm": 0.19551439583301544, "learning_rate": 0.001, "loss": 2.5206, "step": 7729 }, { "epoch": 0.3270158219815551, "grad_norm": 0.1879674643278122, "learning_rate": 0.001, "loss": 1.8628, "step": 7730 }, { "epoch": 0.3270581267450715, "grad_norm": 0.20081692934036255, "learning_rate": 0.001, "loss": 1.8979, "step": 7731 }, { "epoch": 0.32710043150858786, "grad_norm": 0.178413525223732, "learning_rate": 0.001, "loss": 2.3565, "step": 7732 }, { "epoch": 0.3271427362721042, "grad_norm": 0.384456992149353, "learning_rate": 0.001, "loss": 2.2038, "step": 7733 }, { "epoch": 0.3271850410356206, "grad_norm": 0.29158031940460205, "learning_rate": 0.001, "loss": 2.7088, "step": 7734 }, { "epoch": 0.327227345799137, "grad_norm": 0.17012235522270203, "learning_rate": 0.001, "loss": 1.5478, "step": 7735 }, { "epoch": 0.32726965056265334, "grad_norm": 0.39595428109169006, "learning_rate": 0.001, "loss": 2.7015, "step": 7736 }, { "epoch": 0.32731195532616975, "grad_norm": 0.58181232213974, "learning_rate": 0.001, "loss": 1.7588, "step": 7737 }, { "epoch": 0.3273542600896861, "grad_norm": 0.20064769685268402, "learning_rate": 0.001, "loss": 1.8287, "step": 7738 }, { "epoch": 0.32739656485320245, "grad_norm": 0.2265099585056305, "learning_rate": 0.001, "loss": 2.289, "step": 7739 }, { "epoch": 0.32743886961671886, "grad_norm": 0.2981756031513214, "learning_rate": 0.001, "loss": 2.1712, "step": 7740 }, { "epoch": 0.3274811743802352, "grad_norm": 1.7779628038406372, "learning_rate": 0.001, "loss": 3.4488, "step": 7741 }, { "epoch": 0.3275234791437516, "grad_norm": 0.2585592269897461, "learning_rate": 0.001, "loss": 2.4738, "step": 7742 }, { "epoch": 0.327565783907268, "grad_norm": 0.9747766256332397, "learning_rate": 0.001, "loss": 3.1085, "step": 7743 }, { "epoch": 0.32760808867078434, "grad_norm": 6.341303825378418, "learning_rate": 0.001, "loss": 2.3515, "step": 7744 }, { "epoch": 0.3276503934343007, "grad_norm": 0.28042072057724, "learning_rate": 0.001, "loss": 1.8256, "step": 7745 }, { "epoch": 0.3276926981978171, "grad_norm": 0.21017926931381226, "learning_rate": 0.001, "loss": 2.2271, "step": 7746 }, { "epoch": 0.32773500296133345, "grad_norm": 0.259328156709671, "learning_rate": 0.001, "loss": 2.6266, "step": 7747 }, { "epoch": 0.3277773077248498, "grad_norm": 0.16318392753601074, "learning_rate": 0.001, "loss": 1.8191, "step": 7748 }, { "epoch": 0.32781961248836616, "grad_norm": 0.27874764800071716, "learning_rate": 0.001, "loss": 1.9758, "step": 7749 }, { "epoch": 0.3278619172518826, "grad_norm": 0.2953718304634094, "learning_rate": 0.001, "loss": 3.011, "step": 7750 }, { "epoch": 0.3279042220153989, "grad_norm": 0.3343517780303955, "learning_rate": 0.001, "loss": 2.1521, "step": 7751 }, { "epoch": 0.3279465267789153, "grad_norm": 0.25767582654953003, "learning_rate": 0.001, "loss": 2.672, "step": 7752 }, { "epoch": 0.3279888315424317, "grad_norm": 0.21539786458015442, "learning_rate": 0.001, "loss": 2.3109, "step": 7753 }, { "epoch": 0.32803113630594805, "grad_norm": 0.22561384737491608, "learning_rate": 0.001, "loss": 2.5035, "step": 7754 }, { "epoch": 0.3280734410694644, "grad_norm": 0.2779701054096222, "learning_rate": 0.001, "loss": 2.5272, "step": 7755 }, { "epoch": 0.3281157458329808, "grad_norm": 0.19811490178108215, "learning_rate": 0.001, "loss": 1.7952, "step": 7756 }, { "epoch": 0.32815805059649716, "grad_norm": 0.20879384875297546, "learning_rate": 0.001, "loss": 1.2741, "step": 7757 }, { "epoch": 0.3282003553600135, "grad_norm": 0.8248985409736633, "learning_rate": 0.001, "loss": 1.9017, "step": 7758 }, { "epoch": 0.32824266012352993, "grad_norm": 0.20458753407001495, "learning_rate": 0.001, "loss": 1.9238, "step": 7759 }, { "epoch": 0.3282849648870463, "grad_norm": 0.32681992650032043, "learning_rate": 0.001, "loss": 3.2441, "step": 7760 }, { "epoch": 0.32832726965056264, "grad_norm": 6.789393424987793, "learning_rate": 0.001, "loss": 2.4838, "step": 7761 }, { "epoch": 0.32836957441407905, "grad_norm": 0.49927785992622375, "learning_rate": 0.001, "loss": 2.888, "step": 7762 }, { "epoch": 0.3284118791775954, "grad_norm": 2.3099589347839355, "learning_rate": 0.001, "loss": 2.6976, "step": 7763 }, { "epoch": 0.32845418394111175, "grad_norm": 2.1838667392730713, "learning_rate": 0.001, "loss": 2.2512, "step": 7764 }, { "epoch": 0.32849648870462816, "grad_norm": 0.39657530188560486, "learning_rate": 0.001, "loss": 2.7728, "step": 7765 }, { "epoch": 0.3285387934681445, "grad_norm": 0.2155158668756485, "learning_rate": 0.001, "loss": 2.2733, "step": 7766 }, { "epoch": 0.3285810982316609, "grad_norm": 0.46449387073516846, "learning_rate": 0.001, "loss": 3.3834, "step": 7767 }, { "epoch": 0.3286234029951773, "grad_norm": 0.20302799344062805, "learning_rate": 0.001, "loss": 2.1242, "step": 7768 }, { "epoch": 0.32866570775869364, "grad_norm": 0.1897967904806137, "learning_rate": 0.001, "loss": 2.2495, "step": 7769 }, { "epoch": 0.32870801252221, "grad_norm": 0.17885787785053253, "learning_rate": 0.001, "loss": 1.9588, "step": 7770 }, { "epoch": 0.32875031728572635, "grad_norm": 0.25244593620300293, "learning_rate": 0.001, "loss": 2.8224, "step": 7771 }, { "epoch": 0.32879262204924276, "grad_norm": 0.17659065127372742, "learning_rate": 0.001, "loss": 1.5998, "step": 7772 }, { "epoch": 0.3288349268127591, "grad_norm": 0.19181221723556519, "learning_rate": 0.001, "loss": 1.8639, "step": 7773 }, { "epoch": 0.32887723157627546, "grad_norm": 0.3274793028831482, "learning_rate": 0.001, "loss": 2.3154, "step": 7774 }, { "epoch": 0.3289195363397919, "grad_norm": 0.21050041913986206, "learning_rate": 0.001, "loss": 2.1191, "step": 7775 }, { "epoch": 0.32896184110330823, "grad_norm": 0.17837364971637726, "learning_rate": 0.001, "loss": 1.7701, "step": 7776 }, { "epoch": 0.3290041458668246, "grad_norm": 0.6773764491081238, "learning_rate": 0.001, "loss": 2.6309, "step": 7777 }, { "epoch": 0.329046450630341, "grad_norm": 2.140749216079712, "learning_rate": 0.001, "loss": 3.2878, "step": 7778 }, { "epoch": 0.32908875539385735, "grad_norm": 4.552440166473389, "learning_rate": 0.001, "loss": 2.2279, "step": 7779 }, { "epoch": 0.3291310601573737, "grad_norm": 0.37157878279685974, "learning_rate": 0.001, "loss": 2.3027, "step": 7780 }, { "epoch": 0.3291733649208901, "grad_norm": 0.25460559129714966, "learning_rate": 0.001, "loss": 2.0759, "step": 7781 }, { "epoch": 0.32921566968440646, "grad_norm": 1.6842424869537354, "learning_rate": 0.001, "loss": 2.857, "step": 7782 }, { "epoch": 0.3292579744479228, "grad_norm": 0.2335149198770523, "learning_rate": 0.001, "loss": 1.7248, "step": 7783 }, { "epoch": 0.32930027921143923, "grad_norm": 0.1995423585176468, "learning_rate": 0.001, "loss": 1.7434, "step": 7784 }, { "epoch": 0.3293425839749556, "grad_norm": 0.2805491089820862, "learning_rate": 0.001, "loss": 2.6196, "step": 7785 }, { "epoch": 0.32938488873847194, "grad_norm": 0.7963484525680542, "learning_rate": 0.001, "loss": 2.7678, "step": 7786 }, { "epoch": 0.32942719350198835, "grad_norm": 0.2746239900588989, "learning_rate": 0.001, "loss": 2.0567, "step": 7787 }, { "epoch": 0.3294694982655047, "grad_norm": 0.6149387955665588, "learning_rate": 0.001, "loss": 2.4013, "step": 7788 }, { "epoch": 0.32951180302902106, "grad_norm": 0.5365425944328308, "learning_rate": 0.001, "loss": 2.2887, "step": 7789 }, { "epoch": 0.32955410779253747, "grad_norm": 0.22529155015945435, "learning_rate": 0.001, "loss": 2.5844, "step": 7790 }, { "epoch": 0.3295964125560538, "grad_norm": 0.28875237703323364, "learning_rate": 0.001, "loss": 1.7827, "step": 7791 }, { "epoch": 0.3296387173195702, "grad_norm": 0.20266857743263245, "learning_rate": 0.001, "loss": 2.5674, "step": 7792 }, { "epoch": 0.32968102208308653, "grad_norm": 0.16716927289962769, "learning_rate": 0.001, "loss": 2.2583, "step": 7793 }, { "epoch": 0.32972332684660294, "grad_norm": 0.25711336731910706, "learning_rate": 0.001, "loss": 2.9724, "step": 7794 }, { "epoch": 0.3297656316101193, "grad_norm": 4.552552700042725, "learning_rate": 0.001, "loss": 2.6996, "step": 7795 }, { "epoch": 0.32980793637363565, "grad_norm": 0.5906350612640381, "learning_rate": 0.001, "loss": 2.1236, "step": 7796 }, { "epoch": 0.32985024113715206, "grad_norm": 0.2682677209377289, "learning_rate": 0.001, "loss": 2.4385, "step": 7797 }, { "epoch": 0.3298925459006684, "grad_norm": 0.25783124566078186, "learning_rate": 0.001, "loss": 2.3089, "step": 7798 }, { "epoch": 0.32993485066418476, "grad_norm": 0.18664343655109406, "learning_rate": 0.001, "loss": 2.0034, "step": 7799 }, { "epoch": 0.3299771554277012, "grad_norm": 0.1630748063325882, "learning_rate": 0.001, "loss": 1.6401, "step": 7800 }, { "epoch": 0.33001946019121753, "grad_norm": 0.30394893884658813, "learning_rate": 0.001, "loss": 2.3112, "step": 7801 }, { "epoch": 0.3300617649547339, "grad_norm": 0.19138090312480927, "learning_rate": 0.001, "loss": 2.5035, "step": 7802 }, { "epoch": 0.3301040697182503, "grad_norm": 0.16754819452762604, "learning_rate": 0.001, "loss": 1.6879, "step": 7803 }, { "epoch": 0.33014637448176665, "grad_norm": 0.8580173850059509, "learning_rate": 0.001, "loss": 2.4691, "step": 7804 }, { "epoch": 0.330188679245283, "grad_norm": 0.176096573472023, "learning_rate": 0.001, "loss": 1.8854, "step": 7805 }, { "epoch": 0.3302309840087994, "grad_norm": 0.20934566855430603, "learning_rate": 0.001, "loss": 2.0659, "step": 7806 }, { "epoch": 0.33027328877231577, "grad_norm": 5.532082557678223, "learning_rate": 0.001, "loss": 1.759, "step": 7807 }, { "epoch": 0.3303155935358321, "grad_norm": 0.19679847359657288, "learning_rate": 0.001, "loss": 2.3835, "step": 7808 }, { "epoch": 0.33035789829934853, "grad_norm": 0.19240596890449524, "learning_rate": 0.001, "loss": 2.0052, "step": 7809 }, { "epoch": 0.3304002030628649, "grad_norm": 0.19509436190128326, "learning_rate": 0.001, "loss": 2.426, "step": 7810 }, { "epoch": 0.33044250782638124, "grad_norm": 0.19828227162361145, "learning_rate": 0.001, "loss": 2.1933, "step": 7811 }, { "epoch": 0.33048481258989765, "grad_norm": 2.229938507080078, "learning_rate": 0.001, "loss": 1.7994, "step": 7812 }, { "epoch": 0.330527117353414, "grad_norm": 0.2188158929347992, "learning_rate": 0.001, "loss": 1.8493, "step": 7813 }, { "epoch": 0.33056942211693036, "grad_norm": 0.21723762154579163, "learning_rate": 0.001, "loss": 1.8718, "step": 7814 }, { "epoch": 0.33061172688044677, "grad_norm": 0.5078144669532776, "learning_rate": 0.001, "loss": 2.1031, "step": 7815 }, { "epoch": 0.3306540316439631, "grad_norm": 0.24922381341457367, "learning_rate": 0.001, "loss": 2.1853, "step": 7816 }, { "epoch": 0.3306963364074795, "grad_norm": 0.19677498936653137, "learning_rate": 0.001, "loss": 2.4259, "step": 7817 }, { "epoch": 0.33073864117099583, "grad_norm": 0.2011793553829193, "learning_rate": 0.001, "loss": 3.2597, "step": 7818 }, { "epoch": 0.33078094593451224, "grad_norm": 0.3408401608467102, "learning_rate": 0.001, "loss": 2.3159, "step": 7819 }, { "epoch": 0.3308232506980286, "grad_norm": 0.1871691644191742, "learning_rate": 0.001, "loss": 2.5773, "step": 7820 }, { "epoch": 0.33086555546154495, "grad_norm": 0.19966411590576172, "learning_rate": 0.001, "loss": 1.4954, "step": 7821 }, { "epoch": 0.33090786022506136, "grad_norm": 0.20927104353904724, "learning_rate": 0.001, "loss": 2.0749, "step": 7822 }, { "epoch": 0.3309501649885777, "grad_norm": 0.2717510759830475, "learning_rate": 0.001, "loss": 2.9177, "step": 7823 }, { "epoch": 0.33099246975209407, "grad_norm": 0.2813604176044464, "learning_rate": 0.001, "loss": 1.8495, "step": 7824 }, { "epoch": 0.3310347745156105, "grad_norm": 0.18087762594223022, "learning_rate": 0.001, "loss": 2.747, "step": 7825 }, { "epoch": 0.33107707927912683, "grad_norm": 29.72042465209961, "learning_rate": 0.001, "loss": 3.0675, "step": 7826 }, { "epoch": 0.3311193840426432, "grad_norm": 0.2071724832057953, "learning_rate": 0.001, "loss": 2.6322, "step": 7827 }, { "epoch": 0.3311616888061596, "grad_norm": 0.208586648106575, "learning_rate": 0.001, "loss": 2.4795, "step": 7828 }, { "epoch": 0.33120399356967595, "grad_norm": 0.19940099120140076, "learning_rate": 0.001, "loss": 2.5773, "step": 7829 }, { "epoch": 0.3312462983331923, "grad_norm": 2.280409097671509, "learning_rate": 0.001, "loss": 2.3525, "step": 7830 }, { "epoch": 0.3312886030967087, "grad_norm": 0.977155864238739, "learning_rate": 0.001, "loss": 1.8458, "step": 7831 }, { "epoch": 0.33133090786022507, "grad_norm": 0.33999037742614746, "learning_rate": 0.001, "loss": 1.8913, "step": 7832 }, { "epoch": 0.3313732126237414, "grad_norm": 0.23237483203411102, "learning_rate": 0.001, "loss": 2.69, "step": 7833 }, { "epoch": 0.33141551738725783, "grad_norm": 0.1733536273241043, "learning_rate": 0.001, "loss": 2.289, "step": 7834 }, { "epoch": 0.3314578221507742, "grad_norm": 0.19590957462787628, "learning_rate": 0.001, "loss": 2.3374, "step": 7835 }, { "epoch": 0.33150012691429054, "grad_norm": 0.22737683355808258, "learning_rate": 0.001, "loss": 2.8243, "step": 7836 }, { "epoch": 0.33154243167780695, "grad_norm": 0.22133977711200714, "learning_rate": 0.001, "loss": 1.9613, "step": 7837 }, { "epoch": 0.3315847364413233, "grad_norm": 0.24154724180698395, "learning_rate": 0.001, "loss": 2.0886, "step": 7838 }, { "epoch": 0.33162704120483966, "grad_norm": 0.1789299100637436, "learning_rate": 0.001, "loss": 2.6418, "step": 7839 }, { "epoch": 0.331669345968356, "grad_norm": 0.27288487553596497, "learning_rate": 0.001, "loss": 2.4128, "step": 7840 }, { "epoch": 0.3317116507318724, "grad_norm": 0.7873362302780151, "learning_rate": 0.001, "loss": 1.8761, "step": 7841 }, { "epoch": 0.3317539554953888, "grad_norm": 0.32761895656585693, "learning_rate": 0.001, "loss": 2.2242, "step": 7842 }, { "epoch": 0.33179626025890513, "grad_norm": 2.1952366828918457, "learning_rate": 0.001, "loss": 2.0018, "step": 7843 }, { "epoch": 0.33183856502242154, "grad_norm": 0.18359605967998505, "learning_rate": 0.001, "loss": 1.7927, "step": 7844 }, { "epoch": 0.3318808697859379, "grad_norm": 0.9010672569274902, "learning_rate": 0.001, "loss": 2.9856, "step": 7845 }, { "epoch": 0.33192317454945425, "grad_norm": 0.23249398171901703, "learning_rate": 0.001, "loss": 2.6271, "step": 7846 }, { "epoch": 0.33196547931297066, "grad_norm": 0.5741890072822571, "learning_rate": 0.001, "loss": 3.903, "step": 7847 }, { "epoch": 0.332007784076487, "grad_norm": 0.5828627347946167, "learning_rate": 0.001, "loss": 2.3399, "step": 7848 }, { "epoch": 0.33205008884000337, "grad_norm": 0.2302628457546234, "learning_rate": 0.001, "loss": 2.6404, "step": 7849 }, { "epoch": 0.3320923936035198, "grad_norm": 1.7967714071273804, "learning_rate": 0.001, "loss": 2.3163, "step": 7850 }, { "epoch": 0.33213469836703613, "grad_norm": 36.19169616699219, "learning_rate": 0.001, "loss": 2.172, "step": 7851 }, { "epoch": 0.3321770031305525, "grad_norm": 4.201509952545166, "learning_rate": 0.001, "loss": 2.2538, "step": 7852 }, { "epoch": 0.3322193078940689, "grad_norm": 0.23143364489078522, "learning_rate": 0.001, "loss": 3.6801, "step": 7853 }, { "epoch": 0.33226161265758525, "grad_norm": 0.4308498203754425, "learning_rate": 0.001, "loss": 2.6071, "step": 7854 }, { "epoch": 0.3323039174211016, "grad_norm": 0.1866970956325531, "learning_rate": 0.001, "loss": 3.0409, "step": 7855 }, { "epoch": 0.332346222184618, "grad_norm": 0.20423516631126404, "learning_rate": 0.001, "loss": 2.1872, "step": 7856 }, { "epoch": 0.33238852694813437, "grad_norm": 0.23156633973121643, "learning_rate": 0.001, "loss": 1.8005, "step": 7857 }, { "epoch": 0.3324308317116507, "grad_norm": 0.29893800616264343, "learning_rate": 0.001, "loss": 1.9907, "step": 7858 }, { "epoch": 0.33247313647516713, "grad_norm": 0.23436959087848663, "learning_rate": 0.001, "loss": 2.8521, "step": 7859 }, { "epoch": 0.3325154412386835, "grad_norm": 0.1933186650276184, "learning_rate": 0.001, "loss": 1.6054, "step": 7860 }, { "epoch": 0.33255774600219984, "grad_norm": 0.2026066929101944, "learning_rate": 0.001, "loss": 2.026, "step": 7861 }, { "epoch": 0.3326000507657162, "grad_norm": 0.21507464349269867, "learning_rate": 0.001, "loss": 2.9487, "step": 7862 }, { "epoch": 0.3326423555292326, "grad_norm": 0.2192848175764084, "learning_rate": 0.001, "loss": 1.9447, "step": 7863 }, { "epoch": 0.33268466029274896, "grad_norm": 2.8954203128814697, "learning_rate": 0.001, "loss": 1.8237, "step": 7864 }, { "epoch": 0.3327269650562653, "grad_norm": 0.30463677644729614, "learning_rate": 0.001, "loss": 2.1563, "step": 7865 }, { "epoch": 0.3327692698197817, "grad_norm": 0.45512622594833374, "learning_rate": 0.001, "loss": 2.8004, "step": 7866 }, { "epoch": 0.3328115745832981, "grad_norm": 0.32765403389930725, "learning_rate": 0.001, "loss": 2.4544, "step": 7867 }, { "epoch": 0.33285387934681443, "grad_norm": 0.21778345108032227, "learning_rate": 0.001, "loss": 2.2046, "step": 7868 }, { "epoch": 0.33289618411033084, "grad_norm": 0.2558639645576477, "learning_rate": 0.001, "loss": 3.3903, "step": 7869 }, { "epoch": 0.3329384888738472, "grad_norm": 0.27127140760421753, "learning_rate": 0.001, "loss": 2.1592, "step": 7870 }, { "epoch": 0.33298079363736355, "grad_norm": 0.2440665364265442, "learning_rate": 0.001, "loss": 3.0212, "step": 7871 }, { "epoch": 0.33302309840087996, "grad_norm": 0.23537136614322662, "learning_rate": 0.001, "loss": 2.1113, "step": 7872 }, { "epoch": 0.3330654031643963, "grad_norm": 0.5498445630073547, "learning_rate": 0.001, "loss": 2.9246, "step": 7873 }, { "epoch": 0.33310770792791267, "grad_norm": 0.19727638363838196, "learning_rate": 0.001, "loss": 1.9724, "step": 7874 }, { "epoch": 0.3331500126914291, "grad_norm": 0.19722647964954376, "learning_rate": 0.001, "loss": 1.92, "step": 7875 }, { "epoch": 0.33319231745494543, "grad_norm": 1.0840023756027222, "learning_rate": 0.001, "loss": 2.243, "step": 7876 }, { "epoch": 0.3332346222184618, "grad_norm": 1.0607739686965942, "learning_rate": 0.001, "loss": 2.2591, "step": 7877 }, { "epoch": 0.3332769269819782, "grad_norm": 0.19976572692394257, "learning_rate": 0.001, "loss": 2.5023, "step": 7878 }, { "epoch": 0.33331923174549455, "grad_norm": 0.20749805867671967, "learning_rate": 0.001, "loss": 2.4522, "step": 7879 }, { "epoch": 0.3333615365090109, "grad_norm": 0.1775732934474945, "learning_rate": 0.001, "loss": 2.4282, "step": 7880 }, { "epoch": 0.3334038412725273, "grad_norm": 0.6955072283744812, "learning_rate": 0.001, "loss": 1.5459, "step": 7881 }, { "epoch": 0.33344614603604367, "grad_norm": 2.1425440311431885, "learning_rate": 0.001, "loss": 1.8967, "step": 7882 }, { "epoch": 0.33348845079956, "grad_norm": 3.9106364250183105, "learning_rate": 0.001, "loss": 2.2198, "step": 7883 }, { "epoch": 0.3335307555630764, "grad_norm": 6.036744594573975, "learning_rate": 0.001, "loss": 2.4918, "step": 7884 }, { "epoch": 0.3335730603265928, "grad_norm": 0.19927839934825897, "learning_rate": 0.001, "loss": 2.5132, "step": 7885 }, { "epoch": 0.33361536509010914, "grad_norm": 1.190531849861145, "learning_rate": 0.001, "loss": 1.8756, "step": 7886 }, { "epoch": 0.3336576698536255, "grad_norm": 0.2305949628353119, "learning_rate": 0.001, "loss": 3.0, "step": 7887 }, { "epoch": 0.3336999746171419, "grad_norm": 0.26107683777809143, "learning_rate": 0.001, "loss": 2.8662, "step": 7888 }, { "epoch": 0.33374227938065826, "grad_norm": 0.3804430365562439, "learning_rate": 0.001, "loss": 3.1798, "step": 7889 }, { "epoch": 0.3337845841441746, "grad_norm": 0.32322415709495544, "learning_rate": 0.001, "loss": 2.1622, "step": 7890 }, { "epoch": 0.333826888907691, "grad_norm": 1.6569503545761108, "learning_rate": 0.001, "loss": 2.8064, "step": 7891 }, { "epoch": 0.3338691936712074, "grad_norm": 0.975721001625061, "learning_rate": 0.001, "loss": 2.7777, "step": 7892 }, { "epoch": 0.33391149843472373, "grad_norm": 1.4115006923675537, "learning_rate": 0.001, "loss": 2.3504, "step": 7893 }, { "epoch": 0.33395380319824014, "grad_norm": 0.6836025714874268, "learning_rate": 0.001, "loss": 2.1942, "step": 7894 }, { "epoch": 0.3339961079617565, "grad_norm": 0.35912907123565674, "learning_rate": 0.001, "loss": 3.1702, "step": 7895 }, { "epoch": 0.33403841272527285, "grad_norm": 1.0951977968215942, "learning_rate": 0.001, "loss": 3.3936, "step": 7896 }, { "epoch": 0.33408071748878926, "grad_norm": 0.24415375292301178, "learning_rate": 0.001, "loss": 2.4679, "step": 7897 }, { "epoch": 0.3341230222523056, "grad_norm": 0.23192666471004486, "learning_rate": 0.001, "loss": 1.486, "step": 7898 }, { "epoch": 0.33416532701582197, "grad_norm": 0.3416637182235718, "learning_rate": 0.001, "loss": 1.8632, "step": 7899 }, { "epoch": 0.3342076317793384, "grad_norm": 0.23700256645679474, "learning_rate": 0.001, "loss": 2.0956, "step": 7900 }, { "epoch": 0.33424993654285473, "grad_norm": 0.2494289129972458, "learning_rate": 0.001, "loss": 2.3394, "step": 7901 }, { "epoch": 0.3342922413063711, "grad_norm": 0.2167077511548996, "learning_rate": 0.001, "loss": 2.3218, "step": 7902 }, { "epoch": 0.3343345460698875, "grad_norm": 1.0088181495666504, "learning_rate": 0.001, "loss": 2.4546, "step": 7903 }, { "epoch": 0.33437685083340385, "grad_norm": 0.45194211602211, "learning_rate": 0.001, "loss": 1.9015, "step": 7904 }, { "epoch": 0.3344191555969202, "grad_norm": 0.20241999626159668, "learning_rate": 0.001, "loss": 1.8324, "step": 7905 }, { "epoch": 0.33446146036043656, "grad_norm": 0.26756224036216736, "learning_rate": 0.001, "loss": 3.2489, "step": 7906 }, { "epoch": 0.33450376512395297, "grad_norm": 0.49798786640167236, "learning_rate": 0.001, "loss": 3.2647, "step": 7907 }, { "epoch": 0.3345460698874693, "grad_norm": 0.2664455473423004, "learning_rate": 0.001, "loss": 2.7186, "step": 7908 }, { "epoch": 0.3345883746509857, "grad_norm": 0.3100428879261017, "learning_rate": 0.001, "loss": 2.275, "step": 7909 }, { "epoch": 0.3346306794145021, "grad_norm": 0.19534626603126526, "learning_rate": 0.001, "loss": 2.7456, "step": 7910 }, { "epoch": 0.33467298417801844, "grad_norm": 0.6475488543510437, "learning_rate": 0.001, "loss": 2.2076, "step": 7911 }, { "epoch": 0.3347152889415348, "grad_norm": 0.3301447927951813, "learning_rate": 0.001, "loss": 2.3495, "step": 7912 }, { "epoch": 0.3347575937050512, "grad_norm": 2.3587558269500732, "learning_rate": 0.001, "loss": 2.3851, "step": 7913 }, { "epoch": 0.33479989846856756, "grad_norm": 0.2016088366508484, "learning_rate": 0.001, "loss": 2.1589, "step": 7914 }, { "epoch": 0.3348422032320839, "grad_norm": 0.2044316530227661, "learning_rate": 0.001, "loss": 3.0746, "step": 7915 }, { "epoch": 0.3348845079956003, "grad_norm": 0.19156186282634735, "learning_rate": 0.001, "loss": 2.1999, "step": 7916 }, { "epoch": 0.3349268127591167, "grad_norm": 0.23087522387504578, "learning_rate": 0.001, "loss": 3.6112, "step": 7917 }, { "epoch": 0.33496911752263303, "grad_norm": 0.1780395656824112, "learning_rate": 0.001, "loss": 2.6512, "step": 7918 }, { "epoch": 0.33501142228614944, "grad_norm": 0.20307819545269012, "learning_rate": 0.001, "loss": 1.8293, "step": 7919 }, { "epoch": 0.3350537270496658, "grad_norm": 0.735317587852478, "learning_rate": 0.001, "loss": 1.861, "step": 7920 }, { "epoch": 0.33509603181318215, "grad_norm": 0.2308008074760437, "learning_rate": 0.001, "loss": 2.4375, "step": 7921 }, { "epoch": 0.33513833657669856, "grad_norm": 0.2518015205860138, "learning_rate": 0.001, "loss": 2.1128, "step": 7922 }, { "epoch": 0.3351806413402149, "grad_norm": 0.22979722917079926, "learning_rate": 0.001, "loss": 1.8036, "step": 7923 }, { "epoch": 0.33522294610373127, "grad_norm": 0.20202603936195374, "learning_rate": 0.001, "loss": 2.629, "step": 7924 }, { "epoch": 0.3352652508672477, "grad_norm": 0.19163471460342407, "learning_rate": 0.001, "loss": 2.8881, "step": 7925 }, { "epoch": 0.33530755563076403, "grad_norm": 0.17413891851902008, "learning_rate": 0.001, "loss": 1.7156, "step": 7926 }, { "epoch": 0.3353498603942804, "grad_norm": 0.3942449390888214, "learning_rate": 0.001, "loss": 1.8654, "step": 7927 }, { "epoch": 0.3353921651577968, "grad_norm": 20.90395164489746, "learning_rate": 0.001, "loss": 1.722, "step": 7928 }, { "epoch": 0.33543446992131315, "grad_norm": 2.177119016647339, "learning_rate": 0.001, "loss": 1.8256, "step": 7929 }, { "epoch": 0.3354767746848295, "grad_norm": 0.31585386395454407, "learning_rate": 0.001, "loss": 2.4039, "step": 7930 }, { "epoch": 0.33551907944834586, "grad_norm": 0.3052481710910797, "learning_rate": 0.001, "loss": 2.1275, "step": 7931 }, { "epoch": 0.33556138421186227, "grad_norm": 0.23809614777565002, "learning_rate": 0.001, "loss": 1.7413, "step": 7932 }, { "epoch": 0.3356036889753786, "grad_norm": 2.0751454830169678, "learning_rate": 0.001, "loss": 2.132, "step": 7933 }, { "epoch": 0.335645993738895, "grad_norm": 0.6498151421546936, "learning_rate": 0.001, "loss": 2.0706, "step": 7934 }, { "epoch": 0.3356882985024114, "grad_norm": 0.26023566722869873, "learning_rate": 0.001, "loss": 2.4264, "step": 7935 }, { "epoch": 0.33573060326592774, "grad_norm": 0.203081876039505, "learning_rate": 0.001, "loss": 2.4726, "step": 7936 }, { "epoch": 0.3357729080294441, "grad_norm": 0.2829737961292267, "learning_rate": 0.001, "loss": 3.3616, "step": 7937 }, { "epoch": 0.3358152127929605, "grad_norm": 1.6824712753295898, "learning_rate": 0.001, "loss": 1.6086, "step": 7938 }, { "epoch": 0.33585751755647686, "grad_norm": 0.19478239119052887, "learning_rate": 0.001, "loss": 2.579, "step": 7939 }, { "epoch": 0.3358998223199932, "grad_norm": 0.22429609298706055, "learning_rate": 0.001, "loss": 2.8092, "step": 7940 }, { "epoch": 0.3359421270835096, "grad_norm": 0.16416104137897491, "learning_rate": 0.001, "loss": 2.2778, "step": 7941 }, { "epoch": 0.335984431847026, "grad_norm": 0.27525100111961365, "learning_rate": 0.001, "loss": 2.1074, "step": 7942 }, { "epoch": 0.33602673661054233, "grad_norm": 0.219032421708107, "learning_rate": 0.001, "loss": 2.4779, "step": 7943 }, { "epoch": 0.33606904137405874, "grad_norm": 0.7257388830184937, "learning_rate": 0.001, "loss": 1.9499, "step": 7944 }, { "epoch": 0.3361113461375751, "grad_norm": 0.8192842602729797, "learning_rate": 0.001, "loss": 1.9096, "step": 7945 }, { "epoch": 0.33615365090109145, "grad_norm": 0.1924901306629181, "learning_rate": 0.001, "loss": 1.9039, "step": 7946 }, { "epoch": 0.33619595566460786, "grad_norm": 0.18553310632705688, "learning_rate": 0.001, "loss": 1.9586, "step": 7947 }, { "epoch": 0.3362382604281242, "grad_norm": 0.41920140385627747, "learning_rate": 0.001, "loss": 2.1416, "step": 7948 }, { "epoch": 0.33628056519164057, "grad_norm": 0.21451827883720398, "learning_rate": 0.001, "loss": 2.4585, "step": 7949 }, { "epoch": 0.336322869955157, "grad_norm": 0.2012207955121994, "learning_rate": 0.001, "loss": 2.3316, "step": 7950 }, { "epoch": 0.33636517471867333, "grad_norm": 0.1847204715013504, "learning_rate": 0.001, "loss": 2.8531, "step": 7951 }, { "epoch": 0.3364074794821897, "grad_norm": 0.2382209748029709, "learning_rate": 0.001, "loss": 2.1404, "step": 7952 }, { "epoch": 0.33644978424570604, "grad_norm": 0.19776912033557892, "learning_rate": 0.001, "loss": 2.2637, "step": 7953 }, { "epoch": 0.33649208900922245, "grad_norm": 0.3410530388355255, "learning_rate": 0.001, "loss": 2.5895, "step": 7954 }, { "epoch": 0.3365343937727388, "grad_norm": 0.4076724648475647, "learning_rate": 0.001, "loss": 2.3355, "step": 7955 }, { "epoch": 0.33657669853625516, "grad_norm": 0.24683041870594025, "learning_rate": 0.001, "loss": 2.0245, "step": 7956 }, { "epoch": 0.33661900329977157, "grad_norm": 0.1744687408208847, "learning_rate": 0.001, "loss": 2.8672, "step": 7957 }, { "epoch": 0.3366613080632879, "grad_norm": 0.1615762561559677, "learning_rate": 0.001, "loss": 2.8006, "step": 7958 }, { "epoch": 0.3367036128268043, "grad_norm": 6.103714942932129, "learning_rate": 0.001, "loss": 2.3797, "step": 7959 }, { "epoch": 0.3367459175903207, "grad_norm": 0.23423656821250916, "learning_rate": 0.001, "loss": 2.1547, "step": 7960 }, { "epoch": 0.33678822235383704, "grad_norm": 0.36211827397346497, "learning_rate": 0.001, "loss": 2.6871, "step": 7961 }, { "epoch": 0.3368305271173534, "grad_norm": 0.2331177294254303, "learning_rate": 0.001, "loss": 2.6157, "step": 7962 }, { "epoch": 0.3368728318808698, "grad_norm": 0.18622010946273804, "learning_rate": 0.001, "loss": 2.4594, "step": 7963 }, { "epoch": 0.33691513664438616, "grad_norm": 0.26407673954963684, "learning_rate": 0.001, "loss": 2.1465, "step": 7964 }, { "epoch": 0.3369574414079025, "grad_norm": 0.20621120929718018, "learning_rate": 0.001, "loss": 1.7513, "step": 7965 }, { "epoch": 0.3369997461714189, "grad_norm": 0.2361491471529007, "learning_rate": 0.001, "loss": 1.8199, "step": 7966 }, { "epoch": 0.3370420509349353, "grad_norm": 0.20442907512187958, "learning_rate": 0.001, "loss": 3.1949, "step": 7967 }, { "epoch": 0.33708435569845163, "grad_norm": 0.23592479526996613, "learning_rate": 0.001, "loss": 1.7339, "step": 7968 }, { "epoch": 0.33712666046196804, "grad_norm": 19.943355560302734, "learning_rate": 0.001, "loss": 1.6594, "step": 7969 }, { "epoch": 0.3371689652254844, "grad_norm": 4.2921223640441895, "learning_rate": 0.001, "loss": 2.8318, "step": 7970 }, { "epoch": 0.33721126998900075, "grad_norm": 0.3829931616783142, "learning_rate": 0.001, "loss": 2.4734, "step": 7971 }, { "epoch": 0.33725357475251716, "grad_norm": 1.2138727903366089, "learning_rate": 0.001, "loss": 3.164, "step": 7972 }, { "epoch": 0.3372958795160335, "grad_norm": 0.18562185764312744, "learning_rate": 0.001, "loss": 2.0124, "step": 7973 }, { "epoch": 0.33733818427954987, "grad_norm": 1.363898754119873, "learning_rate": 0.001, "loss": 2.4757, "step": 7974 }, { "epoch": 0.3373804890430662, "grad_norm": 0.16648072004318237, "learning_rate": 0.001, "loss": 2.2298, "step": 7975 }, { "epoch": 0.33742279380658263, "grad_norm": 0.27364203333854675, "learning_rate": 0.001, "loss": 2.1235, "step": 7976 }, { "epoch": 0.337465098570099, "grad_norm": 0.2676752209663391, "learning_rate": 0.001, "loss": 2.7993, "step": 7977 }, { "epoch": 0.33750740333361534, "grad_norm": 0.21944323182106018, "learning_rate": 0.001, "loss": 2.7508, "step": 7978 }, { "epoch": 0.33754970809713175, "grad_norm": 0.18799695372581482, "learning_rate": 0.001, "loss": 1.9509, "step": 7979 }, { "epoch": 0.3375920128606481, "grad_norm": 0.17753717303276062, "learning_rate": 0.001, "loss": 2.3597, "step": 7980 }, { "epoch": 0.33763431762416446, "grad_norm": 0.22551682591438293, "learning_rate": 0.001, "loss": 1.6755, "step": 7981 }, { "epoch": 0.33767662238768087, "grad_norm": 0.21229778230190277, "learning_rate": 0.001, "loss": 2.2954, "step": 7982 }, { "epoch": 0.3377189271511972, "grad_norm": 0.21653328835964203, "learning_rate": 0.001, "loss": 2.2161, "step": 7983 }, { "epoch": 0.3377612319147136, "grad_norm": 0.17863360047340393, "learning_rate": 0.001, "loss": 2.0211, "step": 7984 }, { "epoch": 0.33780353667823, "grad_norm": 0.3650844693183899, "learning_rate": 0.001, "loss": 2.7883, "step": 7985 }, { "epoch": 0.33784584144174634, "grad_norm": 0.31294921040534973, "learning_rate": 0.001, "loss": 2.2396, "step": 7986 }, { "epoch": 0.3378881462052627, "grad_norm": 0.8418748378753662, "learning_rate": 0.001, "loss": 1.9066, "step": 7987 }, { "epoch": 0.3379304509687791, "grad_norm": 1.4910551309585571, "learning_rate": 0.001, "loss": 1.5615, "step": 7988 }, { "epoch": 0.33797275573229546, "grad_norm": 0.7362372875213623, "learning_rate": 0.001, "loss": 2.7537, "step": 7989 }, { "epoch": 0.3380150604958118, "grad_norm": 0.27851244807243347, "learning_rate": 0.001, "loss": 2.1922, "step": 7990 }, { "epoch": 0.3380573652593282, "grad_norm": 0.18572275340557098, "learning_rate": 0.001, "loss": 1.9722, "step": 7991 }, { "epoch": 0.3380996700228446, "grad_norm": 0.32508203387260437, "learning_rate": 0.001, "loss": 2.7969, "step": 7992 }, { "epoch": 0.33814197478636093, "grad_norm": 0.22849464416503906, "learning_rate": 0.001, "loss": 1.5679, "step": 7993 }, { "epoch": 0.33818427954987734, "grad_norm": 0.18476389348506927, "learning_rate": 0.001, "loss": 2.1825, "step": 7994 }, { "epoch": 0.3382265843133937, "grad_norm": 0.32618439197540283, "learning_rate": 0.001, "loss": 1.9877, "step": 7995 }, { "epoch": 0.33826888907691005, "grad_norm": 0.20857444405555725, "learning_rate": 0.001, "loss": 1.8781, "step": 7996 }, { "epoch": 0.3383111938404264, "grad_norm": 0.1851375550031662, "learning_rate": 0.001, "loss": 1.5724, "step": 7997 }, { "epoch": 0.3383534986039428, "grad_norm": 0.5002090930938721, "learning_rate": 0.001, "loss": 2.1482, "step": 7998 }, { "epoch": 0.33839580336745917, "grad_norm": 0.18497441709041595, "learning_rate": 0.001, "loss": 2.2619, "step": 7999 }, { "epoch": 0.3384381081309755, "grad_norm": 0.2192855030298233, "learning_rate": 0.001, "loss": 2.4961, "step": 8000 }, { "epoch": 0.33848041289449193, "grad_norm": 3.4979212284088135, "learning_rate": 0.001, "loss": 2.0733, "step": 8001 }, { "epoch": 0.3385227176580083, "grad_norm": 0.2794019877910614, "learning_rate": 0.001, "loss": 2.9049, "step": 8002 }, { "epoch": 0.33856502242152464, "grad_norm": 0.26850298047065735, "learning_rate": 0.001, "loss": 2.1762, "step": 8003 }, { "epoch": 0.33860732718504105, "grad_norm": 0.19548161327838898, "learning_rate": 0.001, "loss": 1.9791, "step": 8004 }, { "epoch": 0.3386496319485574, "grad_norm": 0.20981860160827637, "learning_rate": 0.001, "loss": 2.0878, "step": 8005 }, { "epoch": 0.33869193671207376, "grad_norm": 0.3214268088340759, "learning_rate": 0.001, "loss": 1.628, "step": 8006 }, { "epoch": 0.33873424147559017, "grad_norm": 0.2680968940258026, "learning_rate": 0.001, "loss": 1.6191, "step": 8007 }, { "epoch": 0.3387765462391065, "grad_norm": 5.1096930503845215, "learning_rate": 0.001, "loss": 2.2251, "step": 8008 }, { "epoch": 0.3388188510026229, "grad_norm": 0.2013072371482849, "learning_rate": 0.001, "loss": 2.0241, "step": 8009 }, { "epoch": 0.3388611557661393, "grad_norm": 0.2397328019142151, "learning_rate": 0.001, "loss": 1.7325, "step": 8010 }, { "epoch": 0.33890346052965564, "grad_norm": 0.20630115270614624, "learning_rate": 0.001, "loss": 1.7987, "step": 8011 }, { "epoch": 0.338945765293172, "grad_norm": 0.3187551200389862, "learning_rate": 0.001, "loss": 2.6951, "step": 8012 }, { "epoch": 0.3389880700566884, "grad_norm": 0.2622925937175751, "learning_rate": 0.001, "loss": 1.9729, "step": 8013 }, { "epoch": 0.33903037482020476, "grad_norm": 0.20265188813209534, "learning_rate": 0.001, "loss": 3.4582, "step": 8014 }, { "epoch": 0.3390726795837211, "grad_norm": 0.44326838850975037, "learning_rate": 0.001, "loss": 2.0444, "step": 8015 }, { "epoch": 0.3391149843472375, "grad_norm": 6.31563663482666, "learning_rate": 0.001, "loss": 1.9386, "step": 8016 }, { "epoch": 0.3391572891107539, "grad_norm": 0.7522348761558533, "learning_rate": 0.001, "loss": 2.9402, "step": 8017 }, { "epoch": 0.33919959387427023, "grad_norm": 0.27529436349868774, "learning_rate": 0.001, "loss": 1.899, "step": 8018 }, { "epoch": 0.3392418986377866, "grad_norm": 0.304376482963562, "learning_rate": 0.001, "loss": 2.4614, "step": 8019 }, { "epoch": 0.339284203401303, "grad_norm": 0.2474507987499237, "learning_rate": 0.001, "loss": 2.5785, "step": 8020 }, { "epoch": 0.33932650816481935, "grad_norm": 0.27842018008232117, "learning_rate": 0.001, "loss": 2.2808, "step": 8021 }, { "epoch": 0.3393688129283357, "grad_norm": 0.2625916302204132, "learning_rate": 0.001, "loss": 2.1915, "step": 8022 }, { "epoch": 0.3394111176918521, "grad_norm": 0.2093726396560669, "learning_rate": 0.001, "loss": 2.1816, "step": 8023 }, { "epoch": 0.33945342245536847, "grad_norm": 0.2079465538263321, "learning_rate": 0.001, "loss": 1.8849, "step": 8024 }, { "epoch": 0.3394957272188848, "grad_norm": 0.7054566740989685, "learning_rate": 0.001, "loss": 1.8009, "step": 8025 }, { "epoch": 0.33953803198240123, "grad_norm": 0.24773012101650238, "learning_rate": 0.001, "loss": 3.0748, "step": 8026 }, { "epoch": 0.3395803367459176, "grad_norm": 0.23611260950565338, "learning_rate": 0.001, "loss": 2.0335, "step": 8027 }, { "epoch": 0.33962264150943394, "grad_norm": 1.1430414915084839, "learning_rate": 0.001, "loss": 3.0462, "step": 8028 }, { "epoch": 0.33966494627295035, "grad_norm": 0.22795584797859192, "learning_rate": 0.001, "loss": 2.0713, "step": 8029 }, { "epoch": 0.3397072510364667, "grad_norm": 0.2508586347103119, "learning_rate": 0.001, "loss": 3.3238, "step": 8030 }, { "epoch": 0.33974955579998306, "grad_norm": 0.24516475200653076, "learning_rate": 0.001, "loss": 1.9439, "step": 8031 }, { "epoch": 0.33979186056349947, "grad_norm": 0.1642148196697235, "learning_rate": 0.001, "loss": 2.5945, "step": 8032 }, { "epoch": 0.3398341653270158, "grad_norm": 2.4271435737609863, "learning_rate": 0.001, "loss": 2.6635, "step": 8033 }, { "epoch": 0.3398764700905322, "grad_norm": 0.8231348991394043, "learning_rate": 0.001, "loss": 2.5651, "step": 8034 }, { "epoch": 0.3399187748540486, "grad_norm": 0.23157507181167603, "learning_rate": 0.001, "loss": 2.0043, "step": 8035 }, { "epoch": 0.33996107961756494, "grad_norm": 0.24356773495674133, "learning_rate": 0.001, "loss": 2.0861, "step": 8036 }, { "epoch": 0.3400033843810813, "grad_norm": 0.23287416994571686, "learning_rate": 0.001, "loss": 2.1728, "step": 8037 }, { "epoch": 0.3400456891445977, "grad_norm": 0.1699216365814209, "learning_rate": 0.001, "loss": 1.8013, "step": 8038 }, { "epoch": 0.34008799390811406, "grad_norm": 0.19102227687835693, "learning_rate": 0.001, "loss": 2.2718, "step": 8039 }, { "epoch": 0.3401302986716304, "grad_norm": 0.2611904442310333, "learning_rate": 0.001, "loss": 3.0925, "step": 8040 }, { "epoch": 0.34017260343514677, "grad_norm": 0.8404667973518372, "learning_rate": 0.001, "loss": 1.8287, "step": 8041 }, { "epoch": 0.3402149081986632, "grad_norm": 0.3977033793926239, "learning_rate": 0.001, "loss": 2.1804, "step": 8042 }, { "epoch": 0.34025721296217953, "grad_norm": 0.21507540345191956, "learning_rate": 0.001, "loss": 2.6397, "step": 8043 }, { "epoch": 0.3402995177256959, "grad_norm": 0.22415781021118164, "learning_rate": 0.001, "loss": 2.7939, "step": 8044 }, { "epoch": 0.3403418224892123, "grad_norm": 2.2323434352874756, "learning_rate": 0.001, "loss": 2.3274, "step": 8045 }, { "epoch": 0.34038412725272865, "grad_norm": 5.563083171844482, "learning_rate": 0.001, "loss": 2.1133, "step": 8046 }, { "epoch": 0.340426432016245, "grad_norm": 0.27088257670402527, "learning_rate": 0.001, "loss": 2.0518, "step": 8047 }, { "epoch": 0.3404687367797614, "grad_norm": 1.1499382257461548, "learning_rate": 0.001, "loss": 2.057, "step": 8048 }, { "epoch": 0.34051104154327777, "grad_norm": 0.2051892727613449, "learning_rate": 0.001, "loss": 2.125, "step": 8049 }, { "epoch": 0.3405533463067941, "grad_norm": 0.3838144540786743, "learning_rate": 0.001, "loss": 2.1897, "step": 8050 }, { "epoch": 0.34059565107031053, "grad_norm": 0.28759241104125977, "learning_rate": 0.001, "loss": 1.9844, "step": 8051 }, { "epoch": 0.3406379558338269, "grad_norm": 0.2020261138677597, "learning_rate": 0.001, "loss": 2.8377, "step": 8052 }, { "epoch": 0.34068026059734324, "grad_norm": 0.24153375625610352, "learning_rate": 0.001, "loss": 2.1826, "step": 8053 }, { "epoch": 0.34072256536085965, "grad_norm": 0.43648314476013184, "learning_rate": 0.001, "loss": 2.8549, "step": 8054 }, { "epoch": 0.340764870124376, "grad_norm": 0.19116896390914917, "learning_rate": 0.001, "loss": 3.6501, "step": 8055 }, { "epoch": 0.34080717488789236, "grad_norm": 7.185187816619873, "learning_rate": 0.001, "loss": 2.6377, "step": 8056 }, { "epoch": 0.34084947965140877, "grad_norm": 0.18975602090358734, "learning_rate": 0.001, "loss": 2.1213, "step": 8057 }, { "epoch": 0.3408917844149251, "grad_norm": 0.2663729190826416, "learning_rate": 0.001, "loss": 2.6091, "step": 8058 }, { "epoch": 0.3409340891784415, "grad_norm": 0.20250354707241058, "learning_rate": 0.001, "loss": 2.103, "step": 8059 }, { "epoch": 0.3409763939419579, "grad_norm": 0.20965036749839783, "learning_rate": 0.001, "loss": 3.2814, "step": 8060 }, { "epoch": 0.34101869870547424, "grad_norm": 0.2456333339214325, "learning_rate": 0.001, "loss": 3.3578, "step": 8061 }, { "epoch": 0.3410610034689906, "grad_norm": 0.18389590084552765, "learning_rate": 0.001, "loss": 2.5921, "step": 8062 }, { "epoch": 0.341103308232507, "grad_norm": 0.23083733022212982, "learning_rate": 0.001, "loss": 2.6869, "step": 8063 }, { "epoch": 0.34114561299602336, "grad_norm": 0.6850119829177856, "learning_rate": 0.001, "loss": 1.5674, "step": 8064 }, { "epoch": 0.3411879177595397, "grad_norm": 0.2017279416322708, "learning_rate": 0.001, "loss": 2.0718, "step": 8065 }, { "epoch": 0.34123022252305607, "grad_norm": 0.18653610348701477, "learning_rate": 0.001, "loss": 2.4179, "step": 8066 }, { "epoch": 0.3412725272865725, "grad_norm": 0.3980189263820648, "learning_rate": 0.001, "loss": 2.1241, "step": 8067 }, { "epoch": 0.34131483205008883, "grad_norm": 0.7478682398796082, "learning_rate": 0.001, "loss": 1.8917, "step": 8068 }, { "epoch": 0.3413571368136052, "grad_norm": 0.1957026720046997, "learning_rate": 0.001, "loss": 2.3097, "step": 8069 }, { "epoch": 0.3413994415771216, "grad_norm": 0.1947687864303589, "learning_rate": 0.001, "loss": 1.8799, "step": 8070 }, { "epoch": 0.34144174634063795, "grad_norm": 0.216701477766037, "learning_rate": 0.001, "loss": 3.1663, "step": 8071 }, { "epoch": 0.3414840511041543, "grad_norm": 0.2218198925256729, "learning_rate": 0.001, "loss": 2.1391, "step": 8072 }, { "epoch": 0.3415263558676707, "grad_norm": 0.27254411578178406, "learning_rate": 0.001, "loss": 2.6124, "step": 8073 }, { "epoch": 0.34156866063118707, "grad_norm": 0.27166926860809326, "learning_rate": 0.001, "loss": 1.6906, "step": 8074 }, { "epoch": 0.3416109653947034, "grad_norm": 0.2239091545343399, "learning_rate": 0.001, "loss": 1.8461, "step": 8075 }, { "epoch": 0.34165327015821984, "grad_norm": 0.8380588889122009, "learning_rate": 0.001, "loss": 2.1581, "step": 8076 }, { "epoch": 0.3416955749217362, "grad_norm": 0.39979785680770874, "learning_rate": 0.001, "loss": 2.3237, "step": 8077 }, { "epoch": 0.34173787968525254, "grad_norm": 0.25030001997947693, "learning_rate": 0.001, "loss": 1.9951, "step": 8078 }, { "epoch": 0.34178018444876895, "grad_norm": 0.22300481796264648, "learning_rate": 0.001, "loss": 2.2102, "step": 8079 }, { "epoch": 0.3418224892122853, "grad_norm": 0.1912737488746643, "learning_rate": 0.001, "loss": 2.1506, "step": 8080 }, { "epoch": 0.34186479397580166, "grad_norm": 2.661966562271118, "learning_rate": 0.001, "loss": 1.5773, "step": 8081 }, { "epoch": 0.34190709873931807, "grad_norm": 0.2479306310415268, "learning_rate": 0.001, "loss": 2.8477, "step": 8082 }, { "epoch": 0.3419494035028344, "grad_norm": 0.43052566051483154, "learning_rate": 0.001, "loss": 3.5951, "step": 8083 }, { "epoch": 0.3419917082663508, "grad_norm": 0.2106366604566574, "learning_rate": 0.001, "loss": 2.7114, "step": 8084 }, { "epoch": 0.3420340130298672, "grad_norm": 1.2527594566345215, "learning_rate": 0.001, "loss": 2.2984, "step": 8085 }, { "epoch": 0.34207631779338354, "grad_norm": 4.545886993408203, "learning_rate": 0.001, "loss": 1.636, "step": 8086 }, { "epoch": 0.3421186225568999, "grad_norm": 0.3907563090324402, "learning_rate": 0.001, "loss": 3.0252, "step": 8087 }, { "epoch": 0.34216092732041625, "grad_norm": 0.24621452391147614, "learning_rate": 0.001, "loss": 1.6867, "step": 8088 }, { "epoch": 0.34220323208393266, "grad_norm": 0.26306939125061035, "learning_rate": 0.001, "loss": 1.9305, "step": 8089 }, { "epoch": 0.342245536847449, "grad_norm": 0.26752984523773193, "learning_rate": 0.001, "loss": 2.902, "step": 8090 }, { "epoch": 0.34228784161096537, "grad_norm": 0.2583836019039154, "learning_rate": 0.001, "loss": 3.4137, "step": 8091 }, { "epoch": 0.3423301463744818, "grad_norm": 1.7589455842971802, "learning_rate": 0.001, "loss": 2.0823, "step": 8092 }, { "epoch": 0.34237245113799814, "grad_norm": 3.6824162006378174, "learning_rate": 0.001, "loss": 2.4873, "step": 8093 }, { "epoch": 0.3424147559015145, "grad_norm": 0.45065632462501526, "learning_rate": 0.001, "loss": 2.184, "step": 8094 }, { "epoch": 0.3424570606650309, "grad_norm": 0.22970075905323029, "learning_rate": 0.001, "loss": 1.886, "step": 8095 }, { "epoch": 0.34249936542854725, "grad_norm": 0.2680737376213074, "learning_rate": 0.001, "loss": 1.9278, "step": 8096 }, { "epoch": 0.3425416701920636, "grad_norm": 0.39559051394462585, "learning_rate": 0.001, "loss": 2.6823, "step": 8097 }, { "epoch": 0.34258397495558, "grad_norm": 0.23845720291137695, "learning_rate": 0.001, "loss": 2.6721, "step": 8098 }, { "epoch": 0.34262627971909637, "grad_norm": 0.6681344509124756, "learning_rate": 0.001, "loss": 3.0333, "step": 8099 }, { "epoch": 0.3426685844826127, "grad_norm": 0.935767650604248, "learning_rate": 0.001, "loss": 3.4422, "step": 8100 }, { "epoch": 0.34271088924612914, "grad_norm": 0.21342290937900543, "learning_rate": 0.001, "loss": 2.4125, "step": 8101 }, { "epoch": 0.3427531940096455, "grad_norm": 0.7626002430915833, "learning_rate": 0.001, "loss": 3.0488, "step": 8102 }, { "epoch": 0.34279549877316184, "grad_norm": 0.991487979888916, "learning_rate": 0.001, "loss": 2.2147, "step": 8103 }, { "epoch": 0.34283780353667825, "grad_norm": 0.19054241478443146, "learning_rate": 0.001, "loss": 1.7796, "step": 8104 }, { "epoch": 0.3428801083001946, "grad_norm": 2.831486701965332, "learning_rate": 0.001, "loss": 2.9814, "step": 8105 }, { "epoch": 0.34292241306371096, "grad_norm": 0.27352792024612427, "learning_rate": 0.001, "loss": 1.866, "step": 8106 }, { "epoch": 0.34296471782722737, "grad_norm": 0.24455174803733826, "learning_rate": 0.001, "loss": 2.2868, "step": 8107 }, { "epoch": 0.3430070225907437, "grad_norm": 0.21650566160678864, "learning_rate": 0.001, "loss": 2.3255, "step": 8108 }, { "epoch": 0.3430493273542601, "grad_norm": 0.18031521141529083, "learning_rate": 0.001, "loss": 2.1578, "step": 8109 }, { "epoch": 0.34309163211777643, "grad_norm": 0.18996316194534302, "learning_rate": 0.001, "loss": 1.8919, "step": 8110 }, { "epoch": 0.34313393688129284, "grad_norm": 0.18360304832458496, "learning_rate": 0.001, "loss": 1.61, "step": 8111 }, { "epoch": 0.3431762416448092, "grad_norm": 0.44647422432899475, "learning_rate": 0.001, "loss": 2.3315, "step": 8112 }, { "epoch": 0.34321854640832555, "grad_norm": 0.937113344669342, "learning_rate": 0.001, "loss": 3.0726, "step": 8113 }, { "epoch": 0.34326085117184196, "grad_norm": 0.2121163010597229, "learning_rate": 0.001, "loss": 2.0622, "step": 8114 }, { "epoch": 0.3433031559353583, "grad_norm": 0.21113400161266327, "learning_rate": 0.001, "loss": 2.1562, "step": 8115 }, { "epoch": 0.34334546069887467, "grad_norm": 0.22805431485176086, "learning_rate": 0.001, "loss": 1.9647, "step": 8116 }, { "epoch": 0.3433877654623911, "grad_norm": 0.18042448163032532, "learning_rate": 0.001, "loss": 2.1456, "step": 8117 }, { "epoch": 0.34343007022590744, "grad_norm": 0.33136382699012756, "learning_rate": 0.001, "loss": 3.6987, "step": 8118 }, { "epoch": 0.3434723749894238, "grad_norm": 0.865990936756134, "learning_rate": 0.001, "loss": 2.1322, "step": 8119 }, { "epoch": 0.3435146797529402, "grad_norm": 0.2049923986196518, "learning_rate": 0.001, "loss": 2.1151, "step": 8120 }, { "epoch": 0.34355698451645655, "grad_norm": 0.41140925884246826, "learning_rate": 0.001, "loss": 2.1526, "step": 8121 }, { "epoch": 0.3435992892799729, "grad_norm": 0.2289661467075348, "learning_rate": 0.001, "loss": 2.3287, "step": 8122 }, { "epoch": 0.3436415940434893, "grad_norm": 0.8169209957122803, "learning_rate": 0.001, "loss": 3.6942, "step": 8123 }, { "epoch": 0.34368389880700567, "grad_norm": 0.21063531935214996, "learning_rate": 0.001, "loss": 2.1003, "step": 8124 }, { "epoch": 0.343726203570522, "grad_norm": 0.20649611949920654, "learning_rate": 0.001, "loss": 1.8405, "step": 8125 }, { "epoch": 0.34376850833403844, "grad_norm": 0.20465020835399628, "learning_rate": 0.001, "loss": 2.3314, "step": 8126 }, { "epoch": 0.3438108130975548, "grad_norm": 0.20881281793117523, "learning_rate": 0.001, "loss": 2.8126, "step": 8127 }, { "epoch": 0.34385311786107114, "grad_norm": 0.45468512177467346, "learning_rate": 0.001, "loss": 2.2817, "step": 8128 }, { "epoch": 0.34389542262458755, "grad_norm": 0.4071137011051178, "learning_rate": 0.001, "loss": 2.9757, "step": 8129 }, { "epoch": 0.3439377273881039, "grad_norm": 1.8788225650787354, "learning_rate": 0.001, "loss": 3.8213, "step": 8130 }, { "epoch": 0.34398003215162026, "grad_norm": 0.18839234113693237, "learning_rate": 0.001, "loss": 2.2812, "step": 8131 }, { "epoch": 0.3440223369151366, "grad_norm": 2.463320016860962, "learning_rate": 0.001, "loss": 2.9136, "step": 8132 }, { "epoch": 0.344064641678653, "grad_norm": 0.2248656004667282, "learning_rate": 0.001, "loss": 3.0937, "step": 8133 }, { "epoch": 0.3441069464421694, "grad_norm": 1.793042540550232, "learning_rate": 0.001, "loss": 3.3005, "step": 8134 }, { "epoch": 0.34414925120568574, "grad_norm": 0.17570704221725464, "learning_rate": 0.001, "loss": 3.2353, "step": 8135 }, { "epoch": 0.34419155596920215, "grad_norm": 1.5945035219192505, "learning_rate": 0.001, "loss": 1.9676, "step": 8136 }, { "epoch": 0.3442338607327185, "grad_norm": 0.18541212379932404, "learning_rate": 0.001, "loss": 2.5174, "step": 8137 }, { "epoch": 0.34427616549623485, "grad_norm": 0.1724303811788559, "learning_rate": 0.001, "loss": 2.6184, "step": 8138 }, { "epoch": 0.34431847025975126, "grad_norm": 0.9634104371070862, "learning_rate": 0.001, "loss": 2.0628, "step": 8139 }, { "epoch": 0.3443607750232676, "grad_norm": 0.23390017449855804, "learning_rate": 0.001, "loss": 2.6646, "step": 8140 }, { "epoch": 0.34440307978678397, "grad_norm": 0.19771607220172882, "learning_rate": 0.001, "loss": 2.321, "step": 8141 }, { "epoch": 0.3444453845503004, "grad_norm": 0.21361044049263, "learning_rate": 0.001, "loss": 1.8798, "step": 8142 }, { "epoch": 0.34448768931381674, "grad_norm": 0.20966002345085144, "learning_rate": 0.001, "loss": 1.9735, "step": 8143 }, { "epoch": 0.3445299940773331, "grad_norm": 0.24883241951465607, "learning_rate": 0.001, "loss": 2.6098, "step": 8144 }, { "epoch": 0.3445722988408495, "grad_norm": 1.5156461000442505, "learning_rate": 0.001, "loss": 1.8247, "step": 8145 }, { "epoch": 0.34461460360436585, "grad_norm": 0.18546625971794128, "learning_rate": 0.001, "loss": 1.8604, "step": 8146 }, { "epoch": 0.3446569083678822, "grad_norm": 0.18728595972061157, "learning_rate": 0.001, "loss": 2.6729, "step": 8147 }, { "epoch": 0.3446992131313986, "grad_norm": 0.17871029675006866, "learning_rate": 0.001, "loss": 2.6175, "step": 8148 }, { "epoch": 0.344741517894915, "grad_norm": 0.1993844360113144, "learning_rate": 0.001, "loss": 2.3318, "step": 8149 }, { "epoch": 0.3447838226584313, "grad_norm": 0.3129980266094208, "learning_rate": 0.001, "loss": 2.1132, "step": 8150 }, { "epoch": 0.34482612742194774, "grad_norm": 0.2431798130273819, "learning_rate": 0.001, "loss": 1.7761, "step": 8151 }, { "epoch": 0.3448684321854641, "grad_norm": 0.33781230449676514, "learning_rate": 0.001, "loss": 4.0398, "step": 8152 }, { "epoch": 0.34491073694898045, "grad_norm": 1.1244258880615234, "learning_rate": 0.001, "loss": 3.1731, "step": 8153 }, { "epoch": 0.3449530417124968, "grad_norm": 0.1972734034061432, "learning_rate": 0.001, "loss": 3.1912, "step": 8154 }, { "epoch": 0.3449953464760132, "grad_norm": 0.19920934736728668, "learning_rate": 0.001, "loss": 2.0368, "step": 8155 }, { "epoch": 0.34503765123952956, "grad_norm": 0.2313353568315506, "learning_rate": 0.001, "loss": 1.7179, "step": 8156 }, { "epoch": 0.3450799560030459, "grad_norm": 0.3713974356651306, "learning_rate": 0.001, "loss": 2.0866, "step": 8157 }, { "epoch": 0.3451222607665623, "grad_norm": 0.21588067710399628, "learning_rate": 0.001, "loss": 1.7316, "step": 8158 }, { "epoch": 0.3451645655300787, "grad_norm": 0.3257901668548584, "learning_rate": 0.001, "loss": 1.9901, "step": 8159 }, { "epoch": 0.34520687029359504, "grad_norm": 0.21106848120689392, "learning_rate": 0.001, "loss": 3.0461, "step": 8160 }, { "epoch": 0.34524917505711145, "grad_norm": 0.6497496962547302, "learning_rate": 0.001, "loss": 3.1958, "step": 8161 }, { "epoch": 0.3452914798206278, "grad_norm": 0.17610898613929749, "learning_rate": 0.001, "loss": 2.2529, "step": 8162 }, { "epoch": 0.34533378458414415, "grad_norm": 0.19626934826374054, "learning_rate": 0.001, "loss": 1.6347, "step": 8163 }, { "epoch": 0.34537608934766056, "grad_norm": 0.21596650779247284, "learning_rate": 0.001, "loss": 3.0951, "step": 8164 }, { "epoch": 0.3454183941111769, "grad_norm": 0.635787308216095, "learning_rate": 0.001, "loss": 2.1464, "step": 8165 }, { "epoch": 0.3454606988746933, "grad_norm": 0.17580640316009521, "learning_rate": 0.001, "loss": 1.8562, "step": 8166 }, { "epoch": 0.3455030036382097, "grad_norm": 0.35619786381721497, "learning_rate": 0.001, "loss": 2.0581, "step": 8167 }, { "epoch": 0.34554530840172604, "grad_norm": 0.19072014093399048, "learning_rate": 0.001, "loss": 2.9908, "step": 8168 }, { "epoch": 0.3455876131652424, "grad_norm": 0.21359659731388092, "learning_rate": 0.001, "loss": 1.9315, "step": 8169 }, { "epoch": 0.3456299179287588, "grad_norm": 0.197892963886261, "learning_rate": 0.001, "loss": 2.3538, "step": 8170 }, { "epoch": 0.34567222269227516, "grad_norm": 0.18982310593128204, "learning_rate": 0.001, "loss": 2.0689, "step": 8171 }, { "epoch": 0.3457145274557915, "grad_norm": 0.19009721279144287, "learning_rate": 0.001, "loss": 2.123, "step": 8172 }, { "epoch": 0.3457568322193079, "grad_norm": 0.5255012512207031, "learning_rate": 0.001, "loss": 2.5506, "step": 8173 }, { "epoch": 0.3457991369828243, "grad_norm": 1.7404595613479614, "learning_rate": 0.001, "loss": 2.4197, "step": 8174 }, { "epoch": 0.3458414417463406, "grad_norm": 0.2141122967004776, "learning_rate": 0.001, "loss": 2.2982, "step": 8175 }, { "epoch": 0.34588374650985704, "grad_norm": 0.21013054251670837, "learning_rate": 0.001, "loss": 3.148, "step": 8176 }, { "epoch": 0.3459260512733734, "grad_norm": 0.2299475222826004, "learning_rate": 0.001, "loss": 2.7599, "step": 8177 }, { "epoch": 0.34596835603688975, "grad_norm": 0.735609769821167, "learning_rate": 0.001, "loss": 1.6265, "step": 8178 }, { "epoch": 0.3460106608004061, "grad_norm": 0.3593994677066803, "learning_rate": 0.001, "loss": 2.6944, "step": 8179 }, { "epoch": 0.3460529655639225, "grad_norm": 0.22775153815746307, "learning_rate": 0.001, "loss": 2.2529, "step": 8180 }, { "epoch": 0.34609527032743886, "grad_norm": 0.2560899257659912, "learning_rate": 0.001, "loss": 1.9796, "step": 8181 }, { "epoch": 0.3461375750909552, "grad_norm": 0.20973028242588043, "learning_rate": 0.001, "loss": 2.9659, "step": 8182 }, { "epoch": 0.34617987985447163, "grad_norm": 2.2904813289642334, "learning_rate": 0.001, "loss": 1.5693, "step": 8183 }, { "epoch": 0.346222184617988, "grad_norm": 0.2068173885345459, "learning_rate": 0.001, "loss": 2.2342, "step": 8184 }, { "epoch": 0.34626448938150434, "grad_norm": 0.6102269291877747, "learning_rate": 0.001, "loss": 2.1191, "step": 8185 }, { "epoch": 0.34630679414502075, "grad_norm": 0.23791219294071198, "learning_rate": 0.001, "loss": 1.873, "step": 8186 }, { "epoch": 0.3463490989085371, "grad_norm": 0.2418316751718521, "learning_rate": 0.001, "loss": 2.0798, "step": 8187 }, { "epoch": 0.34639140367205346, "grad_norm": 0.944127082824707, "learning_rate": 0.001, "loss": 2.1407, "step": 8188 }, { "epoch": 0.34643370843556986, "grad_norm": 0.20907090604305267, "learning_rate": 0.001, "loss": 2.5714, "step": 8189 }, { "epoch": 0.3464760131990862, "grad_norm": 0.26245564222335815, "learning_rate": 0.001, "loss": 2.214, "step": 8190 }, { "epoch": 0.3465183179626026, "grad_norm": 0.2782506048679352, "learning_rate": 0.001, "loss": 1.9815, "step": 8191 }, { "epoch": 0.346560622726119, "grad_norm": 0.2448754757642746, "learning_rate": 0.001, "loss": 2.5002, "step": 8192 }, { "epoch": 0.34660292748963534, "grad_norm": 0.21422387659549713, "learning_rate": 0.001, "loss": 2.2284, "step": 8193 }, { "epoch": 0.3466452322531517, "grad_norm": 0.19360801577568054, "learning_rate": 0.001, "loss": 1.8096, "step": 8194 }, { "epoch": 0.3466875370166681, "grad_norm": 0.17713648080825806, "learning_rate": 0.001, "loss": 2.179, "step": 8195 }, { "epoch": 0.34672984178018446, "grad_norm": 0.23182488977909088, "learning_rate": 0.001, "loss": 1.9, "step": 8196 }, { "epoch": 0.3467721465437008, "grad_norm": 0.6592618227005005, "learning_rate": 0.001, "loss": 2.6549, "step": 8197 }, { "epoch": 0.3468144513072172, "grad_norm": 0.20647495985031128, "learning_rate": 0.001, "loss": 2.7395, "step": 8198 }, { "epoch": 0.3468567560707336, "grad_norm": 18.073009490966797, "learning_rate": 0.001, "loss": 2.7044, "step": 8199 }, { "epoch": 0.34689906083424993, "grad_norm": 0.1915636509656906, "learning_rate": 0.001, "loss": 2.1236, "step": 8200 }, { "epoch": 0.3469413655977663, "grad_norm": 0.18017853796482086, "learning_rate": 0.001, "loss": 1.8282, "step": 8201 }, { "epoch": 0.3469836703612827, "grad_norm": 1.731067419052124, "learning_rate": 0.001, "loss": 2.718, "step": 8202 }, { "epoch": 0.34702597512479905, "grad_norm": 0.2168511152267456, "learning_rate": 0.001, "loss": 2.0306, "step": 8203 }, { "epoch": 0.3470682798883154, "grad_norm": 0.23651044070720673, "learning_rate": 0.001, "loss": 2.4044, "step": 8204 }, { "epoch": 0.3471105846518318, "grad_norm": 0.2720677852630615, "learning_rate": 0.001, "loss": 1.8696, "step": 8205 }, { "epoch": 0.34715288941534816, "grad_norm": 0.24406278133392334, "learning_rate": 0.001, "loss": 1.9719, "step": 8206 }, { "epoch": 0.3471951941788645, "grad_norm": 0.2830585241317749, "learning_rate": 0.001, "loss": 2.2669, "step": 8207 }, { "epoch": 0.34723749894238093, "grad_norm": 0.9110187292098999, "learning_rate": 0.001, "loss": 2.46, "step": 8208 }, { "epoch": 0.3472798037058973, "grad_norm": 0.23988114297389984, "learning_rate": 0.001, "loss": 2.1952, "step": 8209 }, { "epoch": 0.34732210846941364, "grad_norm": 0.23413024842739105, "learning_rate": 0.001, "loss": 1.7244, "step": 8210 }, { "epoch": 0.34736441323293005, "grad_norm": 0.17257529497146606, "learning_rate": 0.001, "loss": 1.3189, "step": 8211 }, { "epoch": 0.3474067179964464, "grad_norm": 0.3271627128124237, "learning_rate": 0.001, "loss": 2.999, "step": 8212 }, { "epoch": 0.34744902275996276, "grad_norm": 0.4803444445133209, "learning_rate": 0.001, "loss": 3.5374, "step": 8213 }, { "epoch": 0.34749132752347917, "grad_norm": 0.20940083265304565, "learning_rate": 0.001, "loss": 3.9058, "step": 8214 }, { "epoch": 0.3475336322869955, "grad_norm": 2.5996439456939697, "learning_rate": 0.001, "loss": 2.0939, "step": 8215 }, { "epoch": 0.3475759370505119, "grad_norm": 3.1301825046539307, "learning_rate": 0.001, "loss": 3.033, "step": 8216 }, { "epoch": 0.3476182418140283, "grad_norm": 0.2768004834651947, "learning_rate": 0.001, "loss": 2.4367, "step": 8217 }, { "epoch": 0.34766054657754464, "grad_norm": 0.2278338372707367, "learning_rate": 0.001, "loss": 2.7675, "step": 8218 }, { "epoch": 0.347702851341061, "grad_norm": 59.52205276489258, "learning_rate": 0.001, "loss": 1.8877, "step": 8219 }, { "epoch": 0.3477451561045774, "grad_norm": 0.4094898998737335, "learning_rate": 0.001, "loss": 3.3673, "step": 8220 }, { "epoch": 0.34778746086809376, "grad_norm": 0.24076704680919647, "learning_rate": 0.001, "loss": 1.7683, "step": 8221 }, { "epoch": 0.3478297656316101, "grad_norm": 0.2221417874097824, "learning_rate": 0.001, "loss": 2.8931, "step": 8222 }, { "epoch": 0.34787207039512646, "grad_norm": 0.2613305151462555, "learning_rate": 0.001, "loss": 2.6144, "step": 8223 }, { "epoch": 0.3479143751586429, "grad_norm": 0.22772428393363953, "learning_rate": 0.001, "loss": 2.4166, "step": 8224 }, { "epoch": 0.34795667992215923, "grad_norm": 0.21340018510818481, "learning_rate": 0.001, "loss": 1.5321, "step": 8225 }, { "epoch": 0.3479989846856756, "grad_norm": 0.24845480918884277, "learning_rate": 0.001, "loss": 1.9605, "step": 8226 }, { "epoch": 0.348041289449192, "grad_norm": 0.24793021380901337, "learning_rate": 0.001, "loss": 3.3195, "step": 8227 }, { "epoch": 0.34808359421270835, "grad_norm": 3.903813123703003, "learning_rate": 0.001, "loss": 2.8211, "step": 8228 }, { "epoch": 0.3481258989762247, "grad_norm": 0.21152375638484955, "learning_rate": 0.001, "loss": 2.4791, "step": 8229 }, { "epoch": 0.3481682037397411, "grad_norm": 0.8900184631347656, "learning_rate": 0.001, "loss": 2.0313, "step": 8230 }, { "epoch": 0.34821050850325747, "grad_norm": 0.2712019383907318, "learning_rate": 0.001, "loss": 2.3294, "step": 8231 }, { "epoch": 0.3482528132667738, "grad_norm": 5.551436424255371, "learning_rate": 0.001, "loss": 2.4235, "step": 8232 }, { "epoch": 0.34829511803029023, "grad_norm": 0.28290316462516785, "learning_rate": 0.001, "loss": 2.4363, "step": 8233 }, { "epoch": 0.3483374227938066, "grad_norm": 0.7481885552406311, "learning_rate": 0.001, "loss": 1.7503, "step": 8234 }, { "epoch": 0.34837972755732294, "grad_norm": 0.2814173400402069, "learning_rate": 0.001, "loss": 3.2661, "step": 8235 }, { "epoch": 0.34842203232083935, "grad_norm": 1.8952068090438843, "learning_rate": 0.001, "loss": 3.6599, "step": 8236 }, { "epoch": 0.3484643370843557, "grad_norm": 1.625000238418579, "learning_rate": 0.001, "loss": 3.2106, "step": 8237 }, { "epoch": 0.34850664184787206, "grad_norm": 0.29325634241104126, "learning_rate": 0.001, "loss": 1.8171, "step": 8238 }, { "epoch": 0.34854894661138847, "grad_norm": 0.23248402774333954, "learning_rate": 0.001, "loss": 1.9535, "step": 8239 }, { "epoch": 0.3485912513749048, "grad_norm": 0.36317363381385803, "learning_rate": 0.001, "loss": 2.1191, "step": 8240 }, { "epoch": 0.3486335561384212, "grad_norm": 7.670083522796631, "learning_rate": 0.001, "loss": 2.581, "step": 8241 }, { "epoch": 0.3486758609019376, "grad_norm": 0.2423803061246872, "learning_rate": 0.001, "loss": 2.7115, "step": 8242 }, { "epoch": 0.34871816566545394, "grad_norm": 0.21461081504821777, "learning_rate": 0.001, "loss": 2.5597, "step": 8243 }, { "epoch": 0.3487604704289703, "grad_norm": 0.28708726167678833, "learning_rate": 0.001, "loss": 3.2198, "step": 8244 }, { "epoch": 0.34880277519248665, "grad_norm": 6.893510341644287, "learning_rate": 0.001, "loss": 2.521, "step": 8245 }, { "epoch": 0.34884507995600306, "grad_norm": 0.19617153704166412, "learning_rate": 0.001, "loss": 1.9453, "step": 8246 }, { "epoch": 0.3488873847195194, "grad_norm": 0.20679624378681183, "learning_rate": 0.001, "loss": 2.4901, "step": 8247 }, { "epoch": 0.34892968948303577, "grad_norm": 0.28653019666671753, "learning_rate": 0.001, "loss": 2.3008, "step": 8248 }, { "epoch": 0.3489719942465522, "grad_norm": 0.7451533675193787, "learning_rate": 0.001, "loss": 3.0218, "step": 8249 }, { "epoch": 0.34901429901006853, "grad_norm": 0.3160172998905182, "learning_rate": 0.001, "loss": 2.5261, "step": 8250 }, { "epoch": 0.3490566037735849, "grad_norm": 0.20035721361637115, "learning_rate": 0.001, "loss": 2.3146, "step": 8251 }, { "epoch": 0.3490989085371013, "grad_norm": 0.19196628034114838, "learning_rate": 0.001, "loss": 3.0744, "step": 8252 }, { "epoch": 0.34914121330061765, "grad_norm": 0.2677168846130371, "learning_rate": 0.001, "loss": 2.6725, "step": 8253 }, { "epoch": 0.349183518064134, "grad_norm": 0.210031196475029, "learning_rate": 0.001, "loss": 2.6667, "step": 8254 }, { "epoch": 0.3492258228276504, "grad_norm": 0.2728026211261749, "learning_rate": 0.001, "loss": 1.6813, "step": 8255 }, { "epoch": 0.34926812759116677, "grad_norm": 0.2557034492492676, "learning_rate": 0.001, "loss": 2.1864, "step": 8256 }, { "epoch": 0.3493104323546831, "grad_norm": 0.7835190892219543, "learning_rate": 0.001, "loss": 3.1137, "step": 8257 }, { "epoch": 0.34935273711819953, "grad_norm": 0.28121432662010193, "learning_rate": 0.001, "loss": 2.9322, "step": 8258 }, { "epoch": 0.3493950418817159, "grad_norm": 0.207332044839859, "learning_rate": 0.001, "loss": 2.1749, "step": 8259 }, { "epoch": 0.34943734664523224, "grad_norm": 0.5446605682373047, "learning_rate": 0.001, "loss": 2.0835, "step": 8260 }, { "epoch": 0.34947965140874865, "grad_norm": 0.17694032192230225, "learning_rate": 0.001, "loss": 1.9013, "step": 8261 }, { "epoch": 0.349521956172265, "grad_norm": 0.546602725982666, "learning_rate": 0.001, "loss": 2.1906, "step": 8262 }, { "epoch": 0.34956426093578136, "grad_norm": 0.26002398133277893, "learning_rate": 0.001, "loss": 2.5833, "step": 8263 }, { "epoch": 0.34960656569929777, "grad_norm": 0.21473874151706696, "learning_rate": 0.001, "loss": 1.9894, "step": 8264 }, { "epoch": 0.3496488704628141, "grad_norm": 0.22110356390476227, "learning_rate": 0.001, "loss": 1.663, "step": 8265 }, { "epoch": 0.3496911752263305, "grad_norm": 0.8616834878921509, "learning_rate": 0.001, "loss": 2.6967, "step": 8266 }, { "epoch": 0.34973347998984683, "grad_norm": 0.2699930965900421, "learning_rate": 0.001, "loss": 3.416, "step": 8267 }, { "epoch": 0.34977578475336324, "grad_norm": 0.27595359086990356, "learning_rate": 0.001, "loss": 1.9483, "step": 8268 }, { "epoch": 0.3498180895168796, "grad_norm": 0.9030215740203857, "learning_rate": 0.001, "loss": 1.9612, "step": 8269 }, { "epoch": 0.34986039428039595, "grad_norm": 0.21707811951637268, "learning_rate": 0.001, "loss": 1.8743, "step": 8270 }, { "epoch": 0.34990269904391236, "grad_norm": 0.22298790514469147, "learning_rate": 0.001, "loss": 2.299, "step": 8271 }, { "epoch": 0.3499450038074287, "grad_norm": 0.4610617756843567, "learning_rate": 0.001, "loss": 2.5806, "step": 8272 }, { "epoch": 0.34998730857094507, "grad_norm": 0.16975651681423187, "learning_rate": 0.001, "loss": 1.8847, "step": 8273 }, { "epoch": 0.3500296133344615, "grad_norm": 0.1778561770915985, "learning_rate": 0.001, "loss": 1.9668, "step": 8274 }, { "epoch": 0.35007191809797783, "grad_norm": 0.1916327327489853, "learning_rate": 0.001, "loss": 2.3812, "step": 8275 }, { "epoch": 0.3501142228614942, "grad_norm": 0.18574245274066925, "learning_rate": 0.001, "loss": 2.6713, "step": 8276 }, { "epoch": 0.3501565276250106, "grad_norm": 0.17055678367614746, "learning_rate": 0.001, "loss": 2.7099, "step": 8277 }, { "epoch": 0.35019883238852695, "grad_norm": 3.7420198917388916, "learning_rate": 0.001, "loss": 2.2635, "step": 8278 }, { "epoch": 0.3502411371520433, "grad_norm": 0.19405874609947205, "learning_rate": 0.001, "loss": 2.8745, "step": 8279 }, { "epoch": 0.3502834419155597, "grad_norm": 0.43192586302757263, "learning_rate": 0.001, "loss": 2.0631, "step": 8280 }, { "epoch": 0.35032574667907607, "grad_norm": 0.19240808486938477, "learning_rate": 0.001, "loss": 2.2849, "step": 8281 }, { "epoch": 0.3503680514425924, "grad_norm": 0.17617124319076538, "learning_rate": 0.001, "loss": 1.8595, "step": 8282 }, { "epoch": 0.35041035620610883, "grad_norm": 4.70356559753418, "learning_rate": 0.001, "loss": 2.568, "step": 8283 }, { "epoch": 0.3504526609696252, "grad_norm": 0.2642245292663574, "learning_rate": 0.001, "loss": 3.0254, "step": 8284 }, { "epoch": 0.35049496573314154, "grad_norm": 0.1709575653076172, "learning_rate": 0.001, "loss": 2.6415, "step": 8285 }, { "epoch": 0.35053727049665795, "grad_norm": 0.2094145566225052, "learning_rate": 0.001, "loss": 2.4383, "step": 8286 }, { "epoch": 0.3505795752601743, "grad_norm": 0.18870730698108673, "learning_rate": 0.001, "loss": 1.6139, "step": 8287 }, { "epoch": 0.35062188002369066, "grad_norm": 0.23713693022727966, "learning_rate": 0.001, "loss": 1.9205, "step": 8288 }, { "epoch": 0.35066418478720707, "grad_norm": 0.1588599532842636, "learning_rate": 0.001, "loss": 1.5372, "step": 8289 }, { "epoch": 0.3507064895507234, "grad_norm": 7.636360168457031, "learning_rate": 0.001, "loss": 2.0058, "step": 8290 }, { "epoch": 0.3507487943142398, "grad_norm": 0.8418156504631042, "learning_rate": 0.001, "loss": 2.6797, "step": 8291 }, { "epoch": 0.35079109907775613, "grad_norm": 0.2791912853717804, "learning_rate": 0.001, "loss": 3.1777, "step": 8292 }, { "epoch": 0.35083340384127254, "grad_norm": 0.22210825979709625, "learning_rate": 0.001, "loss": 2.0927, "step": 8293 }, { "epoch": 0.3508757086047889, "grad_norm": 2.1510884761810303, "learning_rate": 0.001, "loss": 2.077, "step": 8294 }, { "epoch": 0.35091801336830525, "grad_norm": 0.21856360137462616, "learning_rate": 0.001, "loss": 2.7959, "step": 8295 }, { "epoch": 0.35096031813182166, "grad_norm": 0.2667442560195923, "learning_rate": 0.001, "loss": 2.2033, "step": 8296 }, { "epoch": 0.351002622895338, "grad_norm": 0.24852122366428375, "learning_rate": 0.001, "loss": 2.1141, "step": 8297 }, { "epoch": 0.35104492765885437, "grad_norm": 1.397088885307312, "learning_rate": 0.001, "loss": 1.803, "step": 8298 }, { "epoch": 0.3510872324223708, "grad_norm": 17.205541610717773, "learning_rate": 0.001, "loss": 2.371, "step": 8299 }, { "epoch": 0.35112953718588713, "grad_norm": 0.2123159021139145, "learning_rate": 0.001, "loss": 3.3105, "step": 8300 }, { "epoch": 0.3511718419494035, "grad_norm": 2.099641799926758, "learning_rate": 0.001, "loss": 2.1354, "step": 8301 }, { "epoch": 0.3512141467129199, "grad_norm": 6.61326789855957, "learning_rate": 0.001, "loss": 2.2409, "step": 8302 }, { "epoch": 0.35125645147643625, "grad_norm": 2.718135118484497, "learning_rate": 0.001, "loss": 2.2279, "step": 8303 }, { "epoch": 0.3512987562399526, "grad_norm": 1.8631720542907715, "learning_rate": 0.001, "loss": 3.8234, "step": 8304 }, { "epoch": 0.351341061003469, "grad_norm": 20.797183990478516, "learning_rate": 0.001, "loss": 3.4096, "step": 8305 }, { "epoch": 0.35138336576698537, "grad_norm": 0.22008393704891205, "learning_rate": 0.001, "loss": 1.8032, "step": 8306 }, { "epoch": 0.3514256705305017, "grad_norm": 0.26470401883125305, "learning_rate": 0.001, "loss": 2.6094, "step": 8307 }, { "epoch": 0.35146797529401813, "grad_norm": 0.19725409150123596, "learning_rate": 0.001, "loss": 1.5124, "step": 8308 }, { "epoch": 0.3515102800575345, "grad_norm": 0.3234120011329651, "learning_rate": 0.001, "loss": 2.1466, "step": 8309 }, { "epoch": 0.35155258482105084, "grad_norm": 0.32287687063217163, "learning_rate": 0.001, "loss": 2.0659, "step": 8310 }, { "epoch": 0.35159488958456725, "grad_norm": 0.2961645722389221, "learning_rate": 0.001, "loss": 2.6533, "step": 8311 }, { "epoch": 0.3516371943480836, "grad_norm": 2.627986192703247, "learning_rate": 0.001, "loss": 2.3641, "step": 8312 }, { "epoch": 0.35167949911159996, "grad_norm": 0.374530166387558, "learning_rate": 0.001, "loss": 2.9414, "step": 8313 }, { "epoch": 0.3517218038751163, "grad_norm": 0.19504281878471375, "learning_rate": 0.001, "loss": 1.7939, "step": 8314 }, { "epoch": 0.3517641086386327, "grad_norm": 0.6185072064399719, "learning_rate": 0.001, "loss": 2.1054, "step": 8315 }, { "epoch": 0.3518064134021491, "grad_norm": 0.4495874345302582, "learning_rate": 0.001, "loss": 1.7725, "step": 8316 }, { "epoch": 0.35184871816566543, "grad_norm": 0.2591138780117035, "learning_rate": 0.001, "loss": 2.4438, "step": 8317 }, { "epoch": 0.35189102292918184, "grad_norm": 0.2160710096359253, "learning_rate": 0.001, "loss": 1.9199, "step": 8318 }, { "epoch": 0.3519333276926982, "grad_norm": 0.22822026908397675, "learning_rate": 0.001, "loss": 2.8699, "step": 8319 }, { "epoch": 0.35197563245621455, "grad_norm": 0.23242418467998505, "learning_rate": 0.001, "loss": 2.9319, "step": 8320 }, { "epoch": 0.35201793721973096, "grad_norm": 59.025177001953125, "learning_rate": 0.001, "loss": 2.2009, "step": 8321 }, { "epoch": 0.3520602419832473, "grad_norm": 0.3726635277271271, "learning_rate": 0.001, "loss": 1.9791, "step": 8322 }, { "epoch": 0.35210254674676367, "grad_norm": 0.2542991638183594, "learning_rate": 0.001, "loss": 3.2745, "step": 8323 }, { "epoch": 0.3521448515102801, "grad_norm": 0.25025397539138794, "learning_rate": 0.001, "loss": 1.8405, "step": 8324 }, { "epoch": 0.35218715627379643, "grad_norm": 0.2506003677845001, "learning_rate": 0.001, "loss": 3.0609, "step": 8325 }, { "epoch": 0.3522294610373128, "grad_norm": 0.26360347867012024, "learning_rate": 0.001, "loss": 2.1514, "step": 8326 }, { "epoch": 0.3522717658008292, "grad_norm": 0.23163963854312897, "learning_rate": 0.001, "loss": 2.0993, "step": 8327 }, { "epoch": 0.35231407056434555, "grad_norm": 18.21137046813965, "learning_rate": 0.001, "loss": 2.8873, "step": 8328 }, { "epoch": 0.3523563753278619, "grad_norm": 0.21554243564605713, "learning_rate": 0.001, "loss": 2.7229, "step": 8329 }, { "epoch": 0.3523986800913783, "grad_norm": 0.26712241768836975, "learning_rate": 0.001, "loss": 2.5294, "step": 8330 }, { "epoch": 0.35244098485489467, "grad_norm": 0.2301950454711914, "learning_rate": 0.001, "loss": 2.3371, "step": 8331 }, { "epoch": 0.352483289618411, "grad_norm": 0.19002433121204376, "learning_rate": 0.001, "loss": 2.637, "step": 8332 }, { "epoch": 0.35252559438192743, "grad_norm": 0.25531867146492004, "learning_rate": 0.001, "loss": 2.001, "step": 8333 }, { "epoch": 0.3525678991454438, "grad_norm": 0.9937705397605896, "learning_rate": 0.001, "loss": 1.9568, "step": 8334 }, { "epoch": 0.35261020390896014, "grad_norm": 0.3505503833293915, "learning_rate": 0.001, "loss": 2.2035, "step": 8335 }, { "epoch": 0.3526525086724765, "grad_norm": 0.2179388403892517, "learning_rate": 0.001, "loss": 2.0458, "step": 8336 }, { "epoch": 0.3526948134359929, "grad_norm": 0.17621107399463654, "learning_rate": 0.001, "loss": 1.9847, "step": 8337 }, { "epoch": 0.35273711819950926, "grad_norm": 0.34067684412002563, "learning_rate": 0.001, "loss": 2.064, "step": 8338 }, { "epoch": 0.3527794229630256, "grad_norm": 0.24471262097358704, "learning_rate": 0.001, "loss": 3.5153, "step": 8339 }, { "epoch": 0.352821727726542, "grad_norm": 0.37908583879470825, "learning_rate": 0.001, "loss": 2.4995, "step": 8340 }, { "epoch": 0.3528640324900584, "grad_norm": 0.453393816947937, "learning_rate": 0.001, "loss": 2.0838, "step": 8341 }, { "epoch": 0.35290633725357473, "grad_norm": 0.18824003636837006, "learning_rate": 0.001, "loss": 2.6714, "step": 8342 }, { "epoch": 0.35294864201709114, "grad_norm": 0.16498175263404846, "learning_rate": 0.001, "loss": 1.8878, "step": 8343 }, { "epoch": 0.3529909467806075, "grad_norm": 0.18404828011989594, "learning_rate": 0.001, "loss": 2.5525, "step": 8344 }, { "epoch": 0.35303325154412385, "grad_norm": 0.19402121007442474, "learning_rate": 0.001, "loss": 1.3525, "step": 8345 }, { "epoch": 0.35307555630764026, "grad_norm": 0.3609767258167267, "learning_rate": 0.001, "loss": 2.2392, "step": 8346 }, { "epoch": 0.3531178610711566, "grad_norm": 0.17331457138061523, "learning_rate": 0.001, "loss": 1.9563, "step": 8347 }, { "epoch": 0.35316016583467297, "grad_norm": 0.1629885584115982, "learning_rate": 0.001, "loss": 2.2316, "step": 8348 }, { "epoch": 0.3532024705981894, "grad_norm": 0.4724940061569214, "learning_rate": 0.001, "loss": 2.8375, "step": 8349 }, { "epoch": 0.35324477536170573, "grad_norm": 0.19430530071258545, "learning_rate": 0.001, "loss": 2.5702, "step": 8350 }, { "epoch": 0.3532870801252221, "grad_norm": 0.2884927988052368, "learning_rate": 0.001, "loss": 1.8518, "step": 8351 }, { "epoch": 0.3533293848887385, "grad_norm": 0.30345895886421204, "learning_rate": 0.001, "loss": 2.2031, "step": 8352 }, { "epoch": 0.35337168965225485, "grad_norm": 0.1875777542591095, "learning_rate": 0.001, "loss": 1.9054, "step": 8353 }, { "epoch": 0.3534139944157712, "grad_norm": 0.17914628982543945, "learning_rate": 0.001, "loss": 2.1172, "step": 8354 }, { "epoch": 0.3534562991792876, "grad_norm": 0.223419189453125, "learning_rate": 0.001, "loss": 2.0011, "step": 8355 }, { "epoch": 0.35349860394280397, "grad_norm": 1.009746789932251, "learning_rate": 0.001, "loss": 3.5386, "step": 8356 }, { "epoch": 0.3535409087063203, "grad_norm": 0.17974236607551575, "learning_rate": 0.001, "loss": 2.8207, "step": 8357 }, { "epoch": 0.3535832134698367, "grad_norm": 0.20830687880516052, "learning_rate": 0.001, "loss": 2.6878, "step": 8358 }, { "epoch": 0.3536255182333531, "grad_norm": 0.19284899532794952, "learning_rate": 0.001, "loss": 1.9765, "step": 8359 }, { "epoch": 0.35366782299686944, "grad_norm": 1.829329252243042, "learning_rate": 0.001, "loss": 1.6846, "step": 8360 }, { "epoch": 0.3537101277603858, "grad_norm": 0.21594662964344025, "learning_rate": 0.001, "loss": 1.8569, "step": 8361 }, { "epoch": 0.3537524325239022, "grad_norm": 4.540489673614502, "learning_rate": 0.001, "loss": 2.6528, "step": 8362 }, { "epoch": 0.35379473728741856, "grad_norm": 0.24919438362121582, "learning_rate": 0.001, "loss": 3.6536, "step": 8363 }, { "epoch": 0.3538370420509349, "grad_norm": 0.21543803811073303, "learning_rate": 0.001, "loss": 2.3457, "step": 8364 }, { "epoch": 0.3538793468144513, "grad_norm": 0.18875446915626526, "learning_rate": 0.001, "loss": 1.6829, "step": 8365 }, { "epoch": 0.3539216515779677, "grad_norm": 0.18854385614395142, "learning_rate": 0.001, "loss": 2.7787, "step": 8366 }, { "epoch": 0.35396395634148403, "grad_norm": 0.23087264597415924, "learning_rate": 0.001, "loss": 2.1272, "step": 8367 }, { "epoch": 0.35400626110500044, "grad_norm": 0.16182763874530792, "learning_rate": 0.001, "loss": 1.4934, "step": 8368 }, { "epoch": 0.3540485658685168, "grad_norm": 0.19627897441387177, "learning_rate": 0.001, "loss": 2.9024, "step": 8369 }, { "epoch": 0.35409087063203315, "grad_norm": 0.18524384498596191, "learning_rate": 0.001, "loss": 2.1512, "step": 8370 }, { "epoch": 0.35413317539554956, "grad_norm": 0.1795620322227478, "learning_rate": 0.001, "loss": 2.0742, "step": 8371 }, { "epoch": 0.3541754801590659, "grad_norm": 0.20785008370876312, "learning_rate": 0.001, "loss": 1.6379, "step": 8372 }, { "epoch": 0.35421778492258227, "grad_norm": 0.20255346596240997, "learning_rate": 0.001, "loss": 2.1128, "step": 8373 }, { "epoch": 0.3542600896860987, "grad_norm": 0.22817464172840118, "learning_rate": 0.001, "loss": 2.0911, "step": 8374 }, { "epoch": 0.35430239444961503, "grad_norm": 1.5379173755645752, "learning_rate": 0.001, "loss": 2.2441, "step": 8375 }, { "epoch": 0.3543446992131314, "grad_norm": 1.9879519939422607, "learning_rate": 0.001, "loss": 1.7536, "step": 8376 }, { "epoch": 0.3543870039766478, "grad_norm": 0.8693563938140869, "learning_rate": 0.001, "loss": 2.0423, "step": 8377 }, { "epoch": 0.35442930874016415, "grad_norm": 0.20398443937301636, "learning_rate": 0.001, "loss": 1.926, "step": 8378 }, { "epoch": 0.3544716135036805, "grad_norm": 1.420688509941101, "learning_rate": 0.001, "loss": 2.1442, "step": 8379 }, { "epoch": 0.35451391826719686, "grad_norm": 8.911663055419922, "learning_rate": 0.001, "loss": 1.6632, "step": 8380 }, { "epoch": 0.35455622303071327, "grad_norm": 0.2737337052822113, "learning_rate": 0.001, "loss": 1.9327, "step": 8381 }, { "epoch": 0.3545985277942296, "grad_norm": 0.20059816539287567, "learning_rate": 0.001, "loss": 2.094, "step": 8382 }, { "epoch": 0.354640832557746, "grad_norm": 0.21529340744018555, "learning_rate": 0.001, "loss": 1.811, "step": 8383 }, { "epoch": 0.3546831373212624, "grad_norm": 0.21227684617042542, "learning_rate": 0.001, "loss": 1.9585, "step": 8384 }, { "epoch": 0.35472544208477874, "grad_norm": 0.18570013344287872, "learning_rate": 0.001, "loss": 2.5025, "step": 8385 }, { "epoch": 0.3547677468482951, "grad_norm": 0.21961857378482819, "learning_rate": 0.001, "loss": 1.7425, "step": 8386 }, { "epoch": 0.3548100516118115, "grad_norm": 8.055773735046387, "learning_rate": 0.001, "loss": 2.41, "step": 8387 }, { "epoch": 0.35485235637532786, "grad_norm": 0.33439183235168457, "learning_rate": 0.001, "loss": 2.141, "step": 8388 }, { "epoch": 0.3548946611388442, "grad_norm": 0.2350430190563202, "learning_rate": 0.001, "loss": 2.0489, "step": 8389 }, { "epoch": 0.3549369659023606, "grad_norm": 0.2935107946395874, "learning_rate": 0.001, "loss": 1.439, "step": 8390 }, { "epoch": 0.354979270665877, "grad_norm": 2.349884033203125, "learning_rate": 0.001, "loss": 2.6664, "step": 8391 }, { "epoch": 0.35502157542939333, "grad_norm": 2.4287772178649902, "learning_rate": 0.001, "loss": 2.796, "step": 8392 }, { "epoch": 0.35506388019290974, "grad_norm": 0.24385391175746918, "learning_rate": 0.001, "loss": 1.9699, "step": 8393 }, { "epoch": 0.3551061849564261, "grad_norm": 0.21865463256835938, "learning_rate": 0.001, "loss": 1.4755, "step": 8394 }, { "epoch": 0.35514848971994245, "grad_norm": 0.221740260720253, "learning_rate": 0.001, "loss": 2.6338, "step": 8395 }, { "epoch": 0.35519079448345886, "grad_norm": 0.212009459733963, "learning_rate": 0.001, "loss": 1.8636, "step": 8396 }, { "epoch": 0.3552330992469752, "grad_norm": 0.42118605971336365, "learning_rate": 0.001, "loss": 1.8155, "step": 8397 }, { "epoch": 0.35527540401049157, "grad_norm": 0.19747601449489594, "learning_rate": 0.001, "loss": 2.4585, "step": 8398 }, { "epoch": 0.355317708774008, "grad_norm": 0.21434566378593445, "learning_rate": 0.001, "loss": 3.2472, "step": 8399 }, { "epoch": 0.35536001353752433, "grad_norm": 0.1856713443994522, "learning_rate": 0.001, "loss": 1.8753, "step": 8400 }, { "epoch": 0.3554023183010407, "grad_norm": 0.215153768658638, "learning_rate": 0.001, "loss": 1.7379, "step": 8401 }, { "epoch": 0.35544462306455704, "grad_norm": 0.16858762502670288, "learning_rate": 0.001, "loss": 1.8505, "step": 8402 }, { "epoch": 0.35548692782807345, "grad_norm": 0.23868021368980408, "learning_rate": 0.001, "loss": 3.6215, "step": 8403 }, { "epoch": 0.3555292325915898, "grad_norm": 0.21436399221420288, "learning_rate": 0.001, "loss": 1.5321, "step": 8404 }, { "epoch": 0.35557153735510616, "grad_norm": 0.3087001442909241, "learning_rate": 0.001, "loss": 2.6408, "step": 8405 }, { "epoch": 0.35561384211862257, "grad_norm": 0.20392103493213654, "learning_rate": 0.001, "loss": 3.0366, "step": 8406 }, { "epoch": 0.3556561468821389, "grad_norm": 1.7128150463104248, "learning_rate": 0.001, "loss": 3.0189, "step": 8407 }, { "epoch": 0.3556984516456553, "grad_norm": 0.2017560452222824, "learning_rate": 0.001, "loss": 2.2836, "step": 8408 }, { "epoch": 0.3557407564091717, "grad_norm": 0.2031020075082779, "learning_rate": 0.001, "loss": 2.3527, "step": 8409 }, { "epoch": 0.35578306117268804, "grad_norm": 0.19620314240455627, "learning_rate": 0.001, "loss": 2.9028, "step": 8410 }, { "epoch": 0.3558253659362044, "grad_norm": 0.23667864501476288, "learning_rate": 0.001, "loss": 1.8299, "step": 8411 }, { "epoch": 0.3558676706997208, "grad_norm": 0.18068136274814606, "learning_rate": 0.001, "loss": 2.8935, "step": 8412 }, { "epoch": 0.35590997546323716, "grad_norm": 0.9692279696464539, "learning_rate": 0.001, "loss": 2.2835, "step": 8413 }, { "epoch": 0.3559522802267535, "grad_norm": 0.1930302083492279, "learning_rate": 0.001, "loss": 1.8272, "step": 8414 }, { "epoch": 0.3559945849902699, "grad_norm": 0.49082469940185547, "learning_rate": 0.001, "loss": 3.0818, "step": 8415 }, { "epoch": 0.3560368897537863, "grad_norm": 0.17451566457748413, "learning_rate": 0.001, "loss": 1.9013, "step": 8416 }, { "epoch": 0.35607919451730263, "grad_norm": 0.3995453715324402, "learning_rate": 0.001, "loss": 2.0403, "step": 8417 }, { "epoch": 0.35612149928081904, "grad_norm": 0.18853165209293365, "learning_rate": 0.001, "loss": 1.9157, "step": 8418 }, { "epoch": 0.3561638040443354, "grad_norm": 0.16779901087284088, "learning_rate": 0.001, "loss": 2.425, "step": 8419 }, { "epoch": 0.35620610880785175, "grad_norm": 0.20070673525333405, "learning_rate": 0.001, "loss": 2.0243, "step": 8420 }, { "epoch": 0.35624841357136816, "grad_norm": 0.26167789101600647, "learning_rate": 0.001, "loss": 2.3683, "step": 8421 }, { "epoch": 0.3562907183348845, "grad_norm": 0.39837929606437683, "learning_rate": 0.001, "loss": 3.0087, "step": 8422 }, { "epoch": 0.35633302309840087, "grad_norm": 0.1800961196422577, "learning_rate": 0.001, "loss": 2.4717, "step": 8423 }, { "epoch": 0.3563753278619173, "grad_norm": 0.2057555615901947, "learning_rate": 0.001, "loss": 2.3249, "step": 8424 }, { "epoch": 0.35641763262543363, "grad_norm": 0.17443668842315674, "learning_rate": 0.001, "loss": 1.6971, "step": 8425 }, { "epoch": 0.35645993738895, "grad_norm": 0.21079231798648834, "learning_rate": 0.001, "loss": 2.3975, "step": 8426 }, { "epoch": 0.35650224215246634, "grad_norm": 3.70257830619812, "learning_rate": 0.001, "loss": 2.5433, "step": 8427 }, { "epoch": 0.35654454691598275, "grad_norm": 0.17566591501235962, "learning_rate": 0.001, "loss": 1.8831, "step": 8428 }, { "epoch": 0.3565868516794991, "grad_norm": 0.4361303746700287, "learning_rate": 0.001, "loss": 2.4334, "step": 8429 }, { "epoch": 0.35662915644301546, "grad_norm": 0.27633586525917053, "learning_rate": 0.001, "loss": 3.0307, "step": 8430 }, { "epoch": 0.35667146120653187, "grad_norm": 0.21895642578601837, "learning_rate": 0.001, "loss": 2.015, "step": 8431 }, { "epoch": 0.3567137659700482, "grad_norm": 0.15661796927452087, "learning_rate": 0.001, "loss": 1.8659, "step": 8432 }, { "epoch": 0.3567560707335646, "grad_norm": 0.19141338765621185, "learning_rate": 0.001, "loss": 2.7277, "step": 8433 }, { "epoch": 0.356798375497081, "grad_norm": 0.2092185765504837, "learning_rate": 0.001, "loss": 2.2695, "step": 8434 }, { "epoch": 0.35684068026059734, "grad_norm": 0.19185788929462433, "learning_rate": 0.001, "loss": 2.2118, "step": 8435 }, { "epoch": 0.3568829850241137, "grad_norm": 0.20594419538974762, "learning_rate": 0.001, "loss": 2.4456, "step": 8436 }, { "epoch": 0.3569252897876301, "grad_norm": 0.843341588973999, "learning_rate": 0.001, "loss": 2.6461, "step": 8437 }, { "epoch": 0.35696759455114646, "grad_norm": 0.18174798786640167, "learning_rate": 0.001, "loss": 2.0485, "step": 8438 }, { "epoch": 0.3570098993146628, "grad_norm": 0.6228585839271545, "learning_rate": 0.001, "loss": 2.8919, "step": 8439 }, { "epoch": 0.3570522040781792, "grad_norm": 0.21685943007469177, "learning_rate": 0.001, "loss": 2.1506, "step": 8440 }, { "epoch": 0.3570945088416956, "grad_norm": 0.16078568994998932, "learning_rate": 0.001, "loss": 2.6238, "step": 8441 }, { "epoch": 0.35713681360521193, "grad_norm": 0.2162536233663559, "learning_rate": 0.001, "loss": 2.6207, "step": 8442 }, { "epoch": 0.35717911836872834, "grad_norm": 0.20838353037834167, "learning_rate": 0.001, "loss": 2.4904, "step": 8443 }, { "epoch": 0.3572214231322447, "grad_norm": 0.20165249705314636, "learning_rate": 0.001, "loss": 3.4246, "step": 8444 }, { "epoch": 0.35726372789576105, "grad_norm": 0.15616677701473236, "learning_rate": 0.001, "loss": 2.2008, "step": 8445 }, { "epoch": 0.35730603265927746, "grad_norm": 0.48721373081207275, "learning_rate": 0.001, "loss": 1.8402, "step": 8446 }, { "epoch": 0.3573483374227938, "grad_norm": 0.23544834554195404, "learning_rate": 0.001, "loss": 2.1721, "step": 8447 }, { "epoch": 0.35739064218631017, "grad_norm": 27.898847579956055, "learning_rate": 0.001, "loss": 2.5454, "step": 8448 }, { "epoch": 0.3574329469498265, "grad_norm": 0.23525142669677734, "learning_rate": 0.001, "loss": 3.1336, "step": 8449 }, { "epoch": 0.35747525171334293, "grad_norm": 0.16481630504131317, "learning_rate": 0.001, "loss": 1.871, "step": 8450 }, { "epoch": 0.3575175564768593, "grad_norm": 0.20045652985572815, "learning_rate": 0.001, "loss": 2.5494, "step": 8451 }, { "epoch": 0.35755986124037564, "grad_norm": 0.4051211476325989, "learning_rate": 0.001, "loss": 2.1058, "step": 8452 }, { "epoch": 0.35760216600389205, "grad_norm": 1.801464557647705, "learning_rate": 0.001, "loss": 2.0417, "step": 8453 }, { "epoch": 0.3576444707674084, "grad_norm": 0.22075815498828888, "learning_rate": 0.001, "loss": 2.2037, "step": 8454 }, { "epoch": 0.35768677553092476, "grad_norm": 0.20504413545131683, "learning_rate": 0.001, "loss": 2.048, "step": 8455 }, { "epoch": 0.35772908029444117, "grad_norm": 0.20746447145938873, "learning_rate": 0.001, "loss": 2.2803, "step": 8456 }, { "epoch": 0.3577713850579575, "grad_norm": 0.47997429966926575, "learning_rate": 0.001, "loss": 2.247, "step": 8457 }, { "epoch": 0.3578136898214739, "grad_norm": 0.24447347223758698, "learning_rate": 0.001, "loss": 1.9872, "step": 8458 }, { "epoch": 0.3578559945849903, "grad_norm": 0.2988087832927704, "learning_rate": 0.001, "loss": 3.0362, "step": 8459 }, { "epoch": 0.35789829934850664, "grad_norm": 1.136828064918518, "learning_rate": 0.001, "loss": 2.3112, "step": 8460 }, { "epoch": 0.357940604112023, "grad_norm": 0.4372611343860626, "learning_rate": 0.001, "loss": 1.7607, "step": 8461 }, { "epoch": 0.3579829088755394, "grad_norm": 0.18738792836666107, "learning_rate": 0.001, "loss": 1.9763, "step": 8462 }, { "epoch": 0.35802521363905576, "grad_norm": 0.4911549985408783, "learning_rate": 0.001, "loss": 3.0154, "step": 8463 }, { "epoch": 0.3580675184025721, "grad_norm": 0.21335723996162415, "learning_rate": 0.001, "loss": 3.5891, "step": 8464 }, { "epoch": 0.3581098231660885, "grad_norm": 0.9935727715492249, "learning_rate": 0.001, "loss": 2.0077, "step": 8465 }, { "epoch": 0.3581521279296049, "grad_norm": 0.16692140698432922, "learning_rate": 0.001, "loss": 1.725, "step": 8466 }, { "epoch": 0.35819443269312123, "grad_norm": 3.184319496154785, "learning_rate": 0.001, "loss": 1.9069, "step": 8467 }, { "epoch": 0.35823673745663764, "grad_norm": 0.2190055400133133, "learning_rate": 0.001, "loss": 2.3108, "step": 8468 }, { "epoch": 0.358279042220154, "grad_norm": 2.8575377464294434, "learning_rate": 0.001, "loss": 1.8183, "step": 8469 }, { "epoch": 0.35832134698367035, "grad_norm": 0.27034443616867065, "learning_rate": 0.001, "loss": 1.9055, "step": 8470 }, { "epoch": 0.3583636517471867, "grad_norm": 0.2202761024236679, "learning_rate": 0.001, "loss": 2.4092, "step": 8471 }, { "epoch": 0.3584059565107031, "grad_norm": 1.0408316850662231, "learning_rate": 0.001, "loss": 2.3927, "step": 8472 }, { "epoch": 0.35844826127421947, "grad_norm": 0.26200470328330994, "learning_rate": 0.001, "loss": 2.256, "step": 8473 }, { "epoch": 0.3584905660377358, "grad_norm": 0.23284506797790527, "learning_rate": 0.001, "loss": 2.4034, "step": 8474 }, { "epoch": 0.35853287080125223, "grad_norm": 0.20646698772907257, "learning_rate": 0.001, "loss": 1.829, "step": 8475 }, { "epoch": 0.3585751755647686, "grad_norm": 0.25378456711769104, "learning_rate": 0.001, "loss": 2.9985, "step": 8476 }, { "epoch": 0.35861748032828494, "grad_norm": 0.31622862815856934, "learning_rate": 0.001, "loss": 2.0001, "step": 8477 }, { "epoch": 0.35865978509180135, "grad_norm": 0.21043705940246582, "learning_rate": 0.001, "loss": 2.5147, "step": 8478 }, { "epoch": 0.3587020898553177, "grad_norm": 0.3560662865638733, "learning_rate": 0.001, "loss": 2.2411, "step": 8479 }, { "epoch": 0.35874439461883406, "grad_norm": 0.4322994649410248, "learning_rate": 0.001, "loss": 3.4217, "step": 8480 }, { "epoch": 0.35878669938235047, "grad_norm": 0.7399798631668091, "learning_rate": 0.001, "loss": 2.408, "step": 8481 }, { "epoch": 0.3588290041458668, "grad_norm": 0.22962254285812378, "learning_rate": 0.001, "loss": 2.7897, "step": 8482 }, { "epoch": 0.3588713089093832, "grad_norm": 0.20676597952842712, "learning_rate": 0.001, "loss": 2.0482, "step": 8483 }, { "epoch": 0.3589136136728996, "grad_norm": 0.1842261403799057, "learning_rate": 0.001, "loss": 2.6388, "step": 8484 }, { "epoch": 0.35895591843641594, "grad_norm": 0.28361448645591736, "learning_rate": 0.001, "loss": 2.6798, "step": 8485 }, { "epoch": 0.3589982231999323, "grad_norm": 0.910611093044281, "learning_rate": 0.001, "loss": 2.1683, "step": 8486 }, { "epoch": 0.3590405279634487, "grad_norm": 0.7657113671302795, "learning_rate": 0.001, "loss": 2.3814, "step": 8487 }, { "epoch": 0.35908283272696506, "grad_norm": 0.29642629623413086, "learning_rate": 0.001, "loss": 1.888, "step": 8488 }, { "epoch": 0.3591251374904814, "grad_norm": 0.1811065673828125, "learning_rate": 0.001, "loss": 3.645, "step": 8489 }, { "epoch": 0.3591674422539978, "grad_norm": 0.2280883491039276, "learning_rate": 0.001, "loss": 2.1726, "step": 8490 }, { "epoch": 0.3592097470175142, "grad_norm": 0.19528071582317352, "learning_rate": 0.001, "loss": 2.1867, "step": 8491 }, { "epoch": 0.35925205178103053, "grad_norm": 0.24748535454273224, "learning_rate": 0.001, "loss": 2.0032, "step": 8492 }, { "epoch": 0.3592943565445469, "grad_norm": 0.21031174063682556, "learning_rate": 0.001, "loss": 2.0499, "step": 8493 }, { "epoch": 0.3593366613080633, "grad_norm": 0.22891457378864288, "learning_rate": 0.001, "loss": 3.2536, "step": 8494 }, { "epoch": 0.35937896607157965, "grad_norm": 0.18080003559589386, "learning_rate": 0.001, "loss": 2.1618, "step": 8495 }, { "epoch": 0.359421270835096, "grad_norm": 0.3403954803943634, "learning_rate": 0.001, "loss": 1.625, "step": 8496 }, { "epoch": 0.3594635755986124, "grad_norm": 0.1686491221189499, "learning_rate": 0.001, "loss": 3.029, "step": 8497 }, { "epoch": 0.35950588036212877, "grad_norm": 0.3256143629550934, "learning_rate": 0.001, "loss": 2.1193, "step": 8498 }, { "epoch": 0.3595481851256451, "grad_norm": 7.151230335235596, "learning_rate": 0.001, "loss": 3.4917, "step": 8499 }, { "epoch": 0.35959048988916154, "grad_norm": 0.2594936788082123, "learning_rate": 0.001, "loss": 2.7958, "step": 8500 }, { "epoch": 0.3596327946526779, "grad_norm": 1.2051527500152588, "learning_rate": 0.001, "loss": 2.0397, "step": 8501 }, { "epoch": 0.35967509941619424, "grad_norm": 4.79257345199585, "learning_rate": 0.001, "loss": 2.5444, "step": 8502 }, { "epoch": 0.35971740417971065, "grad_norm": 0.1828813999891281, "learning_rate": 0.001, "loss": 2.5241, "step": 8503 }, { "epoch": 0.359759708943227, "grad_norm": 0.22619538009166718, "learning_rate": 0.001, "loss": 2.5545, "step": 8504 }, { "epoch": 0.35980201370674336, "grad_norm": 0.16107860207557678, "learning_rate": 0.001, "loss": 2.2485, "step": 8505 }, { "epoch": 0.35984431847025977, "grad_norm": 0.22678741812705994, "learning_rate": 0.001, "loss": 2.5525, "step": 8506 }, { "epoch": 0.3598866232337761, "grad_norm": 0.20380182564258575, "learning_rate": 0.001, "loss": 2.495, "step": 8507 }, { "epoch": 0.3599289279972925, "grad_norm": 0.32149720191955566, "learning_rate": 0.001, "loss": 2.2425, "step": 8508 }, { "epoch": 0.3599712327608089, "grad_norm": 0.17898282408714294, "learning_rate": 0.001, "loss": 2.3266, "step": 8509 }, { "epoch": 0.36001353752432524, "grad_norm": 0.742031455039978, "learning_rate": 0.001, "loss": 2.1324, "step": 8510 }, { "epoch": 0.3600558422878416, "grad_norm": 1.0595115423202515, "learning_rate": 0.001, "loss": 2.9696, "step": 8511 }, { "epoch": 0.360098147051358, "grad_norm": 0.22999782860279083, "learning_rate": 0.001, "loss": 3.4851, "step": 8512 }, { "epoch": 0.36014045181487436, "grad_norm": 0.2157551646232605, "learning_rate": 0.001, "loss": 2.5969, "step": 8513 }, { "epoch": 0.3601827565783907, "grad_norm": 0.8261212706565857, "learning_rate": 0.001, "loss": 2.3355, "step": 8514 }, { "epoch": 0.36022506134190707, "grad_norm": 0.40030571818351746, "learning_rate": 0.001, "loss": 2.4083, "step": 8515 }, { "epoch": 0.3602673661054235, "grad_norm": 0.23546592891216278, "learning_rate": 0.001, "loss": 2.1861, "step": 8516 }, { "epoch": 0.36030967086893984, "grad_norm": 0.20725642144680023, "learning_rate": 0.001, "loss": 2.3907, "step": 8517 }, { "epoch": 0.3603519756324562, "grad_norm": 0.28327709436416626, "learning_rate": 0.001, "loss": 3.0746, "step": 8518 }, { "epoch": 0.3603942803959726, "grad_norm": 0.4538307189941406, "learning_rate": 0.001, "loss": 2.6733, "step": 8519 }, { "epoch": 0.36043658515948895, "grad_norm": 0.20344911515712738, "learning_rate": 0.001, "loss": 2.7097, "step": 8520 }, { "epoch": 0.3604788899230053, "grad_norm": 0.2238440215587616, "learning_rate": 0.001, "loss": 2.2248, "step": 8521 }, { "epoch": 0.3605211946865217, "grad_norm": 0.2143414467573166, "learning_rate": 0.001, "loss": 2.6659, "step": 8522 }, { "epoch": 0.36056349945003807, "grad_norm": 0.21883174777030945, "learning_rate": 0.001, "loss": 2.8158, "step": 8523 }, { "epoch": 0.3606058042135544, "grad_norm": 1.42238450050354, "learning_rate": 0.001, "loss": 2.9731, "step": 8524 }, { "epoch": 0.36064810897707084, "grad_norm": 4.848227500915527, "learning_rate": 0.001, "loss": 2.352, "step": 8525 }, { "epoch": 0.3606904137405872, "grad_norm": 0.7891883850097656, "learning_rate": 0.001, "loss": 2.0768, "step": 8526 }, { "epoch": 0.36073271850410354, "grad_norm": 84.44062042236328, "learning_rate": 0.001, "loss": 3.0048, "step": 8527 }, { "epoch": 0.36077502326761995, "grad_norm": 0.2623269855976105, "learning_rate": 0.001, "loss": 2.887, "step": 8528 }, { "epoch": 0.3608173280311363, "grad_norm": 0.2041042447090149, "learning_rate": 0.001, "loss": 1.7496, "step": 8529 }, { "epoch": 0.36085963279465266, "grad_norm": 0.25694215297698975, "learning_rate": 0.001, "loss": 2.0294, "step": 8530 }, { "epoch": 0.3609019375581691, "grad_norm": 0.4767371416091919, "learning_rate": 0.001, "loss": 1.9393, "step": 8531 }, { "epoch": 0.3609442423216854, "grad_norm": 0.5682966113090515, "learning_rate": 0.001, "loss": 2.401, "step": 8532 }, { "epoch": 0.3609865470852018, "grad_norm": 0.754736065864563, "learning_rate": 0.001, "loss": 1.53, "step": 8533 }, { "epoch": 0.3610288518487182, "grad_norm": 0.19843953847885132, "learning_rate": 0.001, "loss": 2.3422, "step": 8534 }, { "epoch": 0.36107115661223455, "grad_norm": 0.6914002299308777, "learning_rate": 0.001, "loss": 2.5332, "step": 8535 }, { "epoch": 0.3611134613757509, "grad_norm": 0.9868711829185486, "learning_rate": 0.001, "loss": 3.0186, "step": 8536 }, { "epoch": 0.3611557661392673, "grad_norm": 0.9456235766410828, "learning_rate": 0.001, "loss": 1.7753, "step": 8537 }, { "epoch": 0.36119807090278366, "grad_norm": 0.3088882565498352, "learning_rate": 0.001, "loss": 2.0363, "step": 8538 }, { "epoch": 0.3612403756663, "grad_norm": 0.8840659856796265, "learning_rate": 0.001, "loss": 3.2643, "step": 8539 }, { "epoch": 0.36128268042981637, "grad_norm": 1.7744220495224, "learning_rate": 0.001, "loss": 1.981, "step": 8540 }, { "epoch": 0.3613249851933328, "grad_norm": 0.8132971525192261, "learning_rate": 0.001, "loss": 3.234, "step": 8541 }, { "epoch": 0.36136728995684914, "grad_norm": 0.26763689517974854, "learning_rate": 0.001, "loss": 2.3408, "step": 8542 }, { "epoch": 0.3614095947203655, "grad_norm": 0.18884502351284027, "learning_rate": 0.001, "loss": 3.1137, "step": 8543 }, { "epoch": 0.3614518994838819, "grad_norm": 4.647204399108887, "learning_rate": 0.001, "loss": 2.6148, "step": 8544 }, { "epoch": 0.36149420424739825, "grad_norm": 3.6507856845855713, "learning_rate": 0.001, "loss": 3.341, "step": 8545 }, { "epoch": 0.3615365090109146, "grad_norm": 0.21972277760505676, "learning_rate": 0.001, "loss": 1.8723, "step": 8546 }, { "epoch": 0.361578813774431, "grad_norm": 0.3465214967727661, "learning_rate": 0.001, "loss": 2.1504, "step": 8547 }, { "epoch": 0.3616211185379474, "grad_norm": 0.40754181146621704, "learning_rate": 0.001, "loss": 2.694, "step": 8548 }, { "epoch": 0.3616634233014637, "grad_norm": 0.20011767745018005, "learning_rate": 0.001, "loss": 2.4816, "step": 8549 }, { "epoch": 0.36170572806498014, "grad_norm": 0.1964236944913864, "learning_rate": 0.001, "loss": 1.6528, "step": 8550 }, { "epoch": 0.3617480328284965, "grad_norm": 0.17661263048648834, "learning_rate": 0.001, "loss": 1.9885, "step": 8551 }, { "epoch": 0.36179033759201285, "grad_norm": 0.7868698835372925, "learning_rate": 0.001, "loss": 2.6136, "step": 8552 }, { "epoch": 0.36183264235552925, "grad_norm": 0.23645655810832977, "learning_rate": 0.001, "loss": 2.408, "step": 8553 }, { "epoch": 0.3618749471190456, "grad_norm": 0.18343326449394226, "learning_rate": 0.001, "loss": 1.5529, "step": 8554 }, { "epoch": 0.36191725188256196, "grad_norm": 0.18939687311649323, "learning_rate": 0.001, "loss": 2.5676, "step": 8555 }, { "epoch": 0.3619595566460784, "grad_norm": 0.7980564832687378, "learning_rate": 0.001, "loss": 2.4449, "step": 8556 }, { "epoch": 0.3620018614095947, "grad_norm": 0.8566358089447021, "learning_rate": 0.001, "loss": 3.1181, "step": 8557 }, { "epoch": 0.3620441661731111, "grad_norm": 0.24437959492206573, "learning_rate": 0.001, "loss": 2.1782, "step": 8558 }, { "epoch": 0.3620864709366275, "grad_norm": 0.22951102256774902, "learning_rate": 0.001, "loss": 2.0025, "step": 8559 }, { "epoch": 0.36212877570014385, "grad_norm": 0.5679842233657837, "learning_rate": 0.001, "loss": 2.8188, "step": 8560 }, { "epoch": 0.3621710804636602, "grad_norm": 0.2738345265388489, "learning_rate": 0.001, "loss": 2.9867, "step": 8561 }, { "epoch": 0.36221338522717655, "grad_norm": 0.6427478790283203, "learning_rate": 0.001, "loss": 2.0955, "step": 8562 }, { "epoch": 0.36225568999069296, "grad_norm": 0.17962263524532318, "learning_rate": 0.001, "loss": 1.8587, "step": 8563 }, { "epoch": 0.3622979947542093, "grad_norm": 1.4514254331588745, "learning_rate": 0.001, "loss": 1.7745, "step": 8564 }, { "epoch": 0.3623402995177257, "grad_norm": 0.2207036018371582, "learning_rate": 0.001, "loss": 1.6446, "step": 8565 }, { "epoch": 0.3623826042812421, "grad_norm": 0.3202604055404663, "learning_rate": 0.001, "loss": 2.5328, "step": 8566 }, { "epoch": 0.36242490904475844, "grad_norm": 0.6351190209388733, "learning_rate": 0.001, "loss": 2.7094, "step": 8567 }, { "epoch": 0.3624672138082748, "grad_norm": 0.21355757117271423, "learning_rate": 0.001, "loss": 2.6323, "step": 8568 }, { "epoch": 0.3625095185717912, "grad_norm": 0.20924630761146545, "learning_rate": 0.001, "loss": 1.6898, "step": 8569 }, { "epoch": 0.36255182333530755, "grad_norm": 0.4818645417690277, "learning_rate": 0.001, "loss": 1.6882, "step": 8570 }, { "epoch": 0.3625941280988239, "grad_norm": 0.2274334728717804, "learning_rate": 0.001, "loss": 3.8046, "step": 8571 }, { "epoch": 0.3626364328623403, "grad_norm": 4.399014949798584, "learning_rate": 0.001, "loss": 2.6037, "step": 8572 }, { "epoch": 0.3626787376258567, "grad_norm": 0.20539069175720215, "learning_rate": 0.001, "loss": 1.8258, "step": 8573 }, { "epoch": 0.362721042389373, "grad_norm": 0.3310551345348358, "learning_rate": 0.001, "loss": 3.1566, "step": 8574 }, { "epoch": 0.36276334715288944, "grad_norm": 0.2262580245733261, "learning_rate": 0.001, "loss": 2.6769, "step": 8575 }, { "epoch": 0.3628056519164058, "grad_norm": 0.20004798471927643, "learning_rate": 0.001, "loss": 1.7463, "step": 8576 }, { "epoch": 0.36284795667992215, "grad_norm": 0.17591601610183716, "learning_rate": 0.001, "loss": 1.5559, "step": 8577 }, { "epoch": 0.36289026144343856, "grad_norm": 0.17528226971626282, "learning_rate": 0.001, "loss": 1.9886, "step": 8578 }, { "epoch": 0.3629325662069549, "grad_norm": 0.3700055181980133, "learning_rate": 0.001, "loss": 2.4776, "step": 8579 }, { "epoch": 0.36297487097047126, "grad_norm": 0.17485986649990082, "learning_rate": 0.001, "loss": 2.0908, "step": 8580 }, { "epoch": 0.3630171757339877, "grad_norm": 9.299296379089355, "learning_rate": 0.001, "loss": 2.3967, "step": 8581 }, { "epoch": 0.36305948049750403, "grad_norm": 0.20761297643184662, "learning_rate": 0.001, "loss": 2.0552, "step": 8582 }, { "epoch": 0.3631017852610204, "grad_norm": 0.19602414965629578, "learning_rate": 0.001, "loss": 2.1057, "step": 8583 }, { "epoch": 0.36314409002453674, "grad_norm": 1.7627781629562378, "learning_rate": 0.001, "loss": 2.5504, "step": 8584 }, { "epoch": 0.36318639478805315, "grad_norm": 1.635079026222229, "learning_rate": 0.001, "loss": 1.8716, "step": 8585 }, { "epoch": 0.3632286995515695, "grad_norm": 2.4154467582702637, "learning_rate": 0.001, "loss": 2.5212, "step": 8586 }, { "epoch": 0.36327100431508585, "grad_norm": 0.1748884916305542, "learning_rate": 0.001, "loss": 2.1198, "step": 8587 }, { "epoch": 0.36331330907860226, "grad_norm": 1.158073902130127, "learning_rate": 0.001, "loss": 2.5192, "step": 8588 }, { "epoch": 0.3633556138421186, "grad_norm": 0.24196279048919678, "learning_rate": 0.001, "loss": 2.1299, "step": 8589 }, { "epoch": 0.363397918605635, "grad_norm": 0.21735741198062897, "learning_rate": 0.001, "loss": 2.2981, "step": 8590 }, { "epoch": 0.3634402233691514, "grad_norm": 0.22882144153118134, "learning_rate": 0.001, "loss": 2.1787, "step": 8591 }, { "epoch": 0.36348252813266774, "grad_norm": 0.22966794669628143, "learning_rate": 0.001, "loss": 2.0843, "step": 8592 }, { "epoch": 0.3635248328961841, "grad_norm": 3.5307583808898926, "learning_rate": 0.001, "loss": 2.6233, "step": 8593 }, { "epoch": 0.3635671376597005, "grad_norm": 0.1808539777994156, "learning_rate": 0.001, "loss": 2.447, "step": 8594 }, { "epoch": 0.36360944242321686, "grad_norm": 0.191551074385643, "learning_rate": 0.001, "loss": 1.9306, "step": 8595 }, { "epoch": 0.3636517471867332, "grad_norm": 0.2742213308811188, "learning_rate": 0.001, "loss": 2.5281, "step": 8596 }, { "epoch": 0.3636940519502496, "grad_norm": 3.3009042739868164, "learning_rate": 0.001, "loss": 3.4417, "step": 8597 }, { "epoch": 0.363736356713766, "grad_norm": 0.22634568810462952, "learning_rate": 0.001, "loss": 1.9962, "step": 8598 }, { "epoch": 0.36377866147728233, "grad_norm": 0.3063672184944153, "learning_rate": 0.001, "loss": 2.1643, "step": 8599 }, { "epoch": 0.36382096624079874, "grad_norm": 0.4341367781162262, "learning_rate": 0.001, "loss": 2.8908, "step": 8600 }, { "epoch": 0.3638632710043151, "grad_norm": 0.2267148345708847, "learning_rate": 0.001, "loss": 3.3518, "step": 8601 }, { "epoch": 0.36390557576783145, "grad_norm": 0.235674649477005, "learning_rate": 0.001, "loss": 2.0174, "step": 8602 }, { "epoch": 0.36394788053134786, "grad_norm": 0.1691252589225769, "learning_rate": 0.001, "loss": 1.7468, "step": 8603 }, { "epoch": 0.3639901852948642, "grad_norm": 0.4646351933479309, "learning_rate": 0.001, "loss": 2.9241, "step": 8604 }, { "epoch": 0.36403249005838056, "grad_norm": 0.18952451646327972, "learning_rate": 0.001, "loss": 1.9208, "step": 8605 }, { "epoch": 0.3640747948218969, "grad_norm": 1.2215399742126465, "learning_rate": 0.001, "loss": 2.0738, "step": 8606 }, { "epoch": 0.36411709958541333, "grad_norm": 0.1823042780160904, "learning_rate": 0.001, "loss": 2.0435, "step": 8607 }, { "epoch": 0.3641594043489297, "grad_norm": 0.18429160118103027, "learning_rate": 0.001, "loss": 2.1176, "step": 8608 }, { "epoch": 0.36420170911244604, "grad_norm": 0.30278459191322327, "learning_rate": 0.001, "loss": 2.0551, "step": 8609 }, { "epoch": 0.36424401387596245, "grad_norm": 0.27430135011672974, "learning_rate": 0.001, "loss": 2.7956, "step": 8610 }, { "epoch": 0.3642863186394788, "grad_norm": 0.7540306448936462, "learning_rate": 0.001, "loss": 2.312, "step": 8611 }, { "epoch": 0.36432862340299516, "grad_norm": 0.24535398185253143, "learning_rate": 0.001, "loss": 2.2725, "step": 8612 }, { "epoch": 0.36437092816651157, "grad_norm": 0.39230287075042725, "learning_rate": 0.001, "loss": 2.2182, "step": 8613 }, { "epoch": 0.3644132329300279, "grad_norm": 0.19538423418998718, "learning_rate": 0.001, "loss": 2.1788, "step": 8614 }, { "epoch": 0.3644555376935443, "grad_norm": 0.40475207567214966, "learning_rate": 0.001, "loss": 2.1257, "step": 8615 }, { "epoch": 0.3644978424570607, "grad_norm": 0.27652233839035034, "learning_rate": 0.001, "loss": 3.0038, "step": 8616 }, { "epoch": 0.36454014722057704, "grad_norm": 0.19666795432567596, "learning_rate": 0.001, "loss": 1.4796, "step": 8617 }, { "epoch": 0.3645824519840934, "grad_norm": 0.8510257601737976, "learning_rate": 0.001, "loss": 2.5566, "step": 8618 }, { "epoch": 0.3646247567476098, "grad_norm": 0.2172936201095581, "learning_rate": 0.001, "loss": 2.1356, "step": 8619 }, { "epoch": 0.36466706151112616, "grad_norm": 0.20282861590385437, "learning_rate": 0.001, "loss": 1.5411, "step": 8620 }, { "epoch": 0.3647093662746425, "grad_norm": 0.1585301011800766, "learning_rate": 0.001, "loss": 1.3001, "step": 8621 }, { "epoch": 0.3647516710381589, "grad_norm": 0.20633943378925323, "learning_rate": 0.001, "loss": 1.869, "step": 8622 }, { "epoch": 0.3647939758016753, "grad_norm": 0.24952706694602966, "learning_rate": 0.001, "loss": 2.2658, "step": 8623 }, { "epoch": 0.36483628056519163, "grad_norm": 0.24051597714424133, "learning_rate": 0.001, "loss": 2.0523, "step": 8624 }, { "epoch": 0.36487858532870804, "grad_norm": 1.4806382656097412, "learning_rate": 0.001, "loss": 3.0184, "step": 8625 }, { "epoch": 0.3649208900922244, "grad_norm": 0.2159508466720581, "learning_rate": 0.001, "loss": 1.995, "step": 8626 }, { "epoch": 0.36496319485574075, "grad_norm": 0.5792630314826965, "learning_rate": 0.001, "loss": 3.4955, "step": 8627 }, { "epoch": 0.3650054996192571, "grad_norm": 0.18533584475517273, "learning_rate": 0.001, "loss": 1.815, "step": 8628 }, { "epoch": 0.3650478043827735, "grad_norm": 0.2772461175918579, "learning_rate": 0.001, "loss": 2.1131, "step": 8629 }, { "epoch": 0.36509010914628987, "grad_norm": 0.27100545167922974, "learning_rate": 0.001, "loss": 2.2957, "step": 8630 }, { "epoch": 0.3651324139098062, "grad_norm": 0.2463526576757431, "learning_rate": 0.001, "loss": 3.0297, "step": 8631 }, { "epoch": 0.36517471867332263, "grad_norm": 1.035469889640808, "learning_rate": 0.001, "loss": 1.6165, "step": 8632 }, { "epoch": 0.365217023436839, "grad_norm": 0.23822209239006042, "learning_rate": 0.001, "loss": 2.0529, "step": 8633 }, { "epoch": 0.36525932820035534, "grad_norm": 0.20233659446239471, "learning_rate": 0.001, "loss": 1.6715, "step": 8634 }, { "epoch": 0.36530163296387175, "grad_norm": 0.48247143626213074, "learning_rate": 0.001, "loss": 2.6857, "step": 8635 }, { "epoch": 0.3653439377273881, "grad_norm": 0.19231975078582764, "learning_rate": 0.001, "loss": 1.9427, "step": 8636 }, { "epoch": 0.36538624249090446, "grad_norm": 0.7569523453712463, "learning_rate": 0.001, "loss": 2.3201, "step": 8637 }, { "epoch": 0.36542854725442087, "grad_norm": 0.27033358812332153, "learning_rate": 0.001, "loss": 2.4324, "step": 8638 }, { "epoch": 0.3654708520179372, "grad_norm": 0.18519775569438934, "learning_rate": 0.001, "loss": 1.9692, "step": 8639 }, { "epoch": 0.3655131567814536, "grad_norm": 0.25175315141677856, "learning_rate": 0.001, "loss": 3.5475, "step": 8640 }, { "epoch": 0.36555546154497, "grad_norm": 0.18655212223529816, "learning_rate": 0.001, "loss": 1.7191, "step": 8641 }, { "epoch": 0.36559776630848634, "grad_norm": 0.21611282229423523, "learning_rate": 0.001, "loss": 2.2513, "step": 8642 }, { "epoch": 0.3656400710720027, "grad_norm": 0.21771200001239777, "learning_rate": 0.001, "loss": 1.6885, "step": 8643 }, { "epoch": 0.3656823758355191, "grad_norm": 0.19719097018241882, "learning_rate": 0.001, "loss": 2.629, "step": 8644 }, { "epoch": 0.36572468059903546, "grad_norm": 11.389663696289062, "learning_rate": 0.001, "loss": 2.8977, "step": 8645 }, { "epoch": 0.3657669853625518, "grad_norm": 0.15473631024360657, "learning_rate": 0.001, "loss": 1.5634, "step": 8646 }, { "epoch": 0.3658092901260682, "grad_norm": 0.4173217713832855, "learning_rate": 0.001, "loss": 2.0252, "step": 8647 }, { "epoch": 0.3658515948895846, "grad_norm": 1.9161365032196045, "learning_rate": 0.001, "loss": 3.082, "step": 8648 }, { "epoch": 0.36589389965310093, "grad_norm": 0.1819329559803009, "learning_rate": 0.001, "loss": 2.5534, "step": 8649 }, { "epoch": 0.3659362044166173, "grad_norm": 0.2493087202310562, "learning_rate": 0.001, "loss": 2.2305, "step": 8650 }, { "epoch": 0.3659785091801337, "grad_norm": 0.20480546355247498, "learning_rate": 0.001, "loss": 2.044, "step": 8651 }, { "epoch": 0.36602081394365005, "grad_norm": 0.22163717448711395, "learning_rate": 0.001, "loss": 1.9366, "step": 8652 }, { "epoch": 0.3660631187071664, "grad_norm": 0.187017560005188, "learning_rate": 0.001, "loss": 1.992, "step": 8653 }, { "epoch": 0.3661054234706828, "grad_norm": 0.21020790934562683, "learning_rate": 0.001, "loss": 2.3122, "step": 8654 }, { "epoch": 0.36614772823419917, "grad_norm": 0.19258715212345123, "learning_rate": 0.001, "loss": 3.0791, "step": 8655 }, { "epoch": 0.3661900329977155, "grad_norm": 0.18852315843105316, "learning_rate": 0.001, "loss": 2.7449, "step": 8656 }, { "epoch": 0.36623233776123193, "grad_norm": 0.16643263399600983, "learning_rate": 0.001, "loss": 1.8381, "step": 8657 }, { "epoch": 0.3662746425247483, "grad_norm": 0.17222201824188232, "learning_rate": 0.001, "loss": 1.7572, "step": 8658 }, { "epoch": 0.36631694728826464, "grad_norm": 0.1717625856399536, "learning_rate": 0.001, "loss": 2.1745, "step": 8659 }, { "epoch": 0.36635925205178105, "grad_norm": 0.22495992481708527, "learning_rate": 0.001, "loss": 2.179, "step": 8660 }, { "epoch": 0.3664015568152974, "grad_norm": 2.0256707668304443, "learning_rate": 0.001, "loss": 3.8565, "step": 8661 }, { "epoch": 0.36644386157881376, "grad_norm": 0.20122158527374268, "learning_rate": 0.001, "loss": 2.0525, "step": 8662 }, { "epoch": 0.36648616634233017, "grad_norm": 0.2111087143421173, "learning_rate": 0.001, "loss": 2.4875, "step": 8663 }, { "epoch": 0.3665284711058465, "grad_norm": 0.6038394570350647, "learning_rate": 0.001, "loss": 2.5007, "step": 8664 }, { "epoch": 0.3665707758693629, "grad_norm": 0.18492920696735382, "learning_rate": 0.001, "loss": 1.9992, "step": 8665 }, { "epoch": 0.3666130806328793, "grad_norm": 0.39475682377815247, "learning_rate": 0.001, "loss": 3.2232, "step": 8666 }, { "epoch": 0.36665538539639564, "grad_norm": 0.2003537118434906, "learning_rate": 0.001, "loss": 1.8457, "step": 8667 }, { "epoch": 0.366697690159912, "grad_norm": 0.20579084753990173, "learning_rate": 0.001, "loss": 2.3453, "step": 8668 }, { "epoch": 0.3667399949234284, "grad_norm": 0.3103180527687073, "learning_rate": 0.001, "loss": 2.771, "step": 8669 }, { "epoch": 0.36678229968694476, "grad_norm": 0.18858268857002258, "learning_rate": 0.001, "loss": 1.9894, "step": 8670 }, { "epoch": 0.3668246044504611, "grad_norm": 0.2262255698442459, "learning_rate": 0.001, "loss": 1.9428, "step": 8671 }, { "epoch": 0.3668669092139775, "grad_norm": 0.4273422062397003, "learning_rate": 0.001, "loss": 3.7103, "step": 8672 }, { "epoch": 0.3669092139774939, "grad_norm": 0.22894766926765442, "learning_rate": 0.001, "loss": 2.0784, "step": 8673 }, { "epoch": 0.36695151874101023, "grad_norm": 0.248250812292099, "learning_rate": 0.001, "loss": 1.7906, "step": 8674 }, { "epoch": 0.3669938235045266, "grad_norm": 0.307745099067688, "learning_rate": 0.001, "loss": 3.2343, "step": 8675 }, { "epoch": 0.367036128268043, "grad_norm": 0.21414296329021454, "learning_rate": 0.001, "loss": 3.1725, "step": 8676 }, { "epoch": 0.36707843303155935, "grad_norm": 1.1623830795288086, "learning_rate": 0.001, "loss": 1.9302, "step": 8677 }, { "epoch": 0.3671207377950757, "grad_norm": 0.218984916806221, "learning_rate": 0.001, "loss": 2.4448, "step": 8678 }, { "epoch": 0.3671630425585921, "grad_norm": 0.24998073279857635, "learning_rate": 0.001, "loss": 2.1159, "step": 8679 }, { "epoch": 0.36720534732210847, "grad_norm": 0.19620297849178314, "learning_rate": 0.001, "loss": 2.264, "step": 8680 }, { "epoch": 0.3672476520856248, "grad_norm": 0.1893426477909088, "learning_rate": 0.001, "loss": 1.677, "step": 8681 }, { "epoch": 0.36728995684914123, "grad_norm": 0.22591541707515717, "learning_rate": 0.001, "loss": 3.0901, "step": 8682 }, { "epoch": 0.3673322616126576, "grad_norm": 0.3786305785179138, "learning_rate": 0.001, "loss": 3.0062, "step": 8683 }, { "epoch": 0.36737456637617394, "grad_norm": 5.867971897125244, "learning_rate": 0.001, "loss": 2.209, "step": 8684 }, { "epoch": 0.36741687113969035, "grad_norm": 44.805763244628906, "learning_rate": 0.001, "loss": 2.1827, "step": 8685 }, { "epoch": 0.3674591759032067, "grad_norm": 0.18080832064151764, "learning_rate": 0.001, "loss": 1.9619, "step": 8686 }, { "epoch": 0.36750148066672306, "grad_norm": 0.22980724275112152, "learning_rate": 0.001, "loss": 2.2155, "step": 8687 }, { "epoch": 0.36754378543023947, "grad_norm": 0.2389446198940277, "learning_rate": 0.001, "loss": 2.045, "step": 8688 }, { "epoch": 0.3675860901937558, "grad_norm": 0.22380129992961884, "learning_rate": 0.001, "loss": 2.716, "step": 8689 }, { "epoch": 0.3676283949572722, "grad_norm": 0.3434034585952759, "learning_rate": 0.001, "loss": 3.1835, "step": 8690 }, { "epoch": 0.3676706997207886, "grad_norm": 0.237090066075325, "learning_rate": 0.001, "loss": 1.9271, "step": 8691 }, { "epoch": 0.36771300448430494, "grad_norm": 0.22618965804576874, "learning_rate": 0.001, "loss": 2.1982, "step": 8692 }, { "epoch": 0.3677553092478213, "grad_norm": 0.1993749737739563, "learning_rate": 0.001, "loss": 4.0091, "step": 8693 }, { "epoch": 0.3677976140113377, "grad_norm": 2.4615116119384766, "learning_rate": 0.001, "loss": 2.3653, "step": 8694 }, { "epoch": 0.36783991877485406, "grad_norm": 0.5453082323074341, "learning_rate": 0.001, "loss": 3.2241, "step": 8695 }, { "epoch": 0.3678822235383704, "grad_norm": 0.22832508385181427, "learning_rate": 0.001, "loss": 2.1284, "step": 8696 }, { "epoch": 0.36792452830188677, "grad_norm": 0.22793933749198914, "learning_rate": 0.001, "loss": 2.7905, "step": 8697 }, { "epoch": 0.3679668330654032, "grad_norm": 0.2116621732711792, "learning_rate": 0.001, "loss": 3.0399, "step": 8698 }, { "epoch": 0.36800913782891953, "grad_norm": 0.26181337237358093, "learning_rate": 0.001, "loss": 1.625, "step": 8699 }, { "epoch": 0.3680514425924359, "grad_norm": 0.228001207113266, "learning_rate": 0.001, "loss": 3.5122, "step": 8700 }, { "epoch": 0.3680937473559523, "grad_norm": 0.1740439236164093, "learning_rate": 0.001, "loss": 2.094, "step": 8701 }, { "epoch": 0.36813605211946865, "grad_norm": 0.19469280540943146, "learning_rate": 0.001, "loss": 2.2975, "step": 8702 }, { "epoch": 0.368178356882985, "grad_norm": 0.1703725904226303, "learning_rate": 0.001, "loss": 3.327, "step": 8703 }, { "epoch": 0.3682206616465014, "grad_norm": 0.20927943289279938, "learning_rate": 0.001, "loss": 1.7857, "step": 8704 }, { "epoch": 0.36826296641001777, "grad_norm": 0.15202440321445465, "learning_rate": 0.001, "loss": 2.1899, "step": 8705 }, { "epoch": 0.3683052711735341, "grad_norm": 0.17597658932209015, "learning_rate": 0.001, "loss": 1.7741, "step": 8706 }, { "epoch": 0.36834757593705053, "grad_norm": 0.16804081201553345, "learning_rate": 0.001, "loss": 2.0558, "step": 8707 }, { "epoch": 0.3683898807005669, "grad_norm": 0.18276292085647583, "learning_rate": 0.001, "loss": 2.2216, "step": 8708 }, { "epoch": 0.36843218546408324, "grad_norm": 0.2148251235485077, "learning_rate": 0.001, "loss": 1.9215, "step": 8709 }, { "epoch": 0.36847449022759965, "grad_norm": 0.9029409289360046, "learning_rate": 0.001, "loss": 1.6041, "step": 8710 }, { "epoch": 0.368516794991116, "grad_norm": 0.19321629405021667, "learning_rate": 0.001, "loss": 1.8104, "step": 8711 }, { "epoch": 0.36855909975463236, "grad_norm": 0.177422434091568, "learning_rate": 0.001, "loss": 1.8768, "step": 8712 }, { "epoch": 0.36860140451814877, "grad_norm": 0.2311866581439972, "learning_rate": 0.001, "loss": 2.567, "step": 8713 }, { "epoch": 0.3686437092816651, "grad_norm": 0.2138138860464096, "learning_rate": 0.001, "loss": 2.0323, "step": 8714 }, { "epoch": 0.3686860140451815, "grad_norm": 0.17242108285427094, "learning_rate": 0.001, "loss": 2.3389, "step": 8715 }, { "epoch": 0.3687283188086979, "grad_norm": 3.463766098022461, "learning_rate": 0.001, "loss": 1.9939, "step": 8716 }, { "epoch": 0.36877062357221424, "grad_norm": 0.29998794198036194, "learning_rate": 0.001, "loss": 2.0793, "step": 8717 }, { "epoch": 0.3688129283357306, "grad_norm": 0.22262133657932281, "learning_rate": 0.001, "loss": 2.7188, "step": 8718 }, { "epoch": 0.36885523309924695, "grad_norm": 0.28944888710975647, "learning_rate": 0.001, "loss": 1.7407, "step": 8719 }, { "epoch": 0.36889753786276336, "grad_norm": 0.20619124174118042, "learning_rate": 0.001, "loss": 1.8345, "step": 8720 }, { "epoch": 0.3689398426262797, "grad_norm": 0.1661635786294937, "learning_rate": 0.001, "loss": 1.586, "step": 8721 }, { "epoch": 0.36898214738979607, "grad_norm": 0.222077414393425, "learning_rate": 0.001, "loss": 3.3912, "step": 8722 }, { "epoch": 0.3690244521533125, "grad_norm": 0.19446569681167603, "learning_rate": 0.001, "loss": 2.499, "step": 8723 }, { "epoch": 0.36906675691682883, "grad_norm": 1.546207070350647, "learning_rate": 0.001, "loss": 1.9862, "step": 8724 }, { "epoch": 0.3691090616803452, "grad_norm": 0.22588209807872772, "learning_rate": 0.001, "loss": 3.0398, "step": 8725 }, { "epoch": 0.3691513664438616, "grad_norm": 0.22056500613689423, "learning_rate": 0.001, "loss": 2.7724, "step": 8726 }, { "epoch": 0.36919367120737795, "grad_norm": 10.859556198120117, "learning_rate": 0.001, "loss": 1.9629, "step": 8727 }, { "epoch": 0.3692359759708943, "grad_norm": 0.22937330603599548, "learning_rate": 0.001, "loss": 2.0512, "step": 8728 }, { "epoch": 0.3692782807344107, "grad_norm": 2.563246965408325, "learning_rate": 0.001, "loss": 2.3327, "step": 8729 }, { "epoch": 0.36932058549792707, "grad_norm": 0.31085848808288574, "learning_rate": 0.001, "loss": 2.0146, "step": 8730 }, { "epoch": 0.3693628902614434, "grad_norm": 0.22053919732570648, "learning_rate": 0.001, "loss": 1.9071, "step": 8731 }, { "epoch": 0.36940519502495983, "grad_norm": 0.3156310021877289, "learning_rate": 0.001, "loss": 1.7969, "step": 8732 }, { "epoch": 0.3694474997884762, "grad_norm": 0.24050398170948029, "learning_rate": 0.001, "loss": 2.6647, "step": 8733 }, { "epoch": 0.36948980455199254, "grad_norm": 0.18906262516975403, "learning_rate": 0.001, "loss": 1.7419, "step": 8734 }, { "epoch": 0.36953210931550895, "grad_norm": 0.17383328080177307, "learning_rate": 0.001, "loss": 2.0027, "step": 8735 }, { "epoch": 0.3695744140790253, "grad_norm": 0.19866390526294708, "learning_rate": 0.001, "loss": 2.1751, "step": 8736 }, { "epoch": 0.36961671884254166, "grad_norm": 0.2835475504398346, "learning_rate": 0.001, "loss": 1.9432, "step": 8737 }, { "epoch": 0.36965902360605807, "grad_norm": 0.9217908978462219, "learning_rate": 0.001, "loss": 1.6859, "step": 8738 }, { "epoch": 0.3697013283695744, "grad_norm": 0.2083401083946228, "learning_rate": 0.001, "loss": 2.3959, "step": 8739 }, { "epoch": 0.3697436331330908, "grad_norm": 0.19344541430473328, "learning_rate": 0.001, "loss": 1.929, "step": 8740 }, { "epoch": 0.36978593789660713, "grad_norm": 0.2091798186302185, "learning_rate": 0.001, "loss": 1.8886, "step": 8741 }, { "epoch": 0.36982824266012354, "grad_norm": 0.2184278666973114, "learning_rate": 0.001, "loss": 2.0926, "step": 8742 }, { "epoch": 0.3698705474236399, "grad_norm": 0.2625698149204254, "learning_rate": 0.001, "loss": 2.3432, "step": 8743 }, { "epoch": 0.36991285218715625, "grad_norm": 0.2139061689376831, "learning_rate": 0.001, "loss": 2.6096, "step": 8744 }, { "epoch": 0.36995515695067266, "grad_norm": 0.18271711468696594, "learning_rate": 0.001, "loss": 2.6203, "step": 8745 }, { "epoch": 0.369997461714189, "grad_norm": 0.16942210495471954, "learning_rate": 0.001, "loss": 2.8279, "step": 8746 }, { "epoch": 0.37003976647770537, "grad_norm": 0.19270247220993042, "learning_rate": 0.001, "loss": 1.8951, "step": 8747 }, { "epoch": 0.3700820712412218, "grad_norm": 0.19946153461933136, "learning_rate": 0.001, "loss": 2.6455, "step": 8748 }, { "epoch": 0.37012437600473813, "grad_norm": 0.21664415299892426, "learning_rate": 0.001, "loss": 2.2819, "step": 8749 }, { "epoch": 0.3701666807682545, "grad_norm": 0.19284196197986603, "learning_rate": 0.001, "loss": 2.4914, "step": 8750 }, { "epoch": 0.3702089855317709, "grad_norm": 0.18668252229690552, "learning_rate": 0.001, "loss": 2.0608, "step": 8751 }, { "epoch": 0.37025129029528725, "grad_norm": 1.1074740886688232, "learning_rate": 0.001, "loss": 2.6925, "step": 8752 }, { "epoch": 0.3702935950588036, "grad_norm": 0.20908206701278687, "learning_rate": 0.001, "loss": 2.147, "step": 8753 }, { "epoch": 0.37033589982232, "grad_norm": 0.19691090285778046, "learning_rate": 0.001, "loss": 2.0874, "step": 8754 }, { "epoch": 0.37037820458583637, "grad_norm": 0.25301340222358704, "learning_rate": 0.001, "loss": 2.1522, "step": 8755 }, { "epoch": 0.3704205093493527, "grad_norm": 0.1993921846151352, "learning_rate": 0.001, "loss": 2.3689, "step": 8756 }, { "epoch": 0.37046281411286913, "grad_norm": 0.5521764755249023, "learning_rate": 0.001, "loss": 2.1797, "step": 8757 }, { "epoch": 0.3705051188763855, "grad_norm": 0.19061268866062164, "learning_rate": 0.001, "loss": 2.8067, "step": 8758 }, { "epoch": 0.37054742363990184, "grad_norm": 0.2150365263223648, "learning_rate": 0.001, "loss": 1.9172, "step": 8759 }, { "epoch": 0.37058972840341825, "grad_norm": 0.18955853581428528, "learning_rate": 0.001, "loss": 2.3987, "step": 8760 }, { "epoch": 0.3706320331669346, "grad_norm": 0.18300773203372955, "learning_rate": 0.001, "loss": 2.123, "step": 8761 }, { "epoch": 0.37067433793045096, "grad_norm": 0.1935848891735077, "learning_rate": 0.001, "loss": 2.1818, "step": 8762 }, { "epoch": 0.3707166426939673, "grad_norm": 0.20884737372398376, "learning_rate": 0.001, "loss": 2.2408, "step": 8763 }, { "epoch": 0.3707589474574837, "grad_norm": 0.915891170501709, "learning_rate": 0.001, "loss": 2.0213, "step": 8764 }, { "epoch": 0.3708012522210001, "grad_norm": 18.33778190612793, "learning_rate": 0.001, "loss": 2.0037, "step": 8765 }, { "epoch": 0.37084355698451643, "grad_norm": 1.2620832920074463, "learning_rate": 0.001, "loss": 2.4491, "step": 8766 }, { "epoch": 0.37088586174803284, "grad_norm": 0.18721136450767517, "learning_rate": 0.001, "loss": 1.9352, "step": 8767 }, { "epoch": 0.3709281665115492, "grad_norm": 0.1763981133699417, "learning_rate": 0.001, "loss": 2.1145, "step": 8768 }, { "epoch": 0.37097047127506555, "grad_norm": 0.20833328366279602, "learning_rate": 0.001, "loss": 2.5553, "step": 8769 }, { "epoch": 0.37101277603858196, "grad_norm": 0.37538349628448486, "learning_rate": 0.001, "loss": 1.9705, "step": 8770 }, { "epoch": 0.3710550808020983, "grad_norm": 0.1779465526342392, "learning_rate": 0.001, "loss": 1.8611, "step": 8771 }, { "epoch": 0.37109738556561467, "grad_norm": 0.18236777186393738, "learning_rate": 0.001, "loss": 1.5463, "step": 8772 }, { "epoch": 0.3711396903291311, "grad_norm": 0.3060573935508728, "learning_rate": 0.001, "loss": 2.9194, "step": 8773 }, { "epoch": 0.37118199509264743, "grad_norm": 0.20852862298488617, "learning_rate": 0.001, "loss": 2.5415, "step": 8774 }, { "epoch": 0.3712242998561638, "grad_norm": 0.48245781660079956, "learning_rate": 0.001, "loss": 1.5005, "step": 8775 }, { "epoch": 0.3712666046196802, "grad_norm": 0.31515905261039734, "learning_rate": 0.001, "loss": 3.4687, "step": 8776 }, { "epoch": 0.37130890938319655, "grad_norm": 0.2978445887565613, "learning_rate": 0.001, "loss": 2.7744, "step": 8777 }, { "epoch": 0.3713512141467129, "grad_norm": 0.18172970414161682, "learning_rate": 0.001, "loss": 1.9334, "step": 8778 }, { "epoch": 0.3713935189102293, "grad_norm": 0.18180030584335327, "learning_rate": 0.001, "loss": 1.6701, "step": 8779 }, { "epoch": 0.37143582367374567, "grad_norm": 0.19042405486106873, "learning_rate": 0.001, "loss": 2.0225, "step": 8780 }, { "epoch": 0.371478128437262, "grad_norm": 0.1896408349275589, "learning_rate": 0.001, "loss": 1.7211, "step": 8781 }, { "epoch": 0.37152043320077843, "grad_norm": 0.21694940328598022, "learning_rate": 0.001, "loss": 2.1461, "step": 8782 }, { "epoch": 0.3715627379642948, "grad_norm": 0.2233157753944397, "learning_rate": 0.001, "loss": 2.1678, "step": 8783 }, { "epoch": 0.37160504272781114, "grad_norm": 0.2073362171649933, "learning_rate": 0.001, "loss": 1.9724, "step": 8784 }, { "epoch": 0.37164734749132755, "grad_norm": 0.3852141201496124, "learning_rate": 0.001, "loss": 2.5631, "step": 8785 }, { "epoch": 0.3716896522548439, "grad_norm": 0.18070602416992188, "learning_rate": 0.001, "loss": 1.6071, "step": 8786 }, { "epoch": 0.37173195701836026, "grad_norm": 0.16367939114570618, "learning_rate": 0.001, "loss": 2.2925, "step": 8787 }, { "epoch": 0.3717742617818766, "grad_norm": 0.1724989265203476, "learning_rate": 0.001, "loss": 3.1093, "step": 8788 }, { "epoch": 0.371816566545393, "grad_norm": 0.16434898972511292, "learning_rate": 0.001, "loss": 2.4629, "step": 8789 }, { "epoch": 0.3718588713089094, "grad_norm": 0.2104768455028534, "learning_rate": 0.001, "loss": 2.8752, "step": 8790 }, { "epoch": 0.37190117607242573, "grad_norm": 0.20089322328567505, "learning_rate": 0.001, "loss": 2.4023, "step": 8791 }, { "epoch": 0.37194348083594214, "grad_norm": 0.2389354258775711, "learning_rate": 0.001, "loss": 2.2687, "step": 8792 }, { "epoch": 0.3719857855994585, "grad_norm": 0.47742322087287903, "learning_rate": 0.001, "loss": 1.9946, "step": 8793 }, { "epoch": 0.37202809036297485, "grad_norm": 0.1540321409702301, "learning_rate": 0.001, "loss": 1.7913, "step": 8794 }, { "epoch": 0.37207039512649126, "grad_norm": 0.17690719664096832, "learning_rate": 0.001, "loss": 1.9047, "step": 8795 }, { "epoch": 0.3721126998900076, "grad_norm": 0.23479756712913513, "learning_rate": 0.001, "loss": 3.3659, "step": 8796 }, { "epoch": 0.37215500465352397, "grad_norm": 0.89059978723526, "learning_rate": 0.001, "loss": 3.5161, "step": 8797 }, { "epoch": 0.3721973094170404, "grad_norm": 5.015807628631592, "learning_rate": 0.001, "loss": 2.5261, "step": 8798 }, { "epoch": 0.37223961418055673, "grad_norm": 0.21967126429080963, "learning_rate": 0.001, "loss": 2.07, "step": 8799 }, { "epoch": 0.3722819189440731, "grad_norm": 0.24731267988681793, "learning_rate": 0.001, "loss": 1.9614, "step": 8800 }, { "epoch": 0.3723242237075895, "grad_norm": 0.20447765290737152, "learning_rate": 0.001, "loss": 1.8957, "step": 8801 }, { "epoch": 0.37236652847110585, "grad_norm": 0.2456209510564804, "learning_rate": 0.001, "loss": 1.9937, "step": 8802 }, { "epoch": 0.3724088332346222, "grad_norm": 0.22434556484222412, "learning_rate": 0.001, "loss": 1.5603, "step": 8803 }, { "epoch": 0.3724511379981386, "grad_norm": 0.45701247453689575, "learning_rate": 0.001, "loss": 2.2196, "step": 8804 }, { "epoch": 0.37249344276165497, "grad_norm": 0.2302723377943039, "learning_rate": 0.001, "loss": 1.9974, "step": 8805 }, { "epoch": 0.3725357475251713, "grad_norm": 0.28021976351737976, "learning_rate": 0.001, "loss": 3.7377, "step": 8806 }, { "epoch": 0.37257805228868773, "grad_norm": 0.8244222402572632, "learning_rate": 0.001, "loss": 2.1969, "step": 8807 }, { "epoch": 0.3726203570522041, "grad_norm": 0.20261800289154053, "learning_rate": 0.001, "loss": 3.6426, "step": 8808 }, { "epoch": 0.37266266181572044, "grad_norm": 0.19825433194637299, "learning_rate": 0.001, "loss": 1.9425, "step": 8809 }, { "epoch": 0.3727049665792368, "grad_norm": 1.1609710454940796, "learning_rate": 0.001, "loss": 2.1458, "step": 8810 }, { "epoch": 0.3727472713427532, "grad_norm": 0.5567569136619568, "learning_rate": 0.001, "loss": 1.7775, "step": 8811 }, { "epoch": 0.37278957610626956, "grad_norm": 0.2688750922679901, "learning_rate": 0.001, "loss": 2.358, "step": 8812 }, { "epoch": 0.3728318808697859, "grad_norm": 0.33358681201934814, "learning_rate": 0.001, "loss": 2.8446, "step": 8813 }, { "epoch": 0.3728741856333023, "grad_norm": 0.20433993637561798, "learning_rate": 0.001, "loss": 2.3362, "step": 8814 }, { "epoch": 0.3729164903968187, "grad_norm": 0.19342532753944397, "learning_rate": 0.001, "loss": 2.8176, "step": 8815 }, { "epoch": 0.37295879516033503, "grad_norm": 0.23268815875053406, "learning_rate": 0.001, "loss": 1.9077, "step": 8816 }, { "epoch": 0.37300109992385144, "grad_norm": 0.23792216181755066, "learning_rate": 0.001, "loss": 2.4612, "step": 8817 }, { "epoch": 0.3730434046873678, "grad_norm": 179.4039764404297, "learning_rate": 0.001, "loss": 3.1672, "step": 8818 }, { "epoch": 0.37308570945088415, "grad_norm": 0.29416170716285706, "learning_rate": 0.001, "loss": 2.0936, "step": 8819 }, { "epoch": 0.37312801421440056, "grad_norm": 0.2061353474855423, "learning_rate": 0.001, "loss": 3.3697, "step": 8820 }, { "epoch": 0.3731703189779169, "grad_norm": 0.22392816841602325, "learning_rate": 0.001, "loss": 1.8806, "step": 8821 }, { "epoch": 0.37321262374143327, "grad_norm": 0.5261951088905334, "learning_rate": 0.001, "loss": 2.6985, "step": 8822 }, { "epoch": 0.3732549285049497, "grad_norm": 0.9621092081069946, "learning_rate": 0.001, "loss": 2.2439, "step": 8823 }, { "epoch": 0.37329723326846603, "grad_norm": 0.20049139857292175, "learning_rate": 0.001, "loss": 2.2026, "step": 8824 }, { "epoch": 0.3733395380319824, "grad_norm": 1.3945945501327515, "learning_rate": 0.001, "loss": 2.6688, "step": 8825 }, { "epoch": 0.3733818427954988, "grad_norm": 0.28076982498168945, "learning_rate": 0.001, "loss": 2.9429, "step": 8826 }, { "epoch": 0.37342414755901515, "grad_norm": 0.45473167300224304, "learning_rate": 0.001, "loss": 2.7468, "step": 8827 }, { "epoch": 0.3734664523225315, "grad_norm": 1.4571759700775146, "learning_rate": 0.001, "loss": 1.6316, "step": 8828 }, { "epoch": 0.3735087570860479, "grad_norm": 1.2502529621124268, "learning_rate": 0.001, "loss": 3.0181, "step": 8829 }, { "epoch": 0.37355106184956427, "grad_norm": 0.26605066657066345, "learning_rate": 0.001, "loss": 2.5819, "step": 8830 }, { "epoch": 0.3735933666130806, "grad_norm": 0.4681839644908905, "learning_rate": 0.001, "loss": 2.5359, "step": 8831 }, { "epoch": 0.373635671376597, "grad_norm": 0.24733616411685944, "learning_rate": 0.001, "loss": 3.0378, "step": 8832 }, { "epoch": 0.3736779761401134, "grad_norm": 0.20918242633342743, "learning_rate": 0.001, "loss": 2.6718, "step": 8833 }, { "epoch": 0.37372028090362974, "grad_norm": 0.7425049543380737, "learning_rate": 0.001, "loss": 1.8763, "step": 8834 }, { "epoch": 0.3737625856671461, "grad_norm": 0.1861983686685562, "learning_rate": 0.001, "loss": 2.6386, "step": 8835 }, { "epoch": 0.3738048904306625, "grad_norm": 0.22254261374473572, "learning_rate": 0.001, "loss": 2.9136, "step": 8836 }, { "epoch": 0.37384719519417886, "grad_norm": 0.22637084126472473, "learning_rate": 0.001, "loss": 2.5875, "step": 8837 }, { "epoch": 0.3738894999576952, "grad_norm": 0.2065073400735855, "learning_rate": 0.001, "loss": 2.6558, "step": 8838 }, { "epoch": 0.3739318047212116, "grad_norm": 0.18488679826259613, "learning_rate": 0.001, "loss": 2.429, "step": 8839 }, { "epoch": 0.373974109484728, "grad_norm": 0.22156591713428497, "learning_rate": 0.001, "loss": 1.8497, "step": 8840 }, { "epoch": 0.37401641424824433, "grad_norm": 0.1818188726902008, "learning_rate": 0.001, "loss": 2.1918, "step": 8841 }, { "epoch": 0.37405871901176074, "grad_norm": 0.1845444142818451, "learning_rate": 0.001, "loss": 1.7402, "step": 8842 }, { "epoch": 0.3741010237752771, "grad_norm": 0.17082853615283966, "learning_rate": 0.001, "loss": 1.9857, "step": 8843 }, { "epoch": 0.37414332853879345, "grad_norm": 0.1782820224761963, "learning_rate": 0.001, "loss": 2.0385, "step": 8844 }, { "epoch": 0.37418563330230986, "grad_norm": 1.1810798645019531, "learning_rate": 0.001, "loss": 2.1752, "step": 8845 }, { "epoch": 0.3742279380658262, "grad_norm": 0.19883759319782257, "learning_rate": 0.001, "loss": 2.543, "step": 8846 }, { "epoch": 0.37427024282934257, "grad_norm": 0.47685497999191284, "learning_rate": 0.001, "loss": 2.2209, "step": 8847 }, { "epoch": 0.374312547592859, "grad_norm": 0.6518121957778931, "learning_rate": 0.001, "loss": 2.8904, "step": 8848 }, { "epoch": 0.37435485235637533, "grad_norm": 0.1763034462928772, "learning_rate": 0.001, "loss": 2.5656, "step": 8849 }, { "epoch": 0.3743971571198917, "grad_norm": 13.006291389465332, "learning_rate": 0.001, "loss": 1.7881, "step": 8850 }, { "epoch": 0.3744394618834081, "grad_norm": 0.25351783633232117, "learning_rate": 0.001, "loss": 2.2294, "step": 8851 }, { "epoch": 0.37448176664692445, "grad_norm": 10.508666038513184, "learning_rate": 0.001, "loss": 2.6075, "step": 8852 }, { "epoch": 0.3745240714104408, "grad_norm": 0.23762452602386475, "learning_rate": 0.001, "loss": 2.0531, "step": 8853 }, { "epoch": 0.37456637617395716, "grad_norm": 0.21458260715007782, "learning_rate": 0.001, "loss": 1.6651, "step": 8854 }, { "epoch": 0.37460868093747357, "grad_norm": 0.19314922392368317, "learning_rate": 0.001, "loss": 2.2319, "step": 8855 }, { "epoch": 0.3746509857009899, "grad_norm": 0.21474038064479828, "learning_rate": 0.001, "loss": 1.684, "step": 8856 }, { "epoch": 0.3746932904645063, "grad_norm": 0.9535976648330688, "learning_rate": 0.001, "loss": 2.1219, "step": 8857 }, { "epoch": 0.3747355952280227, "grad_norm": 0.3602615296840668, "learning_rate": 0.001, "loss": 2.2903, "step": 8858 }, { "epoch": 0.37477789999153904, "grad_norm": 0.8784622550010681, "learning_rate": 0.001, "loss": 2.7722, "step": 8859 }, { "epoch": 0.3748202047550554, "grad_norm": 0.21422924101352692, "learning_rate": 0.001, "loss": 2.2356, "step": 8860 }, { "epoch": 0.3748625095185718, "grad_norm": 0.3022599220275879, "learning_rate": 0.001, "loss": 2.8531, "step": 8861 }, { "epoch": 0.37490481428208816, "grad_norm": 0.2399420291185379, "learning_rate": 0.001, "loss": 2.1652, "step": 8862 }, { "epoch": 0.3749471190456045, "grad_norm": 0.24375419318675995, "learning_rate": 0.001, "loss": 1.7758, "step": 8863 }, { "epoch": 0.3749894238091209, "grad_norm": 0.2895803451538086, "learning_rate": 0.001, "loss": 1.9264, "step": 8864 }, { "epoch": 0.3750317285726373, "grad_norm": 0.1909046471118927, "learning_rate": 0.001, "loss": 2.2194, "step": 8865 }, { "epoch": 0.37507403333615363, "grad_norm": 0.2433093786239624, "learning_rate": 0.001, "loss": 2.556, "step": 8866 }, { "epoch": 0.37511633809967004, "grad_norm": 0.20016717910766602, "learning_rate": 0.001, "loss": 1.7043, "step": 8867 }, { "epoch": 0.3751586428631864, "grad_norm": 0.25574326515197754, "learning_rate": 0.001, "loss": 2.0388, "step": 8868 }, { "epoch": 0.37520094762670275, "grad_norm": 0.2795771062374115, "learning_rate": 0.001, "loss": 1.9324, "step": 8869 }, { "epoch": 0.37524325239021916, "grad_norm": 0.26534348726272583, "learning_rate": 0.001, "loss": 2.8957, "step": 8870 }, { "epoch": 0.3752855571537355, "grad_norm": 0.2329631894826889, "learning_rate": 0.001, "loss": 2.7308, "step": 8871 }, { "epoch": 0.37532786191725187, "grad_norm": 0.21924613416194916, "learning_rate": 0.001, "loss": 2.1649, "step": 8872 }, { "epoch": 0.3753701666807683, "grad_norm": 1.5583733320236206, "learning_rate": 0.001, "loss": 2.8169, "step": 8873 }, { "epoch": 0.37541247144428463, "grad_norm": 0.2518419027328491, "learning_rate": 0.001, "loss": 2.3901, "step": 8874 }, { "epoch": 0.375454776207801, "grad_norm": 0.21858522295951843, "learning_rate": 0.001, "loss": 2.339, "step": 8875 }, { "epoch": 0.37549708097131734, "grad_norm": 0.4082587957382202, "learning_rate": 0.001, "loss": 2.3889, "step": 8876 }, { "epoch": 0.37553938573483375, "grad_norm": 0.20391131937503815, "learning_rate": 0.001, "loss": 3.0828, "step": 8877 }, { "epoch": 0.3755816904983501, "grad_norm": 0.21462643146514893, "learning_rate": 0.001, "loss": 2.1877, "step": 8878 }, { "epoch": 0.37562399526186646, "grad_norm": 0.2579302489757538, "learning_rate": 0.001, "loss": 2.0629, "step": 8879 }, { "epoch": 0.37566630002538287, "grad_norm": 0.17081435024738312, "learning_rate": 0.001, "loss": 2.2482, "step": 8880 }, { "epoch": 0.3757086047888992, "grad_norm": 0.21092277765274048, "learning_rate": 0.001, "loss": 2.7822, "step": 8881 }, { "epoch": 0.3757509095524156, "grad_norm": 0.5651504397392273, "learning_rate": 0.001, "loss": 2.5021, "step": 8882 }, { "epoch": 0.375793214315932, "grad_norm": 0.23301394283771515, "learning_rate": 0.001, "loss": 2.0126, "step": 8883 }, { "epoch": 0.37583551907944834, "grad_norm": 2.354231595993042, "learning_rate": 0.001, "loss": 2.4646, "step": 8884 }, { "epoch": 0.3758778238429647, "grad_norm": 2.867743492126465, "learning_rate": 0.001, "loss": 2.1439, "step": 8885 }, { "epoch": 0.3759201286064811, "grad_norm": 1.451896071434021, "learning_rate": 0.001, "loss": 2.6435, "step": 8886 }, { "epoch": 0.37596243336999746, "grad_norm": 0.2039012610912323, "learning_rate": 0.001, "loss": 2.3304, "step": 8887 }, { "epoch": 0.3760047381335138, "grad_norm": 0.4442058801651001, "learning_rate": 0.001, "loss": 2.9109, "step": 8888 }, { "epoch": 0.3760470428970302, "grad_norm": 0.2005094289779663, "learning_rate": 0.001, "loss": 3.5619, "step": 8889 }, { "epoch": 0.3760893476605466, "grad_norm": 3.216552495956421, "learning_rate": 0.001, "loss": 2.8794, "step": 8890 }, { "epoch": 0.37613165242406293, "grad_norm": 0.23342199623584747, "learning_rate": 0.001, "loss": 2.6378, "step": 8891 }, { "epoch": 0.37617395718757934, "grad_norm": 0.23157887160778046, "learning_rate": 0.001, "loss": 2.9445, "step": 8892 }, { "epoch": 0.3762162619510957, "grad_norm": 0.2705920338630676, "learning_rate": 0.001, "loss": 2.2957, "step": 8893 }, { "epoch": 0.37625856671461205, "grad_norm": 0.28149649500846863, "learning_rate": 0.001, "loss": 1.7105, "step": 8894 }, { "epoch": 0.37630087147812846, "grad_norm": 1.1188766956329346, "learning_rate": 0.001, "loss": 2.2677, "step": 8895 }, { "epoch": 0.3763431762416448, "grad_norm": 0.8402219414710999, "learning_rate": 0.001, "loss": 3.4824, "step": 8896 }, { "epoch": 0.37638548100516117, "grad_norm": 0.2254517376422882, "learning_rate": 0.001, "loss": 2.4484, "step": 8897 }, { "epoch": 0.3764277857686776, "grad_norm": 0.18616388738155365, "learning_rate": 0.001, "loss": 1.8755, "step": 8898 }, { "epoch": 0.37647009053219394, "grad_norm": 0.16204172372817993, "learning_rate": 0.001, "loss": 2.3035, "step": 8899 }, { "epoch": 0.3765123952957103, "grad_norm": 0.655873715877533, "learning_rate": 0.001, "loss": 2.7063, "step": 8900 }, { "epoch": 0.37655470005922664, "grad_norm": 0.31336209177970886, "learning_rate": 0.001, "loss": 2.2015, "step": 8901 }, { "epoch": 0.37659700482274305, "grad_norm": 0.21138517558574677, "learning_rate": 0.001, "loss": 2.0808, "step": 8902 }, { "epoch": 0.3766393095862594, "grad_norm": 0.32678109407424927, "learning_rate": 0.001, "loss": 3.5257, "step": 8903 }, { "epoch": 0.37668161434977576, "grad_norm": 0.17282311618328094, "learning_rate": 0.001, "loss": 1.6933, "step": 8904 }, { "epoch": 0.37672391911329217, "grad_norm": 0.19820040464401245, "learning_rate": 0.001, "loss": 1.8809, "step": 8905 }, { "epoch": 0.3767662238768085, "grad_norm": 0.23431384563446045, "learning_rate": 0.001, "loss": 1.9761, "step": 8906 }, { "epoch": 0.3768085286403249, "grad_norm": 0.275423139333725, "learning_rate": 0.001, "loss": 2.9449, "step": 8907 }, { "epoch": 0.3768508334038413, "grad_norm": 0.4699144661426544, "learning_rate": 0.001, "loss": 1.8988, "step": 8908 }, { "epoch": 0.37689313816735764, "grad_norm": 0.17784275114536285, "learning_rate": 0.001, "loss": 2.1766, "step": 8909 }, { "epoch": 0.376935442930874, "grad_norm": 0.25600793957710266, "learning_rate": 0.001, "loss": 2.2169, "step": 8910 }, { "epoch": 0.3769777476943904, "grad_norm": 0.19872885942459106, "learning_rate": 0.001, "loss": 1.8248, "step": 8911 }, { "epoch": 0.37702005245790676, "grad_norm": 0.6820826530456543, "learning_rate": 0.001, "loss": 2.1578, "step": 8912 }, { "epoch": 0.3770623572214231, "grad_norm": 0.17017528414726257, "learning_rate": 0.001, "loss": 3.678, "step": 8913 }, { "epoch": 0.3771046619849395, "grad_norm": 0.1723102331161499, "learning_rate": 0.001, "loss": 3.2953, "step": 8914 }, { "epoch": 0.3771469667484559, "grad_norm": 0.17692267894744873, "learning_rate": 0.001, "loss": 2.2835, "step": 8915 }, { "epoch": 0.37718927151197224, "grad_norm": 0.18386520445346832, "learning_rate": 0.001, "loss": 1.7949, "step": 8916 }, { "epoch": 0.37723157627548864, "grad_norm": 0.17489704489707947, "learning_rate": 0.001, "loss": 2.8251, "step": 8917 }, { "epoch": 0.377273881039005, "grad_norm": 0.18046604096889496, "learning_rate": 0.001, "loss": 1.9294, "step": 8918 }, { "epoch": 0.37731618580252135, "grad_norm": 0.19288641214370728, "learning_rate": 0.001, "loss": 2.1967, "step": 8919 }, { "epoch": 0.37735849056603776, "grad_norm": 20.963592529296875, "learning_rate": 0.001, "loss": 1.7886, "step": 8920 }, { "epoch": 0.3774007953295541, "grad_norm": 0.20476023852825165, "learning_rate": 0.001, "loss": 2.0257, "step": 8921 }, { "epoch": 0.37744310009307047, "grad_norm": 0.17102450132369995, "learning_rate": 0.001, "loss": 3.2415, "step": 8922 }, { "epoch": 0.3774854048565868, "grad_norm": 0.24836905300617218, "learning_rate": 0.001, "loss": 1.6505, "step": 8923 }, { "epoch": 0.37752770962010324, "grad_norm": 0.2269943505525589, "learning_rate": 0.001, "loss": 1.9437, "step": 8924 }, { "epoch": 0.3775700143836196, "grad_norm": 0.18366478383541107, "learning_rate": 0.001, "loss": 1.9654, "step": 8925 }, { "epoch": 0.37761231914713594, "grad_norm": 0.19033034145832062, "learning_rate": 0.001, "loss": 1.7305, "step": 8926 }, { "epoch": 0.37765462391065235, "grad_norm": 0.16039331257343292, "learning_rate": 0.001, "loss": 2.4644, "step": 8927 }, { "epoch": 0.3776969286741687, "grad_norm": 0.15948708355426788, "learning_rate": 0.001, "loss": 3.5417, "step": 8928 }, { "epoch": 0.37773923343768506, "grad_norm": 0.16082708537578583, "learning_rate": 0.001, "loss": 2.1491, "step": 8929 }, { "epoch": 0.37778153820120147, "grad_norm": 0.21579056978225708, "learning_rate": 0.001, "loss": 2.8686, "step": 8930 }, { "epoch": 0.3778238429647178, "grad_norm": 0.18891681730747223, "learning_rate": 0.001, "loss": 2.1063, "step": 8931 }, { "epoch": 0.3778661477282342, "grad_norm": 0.2987871468067169, "learning_rate": 0.001, "loss": 2.1846, "step": 8932 }, { "epoch": 0.3779084524917506, "grad_norm": 0.19955849647521973, "learning_rate": 0.001, "loss": 3.7915, "step": 8933 }, { "epoch": 0.37795075725526694, "grad_norm": 0.4888962507247925, "learning_rate": 0.001, "loss": 1.5937, "step": 8934 }, { "epoch": 0.3779930620187833, "grad_norm": 0.23087970912456512, "learning_rate": 0.001, "loss": 2.2461, "step": 8935 }, { "epoch": 0.3780353667822997, "grad_norm": 0.48626619577407837, "learning_rate": 0.001, "loss": 2.6076, "step": 8936 }, { "epoch": 0.37807767154581606, "grad_norm": 0.21073201298713684, "learning_rate": 0.001, "loss": 1.6726, "step": 8937 }, { "epoch": 0.3781199763093324, "grad_norm": 0.166831374168396, "learning_rate": 0.001, "loss": 1.709, "step": 8938 }, { "epoch": 0.3781622810728488, "grad_norm": 0.19128407537937164, "learning_rate": 0.001, "loss": 2.1098, "step": 8939 }, { "epoch": 0.3782045858363652, "grad_norm": 0.19305849075317383, "learning_rate": 0.001, "loss": 1.8852, "step": 8940 }, { "epoch": 0.37824689059988154, "grad_norm": 0.18565382063388824, "learning_rate": 0.001, "loss": 2.1476, "step": 8941 }, { "epoch": 0.37828919536339795, "grad_norm": 0.25521063804626465, "learning_rate": 0.001, "loss": 2.7811, "step": 8942 }, { "epoch": 0.3783315001269143, "grad_norm": 0.49443572759628296, "learning_rate": 0.001, "loss": 1.8314, "step": 8943 }, { "epoch": 0.37837380489043065, "grad_norm": 0.24259395897388458, "learning_rate": 0.001, "loss": 1.8752, "step": 8944 }, { "epoch": 0.378416109653947, "grad_norm": 0.21471655368804932, "learning_rate": 0.001, "loss": 1.8431, "step": 8945 }, { "epoch": 0.3784584144174634, "grad_norm": 0.34845396876335144, "learning_rate": 0.001, "loss": 2.63, "step": 8946 }, { "epoch": 0.37850071918097977, "grad_norm": 0.4037695825099945, "learning_rate": 0.001, "loss": 2.641, "step": 8947 }, { "epoch": 0.3785430239444961, "grad_norm": 0.1841561496257782, "learning_rate": 0.001, "loss": 2.0631, "step": 8948 }, { "epoch": 0.37858532870801254, "grad_norm": 0.5983163714408875, "learning_rate": 0.001, "loss": 2.3617, "step": 8949 }, { "epoch": 0.3786276334715289, "grad_norm": 0.1743028163909912, "learning_rate": 0.001, "loss": 2.2375, "step": 8950 }, { "epoch": 0.37866993823504524, "grad_norm": 0.24144525825977325, "learning_rate": 0.001, "loss": 1.9207, "step": 8951 }, { "epoch": 0.37871224299856165, "grad_norm": 0.19788867235183716, "learning_rate": 0.001, "loss": 2.8912, "step": 8952 }, { "epoch": 0.378754547762078, "grad_norm": 0.2542145550251007, "learning_rate": 0.001, "loss": 1.792, "step": 8953 }, { "epoch": 0.37879685252559436, "grad_norm": 0.19751232862472534, "learning_rate": 0.001, "loss": 2.1323, "step": 8954 }, { "epoch": 0.3788391572891108, "grad_norm": 0.3706015944480896, "learning_rate": 0.001, "loss": 2.3522, "step": 8955 }, { "epoch": 0.3788814620526271, "grad_norm": 0.20275190472602844, "learning_rate": 0.001, "loss": 1.8339, "step": 8956 }, { "epoch": 0.3789237668161435, "grad_norm": 0.18066799640655518, "learning_rate": 0.001, "loss": 1.6604, "step": 8957 }, { "epoch": 0.3789660715796599, "grad_norm": 0.20664532482624054, "learning_rate": 0.001, "loss": 2.1049, "step": 8958 }, { "epoch": 0.37900837634317625, "grad_norm": 0.15460434556007385, "learning_rate": 0.001, "loss": 2.0694, "step": 8959 }, { "epoch": 0.3790506811066926, "grad_norm": 0.177540585398674, "learning_rate": 0.001, "loss": 2.5301, "step": 8960 }, { "epoch": 0.379092985870209, "grad_norm": 0.21939094364643097, "learning_rate": 0.001, "loss": 2.1656, "step": 8961 }, { "epoch": 0.37913529063372536, "grad_norm": 0.17154373228549957, "learning_rate": 0.001, "loss": 3.5702, "step": 8962 }, { "epoch": 0.3791775953972417, "grad_norm": 1.2005940675735474, "learning_rate": 0.001, "loss": 3.8547, "step": 8963 }, { "epoch": 0.3792199001607581, "grad_norm": 0.20012064278125763, "learning_rate": 0.001, "loss": 2.1547, "step": 8964 }, { "epoch": 0.3792622049242745, "grad_norm": 0.2005445957183838, "learning_rate": 0.001, "loss": 1.9539, "step": 8965 }, { "epoch": 0.37930450968779084, "grad_norm": 0.17973119020462036, "learning_rate": 0.001, "loss": 2.0098, "step": 8966 }, { "epoch": 0.3793468144513072, "grad_norm": 0.16720591485500336, "learning_rate": 0.001, "loss": 1.3429, "step": 8967 }, { "epoch": 0.3793891192148236, "grad_norm": 1.0415222644805908, "learning_rate": 0.001, "loss": 2.3825, "step": 8968 }, { "epoch": 0.37943142397833995, "grad_norm": 0.19271378219127655, "learning_rate": 0.001, "loss": 1.7486, "step": 8969 }, { "epoch": 0.3794737287418563, "grad_norm": 0.26010555028915405, "learning_rate": 0.001, "loss": 2.988, "step": 8970 }, { "epoch": 0.3795160335053727, "grad_norm": 1.41874361038208, "learning_rate": 0.001, "loss": 2.4324, "step": 8971 }, { "epoch": 0.3795583382688891, "grad_norm": 0.18716134130954742, "learning_rate": 0.001, "loss": 2.2786, "step": 8972 }, { "epoch": 0.3796006430324054, "grad_norm": 0.21621747314929962, "learning_rate": 0.001, "loss": 2.0158, "step": 8973 }, { "epoch": 0.37964294779592184, "grad_norm": 0.21100404858589172, "learning_rate": 0.001, "loss": 2.6913, "step": 8974 }, { "epoch": 0.3796852525594382, "grad_norm": 0.23763734102249146, "learning_rate": 0.001, "loss": 2.5987, "step": 8975 }, { "epoch": 0.37972755732295455, "grad_norm": 0.24332080781459808, "learning_rate": 0.001, "loss": 2.6919, "step": 8976 }, { "epoch": 0.37976986208647096, "grad_norm": 0.27417856454849243, "learning_rate": 0.001, "loss": 2.2423, "step": 8977 }, { "epoch": 0.3798121668499873, "grad_norm": 0.19818872213363647, "learning_rate": 0.001, "loss": 2.0502, "step": 8978 }, { "epoch": 0.37985447161350366, "grad_norm": 0.16945703327655792, "learning_rate": 0.001, "loss": 2.848, "step": 8979 }, { "epoch": 0.3798967763770201, "grad_norm": 0.15949320793151855, "learning_rate": 0.001, "loss": 1.7612, "step": 8980 }, { "epoch": 0.3799390811405364, "grad_norm": 0.17047828435897827, "learning_rate": 0.001, "loss": 1.9685, "step": 8981 }, { "epoch": 0.3799813859040528, "grad_norm": 0.19749274849891663, "learning_rate": 0.001, "loss": 2.5775, "step": 8982 }, { "epoch": 0.3800236906675692, "grad_norm": 0.19193464517593384, "learning_rate": 0.001, "loss": 2.1396, "step": 8983 }, { "epoch": 0.38006599543108555, "grad_norm": 2.228489398956299, "learning_rate": 0.001, "loss": 2.439, "step": 8984 }, { "epoch": 0.3801083001946019, "grad_norm": 0.18555498123168945, "learning_rate": 0.001, "loss": 2.0835, "step": 8985 }, { "epoch": 0.3801506049581183, "grad_norm": 0.4127380847930908, "learning_rate": 0.001, "loss": 1.7705, "step": 8986 }, { "epoch": 0.38019290972163466, "grad_norm": 0.2189057320356369, "learning_rate": 0.001, "loss": 2.3658, "step": 8987 }, { "epoch": 0.380235214485151, "grad_norm": 0.8270202279090881, "learning_rate": 0.001, "loss": 2.0476, "step": 8988 }, { "epoch": 0.3802775192486674, "grad_norm": 0.2435636818408966, "learning_rate": 0.001, "loss": 2.236, "step": 8989 }, { "epoch": 0.3803198240121838, "grad_norm": 0.40932199358940125, "learning_rate": 0.001, "loss": 2.5558, "step": 8990 }, { "epoch": 0.38036212877570014, "grad_norm": 0.5411979556083679, "learning_rate": 0.001, "loss": 2.7722, "step": 8991 }, { "epoch": 0.3804044335392165, "grad_norm": 0.28621330857276917, "learning_rate": 0.001, "loss": 1.9874, "step": 8992 }, { "epoch": 0.3804467383027329, "grad_norm": 0.20250999927520752, "learning_rate": 0.001, "loss": 2.4956, "step": 8993 }, { "epoch": 0.38048904306624926, "grad_norm": 0.7544026970863342, "learning_rate": 0.001, "loss": 2.3332, "step": 8994 }, { "epoch": 0.3805313478297656, "grad_norm": 0.20567587018013, "learning_rate": 0.001, "loss": 1.698, "step": 8995 }, { "epoch": 0.380573652593282, "grad_norm": 0.20252707600593567, "learning_rate": 0.001, "loss": 1.5407, "step": 8996 }, { "epoch": 0.3806159573567984, "grad_norm": 0.3609828054904938, "learning_rate": 0.001, "loss": 3.2147, "step": 8997 }, { "epoch": 0.3806582621203147, "grad_norm": 0.18369510769844055, "learning_rate": 0.001, "loss": 1.6158, "step": 8998 }, { "epoch": 0.38070056688383114, "grad_norm": 3.380438804626465, "learning_rate": 0.001, "loss": 2.0616, "step": 8999 }, { "epoch": 0.3807428716473475, "grad_norm": 0.17703545093536377, "learning_rate": 0.001, "loss": 2.9286, "step": 9000 }, { "epoch": 0.38078517641086385, "grad_norm": 0.601148247718811, "learning_rate": 0.001, "loss": 2.1041, "step": 9001 }, { "epoch": 0.38082748117438026, "grad_norm": 0.1904536634683609, "learning_rate": 0.001, "loss": 2.2669, "step": 9002 }, { "epoch": 0.3808697859378966, "grad_norm": 0.5615403056144714, "learning_rate": 0.001, "loss": 2.1864, "step": 9003 }, { "epoch": 0.38091209070141296, "grad_norm": 0.18499788641929626, "learning_rate": 0.001, "loss": 1.82, "step": 9004 }, { "epoch": 0.3809543954649294, "grad_norm": 0.22329087555408478, "learning_rate": 0.001, "loss": 1.9953, "step": 9005 }, { "epoch": 0.38099670022844573, "grad_norm": 0.21240036189556122, "learning_rate": 0.001, "loss": 2.3098, "step": 9006 }, { "epoch": 0.3810390049919621, "grad_norm": 0.171017587184906, "learning_rate": 0.001, "loss": 2.0382, "step": 9007 }, { "epoch": 0.3810813097554785, "grad_norm": 0.22148646414279938, "learning_rate": 0.001, "loss": 2.1392, "step": 9008 }, { "epoch": 0.38112361451899485, "grad_norm": 0.18616536259651184, "learning_rate": 0.001, "loss": 1.8662, "step": 9009 }, { "epoch": 0.3811659192825112, "grad_norm": 10.153366088867188, "learning_rate": 0.001, "loss": 2.767, "step": 9010 }, { "epoch": 0.38120822404602756, "grad_norm": 0.19627103209495544, "learning_rate": 0.001, "loss": 1.7014, "step": 9011 }, { "epoch": 0.38125052880954396, "grad_norm": 0.2045442759990692, "learning_rate": 0.001, "loss": 1.4853, "step": 9012 }, { "epoch": 0.3812928335730603, "grad_norm": 0.8567882180213928, "learning_rate": 0.001, "loss": 2.0106, "step": 9013 }, { "epoch": 0.3813351383365767, "grad_norm": 0.21957813203334808, "learning_rate": 0.001, "loss": 2.1993, "step": 9014 }, { "epoch": 0.3813774431000931, "grad_norm": 0.20184127986431122, "learning_rate": 0.001, "loss": 2.0607, "step": 9015 }, { "epoch": 0.38141974786360944, "grad_norm": 1.3871818780899048, "learning_rate": 0.001, "loss": 1.851, "step": 9016 }, { "epoch": 0.3814620526271258, "grad_norm": 0.26643404364585876, "learning_rate": 0.001, "loss": 2.1513, "step": 9017 }, { "epoch": 0.3815043573906422, "grad_norm": 0.3258124887943268, "learning_rate": 0.001, "loss": 2.6778, "step": 9018 }, { "epoch": 0.38154666215415856, "grad_norm": 0.2190716564655304, "learning_rate": 0.001, "loss": 2.1382, "step": 9019 }, { "epoch": 0.3815889669176749, "grad_norm": 0.21376149356365204, "learning_rate": 0.001, "loss": 1.9718, "step": 9020 }, { "epoch": 0.3816312716811913, "grad_norm": 0.22805331647396088, "learning_rate": 0.001, "loss": 2.3052, "step": 9021 }, { "epoch": 0.3816735764447077, "grad_norm": 0.18218421936035156, "learning_rate": 0.001, "loss": 2.7028, "step": 9022 }, { "epoch": 0.38171588120822403, "grad_norm": 0.19406476616859436, "learning_rate": 0.001, "loss": 2.589, "step": 9023 }, { "epoch": 0.38175818597174044, "grad_norm": 5.224491119384766, "learning_rate": 0.001, "loss": 2.0287, "step": 9024 }, { "epoch": 0.3818004907352568, "grad_norm": 0.2605280578136444, "learning_rate": 0.001, "loss": 2.3784, "step": 9025 }, { "epoch": 0.38184279549877315, "grad_norm": 2.6531484127044678, "learning_rate": 0.001, "loss": 2.6587, "step": 9026 }, { "epoch": 0.38188510026228956, "grad_norm": 0.19211426377296448, "learning_rate": 0.001, "loss": 2.5609, "step": 9027 }, { "epoch": 0.3819274050258059, "grad_norm": 0.31936317682266235, "learning_rate": 0.001, "loss": 2.2436, "step": 9028 }, { "epoch": 0.38196970978932226, "grad_norm": 0.27096110582351685, "learning_rate": 0.001, "loss": 2.763, "step": 9029 }, { "epoch": 0.3820120145528387, "grad_norm": 0.27728742361068726, "learning_rate": 0.001, "loss": 2.2142, "step": 9030 }, { "epoch": 0.38205431931635503, "grad_norm": 0.23881641030311584, "learning_rate": 0.001, "loss": 2.6463, "step": 9031 }, { "epoch": 0.3820966240798714, "grad_norm": 1.6963469982147217, "learning_rate": 0.001, "loss": 1.7634, "step": 9032 }, { "epoch": 0.3821389288433878, "grad_norm": 0.9256619215011597, "learning_rate": 0.001, "loss": 2.8096, "step": 9033 }, { "epoch": 0.38218123360690415, "grad_norm": 0.23679040372371674, "learning_rate": 0.001, "loss": 2.5447, "step": 9034 }, { "epoch": 0.3822235383704205, "grad_norm": 0.266876220703125, "learning_rate": 0.001, "loss": 2.1042, "step": 9035 }, { "epoch": 0.38226584313393686, "grad_norm": 0.47037845849990845, "learning_rate": 0.001, "loss": 4.1207, "step": 9036 }, { "epoch": 0.38230814789745327, "grad_norm": 0.3769329786300659, "learning_rate": 0.001, "loss": 3.3571, "step": 9037 }, { "epoch": 0.3823504526609696, "grad_norm": 1.5549075603485107, "learning_rate": 0.001, "loss": 3.0967, "step": 9038 }, { "epoch": 0.382392757424486, "grad_norm": 0.48421478271484375, "learning_rate": 0.001, "loss": 2.0338, "step": 9039 }, { "epoch": 0.3824350621880024, "grad_norm": 0.22943010926246643, "learning_rate": 0.001, "loss": 2.5297, "step": 9040 }, { "epoch": 0.38247736695151874, "grad_norm": 0.2692186236381531, "learning_rate": 0.001, "loss": 2.6332, "step": 9041 }, { "epoch": 0.3825196717150351, "grad_norm": 0.31615620851516724, "learning_rate": 0.001, "loss": 2.1124, "step": 9042 }, { "epoch": 0.3825619764785515, "grad_norm": 0.2741483449935913, "learning_rate": 0.001, "loss": 1.9714, "step": 9043 }, { "epoch": 0.38260428124206786, "grad_norm": 0.21158187091350555, "learning_rate": 0.001, "loss": 1.8752, "step": 9044 }, { "epoch": 0.3826465860055842, "grad_norm": 9.120562553405762, "learning_rate": 0.001, "loss": 3.4247, "step": 9045 }, { "epoch": 0.3826888907691006, "grad_norm": 0.3110532760620117, "learning_rate": 0.001, "loss": 2.6979, "step": 9046 }, { "epoch": 0.382731195532617, "grad_norm": 0.4028329849243164, "learning_rate": 0.001, "loss": 3.1487, "step": 9047 }, { "epoch": 0.38277350029613333, "grad_norm": 13.722456932067871, "learning_rate": 0.001, "loss": 3.2165, "step": 9048 }, { "epoch": 0.38281580505964974, "grad_norm": 0.2979806661605835, "learning_rate": 0.001, "loss": 1.7009, "step": 9049 }, { "epoch": 0.3828581098231661, "grad_norm": 0.2089688926935196, "learning_rate": 0.001, "loss": 1.9347, "step": 9050 }, { "epoch": 0.38290041458668245, "grad_norm": 0.19975943863391876, "learning_rate": 0.001, "loss": 2.757, "step": 9051 }, { "epoch": 0.38294271935019886, "grad_norm": 0.1705322414636612, "learning_rate": 0.001, "loss": 1.8747, "step": 9052 }, { "epoch": 0.3829850241137152, "grad_norm": 0.2010183334350586, "learning_rate": 0.001, "loss": 1.6273, "step": 9053 }, { "epoch": 0.38302732887723157, "grad_norm": 9.024285316467285, "learning_rate": 0.001, "loss": 1.99, "step": 9054 }, { "epoch": 0.383069633640748, "grad_norm": 0.34857797622680664, "learning_rate": 0.001, "loss": 2.5817, "step": 9055 }, { "epoch": 0.38311193840426433, "grad_norm": 0.5103548169136047, "learning_rate": 0.001, "loss": 2.021, "step": 9056 }, { "epoch": 0.3831542431677807, "grad_norm": 2.387510299682617, "learning_rate": 0.001, "loss": 2.2303, "step": 9057 }, { "epoch": 0.38319654793129704, "grad_norm": 0.2850176990032196, "learning_rate": 0.001, "loss": 2.4001, "step": 9058 }, { "epoch": 0.38323885269481345, "grad_norm": 0.6285756826400757, "learning_rate": 0.001, "loss": 1.8074, "step": 9059 }, { "epoch": 0.3832811574583298, "grad_norm": 0.5569390654563904, "learning_rate": 0.001, "loss": 2.4452, "step": 9060 }, { "epoch": 0.38332346222184616, "grad_norm": 5.331487655639648, "learning_rate": 0.001, "loss": 2.7975, "step": 9061 }, { "epoch": 0.38336576698536257, "grad_norm": 0.30871254205703735, "learning_rate": 0.001, "loss": 2.3949, "step": 9062 }, { "epoch": 0.3834080717488789, "grad_norm": 0.3623650074005127, "learning_rate": 0.001, "loss": 2.5391, "step": 9063 }, { "epoch": 0.3834503765123953, "grad_norm": 0.22876878082752228, "learning_rate": 0.001, "loss": 2.3655, "step": 9064 }, { "epoch": 0.3834926812759117, "grad_norm": 0.1963501125574112, "learning_rate": 0.001, "loss": 2.2311, "step": 9065 }, { "epoch": 0.38353498603942804, "grad_norm": 0.8837391138076782, "learning_rate": 0.001, "loss": 2.8312, "step": 9066 }, { "epoch": 0.3835772908029444, "grad_norm": 0.24529825150966644, "learning_rate": 0.001, "loss": 2.4636, "step": 9067 }, { "epoch": 0.3836195955664608, "grad_norm": 0.21412113308906555, "learning_rate": 0.001, "loss": 2.0917, "step": 9068 }, { "epoch": 0.38366190032997716, "grad_norm": 0.23029856383800507, "learning_rate": 0.001, "loss": 1.8907, "step": 9069 }, { "epoch": 0.3837042050934935, "grad_norm": 1.920670509338379, "learning_rate": 0.001, "loss": 2.3119, "step": 9070 }, { "epoch": 0.3837465098570099, "grad_norm": 0.18355774879455566, "learning_rate": 0.001, "loss": 2.456, "step": 9071 }, { "epoch": 0.3837888146205263, "grad_norm": 3.794325351715088, "learning_rate": 0.001, "loss": 2.9978, "step": 9072 }, { "epoch": 0.38383111938404263, "grad_norm": 0.41664114594459534, "learning_rate": 0.001, "loss": 3.2798, "step": 9073 }, { "epoch": 0.38387342414755904, "grad_norm": 5.11715030670166, "learning_rate": 0.001, "loss": 1.6814, "step": 9074 }, { "epoch": 0.3839157289110754, "grad_norm": 0.2116970419883728, "learning_rate": 0.001, "loss": 2.3122, "step": 9075 }, { "epoch": 0.38395803367459175, "grad_norm": 0.6905900239944458, "learning_rate": 0.001, "loss": 2.5548, "step": 9076 }, { "epoch": 0.38400033843810816, "grad_norm": 0.18607500195503235, "learning_rate": 0.001, "loss": 2.4136, "step": 9077 }, { "epoch": 0.3840426432016245, "grad_norm": 0.18247579038143158, "learning_rate": 0.001, "loss": 1.8635, "step": 9078 }, { "epoch": 0.38408494796514087, "grad_norm": 0.17803490161895752, "learning_rate": 0.001, "loss": 1.9766, "step": 9079 }, { "epoch": 0.3841272527286572, "grad_norm": 0.898413896560669, "learning_rate": 0.001, "loss": 2.7229, "step": 9080 }, { "epoch": 0.38416955749217363, "grad_norm": 0.25650283694267273, "learning_rate": 0.001, "loss": 2.6571, "step": 9081 }, { "epoch": 0.38421186225569, "grad_norm": 0.18835589289665222, "learning_rate": 0.001, "loss": 2.0706, "step": 9082 }, { "epoch": 0.38425416701920634, "grad_norm": 0.2816076874732971, "learning_rate": 0.001, "loss": 3.3339, "step": 9083 }, { "epoch": 0.38429647178272275, "grad_norm": 0.22398167848587036, "learning_rate": 0.001, "loss": 2.4135, "step": 9084 }, { "epoch": 0.3843387765462391, "grad_norm": 0.906223714351654, "learning_rate": 0.001, "loss": 2.5593, "step": 9085 }, { "epoch": 0.38438108130975546, "grad_norm": 0.3350967764854431, "learning_rate": 0.001, "loss": 2.6067, "step": 9086 }, { "epoch": 0.38442338607327187, "grad_norm": 0.21072322130203247, "learning_rate": 0.001, "loss": 2.4582, "step": 9087 }, { "epoch": 0.3844656908367882, "grad_norm": 0.26963114738464355, "learning_rate": 0.001, "loss": 2.1451, "step": 9088 }, { "epoch": 0.3845079956003046, "grad_norm": 0.1972511261701584, "learning_rate": 0.001, "loss": 2.2775, "step": 9089 }, { "epoch": 0.384550300363821, "grad_norm": 0.19861550629138947, "learning_rate": 0.001, "loss": 2.1255, "step": 9090 }, { "epoch": 0.38459260512733734, "grad_norm": 0.22689582407474518, "learning_rate": 0.001, "loss": 1.9905, "step": 9091 }, { "epoch": 0.3846349098908537, "grad_norm": 0.5211851596832275, "learning_rate": 0.001, "loss": 1.9154, "step": 9092 }, { "epoch": 0.3846772146543701, "grad_norm": 0.20935285091400146, "learning_rate": 0.001, "loss": 2.0418, "step": 9093 }, { "epoch": 0.38471951941788646, "grad_norm": 0.20221330225467682, "learning_rate": 0.001, "loss": 1.9986, "step": 9094 }, { "epoch": 0.3847618241814028, "grad_norm": 0.22481901943683624, "learning_rate": 0.001, "loss": 2.7436, "step": 9095 }, { "epoch": 0.3848041289449192, "grad_norm": 0.20071500539779663, "learning_rate": 0.001, "loss": 2.0842, "step": 9096 }, { "epoch": 0.3848464337084356, "grad_norm": 1.3057656288146973, "learning_rate": 0.001, "loss": 2.072, "step": 9097 }, { "epoch": 0.38488873847195193, "grad_norm": 0.1865609586238861, "learning_rate": 0.001, "loss": 1.8298, "step": 9098 }, { "epoch": 0.38493104323546834, "grad_norm": 0.3449401557445526, "learning_rate": 0.001, "loss": 3.089, "step": 9099 }, { "epoch": 0.3849733479989847, "grad_norm": 2.8522613048553467, "learning_rate": 0.001, "loss": 2.6214, "step": 9100 }, { "epoch": 0.38501565276250105, "grad_norm": 4.267495632171631, "learning_rate": 0.001, "loss": 2.3361, "step": 9101 }, { "epoch": 0.3850579575260174, "grad_norm": 0.6872550249099731, "learning_rate": 0.001, "loss": 2.6345, "step": 9102 }, { "epoch": 0.3851002622895338, "grad_norm": 0.21168558299541473, "learning_rate": 0.001, "loss": 2.5256, "step": 9103 }, { "epoch": 0.38514256705305017, "grad_norm": 0.2972700297832489, "learning_rate": 0.001, "loss": 3.0594, "step": 9104 }, { "epoch": 0.3851848718165665, "grad_norm": 0.42669767141342163, "learning_rate": 0.001, "loss": 2.4855, "step": 9105 }, { "epoch": 0.38522717658008293, "grad_norm": 4.5662126541137695, "learning_rate": 0.001, "loss": 3.0319, "step": 9106 }, { "epoch": 0.3852694813435993, "grad_norm": 3.4174654483795166, "learning_rate": 0.001, "loss": 1.9545, "step": 9107 }, { "epoch": 0.38531178610711564, "grad_norm": 0.5192535519599915, "learning_rate": 0.001, "loss": 2.3336, "step": 9108 }, { "epoch": 0.38535409087063205, "grad_norm": 1.6883811950683594, "learning_rate": 0.001, "loss": 2.6045, "step": 9109 }, { "epoch": 0.3853963956341484, "grad_norm": 0.24284732341766357, "learning_rate": 0.001, "loss": 2.2217, "step": 9110 }, { "epoch": 0.38543870039766476, "grad_norm": 0.2053961157798767, "learning_rate": 0.001, "loss": 2.6769, "step": 9111 }, { "epoch": 0.38548100516118117, "grad_norm": 0.32398176193237305, "learning_rate": 0.001, "loss": 2.1218, "step": 9112 }, { "epoch": 0.3855233099246975, "grad_norm": 0.1831725686788559, "learning_rate": 0.001, "loss": 1.6788, "step": 9113 }, { "epoch": 0.3855656146882139, "grad_norm": 0.4239247739315033, "learning_rate": 0.001, "loss": 3.2968, "step": 9114 }, { "epoch": 0.3856079194517303, "grad_norm": 0.20434336364269257, "learning_rate": 0.001, "loss": 1.5334, "step": 9115 }, { "epoch": 0.38565022421524664, "grad_norm": 0.18700295686721802, "learning_rate": 0.001, "loss": 2.1835, "step": 9116 }, { "epoch": 0.385692528978763, "grad_norm": 0.33566123247146606, "learning_rate": 0.001, "loss": 2.4534, "step": 9117 }, { "epoch": 0.3857348337422794, "grad_norm": 0.4300113022327423, "learning_rate": 0.001, "loss": 2.7559, "step": 9118 }, { "epoch": 0.38577713850579576, "grad_norm": 0.21322444081306458, "learning_rate": 0.001, "loss": 1.8912, "step": 9119 }, { "epoch": 0.3858194432693121, "grad_norm": 0.25216782093048096, "learning_rate": 0.001, "loss": 2.3039, "step": 9120 }, { "epoch": 0.3858617480328285, "grad_norm": 0.3035780191421509, "learning_rate": 0.001, "loss": 2.1976, "step": 9121 }, { "epoch": 0.3859040527963449, "grad_norm": 0.23805847764015198, "learning_rate": 0.001, "loss": 2.2747, "step": 9122 }, { "epoch": 0.38594635755986123, "grad_norm": 4.429523944854736, "learning_rate": 0.001, "loss": 3.2502, "step": 9123 }, { "epoch": 0.3859886623233776, "grad_norm": 0.2643257677555084, "learning_rate": 0.001, "loss": 3.0433, "step": 9124 }, { "epoch": 0.386030967086894, "grad_norm": 0.1765228807926178, "learning_rate": 0.001, "loss": 1.8067, "step": 9125 }, { "epoch": 0.38607327185041035, "grad_norm": 2.101315975189209, "learning_rate": 0.001, "loss": 2.885, "step": 9126 }, { "epoch": 0.3861155766139267, "grad_norm": 0.20153525471687317, "learning_rate": 0.001, "loss": 1.8812, "step": 9127 }, { "epoch": 0.3861578813774431, "grad_norm": 0.2016594409942627, "learning_rate": 0.001, "loss": 1.6079, "step": 9128 }, { "epoch": 0.38620018614095947, "grad_norm": 0.23051926493644714, "learning_rate": 0.001, "loss": 2.3102, "step": 9129 }, { "epoch": 0.3862424909044758, "grad_norm": 0.20287853479385376, "learning_rate": 0.001, "loss": 2.5548, "step": 9130 }, { "epoch": 0.38628479566799223, "grad_norm": 0.2215650975704193, "learning_rate": 0.001, "loss": 3.1022, "step": 9131 }, { "epoch": 0.3863271004315086, "grad_norm": 14.681644439697266, "learning_rate": 0.001, "loss": 2.5631, "step": 9132 }, { "epoch": 0.38636940519502494, "grad_norm": 0.21271252632141113, "learning_rate": 0.001, "loss": 1.8983, "step": 9133 }, { "epoch": 0.38641170995854135, "grad_norm": 0.5988287329673767, "learning_rate": 0.001, "loss": 2.0374, "step": 9134 }, { "epoch": 0.3864540147220577, "grad_norm": 0.5086121559143066, "learning_rate": 0.001, "loss": 2.1648, "step": 9135 }, { "epoch": 0.38649631948557406, "grad_norm": 0.28043192625045776, "learning_rate": 0.001, "loss": 2.6912, "step": 9136 }, { "epoch": 0.38653862424909047, "grad_norm": 1.1043363809585571, "learning_rate": 0.001, "loss": 2.9766, "step": 9137 }, { "epoch": 0.3865809290126068, "grad_norm": 0.42361801862716675, "learning_rate": 0.001, "loss": 2.2055, "step": 9138 }, { "epoch": 0.3866232337761232, "grad_norm": 0.7368301749229431, "learning_rate": 0.001, "loss": 3.197, "step": 9139 }, { "epoch": 0.3866655385396396, "grad_norm": 0.4298476278781891, "learning_rate": 0.001, "loss": 2.3723, "step": 9140 }, { "epoch": 0.38670784330315594, "grad_norm": 0.20373845100402832, "learning_rate": 0.001, "loss": 2.2679, "step": 9141 }, { "epoch": 0.3867501480666723, "grad_norm": 0.18816867470741272, "learning_rate": 0.001, "loss": 2.195, "step": 9142 }, { "epoch": 0.3867924528301887, "grad_norm": 0.24059052765369415, "learning_rate": 0.001, "loss": 2.0657, "step": 9143 }, { "epoch": 0.38683475759370506, "grad_norm": 0.19671255350112915, "learning_rate": 0.001, "loss": 3.1763, "step": 9144 }, { "epoch": 0.3868770623572214, "grad_norm": 0.5776639580726624, "learning_rate": 0.001, "loss": 1.9861, "step": 9145 }, { "epoch": 0.3869193671207378, "grad_norm": 0.8698708415031433, "learning_rate": 0.001, "loss": 2.8534, "step": 9146 }, { "epoch": 0.3869616718842542, "grad_norm": 4.098544597625732, "learning_rate": 0.001, "loss": 1.9432, "step": 9147 }, { "epoch": 0.38700397664777053, "grad_norm": 0.20377150177955627, "learning_rate": 0.001, "loss": 2.981, "step": 9148 }, { "epoch": 0.3870462814112869, "grad_norm": 0.17152920365333557, "learning_rate": 0.001, "loss": 1.976, "step": 9149 }, { "epoch": 0.3870885861748033, "grad_norm": 0.19490063190460205, "learning_rate": 0.001, "loss": 2.7471, "step": 9150 }, { "epoch": 0.38713089093831965, "grad_norm": 0.21407170593738556, "learning_rate": 0.001, "loss": 2.155, "step": 9151 }, { "epoch": 0.387173195701836, "grad_norm": 2.212458610534668, "learning_rate": 0.001, "loss": 1.773, "step": 9152 }, { "epoch": 0.3872155004653524, "grad_norm": 0.15391570329666138, "learning_rate": 0.001, "loss": 2.4875, "step": 9153 }, { "epoch": 0.38725780522886877, "grad_norm": 0.15746274590492249, "learning_rate": 0.001, "loss": 2.0196, "step": 9154 }, { "epoch": 0.3873001099923851, "grad_norm": 0.2026894986629486, "learning_rate": 0.001, "loss": 2.7989, "step": 9155 }, { "epoch": 0.38734241475590153, "grad_norm": 0.1813724935054779, "learning_rate": 0.001, "loss": 2.718, "step": 9156 }, { "epoch": 0.3873847195194179, "grad_norm": 0.17108535766601562, "learning_rate": 0.001, "loss": 1.7214, "step": 9157 }, { "epoch": 0.38742702428293424, "grad_norm": 0.1782073825597763, "learning_rate": 0.001, "loss": 2.4506, "step": 9158 }, { "epoch": 0.38746932904645065, "grad_norm": 0.46579709649086, "learning_rate": 0.001, "loss": 1.7336, "step": 9159 }, { "epoch": 0.387511633809967, "grad_norm": 0.19454286992549896, "learning_rate": 0.001, "loss": 1.7805, "step": 9160 }, { "epoch": 0.38755393857348336, "grad_norm": 0.18374571204185486, "learning_rate": 0.001, "loss": 1.6501, "step": 9161 }, { "epoch": 0.38759624333699977, "grad_norm": 0.23796077072620392, "learning_rate": 0.001, "loss": 2.6949, "step": 9162 }, { "epoch": 0.3876385481005161, "grad_norm": 0.3394874036312103, "learning_rate": 0.001, "loss": 3.0172, "step": 9163 }, { "epoch": 0.3876808528640325, "grad_norm": 0.20546482503414154, "learning_rate": 0.001, "loss": 1.9047, "step": 9164 }, { "epoch": 0.3877231576275489, "grad_norm": 0.23391731083393097, "learning_rate": 0.001, "loss": 2.7176, "step": 9165 }, { "epoch": 0.38776546239106524, "grad_norm": 0.2764662206172943, "learning_rate": 0.001, "loss": 2.2979, "step": 9166 }, { "epoch": 0.3878077671545816, "grad_norm": 0.17653588950634003, "learning_rate": 0.001, "loss": 2.3402, "step": 9167 }, { "epoch": 0.387850071918098, "grad_norm": 0.4383090138435364, "learning_rate": 0.001, "loss": 2.0904, "step": 9168 }, { "epoch": 0.38789237668161436, "grad_norm": 0.20846538245677948, "learning_rate": 0.001, "loss": 3.2569, "step": 9169 }, { "epoch": 0.3879346814451307, "grad_norm": 0.6603441834449768, "learning_rate": 0.001, "loss": 2.309, "step": 9170 }, { "epoch": 0.38797698620864707, "grad_norm": 0.18814191222190857, "learning_rate": 0.001, "loss": 1.4408, "step": 9171 }, { "epoch": 0.3880192909721635, "grad_norm": 3.1111462116241455, "learning_rate": 0.001, "loss": 2.6072, "step": 9172 }, { "epoch": 0.38806159573567983, "grad_norm": 0.2014855295419693, "learning_rate": 0.001, "loss": 1.6812, "step": 9173 }, { "epoch": 0.3881039004991962, "grad_norm": 0.24328027665615082, "learning_rate": 0.001, "loss": 2.088, "step": 9174 }, { "epoch": 0.3881462052627126, "grad_norm": 0.2333800196647644, "learning_rate": 0.001, "loss": 2.3285, "step": 9175 }, { "epoch": 0.38818851002622895, "grad_norm": 0.21624669432640076, "learning_rate": 0.001, "loss": 2.7369, "step": 9176 }, { "epoch": 0.3882308147897453, "grad_norm": 0.21028754115104675, "learning_rate": 0.001, "loss": 2.5539, "step": 9177 }, { "epoch": 0.3882731195532617, "grad_norm": 0.26059690117836, "learning_rate": 0.001, "loss": 2.5894, "step": 9178 }, { "epoch": 0.38831542431677807, "grad_norm": 0.20622827112674713, "learning_rate": 0.001, "loss": 2.2577, "step": 9179 }, { "epoch": 0.3883577290802944, "grad_norm": 0.2566535472869873, "learning_rate": 0.001, "loss": 2.6067, "step": 9180 }, { "epoch": 0.38840003384381083, "grad_norm": 0.1649394929409027, "learning_rate": 0.001, "loss": 1.9004, "step": 9181 }, { "epoch": 0.3884423386073272, "grad_norm": 0.211260125041008, "learning_rate": 0.001, "loss": 2.304, "step": 9182 }, { "epoch": 0.38848464337084354, "grad_norm": 5.116180896759033, "learning_rate": 0.001, "loss": 2.774, "step": 9183 }, { "epoch": 0.38852694813435995, "grad_norm": 0.3661218583583832, "learning_rate": 0.001, "loss": 1.7813, "step": 9184 }, { "epoch": 0.3885692528978763, "grad_norm": 0.2053448110818863, "learning_rate": 0.001, "loss": 2.1452, "step": 9185 }, { "epoch": 0.38861155766139266, "grad_norm": 0.16580262780189514, "learning_rate": 0.001, "loss": 1.8883, "step": 9186 }, { "epoch": 0.38865386242490907, "grad_norm": 0.1831020563840866, "learning_rate": 0.001, "loss": 2.1391, "step": 9187 }, { "epoch": 0.3886961671884254, "grad_norm": 1.015125036239624, "learning_rate": 0.001, "loss": 1.8072, "step": 9188 }, { "epoch": 0.3887384719519418, "grad_norm": 0.22336459159851074, "learning_rate": 0.001, "loss": 2.1674, "step": 9189 }, { "epoch": 0.3887807767154582, "grad_norm": 0.20443737506866455, "learning_rate": 0.001, "loss": 2.1295, "step": 9190 }, { "epoch": 0.38882308147897454, "grad_norm": 0.2641700208187103, "learning_rate": 0.001, "loss": 1.5808, "step": 9191 }, { "epoch": 0.3888653862424909, "grad_norm": 0.5966519117355347, "learning_rate": 0.001, "loss": 1.7853, "step": 9192 }, { "epoch": 0.38890769100600725, "grad_norm": 0.6837995648384094, "learning_rate": 0.001, "loss": 2.0421, "step": 9193 }, { "epoch": 0.38894999576952366, "grad_norm": 0.20334653556346893, "learning_rate": 0.001, "loss": 1.995, "step": 9194 }, { "epoch": 0.38899230053304, "grad_norm": 1.7629520893096924, "learning_rate": 0.001, "loss": 1.6059, "step": 9195 }, { "epoch": 0.38903460529655637, "grad_norm": 0.4265093505382538, "learning_rate": 0.001, "loss": 2.4376, "step": 9196 }, { "epoch": 0.3890769100600728, "grad_norm": 0.47653642296791077, "learning_rate": 0.001, "loss": 2.4451, "step": 9197 }, { "epoch": 0.38911921482358913, "grad_norm": 0.43369871377944946, "learning_rate": 0.001, "loss": 1.8053, "step": 9198 }, { "epoch": 0.3891615195871055, "grad_norm": 0.19080866873264313, "learning_rate": 0.001, "loss": 1.9405, "step": 9199 }, { "epoch": 0.3892038243506219, "grad_norm": 0.22995217144489288, "learning_rate": 0.001, "loss": 2.9559, "step": 9200 }, { "epoch": 0.38924612911413825, "grad_norm": 0.21692658960819244, "learning_rate": 0.001, "loss": 3.0938, "step": 9201 }, { "epoch": 0.3892884338776546, "grad_norm": 0.6913968920707703, "learning_rate": 0.001, "loss": 2.2379, "step": 9202 }, { "epoch": 0.389330738641171, "grad_norm": 0.34403377771377563, "learning_rate": 0.001, "loss": 2.1844, "step": 9203 }, { "epoch": 0.38937304340468737, "grad_norm": 1.5377286672592163, "learning_rate": 0.001, "loss": 3.3703, "step": 9204 }, { "epoch": 0.3894153481682037, "grad_norm": 0.212476909160614, "learning_rate": 0.001, "loss": 2.0148, "step": 9205 }, { "epoch": 0.38945765293172013, "grad_norm": 0.8234764933586121, "learning_rate": 0.001, "loss": 2.8544, "step": 9206 }, { "epoch": 0.3894999576952365, "grad_norm": 0.1971426010131836, "learning_rate": 0.001, "loss": 2.0184, "step": 9207 }, { "epoch": 0.38954226245875284, "grad_norm": 0.23782867193222046, "learning_rate": 0.001, "loss": 2.1397, "step": 9208 }, { "epoch": 0.38958456722226925, "grad_norm": 0.7499661445617676, "learning_rate": 0.001, "loss": 1.8995, "step": 9209 }, { "epoch": 0.3896268719857856, "grad_norm": 15.050933837890625, "learning_rate": 0.001, "loss": 1.6647, "step": 9210 }, { "epoch": 0.38966917674930196, "grad_norm": 0.3999849259853363, "learning_rate": 0.001, "loss": 2.5121, "step": 9211 }, { "epoch": 0.38971148151281837, "grad_norm": 0.35282769799232483, "learning_rate": 0.001, "loss": 1.9899, "step": 9212 }, { "epoch": 0.3897537862763347, "grad_norm": 0.534419059753418, "learning_rate": 0.001, "loss": 2.1861, "step": 9213 }, { "epoch": 0.3897960910398511, "grad_norm": 0.2646177411079407, "learning_rate": 0.001, "loss": 2.0317, "step": 9214 }, { "epoch": 0.38983839580336743, "grad_norm": 0.20612642168998718, "learning_rate": 0.001, "loss": 1.835, "step": 9215 }, { "epoch": 0.38988070056688384, "grad_norm": 0.25167348980903625, "learning_rate": 0.001, "loss": 2.4175, "step": 9216 }, { "epoch": 0.3899230053304002, "grad_norm": 0.21403273940086365, "learning_rate": 0.001, "loss": 2.954, "step": 9217 }, { "epoch": 0.38996531009391655, "grad_norm": 0.17985409498214722, "learning_rate": 0.001, "loss": 1.8319, "step": 9218 }, { "epoch": 0.39000761485743296, "grad_norm": 2.4308388233184814, "learning_rate": 0.001, "loss": 2.0903, "step": 9219 }, { "epoch": 0.3900499196209493, "grad_norm": 0.2107587307691574, "learning_rate": 0.001, "loss": 2.7956, "step": 9220 }, { "epoch": 0.39009222438446567, "grad_norm": 1.1186227798461914, "learning_rate": 0.001, "loss": 2.1013, "step": 9221 }, { "epoch": 0.3901345291479821, "grad_norm": 0.2170724719762802, "learning_rate": 0.001, "loss": 2.1852, "step": 9222 }, { "epoch": 0.39017683391149843, "grad_norm": 0.1864577829837799, "learning_rate": 0.001, "loss": 1.4238, "step": 9223 }, { "epoch": 0.3902191386750148, "grad_norm": 0.23575836420059204, "learning_rate": 0.001, "loss": 2.6753, "step": 9224 }, { "epoch": 0.3902614434385312, "grad_norm": 0.21837228536605835, "learning_rate": 0.001, "loss": 2.4262, "step": 9225 }, { "epoch": 0.39030374820204755, "grad_norm": 4.643385410308838, "learning_rate": 0.001, "loss": 2.1914, "step": 9226 }, { "epoch": 0.3903460529655639, "grad_norm": 0.25850534439086914, "learning_rate": 0.001, "loss": 2.254, "step": 9227 }, { "epoch": 0.3903883577290803, "grad_norm": 0.4413652718067169, "learning_rate": 0.001, "loss": 4.012, "step": 9228 }, { "epoch": 0.39043066249259667, "grad_norm": 0.20437230169773102, "learning_rate": 0.001, "loss": 2.7935, "step": 9229 }, { "epoch": 0.390472967256113, "grad_norm": 0.2475084662437439, "learning_rate": 0.001, "loss": 2.2118, "step": 9230 }, { "epoch": 0.39051527201962943, "grad_norm": 0.7936202883720398, "learning_rate": 0.001, "loss": 2.0084, "step": 9231 }, { "epoch": 0.3905575767831458, "grad_norm": 0.21864911913871765, "learning_rate": 0.001, "loss": 2.0875, "step": 9232 }, { "epoch": 0.39059988154666214, "grad_norm": 0.25647038221359253, "learning_rate": 0.001, "loss": 2.1307, "step": 9233 }, { "epoch": 0.39064218631017855, "grad_norm": 1.2627050876617432, "learning_rate": 0.001, "loss": 1.862, "step": 9234 }, { "epoch": 0.3906844910736949, "grad_norm": 0.18225641548633575, "learning_rate": 0.001, "loss": 2.1382, "step": 9235 }, { "epoch": 0.39072679583721126, "grad_norm": 0.1784055531024933, "learning_rate": 0.001, "loss": 2.4409, "step": 9236 }, { "epoch": 0.3907691006007276, "grad_norm": 1.4898735284805298, "learning_rate": 0.001, "loss": 2.1492, "step": 9237 }, { "epoch": 0.390811405364244, "grad_norm": 5.816760540008545, "learning_rate": 0.001, "loss": 2.0967, "step": 9238 }, { "epoch": 0.3908537101277604, "grad_norm": 0.18301312625408173, "learning_rate": 0.001, "loss": 1.8928, "step": 9239 }, { "epoch": 0.39089601489127673, "grad_norm": 0.21848469972610474, "learning_rate": 0.001, "loss": 2.2617, "step": 9240 }, { "epoch": 0.39093831965479314, "grad_norm": 0.23523452877998352, "learning_rate": 0.001, "loss": 1.8523, "step": 9241 }, { "epoch": 0.3909806244183095, "grad_norm": 0.28411930799484253, "learning_rate": 0.001, "loss": 2.5036, "step": 9242 }, { "epoch": 0.39102292918182585, "grad_norm": 0.39887842535972595, "learning_rate": 0.001, "loss": 3.1872, "step": 9243 }, { "epoch": 0.39106523394534226, "grad_norm": 0.26545318961143494, "learning_rate": 0.001, "loss": 2.1511, "step": 9244 }, { "epoch": 0.3911075387088586, "grad_norm": 0.3136458992958069, "learning_rate": 0.001, "loss": 2.8136, "step": 9245 }, { "epoch": 0.39114984347237497, "grad_norm": 0.2994268238544464, "learning_rate": 0.001, "loss": 2.0385, "step": 9246 }, { "epoch": 0.3911921482358914, "grad_norm": 0.21520791947841644, "learning_rate": 0.001, "loss": 3.8089, "step": 9247 }, { "epoch": 0.39123445299940773, "grad_norm": 0.18887737393379211, "learning_rate": 0.001, "loss": 1.6818, "step": 9248 }, { "epoch": 0.3912767577629241, "grad_norm": 0.23042213916778564, "learning_rate": 0.001, "loss": 2.0132, "step": 9249 }, { "epoch": 0.3913190625264405, "grad_norm": 8.303719520568848, "learning_rate": 0.001, "loss": 1.782, "step": 9250 }, { "epoch": 0.39136136728995685, "grad_norm": 0.25407400727272034, "learning_rate": 0.001, "loss": 3.071, "step": 9251 }, { "epoch": 0.3914036720534732, "grad_norm": 0.19759856164455414, "learning_rate": 0.001, "loss": 1.8951, "step": 9252 }, { "epoch": 0.3914459768169896, "grad_norm": 0.3458765745162964, "learning_rate": 0.001, "loss": 3.0209, "step": 9253 }, { "epoch": 0.39148828158050597, "grad_norm": 1.5416405200958252, "learning_rate": 0.001, "loss": 2.7139, "step": 9254 }, { "epoch": 0.3915305863440223, "grad_norm": 0.21925224363803864, "learning_rate": 0.001, "loss": 2.1197, "step": 9255 }, { "epoch": 0.39157289110753873, "grad_norm": 0.6360104084014893, "learning_rate": 0.001, "loss": 1.9278, "step": 9256 }, { "epoch": 0.3916151958710551, "grad_norm": 0.17912402749061584, "learning_rate": 0.001, "loss": 2.8046, "step": 9257 }, { "epoch": 0.39165750063457144, "grad_norm": 0.22755807638168335, "learning_rate": 0.001, "loss": 2.0692, "step": 9258 }, { "epoch": 0.3916998053980878, "grad_norm": 0.3106396794319153, "learning_rate": 0.001, "loss": 2.5425, "step": 9259 }, { "epoch": 0.3917421101616042, "grad_norm": 1.9510068893432617, "learning_rate": 0.001, "loss": 1.7647, "step": 9260 }, { "epoch": 0.39178441492512056, "grad_norm": 0.20490862429141998, "learning_rate": 0.001, "loss": 2.5582, "step": 9261 }, { "epoch": 0.3918267196886369, "grad_norm": 0.18178889155387878, "learning_rate": 0.001, "loss": 2.8619, "step": 9262 }, { "epoch": 0.3918690244521533, "grad_norm": 0.22644934058189392, "learning_rate": 0.001, "loss": 2.4013, "step": 9263 }, { "epoch": 0.3919113292156697, "grad_norm": 0.2479361891746521, "learning_rate": 0.001, "loss": 2.4039, "step": 9264 }, { "epoch": 0.39195363397918603, "grad_norm": 0.20470456779003143, "learning_rate": 0.001, "loss": 2.086, "step": 9265 }, { "epoch": 0.39199593874270244, "grad_norm": 0.3624879717826843, "learning_rate": 0.001, "loss": 1.9208, "step": 9266 }, { "epoch": 0.3920382435062188, "grad_norm": 0.3792955279350281, "learning_rate": 0.001, "loss": 2.912, "step": 9267 }, { "epoch": 0.39208054826973515, "grad_norm": 0.3835373818874359, "learning_rate": 0.001, "loss": 3.1838, "step": 9268 }, { "epoch": 0.39212285303325156, "grad_norm": 0.25581711530685425, "learning_rate": 0.001, "loss": 2.0758, "step": 9269 }, { "epoch": 0.3921651577967679, "grad_norm": 0.20404990017414093, "learning_rate": 0.001, "loss": 1.8675, "step": 9270 }, { "epoch": 0.39220746256028427, "grad_norm": 0.22453588247299194, "learning_rate": 0.001, "loss": 2.3676, "step": 9271 }, { "epoch": 0.3922497673238007, "grad_norm": 0.2473563551902771, "learning_rate": 0.001, "loss": 2.179, "step": 9272 }, { "epoch": 0.39229207208731703, "grad_norm": 0.24559181928634644, "learning_rate": 0.001, "loss": 3.4248, "step": 9273 }, { "epoch": 0.3923343768508334, "grad_norm": 0.2277313470840454, "learning_rate": 0.001, "loss": 2.0933, "step": 9274 }, { "epoch": 0.3923766816143498, "grad_norm": 0.19083429872989655, "learning_rate": 0.001, "loss": 2.4522, "step": 9275 }, { "epoch": 0.39241898637786615, "grad_norm": 0.2212873101234436, "learning_rate": 0.001, "loss": 2.3474, "step": 9276 }, { "epoch": 0.3924612911413825, "grad_norm": 0.1985638290643692, "learning_rate": 0.001, "loss": 2.283, "step": 9277 }, { "epoch": 0.3925035959048989, "grad_norm": 1.7838554382324219, "learning_rate": 0.001, "loss": 2.1613, "step": 9278 }, { "epoch": 0.39254590066841527, "grad_norm": 0.4087802469730377, "learning_rate": 0.001, "loss": 1.7939, "step": 9279 }, { "epoch": 0.3925882054319316, "grad_norm": 0.17536307871341705, "learning_rate": 0.001, "loss": 1.9135, "step": 9280 }, { "epoch": 0.39263051019544803, "grad_norm": 0.6189838647842407, "learning_rate": 0.001, "loss": 3.2527, "step": 9281 }, { "epoch": 0.3926728149589644, "grad_norm": 0.2154485583305359, "learning_rate": 0.001, "loss": 1.8292, "step": 9282 }, { "epoch": 0.39271511972248074, "grad_norm": 0.3119647204875946, "learning_rate": 0.001, "loss": 3.5083, "step": 9283 }, { "epoch": 0.3927574244859971, "grad_norm": 0.28993380069732666, "learning_rate": 0.001, "loss": 3.3037, "step": 9284 }, { "epoch": 0.3927997292495135, "grad_norm": 0.2555803954601288, "learning_rate": 0.001, "loss": 3.6532, "step": 9285 }, { "epoch": 0.39284203401302986, "grad_norm": 0.22608381509780884, "learning_rate": 0.001, "loss": 2.7748, "step": 9286 }, { "epoch": 0.3928843387765462, "grad_norm": 0.2289711982011795, "learning_rate": 0.001, "loss": 2.0334, "step": 9287 }, { "epoch": 0.3929266435400626, "grad_norm": 0.7672654390335083, "learning_rate": 0.001, "loss": 1.7421, "step": 9288 }, { "epoch": 0.392968948303579, "grad_norm": 0.210458442568779, "learning_rate": 0.001, "loss": 2.2449, "step": 9289 }, { "epoch": 0.39301125306709533, "grad_norm": 0.4783534109592438, "learning_rate": 0.001, "loss": 1.7884, "step": 9290 }, { "epoch": 0.39305355783061174, "grad_norm": 0.16665317118167877, "learning_rate": 0.001, "loss": 1.8335, "step": 9291 }, { "epoch": 0.3930958625941281, "grad_norm": 0.6642040610313416, "learning_rate": 0.001, "loss": 2.3598, "step": 9292 }, { "epoch": 0.39313816735764445, "grad_norm": 0.16919787228107452, "learning_rate": 0.001, "loss": 2.0728, "step": 9293 }, { "epoch": 0.39318047212116086, "grad_norm": 0.17005786299705505, "learning_rate": 0.001, "loss": 1.8163, "step": 9294 }, { "epoch": 0.3932227768846772, "grad_norm": 0.29996180534362793, "learning_rate": 0.001, "loss": 2.3236, "step": 9295 }, { "epoch": 0.39326508164819357, "grad_norm": 0.19820648431777954, "learning_rate": 0.001, "loss": 2.0907, "step": 9296 }, { "epoch": 0.39330738641171, "grad_norm": 1.3484530448913574, "learning_rate": 0.001, "loss": 1.6248, "step": 9297 }, { "epoch": 0.39334969117522633, "grad_norm": 0.38515207171440125, "learning_rate": 0.001, "loss": 1.6151, "step": 9298 }, { "epoch": 0.3933919959387427, "grad_norm": 0.2751673758029938, "learning_rate": 0.001, "loss": 1.8327, "step": 9299 }, { "epoch": 0.3934343007022591, "grad_norm": 0.1867271363735199, "learning_rate": 0.001, "loss": 2.3518, "step": 9300 }, { "epoch": 0.39347660546577545, "grad_norm": 1.881629467010498, "learning_rate": 0.001, "loss": 2.6431, "step": 9301 }, { "epoch": 0.3935189102292918, "grad_norm": 0.1862645000219345, "learning_rate": 0.001, "loss": 2.3873, "step": 9302 }, { "epoch": 0.3935612149928082, "grad_norm": 4.848597526550293, "learning_rate": 0.001, "loss": 2.0754, "step": 9303 }, { "epoch": 0.39360351975632457, "grad_norm": 0.8090160489082336, "learning_rate": 0.001, "loss": 2.0008, "step": 9304 }, { "epoch": 0.3936458245198409, "grad_norm": 60.91786575317383, "learning_rate": 0.001, "loss": 2.1857, "step": 9305 }, { "epoch": 0.3936881292833573, "grad_norm": 0.5015795826911926, "learning_rate": 0.001, "loss": 2.5673, "step": 9306 }, { "epoch": 0.3937304340468737, "grad_norm": 0.21446509659290314, "learning_rate": 0.001, "loss": 2.0362, "step": 9307 }, { "epoch": 0.39377273881039004, "grad_norm": 1.1446090936660767, "learning_rate": 0.001, "loss": 1.639, "step": 9308 }, { "epoch": 0.3938150435739064, "grad_norm": 0.2021520733833313, "learning_rate": 0.001, "loss": 3.2664, "step": 9309 }, { "epoch": 0.3938573483374228, "grad_norm": 0.2120133638381958, "learning_rate": 0.001, "loss": 2.2566, "step": 9310 }, { "epoch": 0.39389965310093916, "grad_norm": 0.226138174533844, "learning_rate": 0.001, "loss": 1.8226, "step": 9311 }, { "epoch": 0.3939419578644555, "grad_norm": 4.0954694747924805, "learning_rate": 0.001, "loss": 2.0812, "step": 9312 }, { "epoch": 0.3939842626279719, "grad_norm": 0.37310054898262024, "learning_rate": 0.001, "loss": 3.0503, "step": 9313 }, { "epoch": 0.3940265673914883, "grad_norm": 0.6909289360046387, "learning_rate": 0.001, "loss": 2.0039, "step": 9314 }, { "epoch": 0.39406887215500463, "grad_norm": 0.23392127454280853, "learning_rate": 0.001, "loss": 1.9528, "step": 9315 }, { "epoch": 0.39411117691852104, "grad_norm": 1.0609430074691772, "learning_rate": 0.001, "loss": 2.6997, "step": 9316 }, { "epoch": 0.3941534816820374, "grad_norm": 0.2690190374851227, "learning_rate": 0.001, "loss": 2.5265, "step": 9317 }, { "epoch": 0.39419578644555375, "grad_norm": 0.19583603739738464, "learning_rate": 0.001, "loss": 2.9415, "step": 9318 }, { "epoch": 0.39423809120907016, "grad_norm": 0.2094552218914032, "learning_rate": 0.001, "loss": 2.0034, "step": 9319 }, { "epoch": 0.3942803959725865, "grad_norm": 0.33723655343055725, "learning_rate": 0.001, "loss": 1.8156, "step": 9320 }, { "epoch": 0.39432270073610287, "grad_norm": 0.2463519275188446, "learning_rate": 0.001, "loss": 2.1634, "step": 9321 }, { "epoch": 0.3943650054996193, "grad_norm": 2.194249153137207, "learning_rate": 0.001, "loss": 1.8768, "step": 9322 }, { "epoch": 0.39440731026313564, "grad_norm": 0.2177824079990387, "learning_rate": 0.001, "loss": 1.7509, "step": 9323 }, { "epoch": 0.394449615026652, "grad_norm": 0.2521611452102661, "learning_rate": 0.001, "loss": 2.2049, "step": 9324 }, { "epoch": 0.3944919197901684, "grad_norm": 0.5440561175346375, "learning_rate": 0.001, "loss": 2.5381, "step": 9325 }, { "epoch": 0.39453422455368475, "grad_norm": 0.22166214883327484, "learning_rate": 0.001, "loss": 1.8115, "step": 9326 }, { "epoch": 0.3945765293172011, "grad_norm": 0.8539856672286987, "learning_rate": 0.001, "loss": 3.4899, "step": 9327 }, { "epoch": 0.39461883408071746, "grad_norm": 0.34720534086227417, "learning_rate": 0.001, "loss": 1.8842, "step": 9328 }, { "epoch": 0.39466113884423387, "grad_norm": 0.2507597804069519, "learning_rate": 0.001, "loss": 2.0993, "step": 9329 }, { "epoch": 0.3947034436077502, "grad_norm": 17.17608642578125, "learning_rate": 0.001, "loss": 2.5217, "step": 9330 }, { "epoch": 0.3947457483712666, "grad_norm": 0.21708784997463226, "learning_rate": 0.001, "loss": 3.1434, "step": 9331 }, { "epoch": 0.394788053134783, "grad_norm": 8.15025520324707, "learning_rate": 0.001, "loss": 1.6658, "step": 9332 }, { "epoch": 0.39483035789829934, "grad_norm": 0.1978793740272522, "learning_rate": 0.001, "loss": 1.6641, "step": 9333 }, { "epoch": 0.3948726626618157, "grad_norm": 0.22561314702033997, "learning_rate": 0.001, "loss": 1.9712, "step": 9334 }, { "epoch": 0.3949149674253321, "grad_norm": 8.708436012268066, "learning_rate": 0.001, "loss": 1.9502, "step": 9335 }, { "epoch": 0.39495727218884846, "grad_norm": 0.2591575086116791, "learning_rate": 0.001, "loss": 2.4213, "step": 9336 }, { "epoch": 0.3949995769523648, "grad_norm": 0.22696593403816223, "learning_rate": 0.001, "loss": 1.8231, "step": 9337 }, { "epoch": 0.3950418817158812, "grad_norm": 0.4304518401622772, "learning_rate": 0.001, "loss": 1.6622, "step": 9338 }, { "epoch": 0.3950841864793976, "grad_norm": 0.27557769417762756, "learning_rate": 0.001, "loss": 2.2719, "step": 9339 }, { "epoch": 0.39512649124291394, "grad_norm": 0.2860799729824066, "learning_rate": 0.001, "loss": 1.9818, "step": 9340 }, { "epoch": 0.39516879600643035, "grad_norm": 119.59538269042969, "learning_rate": 0.001, "loss": 1.7143, "step": 9341 }, { "epoch": 0.3952111007699467, "grad_norm": 0.20414665341377258, "learning_rate": 0.001, "loss": 3.6217, "step": 9342 }, { "epoch": 0.39525340553346305, "grad_norm": 0.2606525123119354, "learning_rate": 0.001, "loss": 2.0212, "step": 9343 }, { "epoch": 0.39529571029697946, "grad_norm": 0.2075478881597519, "learning_rate": 0.001, "loss": 2.1644, "step": 9344 }, { "epoch": 0.3953380150604958, "grad_norm": 0.24295449256896973, "learning_rate": 0.001, "loss": 2.2285, "step": 9345 }, { "epoch": 0.39538031982401217, "grad_norm": 0.22193071246147156, "learning_rate": 0.001, "loss": 1.8047, "step": 9346 }, { "epoch": 0.3954226245875286, "grad_norm": 0.21409432590007782, "learning_rate": 0.001, "loss": 2.1791, "step": 9347 }, { "epoch": 0.39546492935104494, "grad_norm": 2.0014984607696533, "learning_rate": 0.001, "loss": 2.5374, "step": 9348 }, { "epoch": 0.3955072341145613, "grad_norm": 5.059160232543945, "learning_rate": 0.001, "loss": 2.2516, "step": 9349 }, { "epoch": 0.39554953887807764, "grad_norm": 0.1997808814048767, "learning_rate": 0.001, "loss": 2.2374, "step": 9350 }, { "epoch": 0.39559184364159405, "grad_norm": 0.19376425445079803, "learning_rate": 0.001, "loss": 1.8633, "step": 9351 }, { "epoch": 0.3956341484051104, "grad_norm": 0.8050264716148376, "learning_rate": 0.001, "loss": 2.5749, "step": 9352 }, { "epoch": 0.39567645316862676, "grad_norm": 0.3786727786064148, "learning_rate": 0.001, "loss": 3.1153, "step": 9353 }, { "epoch": 0.3957187579321432, "grad_norm": 0.2291092574596405, "learning_rate": 0.001, "loss": 1.8893, "step": 9354 }, { "epoch": 0.3957610626956595, "grad_norm": 0.49110499024391174, "learning_rate": 0.001, "loss": 2.2142, "step": 9355 }, { "epoch": 0.3958033674591759, "grad_norm": 0.18595391511917114, "learning_rate": 0.001, "loss": 1.9032, "step": 9356 }, { "epoch": 0.3958456722226923, "grad_norm": 2.1453158855438232, "learning_rate": 0.001, "loss": 2.6647, "step": 9357 }, { "epoch": 0.39588797698620865, "grad_norm": 0.33591386675834656, "learning_rate": 0.001, "loss": 2.0517, "step": 9358 }, { "epoch": 0.395930281749725, "grad_norm": 1.8636958599090576, "learning_rate": 0.001, "loss": 2.7847, "step": 9359 }, { "epoch": 0.3959725865132414, "grad_norm": 0.20229916274547577, "learning_rate": 0.001, "loss": 2.2288, "step": 9360 }, { "epoch": 0.39601489127675776, "grad_norm": 0.2586257755756378, "learning_rate": 0.001, "loss": 1.8981, "step": 9361 }, { "epoch": 0.3960571960402741, "grad_norm": 0.340988427400589, "learning_rate": 0.001, "loss": 2.7096, "step": 9362 }, { "epoch": 0.3960995008037905, "grad_norm": 0.21785105764865875, "learning_rate": 0.001, "loss": 1.8415, "step": 9363 }, { "epoch": 0.3961418055673069, "grad_norm": 0.7364193201065063, "learning_rate": 0.001, "loss": 2.9645, "step": 9364 }, { "epoch": 0.39618411033082324, "grad_norm": 0.32688039541244507, "learning_rate": 0.001, "loss": 2.9784, "step": 9365 }, { "epoch": 0.39622641509433965, "grad_norm": 0.2986684739589691, "learning_rate": 0.001, "loss": 3.2292, "step": 9366 }, { "epoch": 0.396268719857856, "grad_norm": 2.457265853881836, "learning_rate": 0.001, "loss": 1.9641, "step": 9367 }, { "epoch": 0.39631102462137235, "grad_norm": 0.28799575567245483, "learning_rate": 0.001, "loss": 2.2059, "step": 9368 }, { "epoch": 0.39635332938488876, "grad_norm": 0.7766205668449402, "learning_rate": 0.001, "loss": 3.7114, "step": 9369 }, { "epoch": 0.3963956341484051, "grad_norm": 0.7524096369743347, "learning_rate": 0.001, "loss": 2.6579, "step": 9370 }, { "epoch": 0.3964379389119215, "grad_norm": 1.014512062072754, "learning_rate": 0.001, "loss": 2.8398, "step": 9371 }, { "epoch": 0.3964802436754378, "grad_norm": 0.22580230236053467, "learning_rate": 0.001, "loss": 1.9793, "step": 9372 }, { "epoch": 0.39652254843895424, "grad_norm": 0.24192282557487488, "learning_rate": 0.001, "loss": 2.111, "step": 9373 }, { "epoch": 0.3965648532024706, "grad_norm": 1.9858895540237427, "learning_rate": 0.001, "loss": 2.8711, "step": 9374 }, { "epoch": 0.39660715796598695, "grad_norm": 0.21744923293590546, "learning_rate": 0.001, "loss": 1.9478, "step": 9375 }, { "epoch": 0.39664946272950335, "grad_norm": 1.180682897567749, "learning_rate": 0.001, "loss": 1.7087, "step": 9376 }, { "epoch": 0.3966917674930197, "grad_norm": 0.7958380579948425, "learning_rate": 0.001, "loss": 3.7644, "step": 9377 }, { "epoch": 0.39673407225653606, "grad_norm": 1.155543565750122, "learning_rate": 0.001, "loss": 2.2873, "step": 9378 }, { "epoch": 0.3967763770200525, "grad_norm": 0.2220691293478012, "learning_rate": 0.001, "loss": 2.2881, "step": 9379 }, { "epoch": 0.3968186817835688, "grad_norm": 2.1947951316833496, "learning_rate": 0.001, "loss": 2.9223, "step": 9380 }, { "epoch": 0.3968609865470852, "grad_norm": 0.404043048620224, "learning_rate": 0.001, "loss": 2.0489, "step": 9381 }, { "epoch": 0.3969032913106016, "grad_norm": 0.22369450330734253, "learning_rate": 0.001, "loss": 2.169, "step": 9382 }, { "epoch": 0.39694559607411795, "grad_norm": 0.2130047082901001, "learning_rate": 0.001, "loss": 2.0125, "step": 9383 }, { "epoch": 0.3969879008376343, "grad_norm": 0.42711809277534485, "learning_rate": 0.001, "loss": 2.6084, "step": 9384 }, { "epoch": 0.3970302056011507, "grad_norm": 0.31725093722343445, "learning_rate": 0.001, "loss": 2.2298, "step": 9385 }, { "epoch": 0.39707251036466706, "grad_norm": 0.2912001311779022, "learning_rate": 0.001, "loss": 2.0143, "step": 9386 }, { "epoch": 0.3971148151281834, "grad_norm": 0.27698057889938354, "learning_rate": 0.001, "loss": 2.3999, "step": 9387 }, { "epoch": 0.39715711989169983, "grad_norm": 0.7280207276344299, "learning_rate": 0.001, "loss": 2.1915, "step": 9388 }, { "epoch": 0.3971994246552162, "grad_norm": 1.9393442869186401, "learning_rate": 0.001, "loss": 2.6316, "step": 9389 }, { "epoch": 0.39724172941873254, "grad_norm": 0.20909947156906128, "learning_rate": 0.001, "loss": 2.5462, "step": 9390 }, { "epoch": 0.39728403418224895, "grad_norm": 2.941432237625122, "learning_rate": 0.001, "loss": 1.9705, "step": 9391 }, { "epoch": 0.3973263389457653, "grad_norm": 0.18532417714595795, "learning_rate": 0.001, "loss": 2.9728, "step": 9392 }, { "epoch": 0.39736864370928165, "grad_norm": 0.1898314207792282, "learning_rate": 0.001, "loss": 2.2554, "step": 9393 }, { "epoch": 0.39741094847279806, "grad_norm": 2.149635076522827, "learning_rate": 0.001, "loss": 2.0937, "step": 9394 }, { "epoch": 0.3974532532363144, "grad_norm": 0.22059142589569092, "learning_rate": 0.001, "loss": 2.1177, "step": 9395 }, { "epoch": 0.3974955579998308, "grad_norm": 0.38471415638923645, "learning_rate": 0.001, "loss": 1.9792, "step": 9396 }, { "epoch": 0.3975378627633471, "grad_norm": 0.6079134941101074, "learning_rate": 0.001, "loss": 2.0545, "step": 9397 }, { "epoch": 0.39758016752686354, "grad_norm": 0.5451988577842712, "learning_rate": 0.001, "loss": 2.1774, "step": 9398 }, { "epoch": 0.3976224722903799, "grad_norm": 0.20046019554138184, "learning_rate": 0.001, "loss": 1.909, "step": 9399 }, { "epoch": 0.39766477705389625, "grad_norm": 0.7716697454452515, "learning_rate": 0.001, "loss": 2.4313, "step": 9400 }, { "epoch": 0.39770708181741266, "grad_norm": 0.3297424018383026, "learning_rate": 0.001, "loss": 2.0079, "step": 9401 }, { "epoch": 0.397749386580929, "grad_norm": 0.1952991932630539, "learning_rate": 0.001, "loss": 2.1102, "step": 9402 }, { "epoch": 0.39779169134444536, "grad_norm": 0.19148588180541992, "learning_rate": 0.001, "loss": 1.9686, "step": 9403 }, { "epoch": 0.3978339961079618, "grad_norm": 0.2454746812582016, "learning_rate": 0.001, "loss": 2.5814, "step": 9404 }, { "epoch": 0.39787630087147813, "grad_norm": 1.7941993474960327, "learning_rate": 0.001, "loss": 2.0305, "step": 9405 }, { "epoch": 0.3979186056349945, "grad_norm": 15.885468482971191, "learning_rate": 0.001, "loss": 2.1969, "step": 9406 }, { "epoch": 0.3979609103985109, "grad_norm": 0.22626948356628418, "learning_rate": 0.001, "loss": 2.3471, "step": 9407 }, { "epoch": 0.39800321516202725, "grad_norm": 0.3605894446372986, "learning_rate": 0.001, "loss": 2.5423, "step": 9408 }, { "epoch": 0.3980455199255436, "grad_norm": 0.19169077277183533, "learning_rate": 0.001, "loss": 1.6805, "step": 9409 }, { "epoch": 0.39808782468906, "grad_norm": 0.32516393065452576, "learning_rate": 0.001, "loss": 2.1722, "step": 9410 }, { "epoch": 0.39813012945257636, "grad_norm": 0.7686219215393066, "learning_rate": 0.001, "loss": 2.8895, "step": 9411 }, { "epoch": 0.3981724342160927, "grad_norm": 0.2653716802597046, "learning_rate": 0.001, "loss": 1.8654, "step": 9412 }, { "epoch": 0.39821473897960913, "grad_norm": 3.2883455753326416, "learning_rate": 0.001, "loss": 2.0769, "step": 9413 }, { "epoch": 0.3982570437431255, "grad_norm": 10.234563827514648, "learning_rate": 0.001, "loss": 2.4105, "step": 9414 }, { "epoch": 0.39829934850664184, "grad_norm": 0.2494221329689026, "learning_rate": 0.001, "loss": 1.6076, "step": 9415 }, { "epoch": 0.39834165327015825, "grad_norm": 0.28424543142318726, "learning_rate": 0.001, "loss": 2.3292, "step": 9416 }, { "epoch": 0.3983839580336746, "grad_norm": 0.23830546438694, "learning_rate": 0.001, "loss": 2.6482, "step": 9417 }, { "epoch": 0.39842626279719096, "grad_norm": 3.2902987003326416, "learning_rate": 0.001, "loss": 2.8442, "step": 9418 }, { "epoch": 0.3984685675607073, "grad_norm": 1.0926035642623901, "learning_rate": 0.001, "loss": 1.8991, "step": 9419 }, { "epoch": 0.3985108723242237, "grad_norm": 0.6159302592277527, "learning_rate": 0.001, "loss": 3.9763, "step": 9420 }, { "epoch": 0.3985531770877401, "grad_norm": 0.40966638922691345, "learning_rate": 0.001, "loss": 3.1978, "step": 9421 }, { "epoch": 0.39859548185125643, "grad_norm": 1.2507386207580566, "learning_rate": 0.001, "loss": 2.0906, "step": 9422 }, { "epoch": 0.39863778661477284, "grad_norm": 0.210091695189476, "learning_rate": 0.001, "loss": 2.0291, "step": 9423 }, { "epoch": 0.3986800913782892, "grad_norm": 0.6132796406745911, "learning_rate": 0.001, "loss": 1.9906, "step": 9424 }, { "epoch": 0.39872239614180555, "grad_norm": 0.20599402487277985, "learning_rate": 0.001, "loss": 1.9661, "step": 9425 }, { "epoch": 0.39876470090532196, "grad_norm": 75.12084197998047, "learning_rate": 0.001, "loss": 2.2098, "step": 9426 }, { "epoch": 0.3988070056688383, "grad_norm": 0.19137638807296753, "learning_rate": 0.001, "loss": 2.4808, "step": 9427 }, { "epoch": 0.39884931043235466, "grad_norm": 0.4978329539299011, "learning_rate": 0.001, "loss": 2.0099, "step": 9428 }, { "epoch": 0.3988916151958711, "grad_norm": 1.2876667976379395, "learning_rate": 0.001, "loss": 2.6079, "step": 9429 }, { "epoch": 0.39893391995938743, "grad_norm": 8.302372932434082, "learning_rate": 0.001, "loss": 2.3667, "step": 9430 }, { "epoch": 0.3989762247229038, "grad_norm": 0.2350839525461197, "learning_rate": 0.001, "loss": 2.1824, "step": 9431 }, { "epoch": 0.3990185294864202, "grad_norm": 7.219992160797119, "learning_rate": 0.001, "loss": 2.0594, "step": 9432 }, { "epoch": 0.39906083424993655, "grad_norm": 0.8567195534706116, "learning_rate": 0.001, "loss": 2.2847, "step": 9433 }, { "epoch": 0.3991031390134529, "grad_norm": 0.2659953236579895, "learning_rate": 0.001, "loss": 2.0949, "step": 9434 }, { "epoch": 0.3991454437769693, "grad_norm": 0.27466630935668945, "learning_rate": 0.001, "loss": 2.9983, "step": 9435 }, { "epoch": 0.39918774854048567, "grad_norm": 0.22787296772003174, "learning_rate": 0.001, "loss": 3.1284, "step": 9436 }, { "epoch": 0.399230053304002, "grad_norm": 0.6802979111671448, "learning_rate": 0.001, "loss": 2.2057, "step": 9437 }, { "epoch": 0.39927235806751843, "grad_norm": 0.22264288365840912, "learning_rate": 0.001, "loss": 3.2098, "step": 9438 }, { "epoch": 0.3993146628310348, "grad_norm": 0.20934641361236572, "learning_rate": 0.001, "loss": 2.5188, "step": 9439 }, { "epoch": 0.39935696759455114, "grad_norm": 0.3321964144706726, "learning_rate": 0.001, "loss": 2.8836, "step": 9440 }, { "epoch": 0.3993992723580675, "grad_norm": 0.7503458857536316, "learning_rate": 0.001, "loss": 2.3137, "step": 9441 }, { "epoch": 0.3994415771215839, "grad_norm": 0.20252926647663116, "learning_rate": 0.001, "loss": 3.1602, "step": 9442 }, { "epoch": 0.39948388188510026, "grad_norm": 0.2206176519393921, "learning_rate": 0.001, "loss": 4.0381, "step": 9443 }, { "epoch": 0.3995261866486166, "grad_norm": 0.16592377424240112, "learning_rate": 0.001, "loss": 2.0148, "step": 9444 }, { "epoch": 0.399568491412133, "grad_norm": 0.19829799234867096, "learning_rate": 0.001, "loss": 1.8411, "step": 9445 }, { "epoch": 0.3996107961756494, "grad_norm": 0.18881183862686157, "learning_rate": 0.001, "loss": 1.9147, "step": 9446 }, { "epoch": 0.39965310093916573, "grad_norm": 0.24857249855995178, "learning_rate": 0.001, "loss": 2.3787, "step": 9447 }, { "epoch": 0.39969540570268214, "grad_norm": 0.2752115726470947, "learning_rate": 0.001, "loss": 1.9453, "step": 9448 }, { "epoch": 0.3997377104661985, "grad_norm": 0.1992688775062561, "learning_rate": 0.001, "loss": 2.4652, "step": 9449 }, { "epoch": 0.39978001522971485, "grad_norm": 0.20093099772930145, "learning_rate": 0.001, "loss": 1.8111, "step": 9450 }, { "epoch": 0.39982231999323126, "grad_norm": 0.1518988311290741, "learning_rate": 0.001, "loss": 1.7827, "step": 9451 }, { "epoch": 0.3998646247567476, "grad_norm": 0.20735451579093933, "learning_rate": 0.001, "loss": 2.1059, "step": 9452 }, { "epoch": 0.39990692952026397, "grad_norm": 0.2454904466867447, "learning_rate": 0.001, "loss": 2.5582, "step": 9453 }, { "epoch": 0.3999492342837804, "grad_norm": 0.8346079587936401, "learning_rate": 0.001, "loss": 3.135, "step": 9454 }, { "epoch": 0.39999153904729673, "grad_norm": 0.1993485987186432, "learning_rate": 0.001, "loss": 2.1025, "step": 9455 }, { "epoch": 0.4000338438108131, "grad_norm": 0.4179655909538269, "learning_rate": 0.001, "loss": 2.0312, "step": 9456 }, { "epoch": 0.4000761485743295, "grad_norm": 0.17672595381736755, "learning_rate": 0.001, "loss": 2.5397, "step": 9457 }, { "epoch": 0.40011845333784585, "grad_norm": 0.24849601089954376, "learning_rate": 0.001, "loss": 2.4992, "step": 9458 }, { "epoch": 0.4001607581013622, "grad_norm": 0.2164924442768097, "learning_rate": 0.001, "loss": 2.0831, "step": 9459 }, { "epoch": 0.4002030628648786, "grad_norm": 3.8473236560821533, "learning_rate": 0.001, "loss": 2.1948, "step": 9460 }, { "epoch": 0.40024536762839497, "grad_norm": 0.24513396620750427, "learning_rate": 0.001, "loss": 1.9716, "step": 9461 }, { "epoch": 0.4002876723919113, "grad_norm": 0.6947419047355652, "learning_rate": 0.001, "loss": 2.8307, "step": 9462 }, { "epoch": 0.4003299771554277, "grad_norm": 0.20410794019699097, "learning_rate": 0.001, "loss": 3.0127, "step": 9463 }, { "epoch": 0.4003722819189441, "grad_norm": 0.18092742562294006, "learning_rate": 0.001, "loss": 1.9074, "step": 9464 }, { "epoch": 0.40041458668246044, "grad_norm": 0.17439773678779602, "learning_rate": 0.001, "loss": 1.8441, "step": 9465 }, { "epoch": 0.4004568914459768, "grad_norm": 0.5184692144393921, "learning_rate": 0.001, "loss": 2.8331, "step": 9466 }, { "epoch": 0.4004991962094932, "grad_norm": 0.19530414044857025, "learning_rate": 0.001, "loss": 2.3759, "step": 9467 }, { "epoch": 0.40054150097300956, "grad_norm": 0.5446052551269531, "learning_rate": 0.001, "loss": 2.5317, "step": 9468 }, { "epoch": 0.4005838057365259, "grad_norm": 1.0706979036331177, "learning_rate": 0.001, "loss": 3.0557, "step": 9469 }, { "epoch": 0.4006261105000423, "grad_norm": 0.25208720564842224, "learning_rate": 0.001, "loss": 2.2757, "step": 9470 }, { "epoch": 0.4006684152635587, "grad_norm": 0.1785879284143448, "learning_rate": 0.001, "loss": 1.8306, "step": 9471 }, { "epoch": 0.40071072002707503, "grad_norm": 0.20661844313144684, "learning_rate": 0.001, "loss": 2.9331, "step": 9472 }, { "epoch": 0.40075302479059144, "grad_norm": 0.24064034223556519, "learning_rate": 0.001, "loss": 2.2004, "step": 9473 }, { "epoch": 0.4007953295541078, "grad_norm": 28.610883712768555, "learning_rate": 0.001, "loss": 2.2863, "step": 9474 }, { "epoch": 0.40083763431762415, "grad_norm": 0.6039111614227295, "learning_rate": 0.001, "loss": 2.4287, "step": 9475 }, { "epoch": 0.40087993908114056, "grad_norm": 0.18513540923595428, "learning_rate": 0.001, "loss": 2.2021, "step": 9476 }, { "epoch": 0.4009222438446569, "grad_norm": 1.6514339447021484, "learning_rate": 0.001, "loss": 1.9221, "step": 9477 }, { "epoch": 0.40096454860817327, "grad_norm": 10.18622875213623, "learning_rate": 0.001, "loss": 2.3673, "step": 9478 }, { "epoch": 0.4010068533716897, "grad_norm": 1.0164726972579956, "learning_rate": 0.001, "loss": 1.8288, "step": 9479 }, { "epoch": 0.40104915813520603, "grad_norm": 0.5969464778900146, "learning_rate": 0.001, "loss": 2.2311, "step": 9480 }, { "epoch": 0.4010914628987224, "grad_norm": 0.22844240069389343, "learning_rate": 0.001, "loss": 2.826, "step": 9481 }, { "epoch": 0.4011337676622388, "grad_norm": 0.3800431191921234, "learning_rate": 0.001, "loss": 2.7672, "step": 9482 }, { "epoch": 0.40117607242575515, "grad_norm": 0.4591025114059448, "learning_rate": 0.001, "loss": 2.5813, "step": 9483 }, { "epoch": 0.4012183771892715, "grad_norm": 0.21790550649166107, "learning_rate": 0.001, "loss": 1.8184, "step": 9484 }, { "epoch": 0.40126068195278786, "grad_norm": 0.7299678325653076, "learning_rate": 0.001, "loss": 1.9012, "step": 9485 }, { "epoch": 0.40130298671630427, "grad_norm": 1.0232609510421753, "learning_rate": 0.001, "loss": 3.1905, "step": 9486 }, { "epoch": 0.4013452914798206, "grad_norm": 0.1979575753211975, "learning_rate": 0.001, "loss": 1.8858, "step": 9487 }, { "epoch": 0.401387596243337, "grad_norm": 1.3063571453094482, "learning_rate": 0.001, "loss": 1.5222, "step": 9488 }, { "epoch": 0.4014299010068534, "grad_norm": 0.2541360855102539, "learning_rate": 0.001, "loss": 2.1482, "step": 9489 }, { "epoch": 0.40147220577036974, "grad_norm": 2.719597816467285, "learning_rate": 0.001, "loss": 2.9177, "step": 9490 }, { "epoch": 0.4015145105338861, "grad_norm": 0.18410396575927734, "learning_rate": 0.001, "loss": 1.3939, "step": 9491 }, { "epoch": 0.4015568152974025, "grad_norm": 0.30777132511138916, "learning_rate": 0.001, "loss": 2.4803, "step": 9492 }, { "epoch": 0.40159912006091886, "grad_norm": 0.5091328024864197, "learning_rate": 0.001, "loss": 2.1258, "step": 9493 }, { "epoch": 0.4016414248244352, "grad_norm": 0.5316731929779053, "learning_rate": 0.001, "loss": 3.1333, "step": 9494 }, { "epoch": 0.4016837295879516, "grad_norm": 0.5768591165542603, "learning_rate": 0.001, "loss": 2.1632, "step": 9495 }, { "epoch": 0.401726034351468, "grad_norm": 0.2193114310503006, "learning_rate": 0.001, "loss": 2.4664, "step": 9496 }, { "epoch": 0.40176833911498433, "grad_norm": 0.37915632128715515, "learning_rate": 0.001, "loss": 4.2133, "step": 9497 }, { "epoch": 0.40181064387850074, "grad_norm": 0.18758665025234222, "learning_rate": 0.001, "loss": 1.8454, "step": 9498 }, { "epoch": 0.4018529486420171, "grad_norm": 0.24519726634025574, "learning_rate": 0.001, "loss": 2.1569, "step": 9499 }, { "epoch": 0.40189525340553345, "grad_norm": 0.2464457005262375, "learning_rate": 0.001, "loss": 2.0278, "step": 9500 }, { "epoch": 0.40193755816904986, "grad_norm": 0.21936391294002533, "learning_rate": 0.001, "loss": 2.2305, "step": 9501 }, { "epoch": 0.4019798629325662, "grad_norm": 0.248738095164299, "learning_rate": 0.001, "loss": 2.2684, "step": 9502 }, { "epoch": 0.40202216769608257, "grad_norm": 0.25540632009506226, "learning_rate": 0.001, "loss": 3.4357, "step": 9503 }, { "epoch": 0.402064472459599, "grad_norm": 1.870626449584961, "learning_rate": 0.001, "loss": 2.1011, "step": 9504 }, { "epoch": 0.40210677722311533, "grad_norm": 0.23823359608650208, "learning_rate": 0.001, "loss": 1.7739, "step": 9505 }, { "epoch": 0.4021490819866317, "grad_norm": 0.678466260433197, "learning_rate": 0.001, "loss": 1.9031, "step": 9506 }, { "epoch": 0.4021913867501481, "grad_norm": 0.4503398537635803, "learning_rate": 0.001, "loss": 1.8608, "step": 9507 }, { "epoch": 0.40223369151366445, "grad_norm": 0.528942346572876, "learning_rate": 0.001, "loss": 1.7426, "step": 9508 }, { "epoch": 0.4022759962771808, "grad_norm": 0.2618485391139984, "learning_rate": 0.001, "loss": 2.144, "step": 9509 }, { "epoch": 0.40231830104069716, "grad_norm": 4.79293155670166, "learning_rate": 0.001, "loss": 2.7917, "step": 9510 }, { "epoch": 0.40236060580421357, "grad_norm": 0.18239739537239075, "learning_rate": 0.001, "loss": 2.5635, "step": 9511 }, { "epoch": 0.4024029105677299, "grad_norm": 0.7269040942192078, "learning_rate": 0.001, "loss": 2.3232, "step": 9512 }, { "epoch": 0.4024452153312463, "grad_norm": 0.18123169243335724, "learning_rate": 0.001, "loss": 1.7372, "step": 9513 }, { "epoch": 0.4024875200947627, "grad_norm": 0.19387836754322052, "learning_rate": 0.001, "loss": 2.0364, "step": 9514 }, { "epoch": 0.40252982485827904, "grad_norm": 0.18925026059150696, "learning_rate": 0.001, "loss": 1.7467, "step": 9515 }, { "epoch": 0.4025721296217954, "grad_norm": 0.3890759348869324, "learning_rate": 0.001, "loss": 2.7014, "step": 9516 }, { "epoch": 0.4026144343853118, "grad_norm": 0.3346877098083496, "learning_rate": 0.001, "loss": 3.0175, "step": 9517 }, { "epoch": 0.40265673914882816, "grad_norm": 0.3895154893398285, "learning_rate": 0.001, "loss": 2.8014, "step": 9518 }, { "epoch": 0.4026990439123445, "grad_norm": 3.064667224884033, "learning_rate": 0.001, "loss": 2.5548, "step": 9519 }, { "epoch": 0.4027413486758609, "grad_norm": 0.25531595945358276, "learning_rate": 0.001, "loss": 1.9813, "step": 9520 }, { "epoch": 0.4027836534393773, "grad_norm": 0.23096615076065063, "learning_rate": 0.001, "loss": 2.8028, "step": 9521 }, { "epoch": 0.40282595820289363, "grad_norm": 0.22149285674095154, "learning_rate": 0.001, "loss": 2.2017, "step": 9522 }, { "epoch": 0.40286826296641004, "grad_norm": 0.32118022441864014, "learning_rate": 0.001, "loss": 3.6867, "step": 9523 }, { "epoch": 0.4029105677299264, "grad_norm": 0.2108854353427887, "learning_rate": 0.001, "loss": 1.8694, "step": 9524 }, { "epoch": 0.40295287249344275, "grad_norm": 0.9841258525848389, "learning_rate": 0.001, "loss": 3.0174, "step": 9525 }, { "epoch": 0.40299517725695916, "grad_norm": 0.5012634992599487, "learning_rate": 0.001, "loss": 2.83, "step": 9526 }, { "epoch": 0.4030374820204755, "grad_norm": 0.27034950256347656, "learning_rate": 0.001, "loss": 2.4939, "step": 9527 }, { "epoch": 0.40307978678399187, "grad_norm": 0.18938782811164856, "learning_rate": 0.001, "loss": 2.5085, "step": 9528 }, { "epoch": 0.4031220915475083, "grad_norm": 0.1947273164987564, "learning_rate": 0.001, "loss": 2.1869, "step": 9529 }, { "epoch": 0.40316439631102463, "grad_norm": 0.26790064573287964, "learning_rate": 0.001, "loss": 2.4795, "step": 9530 }, { "epoch": 0.403206701074541, "grad_norm": 0.19423073530197144, "learning_rate": 0.001, "loss": 2.0744, "step": 9531 }, { "epoch": 0.40324900583805734, "grad_norm": 0.5601986646652222, "learning_rate": 0.001, "loss": 2.2896, "step": 9532 }, { "epoch": 0.40329131060157375, "grad_norm": 1.604595422744751, "learning_rate": 0.001, "loss": 2.3243, "step": 9533 }, { "epoch": 0.4033336153650901, "grad_norm": 0.31608372926712036, "learning_rate": 0.001, "loss": 2.3288, "step": 9534 }, { "epoch": 0.40337592012860646, "grad_norm": 0.28224605321884155, "learning_rate": 0.001, "loss": 3.0434, "step": 9535 }, { "epoch": 0.40341822489212287, "grad_norm": 0.21669940650463104, "learning_rate": 0.001, "loss": 2.5078, "step": 9536 }, { "epoch": 0.4034605296556392, "grad_norm": 0.34247350692749023, "learning_rate": 0.001, "loss": 2.1365, "step": 9537 }, { "epoch": 0.4035028344191556, "grad_norm": 0.3081550598144531, "learning_rate": 0.001, "loss": 1.9531, "step": 9538 }, { "epoch": 0.403545139182672, "grad_norm": 1.4701812267303467, "learning_rate": 0.001, "loss": 2.1148, "step": 9539 }, { "epoch": 0.40358744394618834, "grad_norm": 0.21030528843402863, "learning_rate": 0.001, "loss": 2.2116, "step": 9540 }, { "epoch": 0.4036297487097047, "grad_norm": 0.22468726336956024, "learning_rate": 0.001, "loss": 2.0276, "step": 9541 }, { "epoch": 0.4036720534732211, "grad_norm": 0.2096341848373413, "learning_rate": 0.001, "loss": 2.1614, "step": 9542 }, { "epoch": 0.40371435823673746, "grad_norm": 0.17588932812213898, "learning_rate": 0.001, "loss": 1.7986, "step": 9543 }, { "epoch": 0.4037566630002538, "grad_norm": 0.21680901944637299, "learning_rate": 0.001, "loss": 2.815, "step": 9544 }, { "epoch": 0.4037989677637702, "grad_norm": 0.16149887442588806, "learning_rate": 0.001, "loss": 1.4867, "step": 9545 }, { "epoch": 0.4038412725272866, "grad_norm": 0.25393468141555786, "learning_rate": 0.001, "loss": 2.2976, "step": 9546 }, { "epoch": 0.40388357729080293, "grad_norm": 0.26233553886413574, "learning_rate": 0.001, "loss": 2.9283, "step": 9547 }, { "epoch": 0.40392588205431934, "grad_norm": 0.21052797138690948, "learning_rate": 0.001, "loss": 2.1164, "step": 9548 }, { "epoch": 0.4039681868178357, "grad_norm": 0.220393568277359, "learning_rate": 0.001, "loss": 2.5153, "step": 9549 }, { "epoch": 0.40401049158135205, "grad_norm": 0.25661540031433105, "learning_rate": 0.001, "loss": 1.9917, "step": 9550 }, { "epoch": 0.40405279634486846, "grad_norm": 2.5997157096862793, "learning_rate": 0.001, "loss": 2.2229, "step": 9551 }, { "epoch": 0.4040951011083848, "grad_norm": 0.198783740401268, "learning_rate": 0.001, "loss": 2.0679, "step": 9552 }, { "epoch": 0.40413740587190117, "grad_norm": 0.18976983428001404, "learning_rate": 0.001, "loss": 2.141, "step": 9553 }, { "epoch": 0.4041797106354175, "grad_norm": 0.3730868101119995, "learning_rate": 0.001, "loss": 2.0335, "step": 9554 }, { "epoch": 0.40422201539893393, "grad_norm": 0.1948307752609253, "learning_rate": 0.001, "loss": 1.7714, "step": 9555 }, { "epoch": 0.4042643201624503, "grad_norm": 0.2243911772966385, "learning_rate": 0.001, "loss": 2.2354, "step": 9556 }, { "epoch": 0.40430662492596664, "grad_norm": 0.24743397533893585, "learning_rate": 0.001, "loss": 2.2232, "step": 9557 }, { "epoch": 0.40434892968948305, "grad_norm": 0.3496690094470978, "learning_rate": 0.001, "loss": 1.7223, "step": 9558 }, { "epoch": 0.4043912344529994, "grad_norm": 0.3667478561401367, "learning_rate": 0.001, "loss": 2.9957, "step": 9559 }, { "epoch": 0.40443353921651576, "grad_norm": 0.32227084040641785, "learning_rate": 0.001, "loss": 2.0967, "step": 9560 }, { "epoch": 0.40447584398003217, "grad_norm": 0.18110904097557068, "learning_rate": 0.001, "loss": 1.6972, "step": 9561 }, { "epoch": 0.4045181487435485, "grad_norm": 0.19031678140163422, "learning_rate": 0.001, "loss": 2.3908, "step": 9562 }, { "epoch": 0.4045604535070649, "grad_norm": 0.17075388133525848, "learning_rate": 0.001, "loss": 1.9596, "step": 9563 }, { "epoch": 0.4046027582705813, "grad_norm": 0.20088161528110504, "learning_rate": 0.001, "loss": 2.8288, "step": 9564 }, { "epoch": 0.40464506303409764, "grad_norm": 0.19151663780212402, "learning_rate": 0.001, "loss": 1.8171, "step": 9565 }, { "epoch": 0.404687367797614, "grad_norm": 0.19099347293376923, "learning_rate": 0.001, "loss": 2.3548, "step": 9566 }, { "epoch": 0.4047296725611304, "grad_norm": 0.2712372839450836, "learning_rate": 0.001, "loss": 2.8372, "step": 9567 }, { "epoch": 0.40477197732464676, "grad_norm": 0.17886675894260406, "learning_rate": 0.001, "loss": 1.9507, "step": 9568 }, { "epoch": 0.4048142820881631, "grad_norm": 46.942535400390625, "learning_rate": 0.001, "loss": 2.8675, "step": 9569 }, { "epoch": 0.4048565868516795, "grad_norm": 0.2914670407772064, "learning_rate": 0.001, "loss": 1.7127, "step": 9570 }, { "epoch": 0.4048988916151959, "grad_norm": 0.1782679408788681, "learning_rate": 0.001, "loss": 1.7707, "step": 9571 }, { "epoch": 0.40494119637871223, "grad_norm": 0.446181982755661, "learning_rate": 0.001, "loss": 2.3157, "step": 9572 }, { "epoch": 0.40498350114222864, "grad_norm": 1.071265697479248, "learning_rate": 0.001, "loss": 2.153, "step": 9573 }, { "epoch": 0.405025805905745, "grad_norm": 0.2291143536567688, "learning_rate": 0.001, "loss": 1.8002, "step": 9574 }, { "epoch": 0.40506811066926135, "grad_norm": 0.21641968190670013, "learning_rate": 0.001, "loss": 2.2813, "step": 9575 }, { "epoch": 0.4051104154327777, "grad_norm": 3.91137957572937, "learning_rate": 0.001, "loss": 2.7253, "step": 9576 }, { "epoch": 0.4051527201962941, "grad_norm": 0.15815329551696777, "learning_rate": 0.001, "loss": 1.5105, "step": 9577 }, { "epoch": 0.40519502495981047, "grad_norm": 0.43796873092651367, "learning_rate": 0.001, "loss": 2.2207, "step": 9578 }, { "epoch": 0.4052373297233268, "grad_norm": 0.17552462220191956, "learning_rate": 0.001, "loss": 2.1559, "step": 9579 }, { "epoch": 0.40527963448684323, "grad_norm": 0.2455400824546814, "learning_rate": 0.001, "loss": 1.8954, "step": 9580 }, { "epoch": 0.4053219392503596, "grad_norm": 0.9045276045799255, "learning_rate": 0.001, "loss": 2.028, "step": 9581 }, { "epoch": 0.40536424401387594, "grad_norm": 0.16627678275108337, "learning_rate": 0.001, "loss": 1.7033, "step": 9582 }, { "epoch": 0.40540654877739235, "grad_norm": 0.23107825219631195, "learning_rate": 0.001, "loss": 3.9189, "step": 9583 }, { "epoch": 0.4054488535409087, "grad_norm": 0.5262654423713684, "learning_rate": 0.001, "loss": 2.7847, "step": 9584 }, { "epoch": 0.40549115830442506, "grad_norm": 0.3403107225894928, "learning_rate": 0.001, "loss": 2.8312, "step": 9585 }, { "epoch": 0.40553346306794147, "grad_norm": 0.19533272087574005, "learning_rate": 0.001, "loss": 2.0224, "step": 9586 }, { "epoch": 0.4055757678314578, "grad_norm": 0.2007928043603897, "learning_rate": 0.001, "loss": 1.8877, "step": 9587 }, { "epoch": 0.4056180725949742, "grad_norm": 0.2459113597869873, "learning_rate": 0.001, "loss": 2.3136, "step": 9588 }, { "epoch": 0.4056603773584906, "grad_norm": 0.17115989327430725, "learning_rate": 0.001, "loss": 2.79, "step": 9589 }, { "epoch": 0.40570268212200694, "grad_norm": 1.789944052696228, "learning_rate": 0.001, "loss": 2.5636, "step": 9590 }, { "epoch": 0.4057449868855233, "grad_norm": 0.5894603133201599, "learning_rate": 0.001, "loss": 2.3501, "step": 9591 }, { "epoch": 0.4057872916490397, "grad_norm": 0.24456271529197693, "learning_rate": 0.001, "loss": 2.8854, "step": 9592 }, { "epoch": 0.40582959641255606, "grad_norm": 0.15908919274806976, "learning_rate": 0.001, "loss": 1.5415, "step": 9593 }, { "epoch": 0.4058719011760724, "grad_norm": 0.17225715517997742, "learning_rate": 0.001, "loss": 2.4391, "step": 9594 }, { "epoch": 0.4059142059395888, "grad_norm": 0.17913804948329926, "learning_rate": 0.001, "loss": 1.4686, "step": 9595 }, { "epoch": 0.4059565107031052, "grad_norm": 0.4248640239238739, "learning_rate": 0.001, "loss": 2.7928, "step": 9596 }, { "epoch": 0.40599881546662153, "grad_norm": 1.1703771352767944, "learning_rate": 0.001, "loss": 2.104, "step": 9597 }, { "epoch": 0.4060411202301379, "grad_norm": 0.18261665105819702, "learning_rate": 0.001, "loss": 1.6789, "step": 9598 }, { "epoch": 0.4060834249936543, "grad_norm": 1.6488425731658936, "learning_rate": 0.001, "loss": 2.1651, "step": 9599 }, { "epoch": 0.40612572975717065, "grad_norm": 1.800002098083496, "learning_rate": 0.001, "loss": 2.8898, "step": 9600 }, { "epoch": 0.406168034520687, "grad_norm": 2.6963353157043457, "learning_rate": 0.001, "loss": 2.6452, "step": 9601 }, { "epoch": 0.4062103392842034, "grad_norm": 0.2065376192331314, "learning_rate": 0.001, "loss": 1.9318, "step": 9602 }, { "epoch": 0.40625264404771977, "grad_norm": 0.9833797812461853, "learning_rate": 0.001, "loss": 2.7088, "step": 9603 }, { "epoch": 0.4062949488112361, "grad_norm": 0.332534521818161, "learning_rate": 0.001, "loss": 1.9894, "step": 9604 }, { "epoch": 0.40633725357475253, "grad_norm": 0.2684883177280426, "learning_rate": 0.001, "loss": 2.1797, "step": 9605 }, { "epoch": 0.4063795583382689, "grad_norm": 1.313679814338684, "learning_rate": 0.001, "loss": 3.2259, "step": 9606 }, { "epoch": 0.40642186310178524, "grad_norm": 0.4700590968132019, "learning_rate": 0.001, "loss": 1.8352, "step": 9607 }, { "epoch": 0.40646416786530165, "grad_norm": 0.29726094007492065, "learning_rate": 0.001, "loss": 2.2151, "step": 9608 }, { "epoch": 0.406506472628818, "grad_norm": 0.20610110461711884, "learning_rate": 0.001, "loss": 1.5279, "step": 9609 }, { "epoch": 0.40654877739233436, "grad_norm": 0.19683901965618134, "learning_rate": 0.001, "loss": 2.1613, "step": 9610 }, { "epoch": 0.40659108215585077, "grad_norm": 0.37825819849967957, "learning_rate": 0.001, "loss": 3.1917, "step": 9611 }, { "epoch": 0.4066333869193671, "grad_norm": 0.18482539057731628, "learning_rate": 0.001, "loss": 2.0728, "step": 9612 }, { "epoch": 0.4066756916828835, "grad_norm": 0.2475501000881195, "learning_rate": 0.001, "loss": 2.9863, "step": 9613 }, { "epoch": 0.4067179964463999, "grad_norm": 0.14848104119300842, "learning_rate": 0.001, "loss": 2.2224, "step": 9614 }, { "epoch": 0.40676030120991624, "grad_norm": 0.4939437806606293, "learning_rate": 0.001, "loss": 2.6319, "step": 9615 }, { "epoch": 0.4068026059734326, "grad_norm": 1.2136601209640503, "learning_rate": 0.001, "loss": 2.591, "step": 9616 }, { "epoch": 0.406844910736949, "grad_norm": 1.4348440170288086, "learning_rate": 0.001, "loss": 2.2057, "step": 9617 }, { "epoch": 0.40688721550046536, "grad_norm": 0.2134755551815033, "learning_rate": 0.001, "loss": 1.9658, "step": 9618 }, { "epoch": 0.4069295202639817, "grad_norm": 1.9099276065826416, "learning_rate": 0.001, "loss": 2.0009, "step": 9619 }, { "epoch": 0.40697182502749807, "grad_norm": 0.2304713875055313, "learning_rate": 0.001, "loss": 2.0757, "step": 9620 }, { "epoch": 0.4070141297910145, "grad_norm": 0.2379099726676941, "learning_rate": 0.001, "loss": 2.3855, "step": 9621 }, { "epoch": 0.40705643455453083, "grad_norm": 0.2802619934082031, "learning_rate": 0.001, "loss": 2.562, "step": 9622 }, { "epoch": 0.4070987393180472, "grad_norm": 0.2311367690563202, "learning_rate": 0.001, "loss": 3.7875, "step": 9623 }, { "epoch": 0.4071410440815636, "grad_norm": 0.4283418655395508, "learning_rate": 0.001, "loss": 1.7183, "step": 9624 }, { "epoch": 0.40718334884507995, "grad_norm": 0.31401526927948, "learning_rate": 0.001, "loss": 2.5655, "step": 9625 }, { "epoch": 0.4072256536085963, "grad_norm": 0.22191263735294342, "learning_rate": 0.001, "loss": 2.251, "step": 9626 }, { "epoch": 0.4072679583721127, "grad_norm": 0.23123349249362946, "learning_rate": 0.001, "loss": 2.5949, "step": 9627 }, { "epoch": 0.40731026313562907, "grad_norm": 0.20630641281604767, "learning_rate": 0.001, "loss": 2.8717, "step": 9628 }, { "epoch": 0.4073525678991454, "grad_norm": 1.072459101676941, "learning_rate": 0.001, "loss": 2.4395, "step": 9629 }, { "epoch": 0.40739487266266183, "grad_norm": 0.22625629603862762, "learning_rate": 0.001, "loss": 2.2453, "step": 9630 }, { "epoch": 0.4074371774261782, "grad_norm": 1.8554446697235107, "learning_rate": 0.001, "loss": 2.1412, "step": 9631 }, { "epoch": 0.40747948218969454, "grad_norm": 0.2199331372976303, "learning_rate": 0.001, "loss": 1.5662, "step": 9632 }, { "epoch": 0.40752178695321095, "grad_norm": 1.69846773147583, "learning_rate": 0.001, "loss": 2.1245, "step": 9633 }, { "epoch": 0.4075640917167273, "grad_norm": 0.22622716426849365, "learning_rate": 0.001, "loss": 2.1381, "step": 9634 }, { "epoch": 0.40760639648024366, "grad_norm": 0.21170629560947418, "learning_rate": 0.001, "loss": 2.0326, "step": 9635 }, { "epoch": 0.40764870124376007, "grad_norm": 0.26057133078575134, "learning_rate": 0.001, "loss": 1.9696, "step": 9636 }, { "epoch": 0.4076910060072764, "grad_norm": 5.649323463439941, "learning_rate": 0.001, "loss": 2.8521, "step": 9637 }, { "epoch": 0.4077333107707928, "grad_norm": 0.32760143280029297, "learning_rate": 0.001, "loss": 3.0351, "step": 9638 }, { "epoch": 0.4077756155343092, "grad_norm": 1.6153209209442139, "learning_rate": 0.001, "loss": 1.9322, "step": 9639 }, { "epoch": 0.40781792029782554, "grad_norm": 1.4140678644180298, "learning_rate": 0.001, "loss": 2.2613, "step": 9640 }, { "epoch": 0.4078602250613419, "grad_norm": 0.33353015780448914, "learning_rate": 0.001, "loss": 3.0011, "step": 9641 }, { "epoch": 0.4079025298248583, "grad_norm": 27.971038818359375, "learning_rate": 0.001, "loss": 2.6894, "step": 9642 }, { "epoch": 0.40794483458837466, "grad_norm": 0.9342984557151794, "learning_rate": 0.001, "loss": 2.7896, "step": 9643 }, { "epoch": 0.407987139351891, "grad_norm": 2.035511016845703, "learning_rate": 0.001, "loss": 2.9743, "step": 9644 }, { "epoch": 0.40802944411540737, "grad_norm": 0.682000994682312, "learning_rate": 0.001, "loss": 2.2909, "step": 9645 }, { "epoch": 0.4080717488789238, "grad_norm": 6.955329418182373, "learning_rate": 0.001, "loss": 1.8326, "step": 9646 }, { "epoch": 0.40811405364244013, "grad_norm": 0.23917068541049957, "learning_rate": 0.001, "loss": 2.0451, "step": 9647 }, { "epoch": 0.4081563584059565, "grad_norm": 0.3635198473930359, "learning_rate": 0.001, "loss": 3.2204, "step": 9648 }, { "epoch": 0.4081986631694729, "grad_norm": 0.5178560614585876, "learning_rate": 0.001, "loss": 3.7043, "step": 9649 }, { "epoch": 0.40824096793298925, "grad_norm": 0.21071864664554596, "learning_rate": 0.001, "loss": 2.7752, "step": 9650 }, { "epoch": 0.4082832726965056, "grad_norm": 0.22183898091316223, "learning_rate": 0.001, "loss": 2.269, "step": 9651 }, { "epoch": 0.408325577460022, "grad_norm": 0.19146625697612762, "learning_rate": 0.001, "loss": 2.5297, "step": 9652 }, { "epoch": 0.40836788222353837, "grad_norm": 0.2734749913215637, "learning_rate": 0.001, "loss": 2.3894, "step": 9653 }, { "epoch": 0.4084101869870547, "grad_norm": 0.24852819740772247, "learning_rate": 0.001, "loss": 2.2241, "step": 9654 }, { "epoch": 0.40845249175057113, "grad_norm": 0.2272268384695053, "learning_rate": 0.001, "loss": 2.39, "step": 9655 }, { "epoch": 0.4084947965140875, "grad_norm": 0.5135742425918579, "learning_rate": 0.001, "loss": 3.2057, "step": 9656 }, { "epoch": 0.40853710127760384, "grad_norm": 0.4296305477619171, "learning_rate": 0.001, "loss": 2.3507, "step": 9657 }, { "epoch": 0.40857940604112025, "grad_norm": 0.17050939798355103, "learning_rate": 0.001, "loss": 1.5197, "step": 9658 }, { "epoch": 0.4086217108046366, "grad_norm": 4.443729877471924, "learning_rate": 0.001, "loss": 2.8856, "step": 9659 }, { "epoch": 0.40866401556815296, "grad_norm": 0.18589919805526733, "learning_rate": 0.001, "loss": 1.711, "step": 9660 }, { "epoch": 0.40870632033166937, "grad_norm": 0.9820543527603149, "learning_rate": 0.001, "loss": 2.7134, "step": 9661 }, { "epoch": 0.4087486250951857, "grad_norm": 3.852842092514038, "learning_rate": 0.001, "loss": 1.894, "step": 9662 }, { "epoch": 0.4087909298587021, "grad_norm": 0.6740947365760803, "learning_rate": 0.001, "loss": 1.9484, "step": 9663 }, { "epoch": 0.4088332346222185, "grad_norm": 0.22364242374897003, "learning_rate": 0.001, "loss": 2.7015, "step": 9664 }, { "epoch": 0.40887553938573484, "grad_norm": 0.283769428730011, "learning_rate": 0.001, "loss": 2.3642, "step": 9665 }, { "epoch": 0.4089178441492512, "grad_norm": 0.9470567107200623, "learning_rate": 0.001, "loss": 2.4213, "step": 9666 }, { "epoch": 0.40896014891276755, "grad_norm": 0.29617589712142944, "learning_rate": 0.001, "loss": 2.9135, "step": 9667 }, { "epoch": 0.40900245367628396, "grad_norm": 0.20563291013240814, "learning_rate": 0.001, "loss": 2.1679, "step": 9668 }, { "epoch": 0.4090447584398003, "grad_norm": 0.18204866349697113, "learning_rate": 0.001, "loss": 2.6882, "step": 9669 }, { "epoch": 0.40908706320331667, "grad_norm": 0.2885321080684662, "learning_rate": 0.001, "loss": 2.095, "step": 9670 }, { "epoch": 0.4091293679668331, "grad_norm": 2.8684329986572266, "learning_rate": 0.001, "loss": 2.4832, "step": 9671 }, { "epoch": 0.40917167273034943, "grad_norm": 0.32784703373908997, "learning_rate": 0.001, "loss": 2.5857, "step": 9672 }, { "epoch": 0.4092139774938658, "grad_norm": 0.7078139185905457, "learning_rate": 0.001, "loss": 1.8139, "step": 9673 }, { "epoch": 0.4092562822573822, "grad_norm": 0.25039342045783997, "learning_rate": 0.001, "loss": 3.3178, "step": 9674 }, { "epoch": 0.40929858702089855, "grad_norm": 0.17222987115383148, "learning_rate": 0.001, "loss": 2.2217, "step": 9675 }, { "epoch": 0.4093408917844149, "grad_norm": 0.15359899401664734, "learning_rate": 0.001, "loss": 2.2061, "step": 9676 }, { "epoch": 0.4093831965479313, "grad_norm": 0.2080516666173935, "learning_rate": 0.001, "loss": 2.7983, "step": 9677 }, { "epoch": 0.40942550131144767, "grad_norm": 0.17450571060180664, "learning_rate": 0.001, "loss": 1.8784, "step": 9678 }, { "epoch": 0.409467806074964, "grad_norm": 0.19042570888996124, "learning_rate": 0.001, "loss": 1.8635, "step": 9679 }, { "epoch": 0.40951011083848043, "grad_norm": 0.17510387301445007, "learning_rate": 0.001, "loss": 2.8996, "step": 9680 }, { "epoch": 0.4095524156019968, "grad_norm": 0.7525598406791687, "learning_rate": 0.001, "loss": 2.7079, "step": 9681 }, { "epoch": 0.40959472036551314, "grad_norm": 1.063751459121704, "learning_rate": 0.001, "loss": 2.0613, "step": 9682 }, { "epoch": 0.40963702512902955, "grad_norm": 0.35080280900001526, "learning_rate": 0.001, "loss": 4.928, "step": 9683 }, { "epoch": 0.4096793298925459, "grad_norm": 0.2547611594200134, "learning_rate": 0.001, "loss": 2.8568, "step": 9684 }, { "epoch": 0.40972163465606226, "grad_norm": 0.23505434393882751, "learning_rate": 0.001, "loss": 2.4675, "step": 9685 }, { "epoch": 0.40976393941957867, "grad_norm": 0.2235456109046936, "learning_rate": 0.001, "loss": 2.3584, "step": 9686 }, { "epoch": 0.409806244183095, "grad_norm": 0.26593148708343506, "learning_rate": 0.001, "loss": 2.3399, "step": 9687 }, { "epoch": 0.4098485489466114, "grad_norm": 0.38054850697517395, "learning_rate": 0.001, "loss": 1.9254, "step": 9688 }, { "epoch": 0.40989085371012773, "grad_norm": 0.18785469233989716, "learning_rate": 0.001, "loss": 2.3267, "step": 9689 }, { "epoch": 0.40993315847364414, "grad_norm": 0.19680415093898773, "learning_rate": 0.001, "loss": 2.1284, "step": 9690 }, { "epoch": 0.4099754632371605, "grad_norm": 0.8819651007652283, "learning_rate": 0.001, "loss": 2.2144, "step": 9691 }, { "epoch": 0.41001776800067685, "grad_norm": 0.36599695682525635, "learning_rate": 0.001, "loss": 2.5738, "step": 9692 }, { "epoch": 0.41006007276419326, "grad_norm": 1.350098967552185, "learning_rate": 0.001, "loss": 3.2411, "step": 9693 }, { "epoch": 0.4101023775277096, "grad_norm": 0.252407968044281, "learning_rate": 0.001, "loss": 3.065, "step": 9694 }, { "epoch": 0.41014468229122597, "grad_norm": 5.906357765197754, "learning_rate": 0.001, "loss": 1.996, "step": 9695 }, { "epoch": 0.4101869870547424, "grad_norm": 0.30286088585853577, "learning_rate": 0.001, "loss": 2.4888, "step": 9696 }, { "epoch": 0.41022929181825873, "grad_norm": 11.067898750305176, "learning_rate": 0.001, "loss": 2.2548, "step": 9697 }, { "epoch": 0.4102715965817751, "grad_norm": 0.21052215993404388, "learning_rate": 0.001, "loss": 3.1103, "step": 9698 }, { "epoch": 0.4103139013452915, "grad_norm": 0.24739009141921997, "learning_rate": 0.001, "loss": 3.126, "step": 9699 }, { "epoch": 0.41035620610880785, "grad_norm": 0.22047722339630127, "learning_rate": 0.001, "loss": 2.3277, "step": 9700 }, { "epoch": 0.4103985108723242, "grad_norm": 0.7261704802513123, "learning_rate": 0.001, "loss": 2.8432, "step": 9701 }, { "epoch": 0.4104408156358406, "grad_norm": 0.2856643795967102, "learning_rate": 0.001, "loss": 1.9512, "step": 9702 }, { "epoch": 0.41048312039935697, "grad_norm": 0.49634721875190735, "learning_rate": 0.001, "loss": 2.1401, "step": 9703 }, { "epoch": 0.4105254251628733, "grad_norm": 0.24824899435043335, "learning_rate": 0.001, "loss": 3.2889, "step": 9704 }, { "epoch": 0.41056772992638974, "grad_norm": 0.20967476069927216, "learning_rate": 0.001, "loss": 2.091, "step": 9705 }, { "epoch": 0.4106100346899061, "grad_norm": 0.1925218403339386, "learning_rate": 0.001, "loss": 1.8878, "step": 9706 }, { "epoch": 0.41065233945342244, "grad_norm": 1.0082390308380127, "learning_rate": 0.001, "loss": 1.9122, "step": 9707 }, { "epoch": 0.41069464421693885, "grad_norm": 6.59636926651001, "learning_rate": 0.001, "loss": 1.9186, "step": 9708 }, { "epoch": 0.4107369489804552, "grad_norm": 0.2302708476781845, "learning_rate": 0.001, "loss": 2.3975, "step": 9709 }, { "epoch": 0.41077925374397156, "grad_norm": 5.976405143737793, "learning_rate": 0.001, "loss": 2.8853, "step": 9710 }, { "epoch": 0.4108215585074879, "grad_norm": 5.888969421386719, "learning_rate": 0.001, "loss": 1.6746, "step": 9711 }, { "epoch": 0.4108638632710043, "grad_norm": 2.457515001296997, "learning_rate": 0.001, "loss": 3.2117, "step": 9712 }, { "epoch": 0.4109061680345207, "grad_norm": 0.266607403755188, "learning_rate": 0.001, "loss": 1.9498, "step": 9713 }, { "epoch": 0.41094847279803703, "grad_norm": 1.2756965160369873, "learning_rate": 0.001, "loss": 2.7778, "step": 9714 }, { "epoch": 0.41099077756155344, "grad_norm": 0.4476830065250397, "learning_rate": 0.001, "loss": 3.3292, "step": 9715 }, { "epoch": 0.4110330823250698, "grad_norm": 0.2495070993900299, "learning_rate": 0.001, "loss": 2.3151, "step": 9716 }, { "epoch": 0.41107538708858615, "grad_norm": 0.30039656162261963, "learning_rate": 0.001, "loss": 1.9914, "step": 9717 }, { "epoch": 0.41111769185210256, "grad_norm": 0.47619497776031494, "learning_rate": 0.001, "loss": 3.4457, "step": 9718 }, { "epoch": 0.4111599966156189, "grad_norm": 0.21117328107357025, "learning_rate": 0.001, "loss": 1.7526, "step": 9719 }, { "epoch": 0.41120230137913527, "grad_norm": 0.4838405251502991, "learning_rate": 0.001, "loss": 2.325, "step": 9720 }, { "epoch": 0.4112446061426517, "grad_norm": 6.137927532196045, "learning_rate": 0.001, "loss": 3.0532, "step": 9721 }, { "epoch": 0.41128691090616804, "grad_norm": 0.5445790886878967, "learning_rate": 0.001, "loss": 2.6303, "step": 9722 }, { "epoch": 0.4113292156696844, "grad_norm": 0.24159128963947296, "learning_rate": 0.001, "loss": 2.4226, "step": 9723 }, { "epoch": 0.4113715204332008, "grad_norm": 0.29332977533340454, "learning_rate": 0.001, "loss": 2.8964, "step": 9724 }, { "epoch": 0.41141382519671715, "grad_norm": 0.6242585182189941, "learning_rate": 0.001, "loss": 2.5372, "step": 9725 }, { "epoch": 0.4114561299602335, "grad_norm": 0.2993077039718628, "learning_rate": 0.001, "loss": 2.4038, "step": 9726 }, { "epoch": 0.4114984347237499, "grad_norm": 0.4130201041698456, "learning_rate": 0.001, "loss": 2.5595, "step": 9727 }, { "epoch": 0.41154073948726627, "grad_norm": 0.3834516406059265, "learning_rate": 0.001, "loss": 3.7084, "step": 9728 }, { "epoch": 0.4115830442507826, "grad_norm": 0.29505565762519836, "learning_rate": 0.001, "loss": 2.1188, "step": 9729 }, { "epoch": 0.41162534901429904, "grad_norm": 0.4178231954574585, "learning_rate": 0.001, "loss": 2.9802, "step": 9730 }, { "epoch": 0.4116676537778154, "grad_norm": 0.2813502252101898, "learning_rate": 0.001, "loss": 1.898, "step": 9731 }, { "epoch": 0.41170995854133174, "grad_norm": 0.24282923340797424, "learning_rate": 0.001, "loss": 3.137, "step": 9732 }, { "epoch": 0.4117522633048481, "grad_norm": 0.21911819279193878, "learning_rate": 0.001, "loss": 2.452, "step": 9733 }, { "epoch": 0.4117945680683645, "grad_norm": 0.6716076731681824, "learning_rate": 0.001, "loss": 2.1814, "step": 9734 }, { "epoch": 0.41183687283188086, "grad_norm": 0.9324064254760742, "learning_rate": 0.001, "loss": 3.5659, "step": 9735 }, { "epoch": 0.4118791775953972, "grad_norm": 0.3203246593475342, "learning_rate": 0.001, "loss": 2.3815, "step": 9736 }, { "epoch": 0.4119214823589136, "grad_norm": 1.0174567699432373, "learning_rate": 0.001, "loss": 2.8275, "step": 9737 }, { "epoch": 0.41196378712243, "grad_norm": 5.53692626953125, "learning_rate": 0.001, "loss": 1.5753, "step": 9738 }, { "epoch": 0.41200609188594634, "grad_norm": 0.3196747303009033, "learning_rate": 0.001, "loss": 2.2303, "step": 9739 }, { "epoch": 0.41204839664946274, "grad_norm": 7.535372257232666, "learning_rate": 0.001, "loss": 2.7096, "step": 9740 }, { "epoch": 0.4120907014129791, "grad_norm": 0.5855947136878967, "learning_rate": 0.001, "loss": 2.4305, "step": 9741 }, { "epoch": 0.41213300617649545, "grad_norm": 0.4567291736602783, "learning_rate": 0.001, "loss": 2.9843, "step": 9742 }, { "epoch": 0.41217531094001186, "grad_norm": 0.276134729385376, "learning_rate": 0.001, "loss": 2.6929, "step": 9743 }, { "epoch": 0.4122176157035282, "grad_norm": 2.817307472229004, "learning_rate": 0.001, "loss": 2.2315, "step": 9744 }, { "epoch": 0.41225992046704457, "grad_norm": 1.8684515953063965, "learning_rate": 0.001, "loss": 1.7073, "step": 9745 }, { "epoch": 0.412302225230561, "grad_norm": 0.2631063759326935, "learning_rate": 0.001, "loss": 2.4196, "step": 9746 }, { "epoch": 0.41234452999407734, "grad_norm": 0.9968166947364807, "learning_rate": 0.001, "loss": 2.3738, "step": 9747 }, { "epoch": 0.4123868347575937, "grad_norm": 0.2650030851364136, "learning_rate": 0.001, "loss": 2.0034, "step": 9748 }, { "epoch": 0.4124291395211101, "grad_norm": 0.18941166996955872, "learning_rate": 0.001, "loss": 2.4076, "step": 9749 }, { "epoch": 0.41247144428462645, "grad_norm": 16.938201904296875, "learning_rate": 0.001, "loss": 2.0145, "step": 9750 }, { "epoch": 0.4125137490481428, "grad_norm": 2.8237738609313965, "learning_rate": 0.001, "loss": 2.1931, "step": 9751 }, { "epoch": 0.4125560538116592, "grad_norm": 0.2568257749080658, "learning_rate": 0.001, "loss": 2.2738, "step": 9752 }, { "epoch": 0.41259835857517557, "grad_norm": 0.2610342502593994, "learning_rate": 0.001, "loss": 2.0419, "step": 9753 }, { "epoch": 0.4126406633386919, "grad_norm": 0.22543591260910034, "learning_rate": 0.001, "loss": 2.1172, "step": 9754 }, { "epoch": 0.41268296810220834, "grad_norm": 1.7136520147323608, "learning_rate": 0.001, "loss": 2.7651, "step": 9755 }, { "epoch": 0.4127252728657247, "grad_norm": 1.0864278078079224, "learning_rate": 0.001, "loss": 2.6813, "step": 9756 }, { "epoch": 0.41276757762924104, "grad_norm": 0.16335022449493408, "learning_rate": 0.001, "loss": 2.7791, "step": 9757 }, { "epoch": 0.4128098823927574, "grad_norm": 0.2040264904499054, "learning_rate": 0.001, "loss": 2.0147, "step": 9758 }, { "epoch": 0.4128521871562738, "grad_norm": 0.23535273969173431, "learning_rate": 0.001, "loss": 2.2457, "step": 9759 }, { "epoch": 0.41289449191979016, "grad_norm": 0.2834819257259369, "learning_rate": 0.001, "loss": 2.5383, "step": 9760 }, { "epoch": 0.4129367966833065, "grad_norm": 0.44039419293403625, "learning_rate": 0.001, "loss": 2.2677, "step": 9761 }, { "epoch": 0.4129791014468229, "grad_norm": 0.19605425000190735, "learning_rate": 0.001, "loss": 2.0914, "step": 9762 }, { "epoch": 0.4130214062103393, "grad_norm": 0.3128882050514221, "learning_rate": 0.001, "loss": 2.4405, "step": 9763 }, { "epoch": 0.41306371097385564, "grad_norm": 0.19780918955802917, "learning_rate": 0.001, "loss": 2.9869, "step": 9764 }, { "epoch": 0.41310601573737205, "grad_norm": 0.7669022679328918, "learning_rate": 0.001, "loss": 2.0915, "step": 9765 }, { "epoch": 0.4131483205008884, "grad_norm": 1.3960890769958496, "learning_rate": 0.001, "loss": 2.018, "step": 9766 }, { "epoch": 0.41319062526440475, "grad_norm": 0.4077872931957245, "learning_rate": 0.001, "loss": 2.4418, "step": 9767 }, { "epoch": 0.41323293002792116, "grad_norm": 0.22895626723766327, "learning_rate": 0.001, "loss": 3.1219, "step": 9768 }, { "epoch": 0.4132752347914375, "grad_norm": 0.3246360719203949, "learning_rate": 0.001, "loss": 2.5731, "step": 9769 }, { "epoch": 0.41331753955495387, "grad_norm": 0.1943444311618805, "learning_rate": 0.001, "loss": 2.6833, "step": 9770 }, { "epoch": 0.4133598443184703, "grad_norm": 0.23944945633411407, "learning_rate": 0.001, "loss": 2.1968, "step": 9771 }, { "epoch": 0.41340214908198664, "grad_norm": 0.2529198229312897, "learning_rate": 0.001, "loss": 2.5286, "step": 9772 }, { "epoch": 0.413444453845503, "grad_norm": 1.6711100339889526, "learning_rate": 0.001, "loss": 2.4631, "step": 9773 }, { "epoch": 0.4134867586090194, "grad_norm": 0.16065281629562378, "learning_rate": 0.001, "loss": 2.8067, "step": 9774 }, { "epoch": 0.41352906337253575, "grad_norm": 0.1823013722896576, "learning_rate": 0.001, "loss": 2.4533, "step": 9775 }, { "epoch": 0.4135713681360521, "grad_norm": 0.5769549012184143, "learning_rate": 0.001, "loss": 1.9118, "step": 9776 }, { "epoch": 0.4136136728995685, "grad_norm": 0.3120274841785431, "learning_rate": 0.001, "loss": 2.6537, "step": 9777 }, { "epoch": 0.4136559776630849, "grad_norm": 2.704646110534668, "learning_rate": 0.001, "loss": 2.6022, "step": 9778 }, { "epoch": 0.4136982824266012, "grad_norm": 3.760073661804199, "learning_rate": 0.001, "loss": 2.4984, "step": 9779 }, { "epoch": 0.4137405871901176, "grad_norm": 0.3028760254383087, "learning_rate": 0.001, "loss": 2.7438, "step": 9780 }, { "epoch": 0.413782891953634, "grad_norm": 0.19027100503444672, "learning_rate": 0.001, "loss": 2.5773, "step": 9781 }, { "epoch": 0.41382519671715035, "grad_norm": 0.703472375869751, "learning_rate": 0.001, "loss": 1.811, "step": 9782 }, { "epoch": 0.4138675014806667, "grad_norm": 0.26506584882736206, "learning_rate": 0.001, "loss": 2.8884, "step": 9783 }, { "epoch": 0.4139098062441831, "grad_norm": 0.2612300515174866, "learning_rate": 0.001, "loss": 2.7629, "step": 9784 }, { "epoch": 0.41395211100769946, "grad_norm": 0.2701547145843506, "learning_rate": 0.001, "loss": 2.6545, "step": 9785 }, { "epoch": 0.4139944157712158, "grad_norm": 0.5775189995765686, "learning_rate": 0.001, "loss": 2.2237, "step": 9786 }, { "epoch": 0.4140367205347322, "grad_norm": 0.29414102435112, "learning_rate": 0.001, "loss": 2.5268, "step": 9787 }, { "epoch": 0.4140790252982486, "grad_norm": 0.3888061046600342, "learning_rate": 0.001, "loss": 2.466, "step": 9788 }, { "epoch": 0.41412133006176494, "grad_norm": 0.42892327904701233, "learning_rate": 0.001, "loss": 2.6945, "step": 9789 }, { "epoch": 0.41416363482528135, "grad_norm": 0.3391939103603363, "learning_rate": 0.001, "loss": 2.2967, "step": 9790 }, { "epoch": 0.4142059395887977, "grad_norm": 0.1967991441488266, "learning_rate": 0.001, "loss": 2.2695, "step": 9791 }, { "epoch": 0.41424824435231405, "grad_norm": 0.18619079887866974, "learning_rate": 0.001, "loss": 2.6969, "step": 9792 }, { "epoch": 0.41429054911583046, "grad_norm": 0.21667589247226715, "learning_rate": 0.001, "loss": 2.346, "step": 9793 }, { "epoch": 0.4143328538793468, "grad_norm": 0.1584491729736328, "learning_rate": 0.001, "loss": 1.6244, "step": 9794 }, { "epoch": 0.4143751586428632, "grad_norm": 0.16956403851509094, "learning_rate": 0.001, "loss": 2.5346, "step": 9795 }, { "epoch": 0.4144174634063796, "grad_norm": 0.2773437798023224, "learning_rate": 0.001, "loss": 3.2338, "step": 9796 }, { "epoch": 0.41445976816989594, "grad_norm": 0.2252512127161026, "learning_rate": 0.001, "loss": 2.1501, "step": 9797 }, { "epoch": 0.4145020729334123, "grad_norm": 0.9727465510368347, "learning_rate": 0.001, "loss": 2.7049, "step": 9798 }, { "epoch": 0.4145443776969287, "grad_norm": 0.23791512846946716, "learning_rate": 0.001, "loss": 1.7446, "step": 9799 }, { "epoch": 0.41458668246044506, "grad_norm": 14.202518463134766, "learning_rate": 0.001, "loss": 3.8168, "step": 9800 }, { "epoch": 0.4146289872239614, "grad_norm": 0.44175243377685547, "learning_rate": 0.001, "loss": 2.1732, "step": 9801 }, { "epoch": 0.41467129198747776, "grad_norm": 0.17590700089931488, "learning_rate": 0.001, "loss": 1.9734, "step": 9802 }, { "epoch": 0.4147135967509942, "grad_norm": 0.18787901103496552, "learning_rate": 0.001, "loss": 2.4456, "step": 9803 }, { "epoch": 0.4147559015145105, "grad_norm": 0.17629052698612213, "learning_rate": 0.001, "loss": 1.8945, "step": 9804 }, { "epoch": 0.4147982062780269, "grad_norm": 0.17695337533950806, "learning_rate": 0.001, "loss": 2.3594, "step": 9805 }, { "epoch": 0.4148405110415433, "grad_norm": 0.24011565744876862, "learning_rate": 0.001, "loss": 1.9115, "step": 9806 }, { "epoch": 0.41488281580505965, "grad_norm": 3.5047898292541504, "learning_rate": 0.001, "loss": 2.0496, "step": 9807 }, { "epoch": 0.414925120568576, "grad_norm": 3.9481778144836426, "learning_rate": 0.001, "loss": 3.1042, "step": 9808 }, { "epoch": 0.4149674253320924, "grad_norm": 0.44774043560028076, "learning_rate": 0.001, "loss": 2.4119, "step": 9809 }, { "epoch": 0.41500973009560876, "grad_norm": 0.9812766313552856, "learning_rate": 0.001, "loss": 2.6236, "step": 9810 }, { "epoch": 0.4150520348591251, "grad_norm": 0.47764018177986145, "learning_rate": 0.001, "loss": 2.3742, "step": 9811 }, { "epoch": 0.41509433962264153, "grad_norm": 0.18392595648765564, "learning_rate": 0.001, "loss": 2.4516, "step": 9812 }, { "epoch": 0.4151366443861579, "grad_norm": 1.547833800315857, "learning_rate": 0.001, "loss": 1.7756, "step": 9813 }, { "epoch": 0.41517894914967424, "grad_norm": 0.18931736052036285, "learning_rate": 0.001, "loss": 1.7584, "step": 9814 }, { "epoch": 0.41522125391319065, "grad_norm": 0.1544259935617447, "learning_rate": 0.001, "loss": 1.4929, "step": 9815 }, { "epoch": 0.415263558676707, "grad_norm": 0.2300853431224823, "learning_rate": 0.001, "loss": 2.7883, "step": 9816 }, { "epoch": 0.41530586344022336, "grad_norm": 1.0780434608459473, "learning_rate": 0.001, "loss": 2.5039, "step": 9817 }, { "epoch": 0.41534816820373976, "grad_norm": 0.5089141130447388, "learning_rate": 0.001, "loss": 1.7734, "step": 9818 }, { "epoch": 0.4153904729672561, "grad_norm": 3.429790496826172, "learning_rate": 0.001, "loss": 2.3779, "step": 9819 }, { "epoch": 0.4154327777307725, "grad_norm": 6.695871353149414, "learning_rate": 0.001, "loss": 2.3627, "step": 9820 }, { "epoch": 0.4154750824942889, "grad_norm": 0.1914941370487213, "learning_rate": 0.001, "loss": 2.655, "step": 9821 }, { "epoch": 0.41551738725780524, "grad_norm": 0.6268720030784607, "learning_rate": 0.001, "loss": 2.0422, "step": 9822 }, { "epoch": 0.4155596920213216, "grad_norm": 0.47744235396385193, "learning_rate": 0.001, "loss": 2.074, "step": 9823 }, { "epoch": 0.41560199678483795, "grad_norm": 0.15372218191623688, "learning_rate": 0.001, "loss": 2.065, "step": 9824 }, { "epoch": 0.41564430154835436, "grad_norm": 0.4260820746421814, "learning_rate": 0.001, "loss": 2.9168, "step": 9825 }, { "epoch": 0.4156866063118707, "grad_norm": 0.19325514137744904, "learning_rate": 0.001, "loss": 1.8882, "step": 9826 }, { "epoch": 0.41572891107538706, "grad_norm": 0.1592322140932083, "learning_rate": 0.001, "loss": 2.4256, "step": 9827 }, { "epoch": 0.4157712158389035, "grad_norm": 0.17341256141662598, "learning_rate": 0.001, "loss": 2.3686, "step": 9828 }, { "epoch": 0.41581352060241983, "grad_norm": 0.20238442718982697, "learning_rate": 0.001, "loss": 2.2101, "step": 9829 }, { "epoch": 0.4158558253659362, "grad_norm": 0.3207574784755707, "learning_rate": 0.001, "loss": 2.9256, "step": 9830 }, { "epoch": 0.4158981301294526, "grad_norm": 0.1748972088098526, "learning_rate": 0.001, "loss": 1.8409, "step": 9831 }, { "epoch": 0.41594043489296895, "grad_norm": 0.24550606310367584, "learning_rate": 0.001, "loss": 3.3066, "step": 9832 }, { "epoch": 0.4159827396564853, "grad_norm": 0.9889479279518127, "learning_rate": 0.001, "loss": 2.1071, "step": 9833 }, { "epoch": 0.4160250444200017, "grad_norm": 0.20585237443447113, "learning_rate": 0.001, "loss": 2.5545, "step": 9834 }, { "epoch": 0.41606734918351806, "grad_norm": 0.13945859670639038, "learning_rate": 0.001, "loss": 1.4628, "step": 9835 }, { "epoch": 0.4161096539470344, "grad_norm": 27.057586669921875, "learning_rate": 0.001, "loss": 2.8699, "step": 9836 }, { "epoch": 0.41615195871055083, "grad_norm": 0.18365734815597534, "learning_rate": 0.001, "loss": 1.5084, "step": 9837 }, { "epoch": 0.4161942634740672, "grad_norm": 0.43779316544532776, "learning_rate": 0.001, "loss": 2.1716, "step": 9838 }, { "epoch": 0.41623656823758354, "grad_norm": 0.7131558060646057, "learning_rate": 0.001, "loss": 2.9858, "step": 9839 }, { "epoch": 0.41627887300109995, "grad_norm": 3.8856091499328613, "learning_rate": 0.001, "loss": 3.9777, "step": 9840 }, { "epoch": 0.4163211777646163, "grad_norm": 0.17805153131484985, "learning_rate": 0.001, "loss": 1.8416, "step": 9841 }, { "epoch": 0.41636348252813266, "grad_norm": 0.5579624772071838, "learning_rate": 0.001, "loss": 3.2418, "step": 9842 }, { "epoch": 0.41640578729164907, "grad_norm": 0.2010784149169922, "learning_rate": 0.001, "loss": 2.5104, "step": 9843 }, { "epoch": 0.4164480920551654, "grad_norm": 0.1619592159986496, "learning_rate": 0.001, "loss": 1.9748, "step": 9844 }, { "epoch": 0.4164903968186818, "grad_norm": 0.40515822172164917, "learning_rate": 0.001, "loss": 2.7918, "step": 9845 }, { "epoch": 0.41653270158219813, "grad_norm": 0.22229692339897156, "learning_rate": 0.001, "loss": 2.0756, "step": 9846 }, { "epoch": 0.41657500634571454, "grad_norm": 0.21411965787410736, "learning_rate": 0.001, "loss": 2.1632, "step": 9847 }, { "epoch": 0.4166173111092309, "grad_norm": 0.21786244213581085, "learning_rate": 0.001, "loss": 3.3767, "step": 9848 }, { "epoch": 0.41665961587274725, "grad_norm": 0.20544399321079254, "learning_rate": 0.001, "loss": 2.1629, "step": 9849 }, { "epoch": 0.41670192063626366, "grad_norm": 0.17390744388103485, "learning_rate": 0.001, "loss": 2.2281, "step": 9850 }, { "epoch": 0.41674422539978, "grad_norm": 0.5404853820800781, "learning_rate": 0.001, "loss": 2.8778, "step": 9851 }, { "epoch": 0.41678653016329636, "grad_norm": 0.18228615820407867, "learning_rate": 0.001, "loss": 2.2249, "step": 9852 }, { "epoch": 0.4168288349268128, "grad_norm": 1.0131566524505615, "learning_rate": 0.001, "loss": 2.8682, "step": 9853 }, { "epoch": 0.41687113969032913, "grad_norm": 0.1611110270023346, "learning_rate": 0.001, "loss": 2.1683, "step": 9854 }, { "epoch": 0.4169134444538455, "grad_norm": 0.7783699631690979, "learning_rate": 0.001, "loss": 2.6472, "step": 9855 }, { "epoch": 0.4169557492173619, "grad_norm": 0.15802177786827087, "learning_rate": 0.001, "loss": 2.6647, "step": 9856 }, { "epoch": 0.41699805398087825, "grad_norm": 0.2226634919643402, "learning_rate": 0.001, "loss": 1.9478, "step": 9857 }, { "epoch": 0.4170403587443946, "grad_norm": 0.4912348687648773, "learning_rate": 0.001, "loss": 1.7005, "step": 9858 }, { "epoch": 0.417082663507911, "grad_norm": 0.1934366077184677, "learning_rate": 0.001, "loss": 2.5896, "step": 9859 }, { "epoch": 0.41712496827142737, "grad_norm": 0.20569314062595367, "learning_rate": 0.001, "loss": 2.9554, "step": 9860 }, { "epoch": 0.4171672730349437, "grad_norm": 0.18698929250240326, "learning_rate": 0.001, "loss": 2.1069, "step": 9861 }, { "epoch": 0.41720957779846013, "grad_norm": 0.2676628530025482, "learning_rate": 0.001, "loss": 2.5049, "step": 9862 }, { "epoch": 0.4172518825619765, "grad_norm": 0.21008531749248505, "learning_rate": 0.001, "loss": 2.1362, "step": 9863 }, { "epoch": 0.41729418732549284, "grad_norm": 0.19284501671791077, "learning_rate": 0.001, "loss": 3.301, "step": 9864 }, { "epoch": 0.41733649208900925, "grad_norm": 0.19138003885746002, "learning_rate": 0.001, "loss": 2.2343, "step": 9865 }, { "epoch": 0.4173787968525256, "grad_norm": 3.3157238960266113, "learning_rate": 0.001, "loss": 1.9117, "step": 9866 }, { "epoch": 0.41742110161604196, "grad_norm": 0.1964617371559143, "learning_rate": 0.001, "loss": 2.8301, "step": 9867 }, { "epoch": 0.4174634063795583, "grad_norm": 0.27521830797195435, "learning_rate": 0.001, "loss": 1.853, "step": 9868 }, { "epoch": 0.4175057111430747, "grad_norm": 0.26007047295570374, "learning_rate": 0.001, "loss": 2.6994, "step": 9869 }, { "epoch": 0.4175480159065911, "grad_norm": 0.2345419079065323, "learning_rate": 0.001, "loss": 3.8451, "step": 9870 }, { "epoch": 0.41759032067010743, "grad_norm": 0.5388814210891724, "learning_rate": 0.001, "loss": 2.7057, "step": 9871 }, { "epoch": 0.41763262543362384, "grad_norm": 0.1674017757177353, "learning_rate": 0.001, "loss": 1.9514, "step": 9872 }, { "epoch": 0.4176749301971402, "grad_norm": 0.2036234587430954, "learning_rate": 0.001, "loss": 2.4295, "step": 9873 }, { "epoch": 0.41771723496065655, "grad_norm": 5.006783485412598, "learning_rate": 0.001, "loss": 2.2285, "step": 9874 }, { "epoch": 0.41775953972417296, "grad_norm": 0.3043411374092102, "learning_rate": 0.001, "loss": 2.1786, "step": 9875 }, { "epoch": 0.4178018444876893, "grad_norm": 0.18863193690776825, "learning_rate": 0.001, "loss": 2.0902, "step": 9876 }, { "epoch": 0.41784414925120567, "grad_norm": 0.21312756836414337, "learning_rate": 0.001, "loss": 2.418, "step": 9877 }, { "epoch": 0.4178864540147221, "grad_norm": 0.25817111134529114, "learning_rate": 0.001, "loss": 3.3019, "step": 9878 }, { "epoch": 0.41792875877823843, "grad_norm": 0.17878010869026184, "learning_rate": 0.001, "loss": 2.1354, "step": 9879 }, { "epoch": 0.4179710635417548, "grad_norm": 0.2418394237756729, "learning_rate": 0.001, "loss": 2.7489, "step": 9880 }, { "epoch": 0.4180133683052712, "grad_norm": 5.557830333709717, "learning_rate": 0.001, "loss": 1.8045, "step": 9881 }, { "epoch": 0.41805567306878755, "grad_norm": 0.17561772465705872, "learning_rate": 0.001, "loss": 2.6087, "step": 9882 }, { "epoch": 0.4180979778323039, "grad_norm": 0.1871245950460434, "learning_rate": 0.001, "loss": 2.7775, "step": 9883 }, { "epoch": 0.4181402825958203, "grad_norm": 0.17754124104976654, "learning_rate": 0.001, "loss": 1.7533, "step": 9884 }, { "epoch": 0.41818258735933667, "grad_norm": 0.406012624502182, "learning_rate": 0.001, "loss": 2.6556, "step": 9885 }, { "epoch": 0.418224892122853, "grad_norm": 0.2209833264350891, "learning_rate": 0.001, "loss": 2.6001, "step": 9886 }, { "epoch": 0.41826719688636943, "grad_norm": 5.273458480834961, "learning_rate": 0.001, "loss": 2.443, "step": 9887 }, { "epoch": 0.4183095016498858, "grad_norm": 0.18473027646541595, "learning_rate": 0.001, "loss": 1.6151, "step": 9888 }, { "epoch": 0.41835180641340214, "grad_norm": 3.541088342666626, "learning_rate": 0.001, "loss": 2.5512, "step": 9889 }, { "epoch": 0.41839411117691855, "grad_norm": 0.17209482192993164, "learning_rate": 0.001, "loss": 2.3301, "step": 9890 }, { "epoch": 0.4184364159404349, "grad_norm": 0.2227262258529663, "learning_rate": 0.001, "loss": 2.3766, "step": 9891 }, { "epoch": 0.41847872070395126, "grad_norm": 0.17431975901126862, "learning_rate": 0.001, "loss": 1.6998, "step": 9892 }, { "epoch": 0.4185210254674676, "grad_norm": 0.3029531240463257, "learning_rate": 0.001, "loss": 2.4269, "step": 9893 }, { "epoch": 0.418563330230984, "grad_norm": 0.22647684812545776, "learning_rate": 0.001, "loss": 1.8381, "step": 9894 }, { "epoch": 0.4186056349945004, "grad_norm": 0.21035243570804596, "learning_rate": 0.001, "loss": 3.3303, "step": 9895 }, { "epoch": 0.41864793975801673, "grad_norm": 1.0283381938934326, "learning_rate": 0.001, "loss": 2.258, "step": 9896 }, { "epoch": 0.41869024452153314, "grad_norm": 1.2793562412261963, "learning_rate": 0.001, "loss": 3.1089, "step": 9897 }, { "epoch": 0.4187325492850495, "grad_norm": 0.21786046028137207, "learning_rate": 0.001, "loss": 1.7205, "step": 9898 }, { "epoch": 0.41877485404856585, "grad_norm": 0.16256938874721527, "learning_rate": 0.001, "loss": 2.8039, "step": 9899 }, { "epoch": 0.41881715881208226, "grad_norm": 0.4168480634689331, "learning_rate": 0.001, "loss": 2.3668, "step": 9900 }, { "epoch": 0.4188594635755986, "grad_norm": 0.1868831068277359, "learning_rate": 0.001, "loss": 2.6872, "step": 9901 }, { "epoch": 0.41890176833911497, "grad_norm": 2.076476573944092, "learning_rate": 0.001, "loss": 2.4575, "step": 9902 }, { "epoch": 0.4189440731026314, "grad_norm": 0.22399549186229706, "learning_rate": 0.001, "loss": 2.7196, "step": 9903 }, { "epoch": 0.41898637786614773, "grad_norm": 0.24914605915546417, "learning_rate": 0.001, "loss": 1.9792, "step": 9904 }, { "epoch": 0.4190286826296641, "grad_norm": 0.265962690114975, "learning_rate": 0.001, "loss": 1.5826, "step": 9905 }, { "epoch": 0.4190709873931805, "grad_norm": 0.20687235891819, "learning_rate": 0.001, "loss": 2.7606, "step": 9906 }, { "epoch": 0.41911329215669685, "grad_norm": 0.18030163645744324, "learning_rate": 0.001, "loss": 1.7922, "step": 9907 }, { "epoch": 0.4191555969202132, "grad_norm": 0.17712494730949402, "learning_rate": 0.001, "loss": 1.9416, "step": 9908 }, { "epoch": 0.4191979016837296, "grad_norm": 0.2647468149662018, "learning_rate": 0.001, "loss": 1.6112, "step": 9909 }, { "epoch": 0.41924020644724597, "grad_norm": 10.648910522460938, "learning_rate": 0.001, "loss": 2.2037, "step": 9910 }, { "epoch": 0.4192825112107623, "grad_norm": 0.22634510695934296, "learning_rate": 0.001, "loss": 1.8658, "step": 9911 }, { "epoch": 0.41932481597427873, "grad_norm": 0.19005584716796875, "learning_rate": 0.001, "loss": 1.784, "step": 9912 }, { "epoch": 0.4193671207377951, "grad_norm": 11.0584077835083, "learning_rate": 0.001, "loss": 2.403, "step": 9913 }, { "epoch": 0.41940942550131144, "grad_norm": 0.18908476829528809, "learning_rate": 0.001, "loss": 2.2306, "step": 9914 }, { "epoch": 0.4194517302648278, "grad_norm": 0.196416437625885, "learning_rate": 0.001, "loss": 2.8057, "step": 9915 }, { "epoch": 0.4194940350283442, "grad_norm": 0.37092915177345276, "learning_rate": 0.001, "loss": 3.7798, "step": 9916 }, { "epoch": 0.41953633979186056, "grad_norm": 0.1975582391023636, "learning_rate": 0.001, "loss": 2.7689, "step": 9917 }, { "epoch": 0.4195786445553769, "grad_norm": 16.982938766479492, "learning_rate": 0.001, "loss": 3.2579, "step": 9918 }, { "epoch": 0.4196209493188933, "grad_norm": 0.22323065996170044, "learning_rate": 0.001, "loss": 2.1371, "step": 9919 }, { "epoch": 0.4196632540824097, "grad_norm": 0.3615620732307434, "learning_rate": 0.001, "loss": 1.9409, "step": 9920 }, { "epoch": 0.41970555884592603, "grad_norm": 0.7461709976196289, "learning_rate": 0.001, "loss": 2.4982, "step": 9921 }, { "epoch": 0.41974786360944244, "grad_norm": 0.20795656740665436, "learning_rate": 0.001, "loss": 2.3513, "step": 9922 }, { "epoch": 0.4197901683729588, "grad_norm": 0.19558289647102356, "learning_rate": 0.001, "loss": 2.7092, "step": 9923 }, { "epoch": 0.41983247313647515, "grad_norm": 0.2396133542060852, "learning_rate": 0.001, "loss": 2.6774, "step": 9924 }, { "epoch": 0.41987477789999156, "grad_norm": 0.2055635303258896, "learning_rate": 0.001, "loss": 3.1715, "step": 9925 }, { "epoch": 0.4199170826635079, "grad_norm": 0.28740987181663513, "learning_rate": 0.001, "loss": 2.0135, "step": 9926 }, { "epoch": 0.41995938742702427, "grad_norm": 3.5585386753082275, "learning_rate": 0.001, "loss": 2.0134, "step": 9927 }, { "epoch": 0.4200016921905407, "grad_norm": 0.19014716148376465, "learning_rate": 0.001, "loss": 3.6427, "step": 9928 }, { "epoch": 0.42004399695405703, "grad_norm": 0.32993394136428833, "learning_rate": 0.001, "loss": 1.8829, "step": 9929 }, { "epoch": 0.4200863017175734, "grad_norm": 0.22422336041927338, "learning_rate": 0.001, "loss": 2.4582, "step": 9930 }, { "epoch": 0.4201286064810898, "grad_norm": 1.323824167251587, "learning_rate": 0.001, "loss": 3.1731, "step": 9931 }, { "epoch": 0.42017091124460615, "grad_norm": 0.17324510216712952, "learning_rate": 0.001, "loss": 1.8609, "step": 9932 }, { "epoch": 0.4202132160081225, "grad_norm": 1.34519624710083, "learning_rate": 0.001, "loss": 2.1091, "step": 9933 }, { "epoch": 0.4202555207716389, "grad_norm": 2.9261980056762695, "learning_rate": 0.001, "loss": 2.0931, "step": 9934 }, { "epoch": 0.42029782553515527, "grad_norm": 0.26872578263282776, "learning_rate": 0.001, "loss": 2.4059, "step": 9935 }, { "epoch": 0.4203401302986716, "grad_norm": 10.175525665283203, "learning_rate": 0.001, "loss": 2.2073, "step": 9936 }, { "epoch": 0.420382435062188, "grad_norm": 0.20974968373775482, "learning_rate": 0.001, "loss": 2.474, "step": 9937 }, { "epoch": 0.4204247398257044, "grad_norm": 2.2708218097686768, "learning_rate": 0.001, "loss": 2.441, "step": 9938 }, { "epoch": 0.42046704458922074, "grad_norm": 0.1942100077867508, "learning_rate": 0.001, "loss": 2.4447, "step": 9939 }, { "epoch": 0.4205093493527371, "grad_norm": 0.24804472923278809, "learning_rate": 0.001, "loss": 2.365, "step": 9940 }, { "epoch": 0.4205516541162535, "grad_norm": 0.21028558909893036, "learning_rate": 0.001, "loss": 1.9267, "step": 9941 }, { "epoch": 0.42059395887976986, "grad_norm": 0.25556495785713196, "learning_rate": 0.001, "loss": 2.5246, "step": 9942 }, { "epoch": 0.4206362636432862, "grad_norm": 0.6381620764732361, "learning_rate": 0.001, "loss": 2.455, "step": 9943 }, { "epoch": 0.4206785684068026, "grad_norm": 0.20870067179203033, "learning_rate": 0.001, "loss": 1.8444, "step": 9944 }, { "epoch": 0.420720873170319, "grad_norm": 0.4335130751132965, "learning_rate": 0.001, "loss": 1.7678, "step": 9945 }, { "epoch": 0.42076317793383533, "grad_norm": 0.20234449207782745, "learning_rate": 0.001, "loss": 1.6857, "step": 9946 }, { "epoch": 0.42080548269735174, "grad_norm": 0.21633018553256989, "learning_rate": 0.001, "loss": 2.6976, "step": 9947 }, { "epoch": 0.4208477874608681, "grad_norm": 0.20859219133853912, "learning_rate": 0.001, "loss": 1.9807, "step": 9948 }, { "epoch": 0.42089009222438445, "grad_norm": 0.19663840532302856, "learning_rate": 0.001, "loss": 2.1092, "step": 9949 }, { "epoch": 0.42093239698790086, "grad_norm": 0.24110959470272064, "learning_rate": 0.001, "loss": 2.0922, "step": 9950 }, { "epoch": 0.4209747017514172, "grad_norm": 0.18923258781433105, "learning_rate": 0.001, "loss": 1.8416, "step": 9951 }, { "epoch": 0.42101700651493357, "grad_norm": 0.16037270426750183, "learning_rate": 0.001, "loss": 2.9326, "step": 9952 }, { "epoch": 0.42105931127845, "grad_norm": 0.15264320373535156, "learning_rate": 0.001, "loss": 1.6577, "step": 9953 }, { "epoch": 0.42110161604196633, "grad_norm": 4.530941486358643, "learning_rate": 0.001, "loss": 2.5381, "step": 9954 }, { "epoch": 0.4211439208054827, "grad_norm": 0.17232206463813782, "learning_rate": 0.001, "loss": 3.0177, "step": 9955 }, { "epoch": 0.4211862255689991, "grad_norm": 0.18628808856010437, "learning_rate": 0.001, "loss": 2.0065, "step": 9956 }, { "epoch": 0.42122853033251545, "grad_norm": 0.1941269338130951, "learning_rate": 0.001, "loss": 3.6867, "step": 9957 }, { "epoch": 0.4212708350960318, "grad_norm": 0.18407343327999115, "learning_rate": 0.001, "loss": 2.8404, "step": 9958 }, { "epoch": 0.42131313985954816, "grad_norm": 0.27422475814819336, "learning_rate": 0.001, "loss": 1.9193, "step": 9959 }, { "epoch": 0.42135544462306457, "grad_norm": 0.3834380805492401, "learning_rate": 0.001, "loss": 1.587, "step": 9960 }, { "epoch": 0.4213977493865809, "grad_norm": 0.28580230474472046, "learning_rate": 0.001, "loss": 2.604, "step": 9961 }, { "epoch": 0.4214400541500973, "grad_norm": 0.7395582795143127, "learning_rate": 0.001, "loss": 2.5038, "step": 9962 }, { "epoch": 0.4214823589136137, "grad_norm": 0.19765476882457733, "learning_rate": 0.001, "loss": 2.0077, "step": 9963 }, { "epoch": 0.42152466367713004, "grad_norm": 0.2048027217388153, "learning_rate": 0.001, "loss": 2.1928, "step": 9964 }, { "epoch": 0.4215669684406464, "grad_norm": 0.1922098696231842, "learning_rate": 0.001, "loss": 1.5701, "step": 9965 }, { "epoch": 0.4216092732041628, "grad_norm": 0.4606640040874481, "learning_rate": 0.001, "loss": 1.8854, "step": 9966 }, { "epoch": 0.42165157796767916, "grad_norm": 0.17350491881370544, "learning_rate": 0.001, "loss": 3.4361, "step": 9967 }, { "epoch": 0.4216938827311955, "grad_norm": 0.17392922937870026, "learning_rate": 0.001, "loss": 1.8771, "step": 9968 }, { "epoch": 0.4217361874947119, "grad_norm": 0.25780975818634033, "learning_rate": 0.001, "loss": 2.3047, "step": 9969 }, { "epoch": 0.4217784922582283, "grad_norm": 0.22652190923690796, "learning_rate": 0.001, "loss": 3.8245, "step": 9970 }, { "epoch": 0.42182079702174463, "grad_norm": 0.1851012408733368, "learning_rate": 0.001, "loss": 2.5209, "step": 9971 }, { "epoch": 0.42186310178526104, "grad_norm": 0.6701632738113403, "learning_rate": 0.001, "loss": 2.6308, "step": 9972 }, { "epoch": 0.4219054065487774, "grad_norm": 0.19841192662715912, "learning_rate": 0.001, "loss": 3.4615, "step": 9973 }, { "epoch": 0.42194771131229375, "grad_norm": 0.18321669101715088, "learning_rate": 0.001, "loss": 2.0243, "step": 9974 }, { "epoch": 0.42199001607581016, "grad_norm": 0.1863401234149933, "learning_rate": 0.001, "loss": 2.2838, "step": 9975 }, { "epoch": 0.4220323208393265, "grad_norm": 0.5115291476249695, "learning_rate": 0.001, "loss": 2.6378, "step": 9976 }, { "epoch": 0.42207462560284287, "grad_norm": 0.20143838226795197, "learning_rate": 0.001, "loss": 2.8185, "step": 9977 }, { "epoch": 0.4221169303663593, "grad_norm": 0.7708411812782288, "learning_rate": 0.001, "loss": 2.9274, "step": 9978 }, { "epoch": 0.42215923512987563, "grad_norm": 0.18255001306533813, "learning_rate": 0.001, "loss": 2.3446, "step": 9979 }, { "epoch": 0.422201539893392, "grad_norm": 2.8448588848114014, "learning_rate": 0.001, "loss": 2.0094, "step": 9980 }, { "epoch": 0.42224384465690834, "grad_norm": 0.2241508811712265, "learning_rate": 0.001, "loss": 2.4382, "step": 9981 }, { "epoch": 0.42228614942042475, "grad_norm": 4.693768501281738, "learning_rate": 0.001, "loss": 2.416, "step": 9982 }, { "epoch": 0.4223284541839411, "grad_norm": 0.18450792133808136, "learning_rate": 0.001, "loss": 1.5582, "step": 9983 }, { "epoch": 0.42237075894745746, "grad_norm": 0.21260473132133484, "learning_rate": 0.001, "loss": 1.7709, "step": 9984 }, { "epoch": 0.42241306371097387, "grad_norm": 0.23331928253173828, "learning_rate": 0.001, "loss": 2.4449, "step": 9985 }, { "epoch": 0.4224553684744902, "grad_norm": 0.38122308254241943, "learning_rate": 0.001, "loss": 2.4113, "step": 9986 }, { "epoch": 0.4224976732380066, "grad_norm": 0.16446325182914734, "learning_rate": 0.001, "loss": 2.0169, "step": 9987 }, { "epoch": 0.422539978001523, "grad_norm": 0.1864539235830307, "learning_rate": 0.001, "loss": 2.2757, "step": 9988 }, { "epoch": 0.42258228276503934, "grad_norm": 0.208041250705719, "learning_rate": 0.001, "loss": 1.9249, "step": 9989 }, { "epoch": 0.4226245875285557, "grad_norm": 0.6653062105178833, "learning_rate": 0.001, "loss": 2.7106, "step": 9990 }, { "epoch": 0.4226668922920721, "grad_norm": 0.2309500128030777, "learning_rate": 0.001, "loss": 2.502, "step": 9991 }, { "epoch": 0.42270919705558846, "grad_norm": 8.24671745300293, "learning_rate": 0.001, "loss": 1.6391, "step": 9992 }, { "epoch": 0.4227515018191048, "grad_norm": 0.389308363199234, "learning_rate": 0.001, "loss": 1.9749, "step": 9993 }, { "epoch": 0.4227938065826212, "grad_norm": 0.2275291532278061, "learning_rate": 0.001, "loss": 3.4856, "step": 9994 }, { "epoch": 0.4228361113461376, "grad_norm": 0.8399461507797241, "learning_rate": 0.001, "loss": 3.256, "step": 9995 }, { "epoch": 0.42287841610965393, "grad_norm": 0.9205453395843506, "learning_rate": 0.001, "loss": 2.8834, "step": 9996 }, { "epoch": 0.42292072087317034, "grad_norm": 20.110586166381836, "learning_rate": 0.001, "loss": 3.0527, "step": 9997 }, { "epoch": 0.4229630256366867, "grad_norm": 0.2495739907026291, "learning_rate": 0.001, "loss": 2.4493, "step": 9998 }, { "epoch": 0.42300533040020305, "grad_norm": 16.760589599609375, "learning_rate": 0.001, "loss": 3.6418, "step": 9999 }, { "epoch": 0.42304763516371946, "grad_norm": 0.20367243885993958, "learning_rate": 0.001, "loss": 2.063, "step": 10000 }, { "epoch": 0.4230899399272358, "grad_norm": 0.2168806493282318, "learning_rate": 0.001, "loss": 2.816, "step": 10001 }, { "epoch": 0.42313224469075217, "grad_norm": 1.293717384338379, "learning_rate": 0.001, "loss": 2.3445, "step": 10002 }, { "epoch": 0.4231745494542686, "grad_norm": 139.1713409423828, "learning_rate": 0.001, "loss": 3.2346, "step": 10003 }, { "epoch": 0.42321685421778493, "grad_norm": 0.39124611020088196, "learning_rate": 0.001, "loss": 2.108, "step": 10004 }, { "epoch": 0.4232591589813013, "grad_norm": 0.9725698828697205, "learning_rate": 0.001, "loss": 2.4943, "step": 10005 }, { "epoch": 0.42330146374481764, "grad_norm": 2.8331100940704346, "learning_rate": 0.001, "loss": 2.6655, "step": 10006 }, { "epoch": 0.42334376850833405, "grad_norm": 0.20734193921089172, "learning_rate": 0.001, "loss": 2.3108, "step": 10007 }, { "epoch": 0.4233860732718504, "grad_norm": 1.9373948574066162, "learning_rate": 0.001, "loss": 1.867, "step": 10008 }, { "epoch": 0.42342837803536676, "grad_norm": 1.3012683391571045, "learning_rate": 0.001, "loss": 2.6328, "step": 10009 }, { "epoch": 0.42347068279888317, "grad_norm": 0.838748037815094, "learning_rate": 0.001, "loss": 2.1099, "step": 10010 }, { "epoch": 0.4235129875623995, "grad_norm": 0.27665624022483826, "learning_rate": 0.001, "loss": 2.0056, "step": 10011 }, { "epoch": 0.4235552923259159, "grad_norm": 0.2420514076948166, "learning_rate": 0.001, "loss": 1.8592, "step": 10012 }, { "epoch": 0.4235975970894323, "grad_norm": 38.479976654052734, "learning_rate": 0.001, "loss": 1.7052, "step": 10013 }, { "epoch": 0.42363990185294864, "grad_norm": 0.297911673784256, "learning_rate": 0.001, "loss": 2.694, "step": 10014 }, { "epoch": 0.423682206616465, "grad_norm": 0.2878136932849884, "learning_rate": 0.001, "loss": 2.3265, "step": 10015 }, { "epoch": 0.4237245113799814, "grad_norm": 0.3122667968273163, "learning_rate": 0.001, "loss": 3.1903, "step": 10016 }, { "epoch": 0.42376681614349776, "grad_norm": 6.599530220031738, "learning_rate": 0.001, "loss": 3.0154, "step": 10017 }, { "epoch": 0.4238091209070141, "grad_norm": 0.2963060438632965, "learning_rate": 0.001, "loss": 2.9263, "step": 10018 }, { "epoch": 0.4238514256705305, "grad_norm": 0.3066433370113373, "learning_rate": 0.001, "loss": 2.8319, "step": 10019 }, { "epoch": 0.4238937304340469, "grad_norm": 0.5115453004837036, "learning_rate": 0.001, "loss": 2.9406, "step": 10020 }, { "epoch": 0.42393603519756323, "grad_norm": 0.24337489902973175, "learning_rate": 0.001, "loss": 2.2676, "step": 10021 }, { "epoch": 0.42397833996107964, "grad_norm": 0.20824651420116425, "learning_rate": 0.001, "loss": 3.0662, "step": 10022 }, { "epoch": 0.424020644724596, "grad_norm": 0.22388121485710144, "learning_rate": 0.001, "loss": 2.5818, "step": 10023 }, { "epoch": 0.42406294948811235, "grad_norm": 0.29800713062286377, "learning_rate": 0.001, "loss": 3.4957, "step": 10024 }, { "epoch": 0.42410525425162876, "grad_norm": 0.23895491659641266, "learning_rate": 0.001, "loss": 2.0719, "step": 10025 }, { "epoch": 0.4241475590151451, "grad_norm": 0.21432499587535858, "learning_rate": 0.001, "loss": 2.6525, "step": 10026 }, { "epoch": 0.42418986377866147, "grad_norm": 0.22099368274211884, "learning_rate": 0.001, "loss": 2.1917, "step": 10027 }, { "epoch": 0.4242321685421778, "grad_norm": 0.18679919838905334, "learning_rate": 0.001, "loss": 2.411, "step": 10028 }, { "epoch": 0.42427447330569423, "grad_norm": 3.992774486541748, "learning_rate": 0.001, "loss": 2.1948, "step": 10029 }, { "epoch": 0.4243167780692106, "grad_norm": 0.29614201188087463, "learning_rate": 0.001, "loss": 2.3378, "step": 10030 }, { "epoch": 0.42435908283272694, "grad_norm": 0.2120267003774643, "learning_rate": 0.001, "loss": 1.9312, "step": 10031 }, { "epoch": 0.42440138759624335, "grad_norm": 0.39450037479400635, "learning_rate": 0.001, "loss": 2.7093, "step": 10032 }, { "epoch": 0.4244436923597597, "grad_norm": 0.28339096903800964, "learning_rate": 0.001, "loss": 2.5644, "step": 10033 }, { "epoch": 0.42448599712327606, "grad_norm": 0.24640879034996033, "learning_rate": 0.001, "loss": 2.3985, "step": 10034 }, { "epoch": 0.42452830188679247, "grad_norm": 4.143669128417969, "learning_rate": 0.001, "loss": 3.2367, "step": 10035 }, { "epoch": 0.4245706066503088, "grad_norm": 0.21093952655792236, "learning_rate": 0.001, "loss": 2.113, "step": 10036 }, { "epoch": 0.4246129114138252, "grad_norm": 0.17772096395492554, "learning_rate": 0.001, "loss": 1.6009, "step": 10037 }, { "epoch": 0.4246552161773416, "grad_norm": 2.9753193855285645, "learning_rate": 0.001, "loss": 2.4802, "step": 10038 }, { "epoch": 0.42469752094085794, "grad_norm": 0.23098500072956085, "learning_rate": 0.001, "loss": 2.2026, "step": 10039 }, { "epoch": 0.4247398257043743, "grad_norm": 4.376840114593506, "learning_rate": 0.001, "loss": 2.04, "step": 10040 }, { "epoch": 0.4247821304678907, "grad_norm": 1.3142906427383423, "learning_rate": 0.001, "loss": 2.0858, "step": 10041 }, { "epoch": 0.42482443523140706, "grad_norm": 0.25867971777915955, "learning_rate": 0.001, "loss": 2.6186, "step": 10042 }, { "epoch": 0.4248667399949234, "grad_norm": 0.8242712616920471, "learning_rate": 0.001, "loss": 2.2378, "step": 10043 }, { "epoch": 0.4249090447584398, "grad_norm": 0.21803343296051025, "learning_rate": 0.001, "loss": 3.163, "step": 10044 }, { "epoch": 0.4249513495219562, "grad_norm": 2.3811676502227783, "learning_rate": 0.001, "loss": 3.5696, "step": 10045 }, { "epoch": 0.42499365428547253, "grad_norm": 0.5477252006530762, "learning_rate": 0.001, "loss": 2.5047, "step": 10046 }, { "epoch": 0.42503595904898894, "grad_norm": 0.2212408185005188, "learning_rate": 0.001, "loss": 2.2622, "step": 10047 }, { "epoch": 0.4250782638125053, "grad_norm": 0.40021783113479614, "learning_rate": 0.001, "loss": 2.4889, "step": 10048 }, { "epoch": 0.42512056857602165, "grad_norm": 0.24213816225528717, "learning_rate": 0.001, "loss": 2.1517, "step": 10049 }, { "epoch": 0.425162873339538, "grad_norm": 0.3857748210430145, "learning_rate": 0.001, "loss": 3.2273, "step": 10050 }, { "epoch": 0.4252051781030544, "grad_norm": 4.682729721069336, "learning_rate": 0.001, "loss": 2.5041, "step": 10051 }, { "epoch": 0.42524748286657077, "grad_norm": 0.7291063070297241, "learning_rate": 0.001, "loss": 2.3562, "step": 10052 }, { "epoch": 0.4252897876300871, "grad_norm": 1.8503894805908203, "learning_rate": 0.001, "loss": 2.678, "step": 10053 }, { "epoch": 0.42533209239360353, "grad_norm": 0.2773977220058441, "learning_rate": 0.001, "loss": 2.6154, "step": 10054 }, { "epoch": 0.4253743971571199, "grad_norm": 0.2241569608449936, "learning_rate": 0.001, "loss": 2.2345, "step": 10055 }, { "epoch": 0.42541670192063624, "grad_norm": 0.5478442311286926, "learning_rate": 0.001, "loss": 3.3277, "step": 10056 }, { "epoch": 0.42545900668415265, "grad_norm": 2.7203447818756104, "learning_rate": 0.001, "loss": 3.8307, "step": 10057 }, { "epoch": 0.425501311447669, "grad_norm": 0.9797614216804504, "learning_rate": 0.001, "loss": 2.2508, "step": 10058 }, { "epoch": 0.42554361621118536, "grad_norm": 0.4571799039840698, "learning_rate": 0.001, "loss": 2.9476, "step": 10059 }, { "epoch": 0.42558592097470177, "grad_norm": 0.34742364287376404, "learning_rate": 0.001, "loss": 2.4439, "step": 10060 }, { "epoch": 0.4256282257382181, "grad_norm": 0.3077751100063324, "learning_rate": 0.001, "loss": 2.7386, "step": 10061 }, { "epoch": 0.4256705305017345, "grad_norm": 0.34680673480033875, "learning_rate": 0.001, "loss": 2.7423, "step": 10062 }, { "epoch": 0.4257128352652509, "grad_norm": 0.5962140560150146, "learning_rate": 0.001, "loss": 3.1924, "step": 10063 }, { "epoch": 0.42575514002876724, "grad_norm": 0.4237496554851532, "learning_rate": 0.001, "loss": 2.7427, "step": 10064 }, { "epoch": 0.4257974447922836, "grad_norm": 0.3010278046131134, "learning_rate": 0.001, "loss": 1.9587, "step": 10065 }, { "epoch": 0.4258397495558, "grad_norm": 0.9290482997894287, "learning_rate": 0.001, "loss": 2.3115, "step": 10066 }, { "epoch": 0.42588205431931636, "grad_norm": 0.18896250426769257, "learning_rate": 0.001, "loss": 1.8756, "step": 10067 }, { "epoch": 0.4259243590828327, "grad_norm": 0.7285879254341125, "learning_rate": 0.001, "loss": 2.1316, "step": 10068 }, { "epoch": 0.4259666638463491, "grad_norm": 0.1963866502046585, "learning_rate": 0.001, "loss": 2.351, "step": 10069 }, { "epoch": 0.4260089686098655, "grad_norm": 5.0258283615112305, "learning_rate": 0.001, "loss": 2.4576, "step": 10070 }, { "epoch": 0.42605127337338183, "grad_norm": 0.7055137157440186, "learning_rate": 0.001, "loss": 2.9171, "step": 10071 }, { "epoch": 0.4260935781368982, "grad_norm": 0.2101747691631317, "learning_rate": 0.001, "loss": 1.9525, "step": 10072 }, { "epoch": 0.4261358829004146, "grad_norm": 0.2830001711845398, "learning_rate": 0.001, "loss": 2.4924, "step": 10073 }, { "epoch": 0.42617818766393095, "grad_norm": 0.2799762189388275, "learning_rate": 0.001, "loss": 3.067, "step": 10074 }, { "epoch": 0.4262204924274473, "grad_norm": 0.2741358280181885, "learning_rate": 0.001, "loss": 2.3429, "step": 10075 }, { "epoch": 0.4262627971909637, "grad_norm": 0.5189446210861206, "learning_rate": 0.001, "loss": 3.3767, "step": 10076 }, { "epoch": 0.42630510195448007, "grad_norm": 0.2622857391834259, "learning_rate": 0.001, "loss": 2.2139, "step": 10077 }, { "epoch": 0.4263474067179964, "grad_norm": 0.25797808170318604, "learning_rate": 0.001, "loss": 2.4526, "step": 10078 }, { "epoch": 0.42638971148151283, "grad_norm": 0.20280535519123077, "learning_rate": 0.001, "loss": 2.2921, "step": 10079 }, { "epoch": 0.4264320162450292, "grad_norm": 0.18795685470104218, "learning_rate": 0.001, "loss": 2.6622, "step": 10080 }, { "epoch": 0.42647432100854554, "grad_norm": 1.9836920499801636, "learning_rate": 0.001, "loss": 3.0886, "step": 10081 }, { "epoch": 0.42651662577206195, "grad_norm": 0.41229698061943054, "learning_rate": 0.001, "loss": 2.5115, "step": 10082 }, { "epoch": 0.4265589305355783, "grad_norm": 0.17537228763103485, "learning_rate": 0.001, "loss": 2.1755, "step": 10083 }, { "epoch": 0.42660123529909466, "grad_norm": 0.2055114507675171, "learning_rate": 0.001, "loss": 2.0749, "step": 10084 }, { "epoch": 0.42664354006261107, "grad_norm": 0.7603585720062256, "learning_rate": 0.001, "loss": 3.0483, "step": 10085 }, { "epoch": 0.4266858448261274, "grad_norm": 0.15949027240276337, "learning_rate": 0.001, "loss": 2.3814, "step": 10086 }, { "epoch": 0.4267281495896438, "grad_norm": 0.18160079419612885, "learning_rate": 0.001, "loss": 2.764, "step": 10087 }, { "epoch": 0.4267704543531602, "grad_norm": 1.1477245092391968, "learning_rate": 0.001, "loss": 2.4743, "step": 10088 }, { "epoch": 0.42681275911667654, "grad_norm": 0.2228129804134369, "learning_rate": 0.001, "loss": 3.238, "step": 10089 }, { "epoch": 0.4268550638801929, "grad_norm": 0.7572936415672302, "learning_rate": 0.001, "loss": 2.4797, "step": 10090 }, { "epoch": 0.4268973686437093, "grad_norm": 0.1875317394733429, "learning_rate": 0.001, "loss": 2.2772, "step": 10091 }, { "epoch": 0.42693967340722566, "grad_norm": 0.16233602166175842, "learning_rate": 0.001, "loss": 1.489, "step": 10092 }, { "epoch": 0.426981978170742, "grad_norm": 0.15060947835445404, "learning_rate": 0.001, "loss": 1.9052, "step": 10093 }, { "epoch": 0.42702428293425837, "grad_norm": 0.28614911437034607, "learning_rate": 0.001, "loss": 2.401, "step": 10094 }, { "epoch": 0.4270665876977748, "grad_norm": 0.1590694785118103, "learning_rate": 0.001, "loss": 2.4475, "step": 10095 }, { "epoch": 0.42710889246129113, "grad_norm": 0.2464892566204071, "learning_rate": 0.001, "loss": 2.7287, "step": 10096 }, { "epoch": 0.4271511972248075, "grad_norm": 0.25172311067581177, "learning_rate": 0.001, "loss": 2.6355, "step": 10097 }, { "epoch": 0.4271935019883239, "grad_norm": 0.19006308913230896, "learning_rate": 0.001, "loss": 1.8972, "step": 10098 }, { "epoch": 0.42723580675184025, "grad_norm": 0.17476560175418854, "learning_rate": 0.001, "loss": 2.0524, "step": 10099 }, { "epoch": 0.4272781115153566, "grad_norm": 0.1643456220626831, "learning_rate": 0.001, "loss": 1.7454, "step": 10100 }, { "epoch": 0.427320416278873, "grad_norm": 0.1704128235578537, "learning_rate": 0.001, "loss": 1.8189, "step": 10101 }, { "epoch": 0.42736272104238937, "grad_norm": 0.16339196264743805, "learning_rate": 0.001, "loss": 2.1309, "step": 10102 }, { "epoch": 0.4274050258059057, "grad_norm": 0.27944785356521606, "learning_rate": 0.001, "loss": 2.072, "step": 10103 }, { "epoch": 0.42744733056942213, "grad_norm": 0.8265528082847595, "learning_rate": 0.001, "loss": 2.8271, "step": 10104 }, { "epoch": 0.4274896353329385, "grad_norm": 0.16367796063423157, "learning_rate": 0.001, "loss": 2.0271, "step": 10105 }, { "epoch": 0.42753194009645484, "grad_norm": 0.17860738933086395, "learning_rate": 0.001, "loss": 1.4789, "step": 10106 }, { "epoch": 0.42757424485997125, "grad_norm": 0.2383193075656891, "learning_rate": 0.001, "loss": 2.6022, "step": 10107 }, { "epoch": 0.4276165496234876, "grad_norm": 0.9572615027427673, "learning_rate": 0.001, "loss": 3.0684, "step": 10108 }, { "epoch": 0.42765885438700396, "grad_norm": 0.31203147768974304, "learning_rate": 0.001, "loss": 2.1978, "step": 10109 }, { "epoch": 0.42770115915052037, "grad_norm": 0.2197791486978531, "learning_rate": 0.001, "loss": 2.5708, "step": 10110 }, { "epoch": 0.4277434639140367, "grad_norm": 0.5677201151847839, "learning_rate": 0.001, "loss": 1.6285, "step": 10111 }, { "epoch": 0.4277857686775531, "grad_norm": 0.18387927114963531, "learning_rate": 0.001, "loss": 1.9223, "step": 10112 }, { "epoch": 0.4278280734410695, "grad_norm": 0.16864502429962158, "learning_rate": 0.001, "loss": 1.8276, "step": 10113 }, { "epoch": 0.42787037820458584, "grad_norm": 0.23291771113872528, "learning_rate": 0.001, "loss": 3.0319, "step": 10114 }, { "epoch": 0.4279126829681022, "grad_norm": 0.18459783494472504, "learning_rate": 0.001, "loss": 1.8791, "step": 10115 }, { "epoch": 0.4279549877316186, "grad_norm": 1.1795158386230469, "learning_rate": 0.001, "loss": 2.6287, "step": 10116 }, { "epoch": 0.42799729249513496, "grad_norm": 0.20951585471630096, "learning_rate": 0.001, "loss": 1.9405, "step": 10117 }, { "epoch": 0.4280395972586513, "grad_norm": 0.31406983733177185, "learning_rate": 0.001, "loss": 3.4603, "step": 10118 }, { "epoch": 0.42808190202216767, "grad_norm": 0.267755925655365, "learning_rate": 0.001, "loss": 3.2455, "step": 10119 }, { "epoch": 0.4281242067856841, "grad_norm": 0.21892082691192627, "learning_rate": 0.001, "loss": 2.5571, "step": 10120 }, { "epoch": 0.42816651154920043, "grad_norm": 0.17393141984939575, "learning_rate": 0.001, "loss": 1.6598, "step": 10121 }, { "epoch": 0.4282088163127168, "grad_norm": 0.17320716381072998, "learning_rate": 0.001, "loss": 2.0091, "step": 10122 }, { "epoch": 0.4282511210762332, "grad_norm": 0.1930369734764099, "learning_rate": 0.001, "loss": 1.7533, "step": 10123 }, { "epoch": 0.42829342583974955, "grad_norm": 0.18624812364578247, "learning_rate": 0.001, "loss": 2.0146, "step": 10124 }, { "epoch": 0.4283357306032659, "grad_norm": 0.7041241526603699, "learning_rate": 0.001, "loss": 2.8163, "step": 10125 }, { "epoch": 0.4283780353667823, "grad_norm": 0.2105880081653595, "learning_rate": 0.001, "loss": 1.9341, "step": 10126 }, { "epoch": 0.42842034013029867, "grad_norm": 0.1799003928899765, "learning_rate": 0.001, "loss": 2.2052, "step": 10127 }, { "epoch": 0.428462644893815, "grad_norm": 1.5456318855285645, "learning_rate": 0.001, "loss": 1.8783, "step": 10128 }, { "epoch": 0.42850494965733144, "grad_norm": 0.17844410240650177, "learning_rate": 0.001, "loss": 2.3216, "step": 10129 }, { "epoch": 0.4285472544208478, "grad_norm": 0.16258619725704193, "learning_rate": 0.001, "loss": 2.56, "step": 10130 }, { "epoch": 0.42858955918436414, "grad_norm": 0.18964774906635284, "learning_rate": 0.001, "loss": 1.9728, "step": 10131 }, { "epoch": 0.42863186394788055, "grad_norm": 0.18737366795539856, "learning_rate": 0.001, "loss": 2.6866, "step": 10132 }, { "epoch": 0.4286741687113969, "grad_norm": 0.19086334109306335, "learning_rate": 0.001, "loss": 2.0617, "step": 10133 }, { "epoch": 0.42871647347491326, "grad_norm": 0.2464657872915268, "learning_rate": 0.001, "loss": 2.9955, "step": 10134 }, { "epoch": 0.42875877823842967, "grad_norm": 0.3504067063331604, "learning_rate": 0.001, "loss": 2.5686, "step": 10135 }, { "epoch": 0.428801083001946, "grad_norm": 0.17571739852428436, "learning_rate": 0.001, "loss": 2.1223, "step": 10136 }, { "epoch": 0.4288433877654624, "grad_norm": 0.381346195936203, "learning_rate": 0.001, "loss": 1.5416, "step": 10137 }, { "epoch": 0.4288856925289788, "grad_norm": 0.2737821936607361, "learning_rate": 0.001, "loss": 1.7345, "step": 10138 }, { "epoch": 0.42892799729249514, "grad_norm": 0.17216061055660248, "learning_rate": 0.001, "loss": 1.6129, "step": 10139 }, { "epoch": 0.4289703020560115, "grad_norm": 0.20674671232700348, "learning_rate": 0.001, "loss": 3.2899, "step": 10140 }, { "epoch": 0.42901260681952785, "grad_norm": 1.2608942985534668, "learning_rate": 0.001, "loss": 1.8988, "step": 10141 }, { "epoch": 0.42905491158304426, "grad_norm": 0.18615379929542542, "learning_rate": 0.001, "loss": 1.873, "step": 10142 }, { "epoch": 0.4290972163465606, "grad_norm": 4.064700603485107, "learning_rate": 0.001, "loss": 2.7955, "step": 10143 }, { "epoch": 0.42913952111007697, "grad_norm": 0.2223389595746994, "learning_rate": 0.001, "loss": 2.7764, "step": 10144 }, { "epoch": 0.4291818258735934, "grad_norm": 0.4159873127937317, "learning_rate": 0.001, "loss": 3.8135, "step": 10145 }, { "epoch": 0.42922413063710974, "grad_norm": 0.23633842170238495, "learning_rate": 0.001, "loss": 3.5594, "step": 10146 }, { "epoch": 0.4292664354006261, "grad_norm": 1.0280625820159912, "learning_rate": 0.001, "loss": 1.7139, "step": 10147 }, { "epoch": 0.4293087401641425, "grad_norm": 0.1797591596841812, "learning_rate": 0.001, "loss": 3.0655, "step": 10148 }, { "epoch": 0.42935104492765885, "grad_norm": 0.4903676509857178, "learning_rate": 0.001, "loss": 1.8549, "step": 10149 }, { "epoch": 0.4293933496911752, "grad_norm": 0.15862198173999786, "learning_rate": 0.001, "loss": 2.6685, "step": 10150 }, { "epoch": 0.4294356544546916, "grad_norm": 0.22118091583251953, "learning_rate": 0.001, "loss": 2.2547, "step": 10151 }, { "epoch": 0.42947795921820797, "grad_norm": 0.18271955847740173, "learning_rate": 0.001, "loss": 2.2298, "step": 10152 }, { "epoch": 0.4295202639817243, "grad_norm": 0.170980766415596, "learning_rate": 0.001, "loss": 2.8904, "step": 10153 }, { "epoch": 0.42956256874524074, "grad_norm": 0.15399718284606934, "learning_rate": 0.001, "loss": 2.2953, "step": 10154 }, { "epoch": 0.4296048735087571, "grad_norm": 0.17953842878341675, "learning_rate": 0.001, "loss": 1.9147, "step": 10155 }, { "epoch": 0.42964717827227344, "grad_norm": 0.1702369898557663, "learning_rate": 0.001, "loss": 2.2872, "step": 10156 }, { "epoch": 0.42968948303578985, "grad_norm": 0.19081392884254456, "learning_rate": 0.001, "loss": 2.6647, "step": 10157 }, { "epoch": 0.4297317877993062, "grad_norm": 0.1783342808485031, "learning_rate": 0.001, "loss": 1.9582, "step": 10158 }, { "epoch": 0.42977409256282256, "grad_norm": 0.20043319463729858, "learning_rate": 0.001, "loss": 2.3066, "step": 10159 }, { "epoch": 0.429816397326339, "grad_norm": 0.17219308018684387, "learning_rate": 0.001, "loss": 2.5435, "step": 10160 }, { "epoch": 0.4298587020898553, "grad_norm": 0.29747068881988525, "learning_rate": 0.001, "loss": 2.9951, "step": 10161 }, { "epoch": 0.4299010068533717, "grad_norm": 4.550970077514648, "learning_rate": 0.001, "loss": 2.5154, "step": 10162 }, { "epoch": 0.42994331161688804, "grad_norm": 0.18527233600616455, "learning_rate": 0.001, "loss": 2.0328, "step": 10163 }, { "epoch": 0.42998561638040445, "grad_norm": 0.18644128739833832, "learning_rate": 0.001, "loss": 2.3224, "step": 10164 }, { "epoch": 0.4300279211439208, "grad_norm": 0.2518557906150818, "learning_rate": 0.001, "loss": 2.1388, "step": 10165 }, { "epoch": 0.43007022590743715, "grad_norm": 0.1625964343547821, "learning_rate": 0.001, "loss": 1.8495, "step": 10166 }, { "epoch": 0.43011253067095356, "grad_norm": 0.2331293672323227, "learning_rate": 0.001, "loss": 2.216, "step": 10167 }, { "epoch": 0.4301548354344699, "grad_norm": 0.2668088376522064, "learning_rate": 0.001, "loss": 2.3291, "step": 10168 }, { "epoch": 0.43019714019798627, "grad_norm": 0.17521047592163086, "learning_rate": 0.001, "loss": 1.9921, "step": 10169 }, { "epoch": 0.4302394449615027, "grad_norm": 0.19229163229465485, "learning_rate": 0.001, "loss": 2.0076, "step": 10170 }, { "epoch": 0.43028174972501904, "grad_norm": 0.2229296714067459, "learning_rate": 0.001, "loss": 2.5076, "step": 10171 }, { "epoch": 0.4303240544885354, "grad_norm": 0.47033241391181946, "learning_rate": 0.001, "loss": 2.2927, "step": 10172 }, { "epoch": 0.4303663592520518, "grad_norm": 0.1660117357969284, "learning_rate": 0.001, "loss": 2.2447, "step": 10173 }, { "epoch": 0.43040866401556815, "grad_norm": 2.12497615814209, "learning_rate": 0.001, "loss": 2.0389, "step": 10174 }, { "epoch": 0.4304509687790845, "grad_norm": 0.18803949654102325, "learning_rate": 0.001, "loss": 3.1254, "step": 10175 }, { "epoch": 0.4304932735426009, "grad_norm": 0.2218303233385086, "learning_rate": 0.001, "loss": 1.7379, "step": 10176 }, { "epoch": 0.4305355783061173, "grad_norm": 0.14620518684387207, "learning_rate": 0.001, "loss": 1.6227, "step": 10177 }, { "epoch": 0.4305778830696336, "grad_norm": 0.17080405354499817, "learning_rate": 0.001, "loss": 1.9532, "step": 10178 }, { "epoch": 0.43062018783315004, "grad_norm": 2.5071756839752197, "learning_rate": 0.001, "loss": 3.2385, "step": 10179 }, { "epoch": 0.4306624925966664, "grad_norm": 0.17002911865711212, "learning_rate": 0.001, "loss": 3.0736, "step": 10180 }, { "epoch": 0.43070479736018275, "grad_norm": 0.1898619830608368, "learning_rate": 0.001, "loss": 1.6932, "step": 10181 }, { "epoch": 0.43074710212369915, "grad_norm": 0.6082344651222229, "learning_rate": 0.001, "loss": 3.6207, "step": 10182 }, { "epoch": 0.4307894068872155, "grad_norm": 0.6188779473304749, "learning_rate": 0.001, "loss": 1.6398, "step": 10183 }, { "epoch": 0.43083171165073186, "grad_norm": 0.7821673154830933, "learning_rate": 0.001, "loss": 2.1569, "step": 10184 }, { "epoch": 0.4308740164142482, "grad_norm": 0.543019711971283, "learning_rate": 0.001, "loss": 1.6907, "step": 10185 }, { "epoch": 0.4309163211777646, "grad_norm": 0.5596624612808228, "learning_rate": 0.001, "loss": 3.4859, "step": 10186 }, { "epoch": 0.430958625941281, "grad_norm": 0.22357861697673798, "learning_rate": 0.001, "loss": 1.896, "step": 10187 }, { "epoch": 0.43100093070479734, "grad_norm": 0.16170749068260193, "learning_rate": 0.001, "loss": 1.8071, "step": 10188 }, { "epoch": 0.43104323546831375, "grad_norm": 0.15653282403945923, "learning_rate": 0.001, "loss": 1.6408, "step": 10189 }, { "epoch": 0.4310855402318301, "grad_norm": 0.22891934216022491, "learning_rate": 0.001, "loss": 1.2615, "step": 10190 }, { "epoch": 0.43112784499534645, "grad_norm": 0.9978185892105103, "learning_rate": 0.001, "loss": 2.0704, "step": 10191 }, { "epoch": 0.43117014975886286, "grad_norm": 0.1806502640247345, "learning_rate": 0.001, "loss": 2.1097, "step": 10192 }, { "epoch": 0.4312124545223792, "grad_norm": 0.16584782302379608, "learning_rate": 0.001, "loss": 2.528, "step": 10193 }, { "epoch": 0.4312547592858956, "grad_norm": 0.39718106389045715, "learning_rate": 0.001, "loss": 2.2111, "step": 10194 }, { "epoch": 0.431297064049412, "grad_norm": 0.37005481123924255, "learning_rate": 0.001, "loss": 2.8491, "step": 10195 }, { "epoch": 0.43133936881292834, "grad_norm": 0.18187114596366882, "learning_rate": 0.001, "loss": 2.0542, "step": 10196 }, { "epoch": 0.4313816735764447, "grad_norm": 0.21417534351348877, "learning_rate": 0.001, "loss": 1.842, "step": 10197 }, { "epoch": 0.4314239783399611, "grad_norm": 0.2430461347103119, "learning_rate": 0.001, "loss": 3.2218, "step": 10198 }, { "epoch": 0.43146628310347745, "grad_norm": 0.1812175065279007, "learning_rate": 0.001, "loss": 2.5477, "step": 10199 }, { "epoch": 0.4315085878669938, "grad_norm": 0.5557557940483093, "learning_rate": 0.001, "loss": 2.0329, "step": 10200 }, { "epoch": 0.4315508926305102, "grad_norm": 0.16666504740715027, "learning_rate": 0.001, "loss": 1.8955, "step": 10201 }, { "epoch": 0.4315931973940266, "grad_norm": 8.631728172302246, "learning_rate": 0.001, "loss": 2.2758, "step": 10202 }, { "epoch": 0.4316355021575429, "grad_norm": 0.28974688053131104, "learning_rate": 0.001, "loss": 1.8011, "step": 10203 }, { "epoch": 0.43167780692105934, "grad_norm": 0.17004406452178955, "learning_rate": 0.001, "loss": 1.7401, "step": 10204 }, { "epoch": 0.4317201116845757, "grad_norm": 0.3253629803657532, "learning_rate": 0.001, "loss": 2.3806, "step": 10205 }, { "epoch": 0.43176241644809205, "grad_norm": 0.18079756200313568, "learning_rate": 0.001, "loss": 1.6866, "step": 10206 }, { "epoch": 0.4318047212116084, "grad_norm": 0.16561074554920197, "learning_rate": 0.001, "loss": 2.1134, "step": 10207 }, { "epoch": 0.4318470259751248, "grad_norm": 0.16736382246017456, "learning_rate": 0.001, "loss": 2.5646, "step": 10208 }, { "epoch": 0.43188933073864116, "grad_norm": 0.16732652485370636, "learning_rate": 0.001, "loss": 1.7875, "step": 10209 }, { "epoch": 0.4319316355021575, "grad_norm": 1.6806914806365967, "learning_rate": 0.001, "loss": 1.8155, "step": 10210 }, { "epoch": 0.43197394026567393, "grad_norm": 0.902766764163971, "learning_rate": 0.001, "loss": 2.1906, "step": 10211 }, { "epoch": 0.4320162450291903, "grad_norm": 0.22574925422668457, "learning_rate": 0.001, "loss": 2.4012, "step": 10212 }, { "epoch": 0.43205854979270664, "grad_norm": 0.20272192358970642, "learning_rate": 0.001, "loss": 2.7397, "step": 10213 }, { "epoch": 0.43210085455622305, "grad_norm": 0.18217982351779938, "learning_rate": 0.001, "loss": 1.8363, "step": 10214 }, { "epoch": 0.4321431593197394, "grad_norm": 0.32555845379829407, "learning_rate": 0.001, "loss": 3.1817, "step": 10215 }, { "epoch": 0.43218546408325575, "grad_norm": 4.589909076690674, "learning_rate": 0.001, "loss": 2.9862, "step": 10216 }, { "epoch": 0.43222776884677216, "grad_norm": 0.24728332459926605, "learning_rate": 0.001, "loss": 1.834, "step": 10217 }, { "epoch": 0.4322700736102885, "grad_norm": 0.331990510225296, "learning_rate": 0.001, "loss": 3.6178, "step": 10218 }, { "epoch": 0.4323123783738049, "grad_norm": 0.31489211320877075, "learning_rate": 0.001, "loss": 1.8217, "step": 10219 }, { "epoch": 0.4323546831373213, "grad_norm": 1.1140625476837158, "learning_rate": 0.001, "loss": 2.5692, "step": 10220 }, { "epoch": 0.43239698790083764, "grad_norm": 0.1725742071866989, "learning_rate": 0.001, "loss": 1.7017, "step": 10221 }, { "epoch": 0.432439292664354, "grad_norm": 0.19503003358840942, "learning_rate": 0.001, "loss": 2.5324, "step": 10222 }, { "epoch": 0.4324815974278704, "grad_norm": 0.26874589920043945, "learning_rate": 0.001, "loss": 3.1633, "step": 10223 }, { "epoch": 0.43252390219138676, "grad_norm": 0.24442531168460846, "learning_rate": 0.001, "loss": 2.4106, "step": 10224 }, { "epoch": 0.4325662069549031, "grad_norm": 0.32704654335975647, "learning_rate": 0.001, "loss": 3.401, "step": 10225 }, { "epoch": 0.4326085117184195, "grad_norm": 0.2951880693435669, "learning_rate": 0.001, "loss": 2.9172, "step": 10226 }, { "epoch": 0.4326508164819359, "grad_norm": 0.16954240202903748, "learning_rate": 0.001, "loss": 1.8404, "step": 10227 }, { "epoch": 0.43269312124545223, "grad_norm": 0.1641477644443512, "learning_rate": 0.001, "loss": 3.1311, "step": 10228 }, { "epoch": 0.4327354260089686, "grad_norm": 0.7109168171882629, "learning_rate": 0.001, "loss": 1.9146, "step": 10229 }, { "epoch": 0.432777730772485, "grad_norm": 0.21353894472122192, "learning_rate": 0.001, "loss": 2.6354, "step": 10230 }, { "epoch": 0.43282003553600135, "grad_norm": 0.32388636469841003, "learning_rate": 0.001, "loss": 1.8109, "step": 10231 }, { "epoch": 0.4328623402995177, "grad_norm": 0.46089649200439453, "learning_rate": 0.001, "loss": 3.1302, "step": 10232 }, { "epoch": 0.4329046450630341, "grad_norm": 0.28092247247695923, "learning_rate": 0.001, "loss": 2.9537, "step": 10233 }, { "epoch": 0.43294694982655046, "grad_norm": 0.1795322597026825, "learning_rate": 0.001, "loss": 2.0469, "step": 10234 }, { "epoch": 0.4329892545900668, "grad_norm": 1.1064038276672363, "learning_rate": 0.001, "loss": 2.0708, "step": 10235 }, { "epoch": 0.43303155935358323, "grad_norm": 0.1590038537979126, "learning_rate": 0.001, "loss": 3.0817, "step": 10236 }, { "epoch": 0.4330738641170996, "grad_norm": 0.1741926223039627, "learning_rate": 0.001, "loss": 2.1068, "step": 10237 }, { "epoch": 0.43311616888061594, "grad_norm": 0.15864573419094086, "learning_rate": 0.001, "loss": 2.225, "step": 10238 }, { "epoch": 0.43315847364413235, "grad_norm": 0.1976540982723236, "learning_rate": 0.001, "loss": 2.0396, "step": 10239 }, { "epoch": 0.4332007784076487, "grad_norm": 0.1970503330230713, "learning_rate": 0.001, "loss": 2.3251, "step": 10240 }, { "epoch": 0.43324308317116506, "grad_norm": 0.19585905969142914, "learning_rate": 0.001, "loss": 2.9537, "step": 10241 }, { "epoch": 0.43328538793468147, "grad_norm": 0.5313712358474731, "learning_rate": 0.001, "loss": 2.066, "step": 10242 }, { "epoch": 0.4333276926981978, "grad_norm": 0.3845883905887604, "learning_rate": 0.001, "loss": 3.0735, "step": 10243 }, { "epoch": 0.4333699974617142, "grad_norm": 4.409493446350098, "learning_rate": 0.001, "loss": 2.7471, "step": 10244 }, { "epoch": 0.4334123022252306, "grad_norm": 0.15902769565582275, "learning_rate": 0.001, "loss": 2.0389, "step": 10245 }, { "epoch": 0.43345460698874694, "grad_norm": 0.19496986269950867, "learning_rate": 0.001, "loss": 2.2368, "step": 10246 }, { "epoch": 0.4334969117522633, "grad_norm": 3.496062755584717, "learning_rate": 0.001, "loss": 2.2234, "step": 10247 }, { "epoch": 0.4335392165157797, "grad_norm": 0.28134244680404663, "learning_rate": 0.001, "loss": 1.4976, "step": 10248 }, { "epoch": 0.43358152127929606, "grad_norm": 0.2013697773218155, "learning_rate": 0.001, "loss": 1.9873, "step": 10249 }, { "epoch": 0.4336238260428124, "grad_norm": 0.42853641510009766, "learning_rate": 0.001, "loss": 3.4437, "step": 10250 }, { "epoch": 0.4336661308063288, "grad_norm": 2.750063896179199, "learning_rate": 0.001, "loss": 2.0177, "step": 10251 }, { "epoch": 0.4337084355698452, "grad_norm": 0.21362249553203583, "learning_rate": 0.001, "loss": 2.6798, "step": 10252 }, { "epoch": 0.43375074033336153, "grad_norm": 0.2875850200653076, "learning_rate": 0.001, "loss": 2.2333, "step": 10253 }, { "epoch": 0.4337930450968779, "grad_norm": 0.2688775360584259, "learning_rate": 0.001, "loss": 2.2072, "step": 10254 }, { "epoch": 0.4338353498603943, "grad_norm": 0.3297450840473175, "learning_rate": 0.001, "loss": 2.5938, "step": 10255 }, { "epoch": 0.43387765462391065, "grad_norm": 0.4499950706958771, "learning_rate": 0.001, "loss": 2.221, "step": 10256 }, { "epoch": 0.433919959387427, "grad_norm": 0.19243115186691284, "learning_rate": 0.001, "loss": 3.2843, "step": 10257 }, { "epoch": 0.4339622641509434, "grad_norm": 0.18401968479156494, "learning_rate": 0.001, "loss": 1.8309, "step": 10258 }, { "epoch": 0.43400456891445977, "grad_norm": 0.18404164910316467, "learning_rate": 0.001, "loss": 2.295, "step": 10259 }, { "epoch": 0.4340468736779761, "grad_norm": 0.1946217566728592, "learning_rate": 0.001, "loss": 2.0939, "step": 10260 }, { "epoch": 0.43408917844149253, "grad_norm": 0.1470000147819519, "learning_rate": 0.001, "loss": 1.7336, "step": 10261 }, { "epoch": 0.4341314832050089, "grad_norm": 0.32551971077919006, "learning_rate": 0.001, "loss": 2.4746, "step": 10262 }, { "epoch": 0.43417378796852524, "grad_norm": 2.652297019958496, "learning_rate": 0.001, "loss": 3.1451, "step": 10263 }, { "epoch": 0.43421609273204165, "grad_norm": 0.1553354561328888, "learning_rate": 0.001, "loss": 1.976, "step": 10264 }, { "epoch": 0.434258397495558, "grad_norm": 0.23101647198200226, "learning_rate": 0.001, "loss": 2.6715, "step": 10265 }, { "epoch": 0.43430070225907436, "grad_norm": 0.20663489401340485, "learning_rate": 0.001, "loss": 1.561, "step": 10266 }, { "epoch": 0.43434300702259077, "grad_norm": 0.19089651107788086, "learning_rate": 0.001, "loss": 2.8731, "step": 10267 }, { "epoch": 0.4343853117861071, "grad_norm": 0.14528456330299377, "learning_rate": 0.001, "loss": 2.2012, "step": 10268 }, { "epoch": 0.4344276165496235, "grad_norm": 0.1953110247850418, "learning_rate": 0.001, "loss": 2.5863, "step": 10269 }, { "epoch": 0.4344699213131399, "grad_norm": 0.19322249293327332, "learning_rate": 0.001, "loss": 2.5648, "step": 10270 }, { "epoch": 0.43451222607665624, "grad_norm": 0.15629512071609497, "learning_rate": 0.001, "loss": 2.6854, "step": 10271 }, { "epoch": 0.4345545308401726, "grad_norm": 0.17981792986392975, "learning_rate": 0.001, "loss": 2.2489, "step": 10272 }, { "epoch": 0.434596835603689, "grad_norm": 0.18551938235759735, "learning_rate": 0.001, "loss": 2.6762, "step": 10273 }, { "epoch": 0.43463914036720536, "grad_norm": 2.313119649887085, "learning_rate": 0.001, "loss": 3.5626, "step": 10274 }, { "epoch": 0.4346814451307217, "grad_norm": 0.34999650716781616, "learning_rate": 0.001, "loss": 2.6788, "step": 10275 }, { "epoch": 0.43472374989423807, "grad_norm": 0.36393770575523376, "learning_rate": 0.001, "loss": 2.2926, "step": 10276 }, { "epoch": 0.4347660546577545, "grad_norm": 0.4789801239967346, "learning_rate": 0.001, "loss": 1.7946, "step": 10277 }, { "epoch": 0.43480835942127083, "grad_norm": 2.8181819915771484, "learning_rate": 0.001, "loss": 2.2287, "step": 10278 }, { "epoch": 0.4348506641847872, "grad_norm": 0.5375362038612366, "learning_rate": 0.001, "loss": 2.2571, "step": 10279 }, { "epoch": 0.4348929689483036, "grad_norm": 0.1631775200366974, "learning_rate": 0.001, "loss": 2.6874, "step": 10280 }, { "epoch": 0.43493527371181995, "grad_norm": 0.1752665489912033, "learning_rate": 0.001, "loss": 2.6751, "step": 10281 }, { "epoch": 0.4349775784753363, "grad_norm": 0.28829485177993774, "learning_rate": 0.001, "loss": 1.4542, "step": 10282 }, { "epoch": 0.4350198832388527, "grad_norm": 0.6851754188537598, "learning_rate": 0.001, "loss": 2.094, "step": 10283 }, { "epoch": 0.43506218800236907, "grad_norm": 0.1630009263753891, "learning_rate": 0.001, "loss": 2.3703, "step": 10284 }, { "epoch": 0.4351044927658854, "grad_norm": 0.7183140516281128, "learning_rate": 0.001, "loss": 3.3134, "step": 10285 }, { "epoch": 0.43514679752940183, "grad_norm": 0.17298482358455658, "learning_rate": 0.001, "loss": 2.7842, "step": 10286 }, { "epoch": 0.4351891022929182, "grad_norm": 0.16329266130924225, "learning_rate": 0.001, "loss": 1.5573, "step": 10287 }, { "epoch": 0.43523140705643454, "grad_norm": 0.15913952887058258, "learning_rate": 0.001, "loss": 2.8684, "step": 10288 }, { "epoch": 0.43527371181995095, "grad_norm": 0.16433869302272797, "learning_rate": 0.001, "loss": 1.6984, "step": 10289 }, { "epoch": 0.4353160165834673, "grad_norm": 0.1634620726108551, "learning_rate": 0.001, "loss": 2.2076, "step": 10290 }, { "epoch": 0.43535832134698366, "grad_norm": 0.2353067547082901, "learning_rate": 0.001, "loss": 2.8474, "step": 10291 }, { "epoch": 0.43540062611050007, "grad_norm": 0.1973012238740921, "learning_rate": 0.001, "loss": 2.5835, "step": 10292 }, { "epoch": 0.4354429308740164, "grad_norm": 0.16465625166893005, "learning_rate": 0.001, "loss": 1.8952, "step": 10293 }, { "epoch": 0.4354852356375328, "grad_norm": 1.9926971197128296, "learning_rate": 0.001, "loss": 1.7796, "step": 10294 }, { "epoch": 0.4355275404010492, "grad_norm": 1.1302298307418823, "learning_rate": 0.001, "loss": 1.9359, "step": 10295 }, { "epoch": 0.43556984516456554, "grad_norm": 0.2590234577655792, "learning_rate": 0.001, "loss": 1.8019, "step": 10296 }, { "epoch": 0.4356121499280819, "grad_norm": 0.14725469052791595, "learning_rate": 0.001, "loss": 2.6869, "step": 10297 }, { "epoch": 0.43565445469159825, "grad_norm": 3.516937255859375, "learning_rate": 0.001, "loss": 1.8395, "step": 10298 }, { "epoch": 0.43569675945511466, "grad_norm": 0.1837252527475357, "learning_rate": 0.001, "loss": 2.3293, "step": 10299 }, { "epoch": 0.435739064218631, "grad_norm": 0.15408559143543243, "learning_rate": 0.001, "loss": 2.7421, "step": 10300 }, { "epoch": 0.43578136898214737, "grad_norm": 2.384270191192627, "learning_rate": 0.001, "loss": 2.6062, "step": 10301 }, { "epoch": 0.4358236737456638, "grad_norm": 0.22565071284770966, "learning_rate": 0.001, "loss": 2.2583, "step": 10302 }, { "epoch": 0.43586597850918013, "grad_norm": 0.19346702098846436, "learning_rate": 0.001, "loss": 2.36, "step": 10303 }, { "epoch": 0.4359082832726965, "grad_norm": 0.15725278854370117, "learning_rate": 0.001, "loss": 1.6664, "step": 10304 }, { "epoch": 0.4359505880362129, "grad_norm": 0.19711074233055115, "learning_rate": 0.001, "loss": 1.736, "step": 10305 }, { "epoch": 0.43599289279972925, "grad_norm": 0.2101636379957199, "learning_rate": 0.001, "loss": 2.4285, "step": 10306 }, { "epoch": 0.4360351975632456, "grad_norm": 0.1750393956899643, "learning_rate": 0.001, "loss": 1.6158, "step": 10307 }, { "epoch": 0.436077502326762, "grad_norm": 0.1906735897064209, "learning_rate": 0.001, "loss": 2.4902, "step": 10308 }, { "epoch": 0.43611980709027837, "grad_norm": 1.6329114437103271, "learning_rate": 0.001, "loss": 2.1277, "step": 10309 }, { "epoch": 0.4361621118537947, "grad_norm": 0.9235885739326477, "learning_rate": 0.001, "loss": 1.4367, "step": 10310 }, { "epoch": 0.43620441661731113, "grad_norm": 0.14719220995903015, "learning_rate": 0.001, "loss": 2.4318, "step": 10311 }, { "epoch": 0.4362467213808275, "grad_norm": 0.2758025825023651, "learning_rate": 0.001, "loss": 1.9603, "step": 10312 }, { "epoch": 0.43628902614434384, "grad_norm": 0.25476765632629395, "learning_rate": 0.001, "loss": 4.3574, "step": 10313 }, { "epoch": 0.43633133090786025, "grad_norm": 0.180466890335083, "learning_rate": 0.001, "loss": 2.5561, "step": 10314 }, { "epoch": 0.4363736356713766, "grad_norm": 0.19408752024173737, "learning_rate": 0.001, "loss": 2.5026, "step": 10315 }, { "epoch": 0.43641594043489296, "grad_norm": 0.25457096099853516, "learning_rate": 0.001, "loss": 2.2554, "step": 10316 }, { "epoch": 0.43645824519840937, "grad_norm": 0.3525673747062683, "learning_rate": 0.001, "loss": 2.2683, "step": 10317 }, { "epoch": 0.4365005499619257, "grad_norm": 0.1872408390045166, "learning_rate": 0.001, "loss": 2.0554, "step": 10318 }, { "epoch": 0.4365428547254421, "grad_norm": 0.31182733178138733, "learning_rate": 0.001, "loss": 2.6635, "step": 10319 }, { "epoch": 0.43658515948895843, "grad_norm": 0.18361353874206543, "learning_rate": 0.001, "loss": 3.2744, "step": 10320 }, { "epoch": 0.43662746425247484, "grad_norm": 0.20506177842617035, "learning_rate": 0.001, "loss": 2.086, "step": 10321 }, { "epoch": 0.4366697690159912, "grad_norm": 0.1834511160850525, "learning_rate": 0.001, "loss": 2.1743, "step": 10322 }, { "epoch": 0.43671207377950755, "grad_norm": 1.0289106369018555, "learning_rate": 0.001, "loss": 2.5491, "step": 10323 }, { "epoch": 0.43675437854302396, "grad_norm": 1.9735060930252075, "learning_rate": 0.001, "loss": 2.4487, "step": 10324 }, { "epoch": 0.4367966833065403, "grad_norm": 0.17474709451198578, "learning_rate": 0.001, "loss": 1.7286, "step": 10325 }, { "epoch": 0.43683898807005667, "grad_norm": 0.17368879914283752, "learning_rate": 0.001, "loss": 2.0555, "step": 10326 }, { "epoch": 0.4368812928335731, "grad_norm": 0.2503129839897156, "learning_rate": 0.001, "loss": 2.6008, "step": 10327 }, { "epoch": 0.43692359759708943, "grad_norm": 0.16766273975372314, "learning_rate": 0.001, "loss": 2.033, "step": 10328 }, { "epoch": 0.4369659023606058, "grad_norm": 0.19881922006607056, "learning_rate": 0.001, "loss": 1.9835, "step": 10329 }, { "epoch": 0.4370082071241222, "grad_norm": 2.48319149017334, "learning_rate": 0.001, "loss": 2.4763, "step": 10330 }, { "epoch": 0.43705051188763855, "grad_norm": 0.19117386639118195, "learning_rate": 0.001, "loss": 1.7097, "step": 10331 }, { "epoch": 0.4370928166511549, "grad_norm": 0.2317335158586502, "learning_rate": 0.001, "loss": 3.0248, "step": 10332 }, { "epoch": 0.4371351214146713, "grad_norm": 3.407411813735962, "learning_rate": 0.001, "loss": 2.5108, "step": 10333 }, { "epoch": 0.43717742617818767, "grad_norm": 0.32746651768684387, "learning_rate": 0.001, "loss": 2.6375, "step": 10334 }, { "epoch": 0.437219730941704, "grad_norm": 0.4482516348361969, "learning_rate": 0.001, "loss": 1.3975, "step": 10335 }, { "epoch": 0.43726203570522043, "grad_norm": 0.20397043228149414, "learning_rate": 0.001, "loss": 1.8973, "step": 10336 }, { "epoch": 0.4373043404687368, "grad_norm": 0.20856736600399017, "learning_rate": 0.001, "loss": 2.7255, "step": 10337 }, { "epoch": 0.43734664523225314, "grad_norm": 0.4428465664386749, "learning_rate": 0.001, "loss": 2.4357, "step": 10338 }, { "epoch": 0.43738894999576955, "grad_norm": 0.2584536075592041, "learning_rate": 0.001, "loss": 3.332, "step": 10339 }, { "epoch": 0.4374312547592859, "grad_norm": 0.2311519831418991, "learning_rate": 0.001, "loss": 3.5032, "step": 10340 }, { "epoch": 0.43747355952280226, "grad_norm": 0.25852352380752563, "learning_rate": 0.001, "loss": 2.6795, "step": 10341 }, { "epoch": 0.4375158642863186, "grad_norm": 0.1977977156639099, "learning_rate": 0.001, "loss": 2.0136, "step": 10342 }, { "epoch": 0.437558169049835, "grad_norm": 0.20671911537647247, "learning_rate": 0.001, "loss": 2.0685, "step": 10343 }, { "epoch": 0.4376004738133514, "grad_norm": 0.22240790724754333, "learning_rate": 0.001, "loss": 2.3391, "step": 10344 }, { "epoch": 0.43764277857686773, "grad_norm": 0.9896284937858582, "learning_rate": 0.001, "loss": 2.4624, "step": 10345 }, { "epoch": 0.43768508334038414, "grad_norm": 0.18324188888072968, "learning_rate": 0.001, "loss": 2.2438, "step": 10346 }, { "epoch": 0.4377273881039005, "grad_norm": 0.20104867219924927, "learning_rate": 0.001, "loss": 2.8267, "step": 10347 }, { "epoch": 0.43776969286741685, "grad_norm": 0.26405319571495056, "learning_rate": 0.001, "loss": 2.1973, "step": 10348 }, { "epoch": 0.43781199763093326, "grad_norm": 0.5061947107315063, "learning_rate": 0.001, "loss": 2.5415, "step": 10349 }, { "epoch": 0.4378543023944496, "grad_norm": 0.46758270263671875, "learning_rate": 0.001, "loss": 3.0251, "step": 10350 }, { "epoch": 0.43789660715796597, "grad_norm": 1.8289541006088257, "learning_rate": 0.001, "loss": 2.4671, "step": 10351 }, { "epoch": 0.4379389119214824, "grad_norm": 2.7915992736816406, "learning_rate": 0.001, "loss": 2.0793, "step": 10352 }, { "epoch": 0.43798121668499873, "grad_norm": 0.1846030205488205, "learning_rate": 0.001, "loss": 1.7337, "step": 10353 }, { "epoch": 0.4380235214485151, "grad_norm": 0.8970298171043396, "learning_rate": 0.001, "loss": 2.6869, "step": 10354 }, { "epoch": 0.4380658262120315, "grad_norm": 0.6228232979774475, "learning_rate": 0.001, "loss": 2.245, "step": 10355 }, { "epoch": 0.43810813097554785, "grad_norm": 0.18022607266902924, "learning_rate": 0.001, "loss": 2.4163, "step": 10356 }, { "epoch": 0.4381504357390642, "grad_norm": 0.3440026640892029, "learning_rate": 0.001, "loss": 2.0781, "step": 10357 }, { "epoch": 0.4381927405025806, "grad_norm": 0.26871904730796814, "learning_rate": 0.001, "loss": 3.507, "step": 10358 }, { "epoch": 0.43823504526609697, "grad_norm": 0.1928730607032776, "learning_rate": 0.001, "loss": 1.6934, "step": 10359 }, { "epoch": 0.4382773500296133, "grad_norm": 0.23119626939296722, "learning_rate": 0.001, "loss": 1.9447, "step": 10360 }, { "epoch": 0.43831965479312973, "grad_norm": 0.20869280397891998, "learning_rate": 0.001, "loss": 2.3586, "step": 10361 }, { "epoch": 0.4383619595566461, "grad_norm": 0.26448631286621094, "learning_rate": 0.001, "loss": 2.1414, "step": 10362 }, { "epoch": 0.43840426432016244, "grad_norm": 0.2913392186164856, "learning_rate": 0.001, "loss": 2.2654, "step": 10363 }, { "epoch": 0.43844656908367885, "grad_norm": 6.033486843109131, "learning_rate": 0.001, "loss": 2.6725, "step": 10364 }, { "epoch": 0.4384888738471952, "grad_norm": 0.8384542465209961, "learning_rate": 0.001, "loss": 1.926, "step": 10365 }, { "epoch": 0.43853117861071156, "grad_norm": 0.20631474256515503, "learning_rate": 0.001, "loss": 1.8508, "step": 10366 }, { "epoch": 0.4385734833742279, "grad_norm": 0.3741599917411804, "learning_rate": 0.001, "loss": 2.8569, "step": 10367 }, { "epoch": 0.4386157881377443, "grad_norm": 0.19838982820510864, "learning_rate": 0.001, "loss": 2.0593, "step": 10368 }, { "epoch": 0.4386580929012607, "grad_norm": 0.16171355545520782, "learning_rate": 0.001, "loss": 1.7758, "step": 10369 }, { "epoch": 0.43870039766477703, "grad_norm": 2.109577178955078, "learning_rate": 0.001, "loss": 2.4595, "step": 10370 }, { "epoch": 0.43874270242829344, "grad_norm": 0.23121996223926544, "learning_rate": 0.001, "loss": 2.1405, "step": 10371 }, { "epoch": 0.4387850071918098, "grad_norm": 0.19020533561706543, "learning_rate": 0.001, "loss": 2.0821, "step": 10372 }, { "epoch": 0.43882731195532615, "grad_norm": 0.3466572165489197, "learning_rate": 0.001, "loss": 2.352, "step": 10373 }, { "epoch": 0.43886961671884256, "grad_norm": 0.22979247570037842, "learning_rate": 0.001, "loss": 1.8425, "step": 10374 }, { "epoch": 0.4389119214823589, "grad_norm": 0.2191963642835617, "learning_rate": 0.001, "loss": 1.9217, "step": 10375 }, { "epoch": 0.43895422624587527, "grad_norm": 0.29709628224372864, "learning_rate": 0.001, "loss": 2.0016, "step": 10376 }, { "epoch": 0.4389965310093917, "grad_norm": 1.1428967714309692, "learning_rate": 0.001, "loss": 2.3209, "step": 10377 }, { "epoch": 0.43903883577290803, "grad_norm": 0.21593493223190308, "learning_rate": 0.001, "loss": 2.2734, "step": 10378 }, { "epoch": 0.4390811405364244, "grad_norm": 0.2102469801902771, "learning_rate": 0.001, "loss": 2.5221, "step": 10379 }, { "epoch": 0.4391234452999408, "grad_norm": 0.19701038300991058, "learning_rate": 0.001, "loss": 2.7192, "step": 10380 }, { "epoch": 0.43916575006345715, "grad_norm": 0.6864662170410156, "learning_rate": 0.001, "loss": 2.4796, "step": 10381 }, { "epoch": 0.4392080548269735, "grad_norm": 0.5476522445678711, "learning_rate": 0.001, "loss": 1.725, "step": 10382 }, { "epoch": 0.4392503595904899, "grad_norm": 0.19268248975276947, "learning_rate": 0.001, "loss": 1.8153, "step": 10383 }, { "epoch": 0.43929266435400627, "grad_norm": 0.40893223881721497, "learning_rate": 0.001, "loss": 2.7319, "step": 10384 }, { "epoch": 0.4393349691175226, "grad_norm": 2.664677143096924, "learning_rate": 0.001, "loss": 2.4025, "step": 10385 }, { "epoch": 0.43937727388103903, "grad_norm": 0.18634748458862305, "learning_rate": 0.001, "loss": 1.9047, "step": 10386 }, { "epoch": 0.4394195786445554, "grad_norm": 0.1849360466003418, "learning_rate": 0.001, "loss": 2.2008, "step": 10387 }, { "epoch": 0.43946188340807174, "grad_norm": 0.15529468655586243, "learning_rate": 0.001, "loss": 2.3172, "step": 10388 }, { "epoch": 0.4395041881715881, "grad_norm": 0.15574440360069275, "learning_rate": 0.001, "loss": 1.7872, "step": 10389 }, { "epoch": 0.4395464929351045, "grad_norm": 0.24909093976020813, "learning_rate": 0.001, "loss": 2.3089, "step": 10390 }, { "epoch": 0.43958879769862086, "grad_norm": 0.27436402440071106, "learning_rate": 0.001, "loss": 2.7429, "step": 10391 }, { "epoch": 0.4396311024621372, "grad_norm": 0.6394477486610413, "learning_rate": 0.001, "loss": 3.1377, "step": 10392 }, { "epoch": 0.4396734072256536, "grad_norm": 0.1768225133419037, "learning_rate": 0.001, "loss": 1.7332, "step": 10393 }, { "epoch": 0.43971571198917, "grad_norm": 0.18364182114601135, "learning_rate": 0.001, "loss": 3.3755, "step": 10394 }, { "epoch": 0.43975801675268633, "grad_norm": 0.1930709332227707, "learning_rate": 0.001, "loss": 2.2478, "step": 10395 }, { "epoch": 0.43980032151620274, "grad_norm": 4.500080585479736, "learning_rate": 0.001, "loss": 1.6433, "step": 10396 }, { "epoch": 0.4398426262797191, "grad_norm": 0.20036698877811432, "learning_rate": 0.001, "loss": 2.2089, "step": 10397 }, { "epoch": 0.43988493104323545, "grad_norm": 0.19765986502170563, "learning_rate": 0.001, "loss": 2.012, "step": 10398 }, { "epoch": 0.43992723580675186, "grad_norm": 0.1463608741760254, "learning_rate": 0.001, "loss": 1.4149, "step": 10399 }, { "epoch": 0.4399695405702682, "grad_norm": 6.610461235046387, "learning_rate": 0.001, "loss": 2.9959, "step": 10400 }, { "epoch": 0.44001184533378457, "grad_norm": 0.23446570336818695, "learning_rate": 0.001, "loss": 2.249, "step": 10401 }, { "epoch": 0.440054150097301, "grad_norm": 1.4213615655899048, "learning_rate": 0.001, "loss": 2.6993, "step": 10402 }, { "epoch": 0.44009645486081733, "grad_norm": 0.6602863073348999, "learning_rate": 0.001, "loss": 1.8553, "step": 10403 }, { "epoch": 0.4401387596243337, "grad_norm": 0.2995677590370178, "learning_rate": 0.001, "loss": 3.3502, "step": 10404 }, { "epoch": 0.4401810643878501, "grad_norm": 0.1976313292980194, "learning_rate": 0.001, "loss": 2.0083, "step": 10405 }, { "epoch": 0.44022336915136645, "grad_norm": 0.2297874391078949, "learning_rate": 0.001, "loss": 3.5225, "step": 10406 }, { "epoch": 0.4402656739148828, "grad_norm": 0.24849973618984222, "learning_rate": 0.001, "loss": 2.3344, "step": 10407 }, { "epoch": 0.4403079786783992, "grad_norm": 0.15684276819229126, "learning_rate": 0.001, "loss": 3.1674, "step": 10408 }, { "epoch": 0.44035028344191557, "grad_norm": 0.24899160861968994, "learning_rate": 0.001, "loss": 1.7431, "step": 10409 }, { "epoch": 0.4403925882054319, "grad_norm": 0.16840027272701263, "learning_rate": 0.001, "loss": 2.1913, "step": 10410 }, { "epoch": 0.4404348929689483, "grad_norm": 0.18736068904399872, "learning_rate": 0.001, "loss": 2.1678, "step": 10411 }, { "epoch": 0.4404771977324647, "grad_norm": 0.19807054102420807, "learning_rate": 0.001, "loss": 2.3152, "step": 10412 }, { "epoch": 0.44051950249598104, "grad_norm": 0.7695265412330627, "learning_rate": 0.001, "loss": 1.9602, "step": 10413 }, { "epoch": 0.4405618072594974, "grad_norm": 0.19554787874221802, "learning_rate": 0.001, "loss": 1.9298, "step": 10414 }, { "epoch": 0.4406041120230138, "grad_norm": 0.22768321633338928, "learning_rate": 0.001, "loss": 1.8096, "step": 10415 }, { "epoch": 0.44064641678653016, "grad_norm": 0.16684657335281372, "learning_rate": 0.001, "loss": 2.6149, "step": 10416 }, { "epoch": 0.4406887215500465, "grad_norm": 6.711855888366699, "learning_rate": 0.001, "loss": 2.506, "step": 10417 }, { "epoch": 0.4407310263135629, "grad_norm": 0.199007049202919, "learning_rate": 0.001, "loss": 2.6235, "step": 10418 }, { "epoch": 0.4407733310770793, "grad_norm": 4.696216106414795, "learning_rate": 0.001, "loss": 2.0736, "step": 10419 }, { "epoch": 0.44081563584059563, "grad_norm": 0.18525268137454987, "learning_rate": 0.001, "loss": 1.6978, "step": 10420 }, { "epoch": 0.44085794060411204, "grad_norm": 0.22784662246704102, "learning_rate": 0.001, "loss": 2.0242, "step": 10421 }, { "epoch": 0.4409002453676284, "grad_norm": 0.49683111906051636, "learning_rate": 0.001, "loss": 3.07, "step": 10422 }, { "epoch": 0.44094255013114475, "grad_norm": 0.29818785190582275, "learning_rate": 0.001, "loss": 2.0421, "step": 10423 }, { "epoch": 0.44098485489466116, "grad_norm": 0.30528533458709717, "learning_rate": 0.001, "loss": 2.3055, "step": 10424 }, { "epoch": 0.4410271596581775, "grad_norm": 0.17285211384296417, "learning_rate": 0.001, "loss": 2.8575, "step": 10425 }, { "epoch": 0.44106946442169387, "grad_norm": 0.2074311375617981, "learning_rate": 0.001, "loss": 2.9005, "step": 10426 }, { "epoch": 0.4411117691852103, "grad_norm": 13.308959007263184, "learning_rate": 0.001, "loss": 2.1316, "step": 10427 }, { "epoch": 0.44115407394872663, "grad_norm": 8.290491104125977, "learning_rate": 0.001, "loss": 3.0784, "step": 10428 }, { "epoch": 0.441196378712243, "grad_norm": 6.2142252922058105, "learning_rate": 0.001, "loss": 2.8017, "step": 10429 }, { "epoch": 0.4412386834757594, "grad_norm": 0.41313034296035767, "learning_rate": 0.001, "loss": 2.1858, "step": 10430 }, { "epoch": 0.44128098823927575, "grad_norm": 13.561888694763184, "learning_rate": 0.001, "loss": 2.6311, "step": 10431 }, { "epoch": 0.4413232930027921, "grad_norm": 0.19502195715904236, "learning_rate": 0.001, "loss": 2.5145, "step": 10432 }, { "epoch": 0.44136559776630846, "grad_norm": 0.2449101358652115, "learning_rate": 0.001, "loss": 2.9577, "step": 10433 }, { "epoch": 0.44140790252982487, "grad_norm": 0.4394688606262207, "learning_rate": 0.001, "loss": 2.156, "step": 10434 }, { "epoch": 0.4414502072933412, "grad_norm": 0.9801822900772095, "learning_rate": 0.001, "loss": 2.5387, "step": 10435 }, { "epoch": 0.4414925120568576, "grad_norm": 0.2750179171562195, "learning_rate": 0.001, "loss": 2.1277, "step": 10436 }, { "epoch": 0.441534816820374, "grad_norm": 0.17588399350643158, "learning_rate": 0.001, "loss": 3.0843, "step": 10437 }, { "epoch": 0.44157712158389034, "grad_norm": 0.18611468374729156, "learning_rate": 0.001, "loss": 2.0977, "step": 10438 }, { "epoch": 0.4416194263474067, "grad_norm": 0.4915761649608612, "learning_rate": 0.001, "loss": 2.4029, "step": 10439 }, { "epoch": 0.4416617311109231, "grad_norm": 0.2895006835460663, "learning_rate": 0.001, "loss": 2.6397, "step": 10440 }, { "epoch": 0.44170403587443946, "grad_norm": 0.19709734618663788, "learning_rate": 0.001, "loss": 2.1713, "step": 10441 }, { "epoch": 0.4417463406379558, "grad_norm": 0.7510543465614319, "learning_rate": 0.001, "loss": 2.5878, "step": 10442 }, { "epoch": 0.4417886454014722, "grad_norm": 0.18231795728206635, "learning_rate": 0.001, "loss": 1.8023, "step": 10443 }, { "epoch": 0.4418309501649886, "grad_norm": 0.18479089438915253, "learning_rate": 0.001, "loss": 2.1124, "step": 10444 }, { "epoch": 0.44187325492850493, "grad_norm": 0.35781130194664, "learning_rate": 0.001, "loss": 2.4331, "step": 10445 }, { "epoch": 0.44191555969202134, "grad_norm": 0.23527033627033234, "learning_rate": 0.001, "loss": 1.5157, "step": 10446 }, { "epoch": 0.4419578644555377, "grad_norm": 0.21979889273643494, "learning_rate": 0.001, "loss": 2.0873, "step": 10447 }, { "epoch": 0.44200016921905405, "grad_norm": 0.2087160348892212, "learning_rate": 0.001, "loss": 2.237, "step": 10448 }, { "epoch": 0.44204247398257046, "grad_norm": 0.30770057439804077, "learning_rate": 0.001, "loss": 2.6998, "step": 10449 }, { "epoch": 0.4420847787460868, "grad_norm": 0.27149367332458496, "learning_rate": 0.001, "loss": 2.438, "step": 10450 }, { "epoch": 0.44212708350960317, "grad_norm": 0.18169645965099335, "learning_rate": 0.001, "loss": 1.4572, "step": 10451 }, { "epoch": 0.4421693882731196, "grad_norm": 0.1930762231349945, "learning_rate": 0.001, "loss": 1.9931, "step": 10452 }, { "epoch": 0.44221169303663593, "grad_norm": 1.637730598449707, "learning_rate": 0.001, "loss": 3.6527, "step": 10453 }, { "epoch": 0.4422539978001523, "grad_norm": 0.19526685774326324, "learning_rate": 0.001, "loss": 2.715, "step": 10454 }, { "epoch": 0.44229630256366864, "grad_norm": 0.25344404578208923, "learning_rate": 0.001, "loss": 2.2599, "step": 10455 }, { "epoch": 0.44233860732718505, "grad_norm": 0.4373885989189148, "learning_rate": 0.001, "loss": 2.1229, "step": 10456 }, { "epoch": 0.4423809120907014, "grad_norm": 0.49542349576950073, "learning_rate": 0.001, "loss": 2.0016, "step": 10457 }, { "epoch": 0.44242321685421776, "grad_norm": 0.21734249591827393, "learning_rate": 0.001, "loss": 2.5501, "step": 10458 }, { "epoch": 0.44246552161773417, "grad_norm": 0.37125974893569946, "learning_rate": 0.001, "loss": 2.2032, "step": 10459 }, { "epoch": 0.4425078263812505, "grad_norm": 1.202412724494934, "learning_rate": 0.001, "loss": 2.6156, "step": 10460 }, { "epoch": 0.4425501311447669, "grad_norm": 0.2056283950805664, "learning_rate": 0.001, "loss": 2.3052, "step": 10461 }, { "epoch": 0.4425924359082833, "grad_norm": 0.24419522285461426, "learning_rate": 0.001, "loss": 1.9584, "step": 10462 }, { "epoch": 0.44263474067179964, "grad_norm": 3.809471845626831, "learning_rate": 0.001, "loss": 2.0806, "step": 10463 }, { "epoch": 0.442677045435316, "grad_norm": 0.1795853078365326, "learning_rate": 0.001, "loss": 1.8472, "step": 10464 }, { "epoch": 0.4427193501988324, "grad_norm": 0.21403010189533234, "learning_rate": 0.001, "loss": 2.5621, "step": 10465 }, { "epoch": 0.44276165496234876, "grad_norm": 0.23772773146629333, "learning_rate": 0.001, "loss": 4.0021, "step": 10466 }, { "epoch": 0.4428039597258651, "grad_norm": 0.1654462069272995, "learning_rate": 0.001, "loss": 2.3355, "step": 10467 }, { "epoch": 0.4428462644893815, "grad_norm": 0.16145402193069458, "learning_rate": 0.001, "loss": 3.1889, "step": 10468 }, { "epoch": 0.4428885692528979, "grad_norm": 0.27223458886146545, "learning_rate": 0.001, "loss": 1.7634, "step": 10469 }, { "epoch": 0.44293087401641423, "grad_norm": 0.1729728877544403, "learning_rate": 0.001, "loss": 1.2275, "step": 10470 }, { "epoch": 0.44297317877993064, "grad_norm": 0.17519982159137726, "learning_rate": 0.001, "loss": 2.0601, "step": 10471 }, { "epoch": 0.443015483543447, "grad_norm": 0.2915792167186737, "learning_rate": 0.001, "loss": 3.0095, "step": 10472 }, { "epoch": 0.44305778830696335, "grad_norm": 0.212400883436203, "learning_rate": 0.001, "loss": 2.7761, "step": 10473 }, { "epoch": 0.44310009307047976, "grad_norm": 1.2644349336624146, "learning_rate": 0.001, "loss": 2.631, "step": 10474 }, { "epoch": 0.4431423978339961, "grad_norm": 0.2543438673019409, "learning_rate": 0.001, "loss": 2.9616, "step": 10475 }, { "epoch": 0.44318470259751247, "grad_norm": 0.5095729827880859, "learning_rate": 0.001, "loss": 2.9212, "step": 10476 }, { "epoch": 0.4432270073610288, "grad_norm": 0.22304564714431763, "learning_rate": 0.001, "loss": 2.3646, "step": 10477 }, { "epoch": 0.44326931212454523, "grad_norm": 0.2589484751224518, "learning_rate": 0.001, "loss": 1.8629, "step": 10478 }, { "epoch": 0.4433116168880616, "grad_norm": 0.171627938747406, "learning_rate": 0.001, "loss": 2.1287, "step": 10479 }, { "epoch": 0.44335392165157794, "grad_norm": 0.18898135423660278, "learning_rate": 0.001, "loss": 1.9837, "step": 10480 }, { "epoch": 0.44339622641509435, "grad_norm": 0.17940890789031982, "learning_rate": 0.001, "loss": 1.8571, "step": 10481 }, { "epoch": 0.4434385311786107, "grad_norm": 0.3940030634403229, "learning_rate": 0.001, "loss": 2.5347, "step": 10482 }, { "epoch": 0.44348083594212706, "grad_norm": 0.16100460290908813, "learning_rate": 0.001, "loss": 3.2192, "step": 10483 }, { "epoch": 0.44352314070564347, "grad_norm": 0.3118045926094055, "learning_rate": 0.001, "loss": 2.7277, "step": 10484 }, { "epoch": 0.4435654454691598, "grad_norm": 0.38664117455482483, "learning_rate": 0.001, "loss": 3.5834, "step": 10485 }, { "epoch": 0.4436077502326762, "grad_norm": 0.15630090236663818, "learning_rate": 0.001, "loss": 1.5872, "step": 10486 }, { "epoch": 0.4436500549961926, "grad_norm": 0.1633961796760559, "learning_rate": 0.001, "loss": 2.2324, "step": 10487 }, { "epoch": 0.44369235975970894, "grad_norm": 0.2306484431028366, "learning_rate": 0.001, "loss": 2.1569, "step": 10488 }, { "epoch": 0.4437346645232253, "grad_norm": 0.3159201741218567, "learning_rate": 0.001, "loss": 2.2158, "step": 10489 }, { "epoch": 0.4437769692867417, "grad_norm": 0.1387176662683487, "learning_rate": 0.001, "loss": 1.596, "step": 10490 }, { "epoch": 0.44381927405025806, "grad_norm": 0.17673452198505402, "learning_rate": 0.001, "loss": 2.1328, "step": 10491 }, { "epoch": 0.4438615788137744, "grad_norm": 0.22988873720169067, "learning_rate": 0.001, "loss": 1.8124, "step": 10492 }, { "epoch": 0.4439038835772908, "grad_norm": 0.1935664266347885, "learning_rate": 0.001, "loss": 1.9753, "step": 10493 }, { "epoch": 0.4439461883408072, "grad_norm": 0.17069995403289795, "learning_rate": 0.001, "loss": 2.0083, "step": 10494 }, { "epoch": 0.44398849310432353, "grad_norm": 1.7595449686050415, "learning_rate": 0.001, "loss": 2.3677, "step": 10495 }, { "epoch": 0.44403079786783994, "grad_norm": 0.2332594096660614, "learning_rate": 0.001, "loss": 1.9425, "step": 10496 }, { "epoch": 0.4440731026313563, "grad_norm": 3.590059757232666, "learning_rate": 0.001, "loss": 2.2229, "step": 10497 }, { "epoch": 0.44411540739487265, "grad_norm": 0.1739518791437149, "learning_rate": 0.001, "loss": 1.8198, "step": 10498 }, { "epoch": 0.44415771215838906, "grad_norm": 0.18792513012886047, "learning_rate": 0.001, "loss": 1.8256, "step": 10499 }, { "epoch": 0.4442000169219054, "grad_norm": 0.16739851236343384, "learning_rate": 0.001, "loss": 1.96, "step": 10500 }, { "epoch": 0.44424232168542177, "grad_norm": 0.17911168932914734, "learning_rate": 0.001, "loss": 2.0014, "step": 10501 }, { "epoch": 0.4442846264489381, "grad_norm": 0.2152254283428192, "learning_rate": 0.001, "loss": 2.1837, "step": 10502 }, { "epoch": 0.44432693121245453, "grad_norm": 0.16652332246303558, "learning_rate": 0.001, "loss": 1.8485, "step": 10503 }, { "epoch": 0.4443692359759709, "grad_norm": 0.1486539989709854, "learning_rate": 0.001, "loss": 1.8012, "step": 10504 }, { "epoch": 0.44441154073948724, "grad_norm": 0.4087856113910675, "learning_rate": 0.001, "loss": 1.6707, "step": 10505 }, { "epoch": 0.44445384550300365, "grad_norm": 0.20888468623161316, "learning_rate": 0.001, "loss": 2.1961, "step": 10506 }, { "epoch": 0.44449615026652, "grad_norm": 0.5997106432914734, "learning_rate": 0.001, "loss": 2.2202, "step": 10507 }, { "epoch": 0.44453845503003636, "grad_norm": 3.278139591217041, "learning_rate": 0.001, "loss": 2.1766, "step": 10508 }, { "epoch": 0.44458075979355277, "grad_norm": 0.16290752589702606, "learning_rate": 0.001, "loss": 3.9578, "step": 10509 }, { "epoch": 0.4446230645570691, "grad_norm": 0.8776594996452332, "learning_rate": 0.001, "loss": 1.7571, "step": 10510 }, { "epoch": 0.4446653693205855, "grad_norm": 0.7716861367225647, "learning_rate": 0.001, "loss": 3.2192, "step": 10511 }, { "epoch": 0.4447076740841019, "grad_norm": 1.821780800819397, "learning_rate": 0.001, "loss": 2.1203, "step": 10512 }, { "epoch": 0.44474997884761824, "grad_norm": 4.94421911239624, "learning_rate": 0.001, "loss": 2.127, "step": 10513 }, { "epoch": 0.4447922836111346, "grad_norm": 0.17742355167865753, "learning_rate": 0.001, "loss": 1.7438, "step": 10514 }, { "epoch": 0.444834588374651, "grad_norm": 0.18554328382015228, "learning_rate": 0.001, "loss": 2.8967, "step": 10515 }, { "epoch": 0.44487689313816736, "grad_norm": 0.1628502756357193, "learning_rate": 0.001, "loss": 1.8562, "step": 10516 }, { "epoch": 0.4449191979016837, "grad_norm": 0.7208130955696106, "learning_rate": 0.001, "loss": 2.7975, "step": 10517 }, { "epoch": 0.4449615026652001, "grad_norm": 0.6698981523513794, "learning_rate": 0.001, "loss": 2.7178, "step": 10518 }, { "epoch": 0.4450038074287165, "grad_norm": 0.20229113101959229, "learning_rate": 0.001, "loss": 2.9919, "step": 10519 }, { "epoch": 0.44504611219223283, "grad_norm": 0.1968151330947876, "learning_rate": 0.001, "loss": 3.2183, "step": 10520 }, { "epoch": 0.44508841695574924, "grad_norm": 0.225248783826828, "learning_rate": 0.001, "loss": 3.2217, "step": 10521 }, { "epoch": 0.4451307217192656, "grad_norm": 0.1952434927225113, "learning_rate": 0.001, "loss": 2.5245, "step": 10522 }, { "epoch": 0.44517302648278195, "grad_norm": 1.0436367988586426, "learning_rate": 0.001, "loss": 3.3446, "step": 10523 }, { "epoch": 0.4452153312462983, "grad_norm": 0.18723340332508087, "learning_rate": 0.001, "loss": 2.3264, "step": 10524 }, { "epoch": 0.4452576360098147, "grad_norm": 0.2530616521835327, "learning_rate": 0.001, "loss": 2.052, "step": 10525 }, { "epoch": 0.44529994077333107, "grad_norm": 0.22954061627388, "learning_rate": 0.001, "loss": 2.7963, "step": 10526 }, { "epoch": 0.4453422455368474, "grad_norm": 0.18304547667503357, "learning_rate": 0.001, "loss": 2.3264, "step": 10527 }, { "epoch": 0.44538455030036384, "grad_norm": 0.39500945806503296, "learning_rate": 0.001, "loss": 1.9825, "step": 10528 }, { "epoch": 0.4454268550638802, "grad_norm": 2.5766665935516357, "learning_rate": 0.001, "loss": 2.5105, "step": 10529 }, { "epoch": 0.44546915982739654, "grad_norm": 0.23480746150016785, "learning_rate": 0.001, "loss": 1.8403, "step": 10530 }, { "epoch": 0.44551146459091295, "grad_norm": 0.18920212984085083, "learning_rate": 0.001, "loss": 2.8969, "step": 10531 }, { "epoch": 0.4455537693544293, "grad_norm": 0.1778506338596344, "learning_rate": 0.001, "loss": 2.9153, "step": 10532 }, { "epoch": 0.44559607411794566, "grad_norm": 0.28486740589141846, "learning_rate": 0.001, "loss": 3.6157, "step": 10533 }, { "epoch": 0.44563837888146207, "grad_norm": 0.19953373074531555, "learning_rate": 0.001, "loss": 2.1183, "step": 10534 }, { "epoch": 0.4456806836449784, "grad_norm": 0.17005318403244019, "learning_rate": 0.001, "loss": 3.2538, "step": 10535 }, { "epoch": 0.4457229884084948, "grad_norm": 0.2750973105430603, "learning_rate": 0.001, "loss": 2.6134, "step": 10536 }, { "epoch": 0.4457652931720112, "grad_norm": 0.18177230656147003, "learning_rate": 0.001, "loss": 2.8142, "step": 10537 }, { "epoch": 0.44580759793552754, "grad_norm": 0.17410710453987122, "learning_rate": 0.001, "loss": 2.2946, "step": 10538 }, { "epoch": 0.4458499026990439, "grad_norm": 0.42573174834251404, "learning_rate": 0.001, "loss": 2.5024, "step": 10539 }, { "epoch": 0.4458922074625603, "grad_norm": 0.19186319410800934, "learning_rate": 0.001, "loss": 1.8359, "step": 10540 }, { "epoch": 0.44593451222607666, "grad_norm": 0.3824551999568939, "learning_rate": 0.001, "loss": 2.5268, "step": 10541 }, { "epoch": 0.445976816989593, "grad_norm": 0.23018528521060944, "learning_rate": 0.001, "loss": 2.834, "step": 10542 }, { "epoch": 0.4460191217531094, "grad_norm": 0.1816767454147339, "learning_rate": 0.001, "loss": 2.0896, "step": 10543 }, { "epoch": 0.4460614265166258, "grad_norm": 0.17159247398376465, "learning_rate": 0.001, "loss": 1.84, "step": 10544 }, { "epoch": 0.44610373128014214, "grad_norm": 1.8396812677383423, "learning_rate": 0.001, "loss": 2.0751, "step": 10545 }, { "epoch": 0.4461460360436585, "grad_norm": 0.3504880666732788, "learning_rate": 0.001, "loss": 2.3788, "step": 10546 }, { "epoch": 0.4461883408071749, "grad_norm": 0.18512053787708282, "learning_rate": 0.001, "loss": 1.6916, "step": 10547 }, { "epoch": 0.44623064557069125, "grad_norm": 0.18409253656864166, "learning_rate": 0.001, "loss": 1.5346, "step": 10548 }, { "epoch": 0.4462729503342076, "grad_norm": 2.5886762142181396, "learning_rate": 0.001, "loss": 1.8567, "step": 10549 }, { "epoch": 0.446315255097724, "grad_norm": 0.6384299993515015, "learning_rate": 0.001, "loss": 2.2381, "step": 10550 }, { "epoch": 0.44635755986124037, "grad_norm": 2.2755072116851807, "learning_rate": 0.001, "loss": 2.2896, "step": 10551 }, { "epoch": 0.4463998646247567, "grad_norm": 0.4430360198020935, "learning_rate": 0.001, "loss": 3.7153, "step": 10552 }, { "epoch": 0.44644216938827314, "grad_norm": 1.0187450647354126, "learning_rate": 0.001, "loss": 3.2753, "step": 10553 }, { "epoch": 0.4464844741517895, "grad_norm": 0.2957771122455597, "learning_rate": 0.001, "loss": 1.7454, "step": 10554 }, { "epoch": 0.44652677891530584, "grad_norm": 0.2080879807472229, "learning_rate": 0.001, "loss": 2.0611, "step": 10555 }, { "epoch": 0.44656908367882225, "grad_norm": 1.7243143320083618, "learning_rate": 0.001, "loss": 2.981, "step": 10556 }, { "epoch": 0.4466113884423386, "grad_norm": 0.2195415198802948, "learning_rate": 0.001, "loss": 1.9373, "step": 10557 }, { "epoch": 0.44665369320585496, "grad_norm": 0.19098582863807678, "learning_rate": 0.001, "loss": 1.9921, "step": 10558 }, { "epoch": 0.4466959979693714, "grad_norm": 0.2138836830854416, "learning_rate": 0.001, "loss": 2.2429, "step": 10559 }, { "epoch": 0.4467383027328877, "grad_norm": 0.18263570964336395, "learning_rate": 0.001, "loss": 2.5905, "step": 10560 }, { "epoch": 0.4467806074964041, "grad_norm": 0.20292994379997253, "learning_rate": 0.001, "loss": 2.3356, "step": 10561 }, { "epoch": 0.4468229122599205, "grad_norm": 0.2716735601425171, "learning_rate": 0.001, "loss": 1.7254, "step": 10562 }, { "epoch": 0.44686521702343684, "grad_norm": 0.16340197622776031, "learning_rate": 0.001, "loss": 1.7591, "step": 10563 }, { "epoch": 0.4469075217869532, "grad_norm": 0.2119547724723816, "learning_rate": 0.001, "loss": 2.3141, "step": 10564 }, { "epoch": 0.4469498265504696, "grad_norm": 0.18071070313453674, "learning_rate": 0.001, "loss": 1.8618, "step": 10565 }, { "epoch": 0.44699213131398596, "grad_norm": 0.3588818311691284, "learning_rate": 0.001, "loss": 3.3872, "step": 10566 }, { "epoch": 0.4470344360775023, "grad_norm": 0.19287428259849548, "learning_rate": 0.001, "loss": 2.3166, "step": 10567 }, { "epoch": 0.44707674084101867, "grad_norm": 0.43625664710998535, "learning_rate": 0.001, "loss": 2.8193, "step": 10568 }, { "epoch": 0.4471190456045351, "grad_norm": 0.4928484559059143, "learning_rate": 0.001, "loss": 1.6347, "step": 10569 }, { "epoch": 0.44716135036805144, "grad_norm": 0.685997724533081, "learning_rate": 0.001, "loss": 2.317, "step": 10570 }, { "epoch": 0.4472036551315678, "grad_norm": 0.15223908424377441, "learning_rate": 0.001, "loss": 2.1719, "step": 10571 }, { "epoch": 0.4472459598950842, "grad_norm": 0.18542735278606415, "learning_rate": 0.001, "loss": 1.5935, "step": 10572 }, { "epoch": 0.44728826465860055, "grad_norm": 0.18469227850437164, "learning_rate": 0.001, "loss": 2.4737, "step": 10573 }, { "epoch": 0.4473305694221169, "grad_norm": 0.24252432584762573, "learning_rate": 0.001, "loss": 1.9852, "step": 10574 }, { "epoch": 0.4473728741856333, "grad_norm": 0.16159304976463318, "learning_rate": 0.001, "loss": 2.6181, "step": 10575 }, { "epoch": 0.44741517894914967, "grad_norm": 3.23504900932312, "learning_rate": 0.001, "loss": 3.0528, "step": 10576 }, { "epoch": 0.447457483712666, "grad_norm": 2.2757198810577393, "learning_rate": 0.001, "loss": 3.2706, "step": 10577 }, { "epoch": 0.44749978847618244, "grad_norm": 0.22294031083583832, "learning_rate": 0.001, "loss": 2.1312, "step": 10578 }, { "epoch": 0.4475420932396988, "grad_norm": 0.19114287197589874, "learning_rate": 0.001, "loss": 2.8998, "step": 10579 }, { "epoch": 0.44758439800321514, "grad_norm": 0.6231216192245483, "learning_rate": 0.001, "loss": 3.3509, "step": 10580 }, { "epoch": 0.44762670276673155, "grad_norm": 0.18667848408222198, "learning_rate": 0.001, "loss": 2.477, "step": 10581 }, { "epoch": 0.4476690075302479, "grad_norm": 0.5230020880699158, "learning_rate": 0.001, "loss": 3.9596, "step": 10582 }, { "epoch": 0.44771131229376426, "grad_norm": 0.2077375054359436, "learning_rate": 0.001, "loss": 2.1221, "step": 10583 }, { "epoch": 0.4477536170572807, "grad_norm": 0.21029135584831238, "learning_rate": 0.001, "loss": 2.7445, "step": 10584 }, { "epoch": 0.447795921820797, "grad_norm": 0.30405527353286743, "learning_rate": 0.001, "loss": 3.0613, "step": 10585 }, { "epoch": 0.4478382265843134, "grad_norm": 0.16450397670269012, "learning_rate": 0.001, "loss": 2.518, "step": 10586 }, { "epoch": 0.4478805313478298, "grad_norm": 1.3755722045898438, "learning_rate": 0.001, "loss": 2.3927, "step": 10587 }, { "epoch": 0.44792283611134615, "grad_norm": 0.18358470499515533, "learning_rate": 0.001, "loss": 2.7458, "step": 10588 }, { "epoch": 0.4479651408748625, "grad_norm": 5.526552200317383, "learning_rate": 0.001, "loss": 1.9233, "step": 10589 }, { "epoch": 0.44800744563837885, "grad_norm": 0.2043481022119522, "learning_rate": 0.001, "loss": 2.0861, "step": 10590 }, { "epoch": 0.44804975040189526, "grad_norm": 0.16888974606990814, "learning_rate": 0.001, "loss": 3.3986, "step": 10591 }, { "epoch": 0.4480920551654116, "grad_norm": 0.208000048995018, "learning_rate": 0.001, "loss": 2.8552, "step": 10592 }, { "epoch": 0.44813435992892797, "grad_norm": 2.04957914352417, "learning_rate": 0.001, "loss": 3.1259, "step": 10593 }, { "epoch": 0.4481766646924444, "grad_norm": 0.4959050118923187, "learning_rate": 0.001, "loss": 2.161, "step": 10594 }, { "epoch": 0.44821896945596074, "grad_norm": 0.20981314778327942, "learning_rate": 0.001, "loss": 2.1842, "step": 10595 }, { "epoch": 0.4482612742194771, "grad_norm": 0.29932335019111633, "learning_rate": 0.001, "loss": 3.8183, "step": 10596 }, { "epoch": 0.4483035789829935, "grad_norm": 0.22173571586608887, "learning_rate": 0.001, "loss": 2.4446, "step": 10597 }, { "epoch": 0.44834588374650985, "grad_norm": 0.19373901188373566, "learning_rate": 0.001, "loss": 2.1564, "step": 10598 }, { "epoch": 0.4483881885100262, "grad_norm": 0.802295446395874, "learning_rate": 0.001, "loss": 1.8698, "step": 10599 }, { "epoch": 0.4484304932735426, "grad_norm": 0.1932401806116104, "learning_rate": 0.001, "loss": 2.4997, "step": 10600 }, { "epoch": 0.448472798037059, "grad_norm": 0.19700828194618225, "learning_rate": 0.001, "loss": 2.4923, "step": 10601 }, { "epoch": 0.4485151028005753, "grad_norm": 0.338742196559906, "learning_rate": 0.001, "loss": 1.6468, "step": 10602 }, { "epoch": 0.44855740756409174, "grad_norm": 0.31011953949928284, "learning_rate": 0.001, "loss": 1.9476, "step": 10603 }, { "epoch": 0.4485997123276081, "grad_norm": 0.1957470327615738, "learning_rate": 0.001, "loss": 1.7986, "step": 10604 }, { "epoch": 0.44864201709112445, "grad_norm": 0.18266044557094574, "learning_rate": 0.001, "loss": 1.9285, "step": 10605 }, { "epoch": 0.44868432185464086, "grad_norm": 0.17342445254325867, "learning_rate": 0.001, "loss": 2.4775, "step": 10606 }, { "epoch": 0.4487266266181572, "grad_norm": 0.17653240263462067, "learning_rate": 0.001, "loss": 2.5431, "step": 10607 }, { "epoch": 0.44876893138167356, "grad_norm": 0.1600387543439865, "learning_rate": 0.001, "loss": 3.0684, "step": 10608 }, { "epoch": 0.44881123614519, "grad_norm": 0.20908235013484955, "learning_rate": 0.001, "loss": 2.4038, "step": 10609 }, { "epoch": 0.4488535409087063, "grad_norm": 0.21198759973049164, "learning_rate": 0.001, "loss": 2.5214, "step": 10610 }, { "epoch": 0.4488958456722227, "grad_norm": 0.16591830551624298, "learning_rate": 0.001, "loss": 2.178, "step": 10611 }, { "epoch": 0.4489381504357391, "grad_norm": 3.6654651165008545, "learning_rate": 0.001, "loss": 2.2057, "step": 10612 }, { "epoch": 0.44898045519925545, "grad_norm": 0.17161937057971954, "learning_rate": 0.001, "loss": 2.2148, "step": 10613 }, { "epoch": 0.4490227599627718, "grad_norm": 4.128261089324951, "learning_rate": 0.001, "loss": 2.2959, "step": 10614 }, { "epoch": 0.44906506472628815, "grad_norm": 0.16964125633239746, "learning_rate": 0.001, "loss": 2.1389, "step": 10615 }, { "epoch": 0.44910736948980456, "grad_norm": 1.7417608499526978, "learning_rate": 0.001, "loss": 1.705, "step": 10616 }, { "epoch": 0.4491496742533209, "grad_norm": 0.9313798546791077, "learning_rate": 0.001, "loss": 2.6124, "step": 10617 }, { "epoch": 0.4491919790168373, "grad_norm": 0.7544777393341064, "learning_rate": 0.001, "loss": 2.773, "step": 10618 }, { "epoch": 0.4492342837803537, "grad_norm": 0.26840439438819885, "learning_rate": 0.001, "loss": 2.0986, "step": 10619 }, { "epoch": 0.44927658854387004, "grad_norm": 0.18352295458316803, "learning_rate": 0.001, "loss": 2.9253, "step": 10620 }, { "epoch": 0.4493188933073864, "grad_norm": 0.25015711784362793, "learning_rate": 0.001, "loss": 2.9165, "step": 10621 }, { "epoch": 0.4493611980709028, "grad_norm": 0.15691161155700684, "learning_rate": 0.001, "loss": 1.6252, "step": 10622 }, { "epoch": 0.44940350283441916, "grad_norm": 0.2128295749425888, "learning_rate": 0.001, "loss": 2.6712, "step": 10623 }, { "epoch": 0.4494458075979355, "grad_norm": 2.622805118560791, "learning_rate": 0.001, "loss": 1.9613, "step": 10624 }, { "epoch": 0.4494881123614519, "grad_norm": 1.922769546508789, "learning_rate": 0.001, "loss": 2.0337, "step": 10625 }, { "epoch": 0.4495304171249683, "grad_norm": 0.1844593733549118, "learning_rate": 0.001, "loss": 2.4377, "step": 10626 }, { "epoch": 0.4495727218884846, "grad_norm": 0.36522769927978516, "learning_rate": 0.001, "loss": 2.3181, "step": 10627 }, { "epoch": 0.44961502665200104, "grad_norm": 0.796236515045166, "learning_rate": 0.001, "loss": 2.1124, "step": 10628 }, { "epoch": 0.4496573314155174, "grad_norm": 25.66647720336914, "learning_rate": 0.001, "loss": 1.9776, "step": 10629 }, { "epoch": 0.44969963617903375, "grad_norm": 0.1876109093427658, "learning_rate": 0.001, "loss": 3.1408, "step": 10630 }, { "epoch": 0.44974194094255016, "grad_norm": 0.2271474301815033, "learning_rate": 0.001, "loss": 2.7732, "step": 10631 }, { "epoch": 0.4497842457060665, "grad_norm": 0.14895124733448029, "learning_rate": 0.001, "loss": 1.4203, "step": 10632 }, { "epoch": 0.44982655046958286, "grad_norm": 0.5558005571365356, "learning_rate": 0.001, "loss": 2.295, "step": 10633 }, { "epoch": 0.4498688552330993, "grad_norm": 0.2572641968727112, "learning_rate": 0.001, "loss": 2.5265, "step": 10634 }, { "epoch": 0.44991115999661563, "grad_norm": 0.22297467291355133, "learning_rate": 0.001, "loss": 2.1774, "step": 10635 }, { "epoch": 0.449953464760132, "grad_norm": 1.5451843738555908, "learning_rate": 0.001, "loss": 2.925, "step": 10636 }, { "epoch": 0.44999576952364834, "grad_norm": 0.1914081871509552, "learning_rate": 0.001, "loss": 2.0183, "step": 10637 }, { "epoch": 0.45003807428716475, "grad_norm": 0.192905992269516, "learning_rate": 0.001, "loss": 1.8431, "step": 10638 }, { "epoch": 0.4500803790506811, "grad_norm": 0.1977374255657196, "learning_rate": 0.001, "loss": 2.5427, "step": 10639 }, { "epoch": 0.45012268381419746, "grad_norm": 0.193924680352211, "learning_rate": 0.001, "loss": 2.4878, "step": 10640 }, { "epoch": 0.45016498857771386, "grad_norm": 0.19579516351222992, "learning_rate": 0.001, "loss": 2.6807, "step": 10641 }, { "epoch": 0.4502072933412302, "grad_norm": 0.17491202056407928, "learning_rate": 0.001, "loss": 1.4516, "step": 10642 }, { "epoch": 0.4502495981047466, "grad_norm": 0.20388638973236084, "learning_rate": 0.001, "loss": 1.9766, "step": 10643 }, { "epoch": 0.450291902868263, "grad_norm": 6.089061737060547, "learning_rate": 0.001, "loss": 1.7728, "step": 10644 }, { "epoch": 0.45033420763177934, "grad_norm": 0.19175101816654205, "learning_rate": 0.001, "loss": 2.4468, "step": 10645 }, { "epoch": 0.4503765123952957, "grad_norm": 0.18164491653442383, "learning_rate": 0.001, "loss": 1.6122, "step": 10646 }, { "epoch": 0.4504188171588121, "grad_norm": 0.28620779514312744, "learning_rate": 0.001, "loss": 2.1983, "step": 10647 }, { "epoch": 0.45046112192232846, "grad_norm": 0.17143702507019043, "learning_rate": 0.001, "loss": 3.1166, "step": 10648 }, { "epoch": 0.4505034266858448, "grad_norm": 0.187702938914299, "learning_rate": 0.001, "loss": 2.7673, "step": 10649 }, { "epoch": 0.4505457314493612, "grad_norm": 0.18621903657913208, "learning_rate": 0.001, "loss": 2.0963, "step": 10650 }, { "epoch": 0.4505880362128776, "grad_norm": 0.18782265484333038, "learning_rate": 0.001, "loss": 2.6116, "step": 10651 }, { "epoch": 0.45063034097639393, "grad_norm": 0.18947267532348633, "learning_rate": 0.001, "loss": 2.3372, "step": 10652 }, { "epoch": 0.45067264573991034, "grad_norm": 0.6221209764480591, "learning_rate": 0.001, "loss": 1.4182, "step": 10653 }, { "epoch": 0.4507149505034267, "grad_norm": 0.18391531705856323, "learning_rate": 0.001, "loss": 2.4517, "step": 10654 }, { "epoch": 0.45075725526694305, "grad_norm": 0.31003570556640625, "learning_rate": 0.001, "loss": 2.6419, "step": 10655 }, { "epoch": 0.45079956003045946, "grad_norm": 3.465576410293579, "learning_rate": 0.001, "loss": 2.7737, "step": 10656 }, { "epoch": 0.4508418647939758, "grad_norm": 0.17568989098072052, "learning_rate": 0.001, "loss": 1.5883, "step": 10657 }, { "epoch": 0.45088416955749216, "grad_norm": 0.18298137187957764, "learning_rate": 0.001, "loss": 2.9265, "step": 10658 }, { "epoch": 0.4509264743210085, "grad_norm": 13.20496654510498, "learning_rate": 0.001, "loss": 2.632, "step": 10659 }, { "epoch": 0.45096877908452493, "grad_norm": 0.2141726016998291, "learning_rate": 0.001, "loss": 1.5008, "step": 10660 }, { "epoch": 0.4510110838480413, "grad_norm": 0.22003300487995148, "learning_rate": 0.001, "loss": 2.2004, "step": 10661 }, { "epoch": 0.45105338861155764, "grad_norm": 0.4876539409160614, "learning_rate": 0.001, "loss": 2.3984, "step": 10662 }, { "epoch": 0.45109569337507405, "grad_norm": 0.2729082703590393, "learning_rate": 0.001, "loss": 1.7232, "step": 10663 }, { "epoch": 0.4511379981385904, "grad_norm": 0.21046875417232513, "learning_rate": 0.001, "loss": 1.6638, "step": 10664 }, { "epoch": 0.45118030290210676, "grad_norm": 6.246835708618164, "learning_rate": 0.001, "loss": 2.2441, "step": 10665 }, { "epoch": 0.45122260766562317, "grad_norm": 0.24696436524391174, "learning_rate": 0.001, "loss": 2.4485, "step": 10666 }, { "epoch": 0.4512649124291395, "grad_norm": 0.20760883390903473, "learning_rate": 0.001, "loss": 1.8772, "step": 10667 }, { "epoch": 0.4513072171926559, "grad_norm": 0.2544911801815033, "learning_rate": 0.001, "loss": 3.1766, "step": 10668 }, { "epoch": 0.4513495219561723, "grad_norm": 0.7529020309448242, "learning_rate": 0.001, "loss": 3.3731, "step": 10669 }, { "epoch": 0.45139182671968864, "grad_norm": 0.23148514330387115, "learning_rate": 0.001, "loss": 2.8049, "step": 10670 }, { "epoch": 0.451434131483205, "grad_norm": 0.28361719846725464, "learning_rate": 0.001, "loss": 2.4099, "step": 10671 }, { "epoch": 0.4514764362467214, "grad_norm": 0.23970754444599152, "learning_rate": 0.001, "loss": 2.2309, "step": 10672 }, { "epoch": 0.45151874101023776, "grad_norm": 0.20568141341209412, "learning_rate": 0.001, "loss": 2.9698, "step": 10673 }, { "epoch": 0.4515610457737541, "grad_norm": 0.1887006312608719, "learning_rate": 0.001, "loss": 1.9361, "step": 10674 }, { "epoch": 0.4516033505372705, "grad_norm": 0.31066638231277466, "learning_rate": 0.001, "loss": 2.2419, "step": 10675 }, { "epoch": 0.4516456553007869, "grad_norm": 0.18941254913806915, "learning_rate": 0.001, "loss": 2.7735, "step": 10676 }, { "epoch": 0.45168796006430323, "grad_norm": 0.16498833894729614, "learning_rate": 0.001, "loss": 3.2135, "step": 10677 }, { "epoch": 0.45173026482781964, "grad_norm": 0.18185345828533173, "learning_rate": 0.001, "loss": 2.1368, "step": 10678 }, { "epoch": 0.451772569591336, "grad_norm": 0.17669661343097687, "learning_rate": 0.001, "loss": 2.4485, "step": 10679 }, { "epoch": 0.45181487435485235, "grad_norm": 0.22324764728546143, "learning_rate": 0.001, "loss": 2.542, "step": 10680 }, { "epoch": 0.4518571791183687, "grad_norm": 0.17187261581420898, "learning_rate": 0.001, "loss": 2.604, "step": 10681 }, { "epoch": 0.4518994838818851, "grad_norm": 0.184734508395195, "learning_rate": 0.001, "loss": 2.2802, "step": 10682 }, { "epoch": 0.45194178864540147, "grad_norm": 0.14700573682785034, "learning_rate": 0.001, "loss": 3.5054, "step": 10683 }, { "epoch": 0.4519840934089178, "grad_norm": 2.5030956268310547, "learning_rate": 0.001, "loss": 2.5592, "step": 10684 }, { "epoch": 0.45202639817243423, "grad_norm": 0.20399565994739532, "learning_rate": 0.001, "loss": 3.7827, "step": 10685 }, { "epoch": 0.4520687029359506, "grad_norm": 1.0105069875717163, "learning_rate": 0.001, "loss": 2.5169, "step": 10686 }, { "epoch": 0.45211100769946694, "grad_norm": 0.39226314425468445, "learning_rate": 0.001, "loss": 1.9134, "step": 10687 }, { "epoch": 0.45215331246298335, "grad_norm": 0.19782952964305878, "learning_rate": 0.001, "loss": 3.265, "step": 10688 }, { "epoch": 0.4521956172264997, "grad_norm": 1.149984359741211, "learning_rate": 0.001, "loss": 2.6159, "step": 10689 }, { "epoch": 0.45223792199001606, "grad_norm": 0.13431769609451294, "learning_rate": 0.001, "loss": 1.4885, "step": 10690 }, { "epoch": 0.45228022675353247, "grad_norm": 0.18614532053470612, "learning_rate": 0.001, "loss": 1.9288, "step": 10691 }, { "epoch": 0.4523225315170488, "grad_norm": 0.2908714711666107, "learning_rate": 0.001, "loss": 3.1537, "step": 10692 }, { "epoch": 0.4523648362805652, "grad_norm": 0.21332935988903046, "learning_rate": 0.001, "loss": 2.2536, "step": 10693 }, { "epoch": 0.4524071410440816, "grad_norm": 0.3098827302455902, "learning_rate": 0.001, "loss": 5.137, "step": 10694 }, { "epoch": 0.45244944580759794, "grad_norm": 0.2915304899215698, "learning_rate": 0.001, "loss": 2.5853, "step": 10695 }, { "epoch": 0.4524917505711143, "grad_norm": 1.5301575660705566, "learning_rate": 0.001, "loss": 2.4361, "step": 10696 }, { "epoch": 0.4525340553346307, "grad_norm": 0.1704961210489273, "learning_rate": 0.001, "loss": 2.1001, "step": 10697 }, { "epoch": 0.45257636009814706, "grad_norm": 0.3829928934574127, "learning_rate": 0.001, "loss": 1.9829, "step": 10698 }, { "epoch": 0.4526186648616634, "grad_norm": 0.14519084990024567, "learning_rate": 0.001, "loss": 1.8952, "step": 10699 }, { "epoch": 0.4526609696251798, "grad_norm": 5.465473651885986, "learning_rate": 0.001, "loss": 2.0073, "step": 10700 }, { "epoch": 0.4527032743886962, "grad_norm": 0.17210935056209564, "learning_rate": 0.001, "loss": 2.0146, "step": 10701 }, { "epoch": 0.45274557915221253, "grad_norm": 2.133603811264038, "learning_rate": 0.001, "loss": 3.2162, "step": 10702 }, { "epoch": 0.4527878839157289, "grad_norm": 0.194070965051651, "learning_rate": 0.001, "loss": 2.5802, "step": 10703 }, { "epoch": 0.4528301886792453, "grad_norm": 0.21091453731060028, "learning_rate": 0.001, "loss": 2.4426, "step": 10704 }, { "epoch": 0.45287249344276165, "grad_norm": 0.1854954957962036, "learning_rate": 0.001, "loss": 1.5763, "step": 10705 }, { "epoch": 0.452914798206278, "grad_norm": 0.1935143917798996, "learning_rate": 0.001, "loss": 1.7313, "step": 10706 }, { "epoch": 0.4529571029697944, "grad_norm": 0.32737401127815247, "learning_rate": 0.001, "loss": 2.1124, "step": 10707 }, { "epoch": 0.45299940773331077, "grad_norm": 3.649876832962036, "learning_rate": 0.001, "loss": 2.0724, "step": 10708 }, { "epoch": 0.4530417124968271, "grad_norm": 1.8369063138961792, "learning_rate": 0.001, "loss": 1.6074, "step": 10709 }, { "epoch": 0.45308401726034353, "grad_norm": 0.22719253599643707, "learning_rate": 0.001, "loss": 2.463, "step": 10710 }, { "epoch": 0.4531263220238599, "grad_norm": 0.38920852541923523, "learning_rate": 0.001, "loss": 1.6571, "step": 10711 }, { "epoch": 0.45316862678737624, "grad_norm": 0.18624505400657654, "learning_rate": 0.001, "loss": 2.507, "step": 10712 }, { "epoch": 0.45321093155089265, "grad_norm": 0.2142438441514969, "learning_rate": 0.001, "loss": 2.3202, "step": 10713 }, { "epoch": 0.453253236314409, "grad_norm": 1.3076987266540527, "learning_rate": 0.001, "loss": 1.6404, "step": 10714 }, { "epoch": 0.45329554107792536, "grad_norm": 0.9134403467178345, "learning_rate": 0.001, "loss": 2.0657, "step": 10715 }, { "epoch": 0.45333784584144177, "grad_norm": 0.3806881904602051, "learning_rate": 0.001, "loss": 2.0147, "step": 10716 }, { "epoch": 0.4533801506049581, "grad_norm": 0.1724647432565689, "learning_rate": 0.001, "loss": 2.2622, "step": 10717 }, { "epoch": 0.4534224553684745, "grad_norm": 0.5232478976249695, "learning_rate": 0.001, "loss": 3.3054, "step": 10718 }, { "epoch": 0.4534647601319909, "grad_norm": 0.16044361889362335, "learning_rate": 0.001, "loss": 3.2017, "step": 10719 }, { "epoch": 0.45350706489550724, "grad_norm": 0.17905324697494507, "learning_rate": 0.001, "loss": 1.8238, "step": 10720 }, { "epoch": 0.4535493696590236, "grad_norm": 0.20264795422554016, "learning_rate": 0.001, "loss": 1.8288, "step": 10721 }, { "epoch": 0.45359167442254, "grad_norm": 42.060760498046875, "learning_rate": 0.001, "loss": 3.1013, "step": 10722 }, { "epoch": 0.45363397918605636, "grad_norm": 0.8245616555213928, "learning_rate": 0.001, "loss": 1.8905, "step": 10723 }, { "epoch": 0.4536762839495727, "grad_norm": 934.2753295898438, "learning_rate": 0.001, "loss": 3.5141, "step": 10724 }, { "epoch": 0.4537185887130891, "grad_norm": 0.4653104245662689, "learning_rate": 0.001, "loss": 2.0465, "step": 10725 }, { "epoch": 0.4537608934766055, "grad_norm": 0.2089160531759262, "learning_rate": 0.001, "loss": 2.6989, "step": 10726 }, { "epoch": 0.45380319824012183, "grad_norm": 1.048384428024292, "learning_rate": 0.001, "loss": 2.7521, "step": 10727 }, { "epoch": 0.4538455030036382, "grad_norm": 0.18600456416606903, "learning_rate": 0.001, "loss": 2.0483, "step": 10728 }, { "epoch": 0.4538878077671546, "grad_norm": 2.7133305072784424, "learning_rate": 0.001, "loss": 2.2104, "step": 10729 }, { "epoch": 0.45393011253067095, "grad_norm": 0.19306029379367828, "learning_rate": 0.001, "loss": 2.466, "step": 10730 }, { "epoch": 0.4539724172941873, "grad_norm": 0.1988273561000824, "learning_rate": 0.001, "loss": 2.7235, "step": 10731 }, { "epoch": 0.4540147220577037, "grad_norm": 6.111566543579102, "learning_rate": 0.001, "loss": 2.7007, "step": 10732 }, { "epoch": 0.45405702682122007, "grad_norm": 0.4075400233268738, "learning_rate": 0.001, "loss": 3.3743, "step": 10733 }, { "epoch": 0.4540993315847364, "grad_norm": 0.1714090257883072, "learning_rate": 0.001, "loss": 2.4183, "step": 10734 }, { "epoch": 0.45414163634825283, "grad_norm": 0.27098438143730164, "learning_rate": 0.001, "loss": 2.5263, "step": 10735 }, { "epoch": 0.4541839411117692, "grad_norm": 0.17201238870620728, "learning_rate": 0.001, "loss": 2.4768, "step": 10736 }, { "epoch": 0.45422624587528554, "grad_norm": 0.23325662314891815, "learning_rate": 0.001, "loss": 3.6854, "step": 10737 }, { "epoch": 0.45426855063880195, "grad_norm": 0.5161541104316711, "learning_rate": 0.001, "loss": 2.359, "step": 10738 }, { "epoch": 0.4543108554023183, "grad_norm": 0.16155464947223663, "learning_rate": 0.001, "loss": 2.0626, "step": 10739 }, { "epoch": 0.45435316016583466, "grad_norm": 0.43314120173454285, "learning_rate": 0.001, "loss": 2.4758, "step": 10740 }, { "epoch": 0.45439546492935107, "grad_norm": 0.17236952483654022, "learning_rate": 0.001, "loss": 2.7448, "step": 10741 }, { "epoch": 0.4544377696928674, "grad_norm": 1.6672812700271606, "learning_rate": 0.001, "loss": 1.8297, "step": 10742 }, { "epoch": 0.4544800744563838, "grad_norm": 0.6431288123130798, "learning_rate": 0.001, "loss": 2.5279, "step": 10743 }, { "epoch": 0.4545223792199002, "grad_norm": 0.1590161770582199, "learning_rate": 0.001, "loss": 1.7153, "step": 10744 }, { "epoch": 0.45456468398341654, "grad_norm": 0.1368952840566635, "learning_rate": 0.001, "loss": 2.4392, "step": 10745 }, { "epoch": 0.4546069887469329, "grad_norm": 0.9663378596305847, "learning_rate": 0.001, "loss": 2.5891, "step": 10746 }, { "epoch": 0.4546492935104493, "grad_norm": 0.19427025318145752, "learning_rate": 0.001, "loss": 1.8954, "step": 10747 }, { "epoch": 0.45469159827396566, "grad_norm": 2.1707139015197754, "learning_rate": 0.001, "loss": 3.155, "step": 10748 }, { "epoch": 0.454733903037482, "grad_norm": 0.20545673370361328, "learning_rate": 0.001, "loss": 3.2715, "step": 10749 }, { "epoch": 0.45477620780099837, "grad_norm": 0.1690516173839569, "learning_rate": 0.001, "loss": 2.7757, "step": 10750 }, { "epoch": 0.4548185125645148, "grad_norm": 0.4233834743499756, "learning_rate": 0.001, "loss": 2.896, "step": 10751 }, { "epoch": 0.45486081732803113, "grad_norm": 0.2022281438112259, "learning_rate": 0.001, "loss": 2.3454, "step": 10752 }, { "epoch": 0.4549031220915475, "grad_norm": 0.17715947329998016, "learning_rate": 0.001, "loss": 2.0872, "step": 10753 }, { "epoch": 0.4549454268550639, "grad_norm": 0.19292153418064117, "learning_rate": 0.001, "loss": 2.1691, "step": 10754 }, { "epoch": 0.45498773161858025, "grad_norm": 1.835961103439331, "learning_rate": 0.001, "loss": 1.9661, "step": 10755 }, { "epoch": 0.4550300363820966, "grad_norm": 0.16486118733882904, "learning_rate": 0.001, "loss": 2.5038, "step": 10756 }, { "epoch": 0.455072341145613, "grad_norm": 0.7095552682876587, "learning_rate": 0.001, "loss": 2.1318, "step": 10757 }, { "epoch": 0.45511464590912937, "grad_norm": 0.8125048875808716, "learning_rate": 0.001, "loss": 2.7182, "step": 10758 }, { "epoch": 0.4551569506726457, "grad_norm": 6.972851753234863, "learning_rate": 0.001, "loss": 2.4598, "step": 10759 }, { "epoch": 0.45519925543616213, "grad_norm": 1.0214871168136597, "learning_rate": 0.001, "loss": 2.0997, "step": 10760 }, { "epoch": 0.4552415601996785, "grad_norm": 8.2625093460083, "learning_rate": 0.001, "loss": 2.4566, "step": 10761 }, { "epoch": 0.45528386496319484, "grad_norm": 0.16502858698368073, "learning_rate": 0.001, "loss": 1.868, "step": 10762 }, { "epoch": 0.45532616972671125, "grad_norm": 0.4280339777469635, "learning_rate": 0.001, "loss": 1.6946, "step": 10763 }, { "epoch": 0.4553684744902276, "grad_norm": 0.23198087513446808, "learning_rate": 0.001, "loss": 3.1451, "step": 10764 }, { "epoch": 0.45541077925374396, "grad_norm": 0.8714398145675659, "learning_rate": 0.001, "loss": 3.1186, "step": 10765 }, { "epoch": 0.45545308401726037, "grad_norm": 0.2438887655735016, "learning_rate": 0.001, "loss": 2.7754, "step": 10766 }, { "epoch": 0.4554953887807767, "grad_norm": 0.3168472349643707, "learning_rate": 0.001, "loss": 2.4378, "step": 10767 }, { "epoch": 0.4555376935442931, "grad_norm": 0.214552104473114, "learning_rate": 0.001, "loss": 2.1332, "step": 10768 }, { "epoch": 0.4555799983078095, "grad_norm": 0.18347600102424622, "learning_rate": 0.001, "loss": 1.9963, "step": 10769 }, { "epoch": 0.45562230307132584, "grad_norm": 0.28068867325782776, "learning_rate": 0.001, "loss": 2.335, "step": 10770 }, { "epoch": 0.4556646078348422, "grad_norm": 0.21685069799423218, "learning_rate": 0.001, "loss": 2.0136, "step": 10771 }, { "epoch": 0.45570691259835855, "grad_norm": 0.18479874730110168, "learning_rate": 0.001, "loss": 2.1247, "step": 10772 }, { "epoch": 0.45574921736187496, "grad_norm": 9.378438949584961, "learning_rate": 0.001, "loss": 1.8411, "step": 10773 }, { "epoch": 0.4557915221253913, "grad_norm": 0.18619363009929657, "learning_rate": 0.001, "loss": 2.4149, "step": 10774 }, { "epoch": 0.45583382688890767, "grad_norm": 0.48514658212661743, "learning_rate": 0.001, "loss": 1.8157, "step": 10775 }, { "epoch": 0.4558761316524241, "grad_norm": 0.21699242293834686, "learning_rate": 0.001, "loss": 2.5075, "step": 10776 }, { "epoch": 0.45591843641594043, "grad_norm": 0.5351647138595581, "learning_rate": 0.001, "loss": 2.0256, "step": 10777 }, { "epoch": 0.4559607411794568, "grad_norm": 0.18083517253398895, "learning_rate": 0.001, "loss": 1.8436, "step": 10778 }, { "epoch": 0.4560030459429732, "grad_norm": 0.16347528994083405, "learning_rate": 0.001, "loss": 1.6929, "step": 10779 }, { "epoch": 0.45604535070648955, "grad_norm": 0.59482342004776, "learning_rate": 0.001, "loss": 2.8624, "step": 10780 }, { "epoch": 0.4560876554700059, "grad_norm": 0.28643858432769775, "learning_rate": 0.001, "loss": 2.5811, "step": 10781 }, { "epoch": 0.4561299602335223, "grad_norm": 0.22716449201107025, "learning_rate": 0.001, "loss": 2.7431, "step": 10782 }, { "epoch": 0.45617226499703867, "grad_norm": 4.5622477531433105, "learning_rate": 0.001, "loss": 1.9823, "step": 10783 }, { "epoch": 0.456214569760555, "grad_norm": 0.4299876391887665, "learning_rate": 0.001, "loss": 1.8034, "step": 10784 }, { "epoch": 0.45625687452407143, "grad_norm": 0.2948051989078522, "learning_rate": 0.001, "loss": 2.2761, "step": 10785 }, { "epoch": 0.4562991792875878, "grad_norm": 0.16705124080181122, "learning_rate": 0.001, "loss": 3.0812, "step": 10786 }, { "epoch": 0.45634148405110414, "grad_norm": 0.33032622933387756, "learning_rate": 0.001, "loss": 2.9178, "step": 10787 }, { "epoch": 0.45638378881462055, "grad_norm": 0.3047260642051697, "learning_rate": 0.001, "loss": 2.0885, "step": 10788 }, { "epoch": 0.4564260935781369, "grad_norm": 0.2009318619966507, "learning_rate": 0.001, "loss": 2.5426, "step": 10789 }, { "epoch": 0.45646839834165326, "grad_norm": 0.26665037870407104, "learning_rate": 0.001, "loss": 3.0866, "step": 10790 }, { "epoch": 0.45651070310516967, "grad_norm": 0.15253359079360962, "learning_rate": 0.001, "loss": 1.9167, "step": 10791 }, { "epoch": 0.456553007868686, "grad_norm": 0.17726293206214905, "learning_rate": 0.001, "loss": 1.9892, "step": 10792 }, { "epoch": 0.4565953126322024, "grad_norm": 0.1824486255645752, "learning_rate": 0.001, "loss": 2.0296, "step": 10793 }, { "epoch": 0.45663761739571873, "grad_norm": 0.7334975004196167, "learning_rate": 0.001, "loss": 2.5563, "step": 10794 }, { "epoch": 0.45667992215923514, "grad_norm": 0.14659246802330017, "learning_rate": 0.001, "loss": 1.8249, "step": 10795 }, { "epoch": 0.4567222269227515, "grad_norm": 1.9629788398742676, "learning_rate": 0.001, "loss": 3.2285, "step": 10796 }, { "epoch": 0.45676453168626785, "grad_norm": 0.18042373657226562, "learning_rate": 0.001, "loss": 3.0951, "step": 10797 }, { "epoch": 0.45680683644978426, "grad_norm": 0.2109190672636032, "learning_rate": 0.001, "loss": 2.0418, "step": 10798 }, { "epoch": 0.4568491412133006, "grad_norm": 0.2082575410604477, "learning_rate": 0.001, "loss": 2.7988, "step": 10799 }, { "epoch": 0.45689144597681697, "grad_norm": 0.14357468485832214, "learning_rate": 0.001, "loss": 1.6557, "step": 10800 }, { "epoch": 0.4569337507403334, "grad_norm": 0.16632072627544403, "learning_rate": 0.001, "loss": 1.9405, "step": 10801 }, { "epoch": 0.45697605550384973, "grad_norm": 0.4018622636795044, "learning_rate": 0.001, "loss": 2.1912, "step": 10802 }, { "epoch": 0.4570183602673661, "grad_norm": 0.19781026244163513, "learning_rate": 0.001, "loss": 2.1403, "step": 10803 }, { "epoch": 0.4570606650308825, "grad_norm": 0.16085539758205414, "learning_rate": 0.001, "loss": 1.8415, "step": 10804 }, { "epoch": 0.45710296979439885, "grad_norm": 0.2825770080089569, "learning_rate": 0.001, "loss": 3.2822, "step": 10805 }, { "epoch": 0.4571452745579152, "grad_norm": 0.15267670154571533, "learning_rate": 0.001, "loss": 2.0494, "step": 10806 }, { "epoch": 0.4571875793214316, "grad_norm": 1.4387484788894653, "learning_rate": 0.001, "loss": 3.0406, "step": 10807 }, { "epoch": 0.45722988408494797, "grad_norm": 0.14792972803115845, "learning_rate": 0.001, "loss": 1.9604, "step": 10808 }, { "epoch": 0.4572721888484643, "grad_norm": 0.17246343195438385, "learning_rate": 0.001, "loss": 2.1818, "step": 10809 }, { "epoch": 0.45731449361198073, "grad_norm": 0.40068310499191284, "learning_rate": 0.001, "loss": 1.7939, "step": 10810 }, { "epoch": 0.4573567983754971, "grad_norm": 0.19033183157444, "learning_rate": 0.001, "loss": 2.0297, "step": 10811 }, { "epoch": 0.45739910313901344, "grad_norm": 0.16076108813285828, "learning_rate": 0.001, "loss": 1.9428, "step": 10812 }, { "epoch": 0.45744140790252985, "grad_norm": 0.1628408133983612, "learning_rate": 0.001, "loss": 2.0893, "step": 10813 }, { "epoch": 0.4574837126660462, "grad_norm": 0.29927513003349304, "learning_rate": 0.001, "loss": 2.7233, "step": 10814 }, { "epoch": 0.45752601742956256, "grad_norm": 0.1823870986700058, "learning_rate": 0.001, "loss": 2.4618, "step": 10815 }, { "epoch": 0.4575683221930789, "grad_norm": 0.4014187455177307, "learning_rate": 0.001, "loss": 2.2749, "step": 10816 }, { "epoch": 0.4576106269565953, "grad_norm": 0.17997021973133087, "learning_rate": 0.001, "loss": 3.2688, "step": 10817 }, { "epoch": 0.4576529317201117, "grad_norm": 0.15339773893356323, "learning_rate": 0.001, "loss": 1.9151, "step": 10818 }, { "epoch": 0.45769523648362803, "grad_norm": 0.36581626534461975, "learning_rate": 0.001, "loss": 1.4648, "step": 10819 }, { "epoch": 0.45773754124714444, "grad_norm": 0.17709127068519592, "learning_rate": 0.001, "loss": 2.4527, "step": 10820 }, { "epoch": 0.4577798460106608, "grad_norm": 0.4405263066291809, "learning_rate": 0.001, "loss": 2.7126, "step": 10821 }, { "epoch": 0.45782215077417715, "grad_norm": 0.6151844263076782, "learning_rate": 0.001, "loss": 2.4587, "step": 10822 }, { "epoch": 0.45786445553769356, "grad_norm": 0.18310926854610443, "learning_rate": 0.001, "loss": 1.8446, "step": 10823 }, { "epoch": 0.4579067603012099, "grad_norm": 0.18017029762268066, "learning_rate": 0.001, "loss": 2.2304, "step": 10824 }, { "epoch": 0.45794906506472627, "grad_norm": 0.9790677428245544, "learning_rate": 0.001, "loss": 3.1814, "step": 10825 }, { "epoch": 0.4579913698282427, "grad_norm": 0.20925338566303253, "learning_rate": 0.001, "loss": 3.3553, "step": 10826 }, { "epoch": 0.45803367459175903, "grad_norm": 0.17660897970199585, "learning_rate": 0.001, "loss": 2.1409, "step": 10827 }, { "epoch": 0.4580759793552754, "grad_norm": 0.48942068219184875, "learning_rate": 0.001, "loss": 2.4424, "step": 10828 }, { "epoch": 0.4581182841187918, "grad_norm": 0.2551790475845337, "learning_rate": 0.001, "loss": 1.7616, "step": 10829 }, { "epoch": 0.45816058888230815, "grad_norm": 0.20451773703098297, "learning_rate": 0.001, "loss": 2.0661, "step": 10830 }, { "epoch": 0.4582028936458245, "grad_norm": 0.2622445225715637, "learning_rate": 0.001, "loss": 1.9234, "step": 10831 }, { "epoch": 0.4582451984093409, "grad_norm": 0.16220338642597198, "learning_rate": 0.001, "loss": 1.6778, "step": 10832 }, { "epoch": 0.45828750317285727, "grad_norm": 0.2827407121658325, "learning_rate": 0.001, "loss": 2.8145, "step": 10833 }, { "epoch": 0.4583298079363736, "grad_norm": 0.21268029510974884, "learning_rate": 0.001, "loss": 2.3485, "step": 10834 }, { "epoch": 0.45837211269989003, "grad_norm": 0.23748518526554108, "learning_rate": 0.001, "loss": 2.6988, "step": 10835 }, { "epoch": 0.4584144174634064, "grad_norm": 0.4688171148300171, "learning_rate": 0.001, "loss": 2.2598, "step": 10836 }, { "epoch": 0.45845672222692274, "grad_norm": 2.4489855766296387, "learning_rate": 0.001, "loss": 2.3562, "step": 10837 }, { "epoch": 0.4584990269904391, "grad_norm": 0.1477189064025879, "learning_rate": 0.001, "loss": 2.064, "step": 10838 }, { "epoch": 0.4585413317539555, "grad_norm": 0.20878483355045319, "learning_rate": 0.001, "loss": 2.477, "step": 10839 }, { "epoch": 0.45858363651747186, "grad_norm": 0.47598791122436523, "learning_rate": 0.001, "loss": 2.7616, "step": 10840 }, { "epoch": 0.4586259412809882, "grad_norm": 0.16954365372657776, "learning_rate": 0.001, "loss": 3.0694, "step": 10841 }, { "epoch": 0.4586682460445046, "grad_norm": 0.25619927048683167, "learning_rate": 0.001, "loss": 1.9389, "step": 10842 }, { "epoch": 0.458710550808021, "grad_norm": 1.3417062759399414, "learning_rate": 0.001, "loss": 2.3684, "step": 10843 }, { "epoch": 0.45875285557153733, "grad_norm": 1.6794036626815796, "learning_rate": 0.001, "loss": 2.2654, "step": 10844 }, { "epoch": 0.45879516033505374, "grad_norm": 0.185557559132576, "learning_rate": 0.001, "loss": 2.1782, "step": 10845 }, { "epoch": 0.4588374650985701, "grad_norm": 0.1776900589466095, "learning_rate": 0.001, "loss": 2.9762, "step": 10846 }, { "epoch": 0.45887976986208645, "grad_norm": 0.19009187817573547, "learning_rate": 0.001, "loss": 2.3947, "step": 10847 }, { "epoch": 0.45892207462560286, "grad_norm": 0.17857715487480164, "learning_rate": 0.001, "loss": 1.9718, "step": 10848 }, { "epoch": 0.4589643793891192, "grad_norm": 0.19130466878414154, "learning_rate": 0.001, "loss": 3.6092, "step": 10849 }, { "epoch": 0.45900668415263557, "grad_norm": 1.0953785181045532, "learning_rate": 0.001, "loss": 2.8111, "step": 10850 }, { "epoch": 0.459048988916152, "grad_norm": 0.1537635773420334, "learning_rate": 0.001, "loss": 1.9802, "step": 10851 }, { "epoch": 0.45909129367966833, "grad_norm": 0.16897273063659668, "learning_rate": 0.001, "loss": 2.0585, "step": 10852 }, { "epoch": 0.4591335984431847, "grad_norm": 0.9091619253158569, "learning_rate": 0.001, "loss": 2.9428, "step": 10853 }, { "epoch": 0.4591759032067011, "grad_norm": 0.16677002608776093, "learning_rate": 0.001, "loss": 1.8913, "step": 10854 }, { "epoch": 0.45921820797021745, "grad_norm": 0.21832408010959625, "learning_rate": 0.001, "loss": 3.3002, "step": 10855 }, { "epoch": 0.4592605127337338, "grad_norm": 0.1814076453447342, "learning_rate": 0.001, "loss": 1.5207, "step": 10856 }, { "epoch": 0.4593028174972502, "grad_norm": 0.17598986625671387, "learning_rate": 0.001, "loss": 2.3064, "step": 10857 }, { "epoch": 0.45934512226076657, "grad_norm": 0.24575437605381012, "learning_rate": 0.001, "loss": 2.6707, "step": 10858 }, { "epoch": 0.4593874270242829, "grad_norm": 0.1765861064195633, "learning_rate": 0.001, "loss": 1.8149, "step": 10859 }, { "epoch": 0.45942973178779933, "grad_norm": 0.7690317034721375, "learning_rate": 0.001, "loss": 2.0892, "step": 10860 }, { "epoch": 0.4594720365513157, "grad_norm": 0.1727459579706192, "learning_rate": 0.001, "loss": 1.9199, "step": 10861 }, { "epoch": 0.45951434131483204, "grad_norm": 0.1738310158252716, "learning_rate": 0.001, "loss": 2.377, "step": 10862 }, { "epoch": 0.4595566460783484, "grad_norm": 12.975968360900879, "learning_rate": 0.001, "loss": 2.9698, "step": 10863 }, { "epoch": 0.4595989508418648, "grad_norm": 0.23020650446414948, "learning_rate": 0.001, "loss": 2.3748, "step": 10864 }, { "epoch": 0.45964125560538116, "grad_norm": 0.14543908834457397, "learning_rate": 0.001, "loss": 1.7494, "step": 10865 }, { "epoch": 0.4596835603688975, "grad_norm": 1.8263896703720093, "learning_rate": 0.001, "loss": 2.5401, "step": 10866 }, { "epoch": 0.4597258651324139, "grad_norm": 0.23865966498851776, "learning_rate": 0.001, "loss": 2.27, "step": 10867 }, { "epoch": 0.4597681698959303, "grad_norm": 0.7951701283454895, "learning_rate": 0.001, "loss": 1.5266, "step": 10868 }, { "epoch": 0.45981047465944663, "grad_norm": 0.18856553733348846, "learning_rate": 0.001, "loss": 3.1963, "step": 10869 }, { "epoch": 0.45985277942296304, "grad_norm": 0.16450615227222443, "learning_rate": 0.001, "loss": 2.5576, "step": 10870 }, { "epoch": 0.4598950841864794, "grad_norm": 0.18288759887218475, "learning_rate": 0.001, "loss": 1.5935, "step": 10871 }, { "epoch": 0.45993738894999575, "grad_norm": 0.16834130883216858, "learning_rate": 0.001, "loss": 1.5081, "step": 10872 }, { "epoch": 0.45997969371351216, "grad_norm": 0.1692802906036377, "learning_rate": 0.001, "loss": 1.8773, "step": 10873 }, { "epoch": 0.4600219984770285, "grad_norm": 0.1614452451467514, "learning_rate": 0.001, "loss": 2.5756, "step": 10874 }, { "epoch": 0.46006430324054487, "grad_norm": 0.18225987255573273, "learning_rate": 0.001, "loss": 1.9148, "step": 10875 }, { "epoch": 0.4601066080040613, "grad_norm": 0.16945257782936096, "learning_rate": 0.001, "loss": 1.9408, "step": 10876 }, { "epoch": 0.46014891276757763, "grad_norm": 0.18923348188400269, "learning_rate": 0.001, "loss": 3.1651, "step": 10877 }, { "epoch": 0.460191217531094, "grad_norm": 0.7960704565048218, "learning_rate": 0.001, "loss": 2.0982, "step": 10878 }, { "epoch": 0.4602335222946104, "grad_norm": 0.16281262040138245, "learning_rate": 0.001, "loss": 2.7911, "step": 10879 }, { "epoch": 0.46027582705812675, "grad_norm": 0.15493243932724, "learning_rate": 0.001, "loss": 1.7558, "step": 10880 }, { "epoch": 0.4603181318216431, "grad_norm": 1.4312360286712646, "learning_rate": 0.001, "loss": 2.6279, "step": 10881 }, { "epoch": 0.4603604365851595, "grad_norm": 0.18182626366615295, "learning_rate": 0.001, "loss": 2.3233, "step": 10882 }, { "epoch": 0.46040274134867587, "grad_norm": 0.17061913013458252, "learning_rate": 0.001, "loss": 2.0113, "step": 10883 }, { "epoch": 0.4604450461121922, "grad_norm": 1.3097604513168335, "learning_rate": 0.001, "loss": 2.1427, "step": 10884 }, { "epoch": 0.4604873508757086, "grad_norm": 0.1752685308456421, "learning_rate": 0.001, "loss": 2.0929, "step": 10885 }, { "epoch": 0.460529655639225, "grad_norm": 0.3267187178134918, "learning_rate": 0.001, "loss": 2.5284, "step": 10886 }, { "epoch": 0.46057196040274134, "grad_norm": 0.17250603437423706, "learning_rate": 0.001, "loss": 2.8212, "step": 10887 }, { "epoch": 0.4606142651662577, "grad_norm": 1.1300095319747925, "learning_rate": 0.001, "loss": 2.0045, "step": 10888 }, { "epoch": 0.4606565699297741, "grad_norm": 0.18050777912139893, "learning_rate": 0.001, "loss": 2.8097, "step": 10889 }, { "epoch": 0.46069887469329046, "grad_norm": 0.16347844898700714, "learning_rate": 0.001, "loss": 2.505, "step": 10890 }, { "epoch": 0.4607411794568068, "grad_norm": 4.939105033874512, "learning_rate": 0.001, "loss": 2.3762, "step": 10891 }, { "epoch": 0.4607834842203232, "grad_norm": 13.591337203979492, "learning_rate": 0.001, "loss": 2.09, "step": 10892 }, { "epoch": 0.4608257889838396, "grad_norm": 0.9323223829269409, "learning_rate": 0.001, "loss": 1.6851, "step": 10893 }, { "epoch": 0.46086809374735593, "grad_norm": 0.7729269862174988, "learning_rate": 0.001, "loss": 1.4762, "step": 10894 }, { "epoch": 0.46091039851087234, "grad_norm": 8.235560417175293, "learning_rate": 0.001, "loss": 2.5788, "step": 10895 }, { "epoch": 0.4609527032743887, "grad_norm": 0.15181410312652588, "learning_rate": 0.001, "loss": 1.448, "step": 10896 }, { "epoch": 0.46099500803790505, "grad_norm": 0.18965478241443634, "learning_rate": 0.001, "loss": 1.7609, "step": 10897 }, { "epoch": 0.46103731280142146, "grad_norm": 1.7368152141571045, "learning_rate": 0.001, "loss": 3.6529, "step": 10898 }, { "epoch": 0.4610796175649378, "grad_norm": 0.22795704007148743, "learning_rate": 0.001, "loss": 2.778, "step": 10899 }, { "epoch": 0.46112192232845417, "grad_norm": 0.15993206202983856, "learning_rate": 0.001, "loss": 1.6612, "step": 10900 }, { "epoch": 0.4611642270919706, "grad_norm": 0.1846051812171936, "learning_rate": 0.001, "loss": 1.5179, "step": 10901 }, { "epoch": 0.46120653185548693, "grad_norm": 0.22482596337795258, "learning_rate": 0.001, "loss": 2.6861, "step": 10902 }, { "epoch": 0.4612488366190033, "grad_norm": 10.483909606933594, "learning_rate": 0.001, "loss": 2.1016, "step": 10903 }, { "epoch": 0.4612911413825197, "grad_norm": 0.45602327585220337, "learning_rate": 0.001, "loss": 2.7724, "step": 10904 }, { "epoch": 0.46133344614603605, "grad_norm": 0.19587081670761108, "learning_rate": 0.001, "loss": 1.8976, "step": 10905 }, { "epoch": 0.4613757509095524, "grad_norm": 0.22737310826778412, "learning_rate": 0.001, "loss": 3.4405, "step": 10906 }, { "epoch": 0.46141805567306876, "grad_norm": 0.17420297861099243, "learning_rate": 0.001, "loss": 2.4252, "step": 10907 }, { "epoch": 0.46146036043658517, "grad_norm": 0.15364967286586761, "learning_rate": 0.001, "loss": 1.766, "step": 10908 }, { "epoch": 0.4615026652001015, "grad_norm": 1.44530189037323, "learning_rate": 0.001, "loss": 2.0774, "step": 10909 }, { "epoch": 0.4615449699636179, "grad_norm": 0.16581588983535767, "learning_rate": 0.001, "loss": 1.5537, "step": 10910 }, { "epoch": 0.4615872747271343, "grad_norm": 0.18303801119327545, "learning_rate": 0.001, "loss": 2.2337, "step": 10911 }, { "epoch": 0.46162957949065064, "grad_norm": 0.1899944394826889, "learning_rate": 0.001, "loss": 1.3449, "step": 10912 }, { "epoch": 0.461671884254167, "grad_norm": 1.3561519384384155, "learning_rate": 0.001, "loss": 2.3431, "step": 10913 }, { "epoch": 0.4617141890176834, "grad_norm": 0.17157615721225739, "learning_rate": 0.001, "loss": 2.5933, "step": 10914 }, { "epoch": 0.46175649378119976, "grad_norm": 1.1707208156585693, "learning_rate": 0.001, "loss": 2.3276, "step": 10915 }, { "epoch": 0.4617987985447161, "grad_norm": 0.3634940981864929, "learning_rate": 0.001, "loss": 2.8591, "step": 10916 }, { "epoch": 0.4618411033082325, "grad_norm": 0.3375161588191986, "learning_rate": 0.001, "loss": 2.2901, "step": 10917 }, { "epoch": 0.4618834080717489, "grad_norm": 31.233200073242188, "learning_rate": 0.001, "loss": 2.6661, "step": 10918 }, { "epoch": 0.46192571283526523, "grad_norm": 0.4107927083969116, "learning_rate": 0.001, "loss": 2.019, "step": 10919 }, { "epoch": 0.46196801759878164, "grad_norm": 0.7090280652046204, "learning_rate": 0.001, "loss": 2.5535, "step": 10920 }, { "epoch": 0.462010322362298, "grad_norm": 2.35701060295105, "learning_rate": 0.001, "loss": 1.7864, "step": 10921 }, { "epoch": 0.46205262712581435, "grad_norm": 0.16083607077598572, "learning_rate": 0.001, "loss": 1.9362, "step": 10922 }, { "epoch": 0.46209493188933076, "grad_norm": 0.20161986351013184, "learning_rate": 0.001, "loss": 2.4446, "step": 10923 }, { "epoch": 0.4621372366528471, "grad_norm": 7.899982929229736, "learning_rate": 0.001, "loss": 3.1712, "step": 10924 }, { "epoch": 0.46217954141636347, "grad_norm": 0.15877409279346466, "learning_rate": 0.001, "loss": 2.2344, "step": 10925 }, { "epoch": 0.4622218461798799, "grad_norm": 0.19421695172786713, "learning_rate": 0.001, "loss": 2.574, "step": 10926 }, { "epoch": 0.46226415094339623, "grad_norm": 0.5524720549583435, "learning_rate": 0.001, "loss": 2.2769, "step": 10927 }, { "epoch": 0.4623064557069126, "grad_norm": 0.30834636092185974, "learning_rate": 0.001, "loss": 1.5401, "step": 10928 }, { "epoch": 0.46234876047042894, "grad_norm": 0.17443600296974182, "learning_rate": 0.001, "loss": 3.5821, "step": 10929 }, { "epoch": 0.46239106523394535, "grad_norm": 0.17415951192378998, "learning_rate": 0.001, "loss": 2.5898, "step": 10930 }, { "epoch": 0.4624333699974617, "grad_norm": 0.14136327803134918, "learning_rate": 0.001, "loss": 2.4831, "step": 10931 }, { "epoch": 0.46247567476097806, "grad_norm": 13.823705673217773, "learning_rate": 0.001, "loss": 3.3177, "step": 10932 }, { "epoch": 0.46251797952449447, "grad_norm": 0.24503834545612335, "learning_rate": 0.001, "loss": 2.119, "step": 10933 }, { "epoch": 0.4625602842880108, "grad_norm": 0.24650239944458008, "learning_rate": 0.001, "loss": 2.9394, "step": 10934 }, { "epoch": 0.4626025890515272, "grad_norm": 0.193034827709198, "learning_rate": 0.001, "loss": 2.3874, "step": 10935 }, { "epoch": 0.4626448938150436, "grad_norm": 0.16548699140548706, "learning_rate": 0.001, "loss": 2.2558, "step": 10936 }, { "epoch": 0.46268719857855994, "grad_norm": 0.2028469741344452, "learning_rate": 0.001, "loss": 3.2924, "step": 10937 }, { "epoch": 0.4627295033420763, "grad_norm": 0.25958749651908875, "learning_rate": 0.001, "loss": 1.914, "step": 10938 }, { "epoch": 0.4627718081055927, "grad_norm": 0.2172216922044754, "learning_rate": 0.001, "loss": 2.3915, "step": 10939 }, { "epoch": 0.46281411286910906, "grad_norm": 0.22597426176071167, "learning_rate": 0.001, "loss": 2.447, "step": 10940 }, { "epoch": 0.4628564176326254, "grad_norm": 0.6521003842353821, "learning_rate": 0.001, "loss": 3.1807, "step": 10941 }, { "epoch": 0.4628987223961418, "grad_norm": 0.2350475788116455, "learning_rate": 0.001, "loss": 2.4876, "step": 10942 }, { "epoch": 0.4629410271596582, "grad_norm": 0.15000639855861664, "learning_rate": 0.001, "loss": 2.5798, "step": 10943 }, { "epoch": 0.46298333192317453, "grad_norm": 13.369725227355957, "learning_rate": 0.001, "loss": 2.4465, "step": 10944 }, { "epoch": 0.46302563668669094, "grad_norm": 0.21107615530490875, "learning_rate": 0.001, "loss": 3.5766, "step": 10945 }, { "epoch": 0.4630679414502073, "grad_norm": 0.45922183990478516, "learning_rate": 0.001, "loss": 3.318, "step": 10946 }, { "epoch": 0.46311024621372365, "grad_norm": 0.15640473365783691, "learning_rate": 0.001, "loss": 1.9649, "step": 10947 }, { "epoch": 0.46315255097724006, "grad_norm": 0.17740632593631744, "learning_rate": 0.001, "loss": 2.4352, "step": 10948 }, { "epoch": 0.4631948557407564, "grad_norm": 0.21806718409061432, "learning_rate": 0.001, "loss": 3.8975, "step": 10949 }, { "epoch": 0.46323716050427277, "grad_norm": 0.17442677915096283, "learning_rate": 0.001, "loss": 2.0025, "step": 10950 }, { "epoch": 0.4632794652677891, "grad_norm": 0.18725450336933136, "learning_rate": 0.001, "loss": 1.6942, "step": 10951 }, { "epoch": 0.46332177003130554, "grad_norm": 0.2647855281829834, "learning_rate": 0.001, "loss": 1.9792, "step": 10952 }, { "epoch": 0.4633640747948219, "grad_norm": 0.22588206827640533, "learning_rate": 0.001, "loss": 2.6015, "step": 10953 }, { "epoch": 0.46340637955833824, "grad_norm": 0.18170830607414246, "learning_rate": 0.001, "loss": 2.4247, "step": 10954 }, { "epoch": 0.46344868432185465, "grad_norm": 0.19047954678535461, "learning_rate": 0.001, "loss": 2.5552, "step": 10955 }, { "epoch": 0.463490989085371, "grad_norm": 0.18378345668315887, "learning_rate": 0.001, "loss": 3.1469, "step": 10956 }, { "epoch": 0.46353329384888736, "grad_norm": 1.0354962348937988, "learning_rate": 0.001, "loss": 3.1662, "step": 10957 }, { "epoch": 0.46357559861240377, "grad_norm": 0.16644562780857086, "learning_rate": 0.001, "loss": 1.9066, "step": 10958 }, { "epoch": 0.4636179033759201, "grad_norm": 0.17657527327537537, "learning_rate": 0.001, "loss": 2.6647, "step": 10959 }, { "epoch": 0.4636602081394365, "grad_norm": 0.43034595251083374, "learning_rate": 0.001, "loss": 2.2832, "step": 10960 }, { "epoch": 0.4637025129029529, "grad_norm": 1.4906646013259888, "learning_rate": 0.001, "loss": 2.355, "step": 10961 }, { "epoch": 0.46374481766646924, "grad_norm": 0.324938029050827, "learning_rate": 0.001, "loss": 1.9415, "step": 10962 }, { "epoch": 0.4637871224299856, "grad_norm": 1.2835636138916016, "learning_rate": 0.001, "loss": 1.9729, "step": 10963 }, { "epoch": 0.463829427193502, "grad_norm": 0.1738107055425644, "learning_rate": 0.001, "loss": 2.4461, "step": 10964 }, { "epoch": 0.46387173195701836, "grad_norm": 0.23237141966819763, "learning_rate": 0.001, "loss": 1.4232, "step": 10965 }, { "epoch": 0.4639140367205347, "grad_norm": 0.30422407388687134, "learning_rate": 0.001, "loss": 2.2135, "step": 10966 }, { "epoch": 0.4639563414840511, "grad_norm": 0.17090842127799988, "learning_rate": 0.001, "loss": 2.0125, "step": 10967 }, { "epoch": 0.4639986462475675, "grad_norm": 0.1509457677602768, "learning_rate": 0.001, "loss": 2.2202, "step": 10968 }, { "epoch": 0.46404095101108384, "grad_norm": 0.16378089785575867, "learning_rate": 0.001, "loss": 2.301, "step": 10969 }, { "epoch": 0.46408325577460025, "grad_norm": 6.196132659912109, "learning_rate": 0.001, "loss": 2.2314, "step": 10970 }, { "epoch": 0.4641255605381166, "grad_norm": 0.44474777579307556, "learning_rate": 0.001, "loss": 2.6964, "step": 10971 }, { "epoch": 0.46416786530163295, "grad_norm": 0.21871718764305115, "learning_rate": 0.001, "loss": 2.0155, "step": 10972 }, { "epoch": 0.46421017006514936, "grad_norm": 0.24714459478855133, "learning_rate": 0.001, "loss": 1.7315, "step": 10973 }, { "epoch": 0.4642524748286657, "grad_norm": 0.2337181717157364, "learning_rate": 0.001, "loss": 3.461, "step": 10974 }, { "epoch": 0.46429477959218207, "grad_norm": 0.19878096878528595, "learning_rate": 0.001, "loss": 2.4174, "step": 10975 }, { "epoch": 0.4643370843556984, "grad_norm": 0.19646792113780975, "learning_rate": 0.001, "loss": 3.2401, "step": 10976 }, { "epoch": 0.46437938911921484, "grad_norm": 0.5441222786903381, "learning_rate": 0.001, "loss": 2.0292, "step": 10977 }, { "epoch": 0.4644216938827312, "grad_norm": 0.19950753450393677, "learning_rate": 0.001, "loss": 2.2161, "step": 10978 }, { "epoch": 0.46446399864624754, "grad_norm": 0.301084965467453, "learning_rate": 0.001, "loss": 1.6781, "step": 10979 }, { "epoch": 0.46450630340976395, "grad_norm": 0.1772916615009308, "learning_rate": 0.001, "loss": 1.5019, "step": 10980 }, { "epoch": 0.4645486081732803, "grad_norm": 0.170380100607872, "learning_rate": 0.001, "loss": 1.8409, "step": 10981 }, { "epoch": 0.46459091293679666, "grad_norm": 0.1674899011850357, "learning_rate": 0.001, "loss": 1.9776, "step": 10982 }, { "epoch": 0.4646332177003131, "grad_norm": 0.20366886258125305, "learning_rate": 0.001, "loss": 2.4532, "step": 10983 }, { "epoch": 0.4646755224638294, "grad_norm": 0.18768104910850525, "learning_rate": 0.001, "loss": 1.868, "step": 10984 }, { "epoch": 0.4647178272273458, "grad_norm": 0.19560806453227997, "learning_rate": 0.001, "loss": 2.4912, "step": 10985 }, { "epoch": 0.4647601319908622, "grad_norm": 0.3951105773448944, "learning_rate": 0.001, "loss": 2.1868, "step": 10986 }, { "epoch": 0.46480243675437855, "grad_norm": 0.9056179523468018, "learning_rate": 0.001, "loss": 2.7611, "step": 10987 }, { "epoch": 0.4648447415178949, "grad_norm": 0.1860157996416092, "learning_rate": 0.001, "loss": 2.0899, "step": 10988 }, { "epoch": 0.4648870462814113, "grad_norm": 0.7894105911254883, "learning_rate": 0.001, "loss": 2.2895, "step": 10989 }, { "epoch": 0.46492935104492766, "grad_norm": 0.16466152667999268, "learning_rate": 0.001, "loss": 2.6064, "step": 10990 }, { "epoch": 0.464971655808444, "grad_norm": 0.17930325865745544, "learning_rate": 0.001, "loss": 1.7395, "step": 10991 }, { "epoch": 0.4650139605719604, "grad_norm": 0.1949310451745987, "learning_rate": 0.001, "loss": 2.3786, "step": 10992 }, { "epoch": 0.4650562653354768, "grad_norm": 0.17911165952682495, "learning_rate": 0.001, "loss": 1.5416, "step": 10993 }, { "epoch": 0.46509857009899314, "grad_norm": 0.207133486866951, "learning_rate": 0.001, "loss": 2.3997, "step": 10994 }, { "epoch": 0.46514087486250955, "grad_norm": 0.1573377251625061, "learning_rate": 0.001, "loss": 1.966, "step": 10995 }, { "epoch": 0.4651831796260259, "grad_norm": 0.7414722442626953, "learning_rate": 0.001, "loss": 3.0765, "step": 10996 }, { "epoch": 0.46522548438954225, "grad_norm": 0.17348453402519226, "learning_rate": 0.001, "loss": 2.8971, "step": 10997 }, { "epoch": 0.4652677891530586, "grad_norm": 1.594288945198059, "learning_rate": 0.001, "loss": 2.3042, "step": 10998 }, { "epoch": 0.465310093916575, "grad_norm": 0.24077658355236053, "learning_rate": 0.001, "loss": 2.2233, "step": 10999 }, { "epoch": 0.4653523986800914, "grad_norm": 0.2647017538547516, "learning_rate": 0.001, "loss": 1.2681, "step": 11000 }, { "epoch": 0.4653947034436077, "grad_norm": 0.15793390572071075, "learning_rate": 0.001, "loss": 1.6003, "step": 11001 }, { "epoch": 0.46543700820712414, "grad_norm": 0.1997009813785553, "learning_rate": 0.001, "loss": 1.7316, "step": 11002 }, { "epoch": 0.4654793129706405, "grad_norm": 0.18965008854866028, "learning_rate": 0.001, "loss": 2.4437, "step": 11003 }, { "epoch": 0.46552161773415685, "grad_norm": 0.24327543377876282, "learning_rate": 0.001, "loss": 2.5611, "step": 11004 }, { "epoch": 0.46556392249767325, "grad_norm": 6.968198776245117, "learning_rate": 0.001, "loss": 2.3886, "step": 11005 }, { "epoch": 0.4656062272611896, "grad_norm": 0.2511080503463745, "learning_rate": 0.001, "loss": 2.9208, "step": 11006 }, { "epoch": 0.46564853202470596, "grad_norm": 0.1881697177886963, "learning_rate": 0.001, "loss": 1.5614, "step": 11007 }, { "epoch": 0.4656908367882224, "grad_norm": 0.4397967755794525, "learning_rate": 0.001, "loss": 3.3356, "step": 11008 }, { "epoch": 0.4657331415517387, "grad_norm": 0.2115083485841751, "learning_rate": 0.001, "loss": 2.1108, "step": 11009 }, { "epoch": 0.4657754463152551, "grad_norm": 0.17709064483642578, "learning_rate": 0.001, "loss": 3.367, "step": 11010 }, { "epoch": 0.4658177510787715, "grad_norm": 0.263375848531723, "learning_rate": 0.001, "loss": 2.0427, "step": 11011 }, { "epoch": 0.46586005584228785, "grad_norm": 0.20178444683551788, "learning_rate": 0.001, "loss": 2.385, "step": 11012 }, { "epoch": 0.4659023606058042, "grad_norm": 0.19885046780109406, "learning_rate": 0.001, "loss": 3.0593, "step": 11013 }, { "epoch": 0.4659446653693206, "grad_norm": 0.27875369787216187, "learning_rate": 0.001, "loss": 3.4957, "step": 11014 }, { "epoch": 0.46598697013283696, "grad_norm": 0.21964240074157715, "learning_rate": 0.001, "loss": 2.5046, "step": 11015 }, { "epoch": 0.4660292748963533, "grad_norm": 0.16891005635261536, "learning_rate": 0.001, "loss": 2.9566, "step": 11016 }, { "epoch": 0.46607157965986973, "grad_norm": 0.17071029543876648, "learning_rate": 0.001, "loss": 2.141, "step": 11017 }, { "epoch": 0.4661138844233861, "grad_norm": 17.83469009399414, "learning_rate": 0.001, "loss": 2.0753, "step": 11018 }, { "epoch": 0.46615618918690244, "grad_norm": 0.20334742963314056, "learning_rate": 0.001, "loss": 3.0571, "step": 11019 }, { "epoch": 0.4661984939504188, "grad_norm": 0.1513107866048813, "learning_rate": 0.001, "loss": 2.1704, "step": 11020 }, { "epoch": 0.4662407987139352, "grad_norm": 0.15699905157089233, "learning_rate": 0.001, "loss": 2.4362, "step": 11021 }, { "epoch": 0.46628310347745155, "grad_norm": 0.25063201785087585, "learning_rate": 0.001, "loss": 2.6234, "step": 11022 }, { "epoch": 0.4663254082409679, "grad_norm": 3.148904323577881, "learning_rate": 0.001, "loss": 3.4478, "step": 11023 }, { "epoch": 0.4663677130044843, "grad_norm": 0.22315865755081177, "learning_rate": 0.001, "loss": 2.1858, "step": 11024 }, { "epoch": 0.4664100177680007, "grad_norm": 0.24025224149227142, "learning_rate": 0.001, "loss": 2.9627, "step": 11025 }, { "epoch": 0.466452322531517, "grad_norm": 0.2143600881099701, "learning_rate": 0.001, "loss": 1.9114, "step": 11026 }, { "epoch": 0.46649462729503344, "grad_norm": 0.5619837641716003, "learning_rate": 0.001, "loss": 2.6092, "step": 11027 }, { "epoch": 0.4665369320585498, "grad_norm": 0.2009797990322113, "learning_rate": 0.001, "loss": 1.9033, "step": 11028 }, { "epoch": 0.46657923682206615, "grad_norm": 1.4359869956970215, "learning_rate": 0.001, "loss": 2.7386, "step": 11029 }, { "epoch": 0.46662154158558256, "grad_norm": 0.15855303406715393, "learning_rate": 0.001, "loss": 3.3832, "step": 11030 }, { "epoch": 0.4666638463490989, "grad_norm": 0.1745101362466812, "learning_rate": 0.001, "loss": 2.5427, "step": 11031 }, { "epoch": 0.46670615111261526, "grad_norm": 0.20111802220344543, "learning_rate": 0.001, "loss": 2.204, "step": 11032 }, { "epoch": 0.4667484558761317, "grad_norm": 0.22334951162338257, "learning_rate": 0.001, "loss": 2.4836, "step": 11033 }, { "epoch": 0.46679076063964803, "grad_norm": 0.17813622951507568, "learning_rate": 0.001, "loss": 2.9059, "step": 11034 }, { "epoch": 0.4668330654031644, "grad_norm": 0.19768601655960083, "learning_rate": 0.001, "loss": 1.7435, "step": 11035 }, { "epoch": 0.4668753701666808, "grad_norm": 0.4055560529232025, "learning_rate": 0.001, "loss": 1.8149, "step": 11036 }, { "epoch": 0.46691767493019715, "grad_norm": 0.14512808620929718, "learning_rate": 0.001, "loss": 2.3372, "step": 11037 }, { "epoch": 0.4669599796937135, "grad_norm": 0.2370501309633255, "learning_rate": 0.001, "loss": 2.0583, "step": 11038 }, { "epoch": 0.4670022844572299, "grad_norm": 0.16900447010993958, "learning_rate": 0.001, "loss": 1.8195, "step": 11039 }, { "epoch": 0.46704458922074626, "grad_norm": 0.16092944145202637, "learning_rate": 0.001, "loss": 2.3941, "step": 11040 }, { "epoch": 0.4670868939842626, "grad_norm": 0.1543259471654892, "learning_rate": 0.001, "loss": 1.7284, "step": 11041 }, { "epoch": 0.467129198747779, "grad_norm": 0.16476312279701233, "learning_rate": 0.001, "loss": 2.297, "step": 11042 }, { "epoch": 0.4671715035112954, "grad_norm": 0.1504514068365097, "learning_rate": 0.001, "loss": 2.2147, "step": 11043 }, { "epoch": 0.46721380827481174, "grad_norm": 0.1745976358652115, "learning_rate": 0.001, "loss": 2.5716, "step": 11044 }, { "epoch": 0.4672561130383281, "grad_norm": 0.17237746715545654, "learning_rate": 0.001, "loss": 2.1571, "step": 11045 }, { "epoch": 0.4672984178018445, "grad_norm": 0.14179447293281555, "learning_rate": 0.001, "loss": 1.7693, "step": 11046 }, { "epoch": 0.46734072256536086, "grad_norm": 0.2136073112487793, "learning_rate": 0.001, "loss": 1.9455, "step": 11047 }, { "epoch": 0.4673830273288772, "grad_norm": 0.15894265472888947, "learning_rate": 0.001, "loss": 1.9635, "step": 11048 }, { "epoch": 0.4674253320923936, "grad_norm": 1.5329841375350952, "learning_rate": 0.001, "loss": 2.4782, "step": 11049 }, { "epoch": 0.46746763685591, "grad_norm": 0.17458941042423248, "learning_rate": 0.001, "loss": 2.1225, "step": 11050 }, { "epoch": 0.46750994161942633, "grad_norm": 0.21708954870700836, "learning_rate": 0.001, "loss": 2.2734, "step": 11051 }, { "epoch": 0.46755224638294274, "grad_norm": 0.7924477458000183, "learning_rate": 0.001, "loss": 1.9928, "step": 11052 }, { "epoch": 0.4675945511464591, "grad_norm": 0.16856947541236877, "learning_rate": 0.001, "loss": 2.1516, "step": 11053 }, { "epoch": 0.46763685590997545, "grad_norm": 0.15970207750797272, "learning_rate": 0.001, "loss": 2.7033, "step": 11054 }, { "epoch": 0.46767916067349186, "grad_norm": 8.105451583862305, "learning_rate": 0.001, "loss": 2.0742, "step": 11055 }, { "epoch": 0.4677214654370082, "grad_norm": 0.1840921938419342, "learning_rate": 0.001, "loss": 1.9796, "step": 11056 }, { "epoch": 0.46776377020052456, "grad_norm": 0.15253804624080658, "learning_rate": 0.001, "loss": 2.0105, "step": 11057 }, { "epoch": 0.467806074964041, "grad_norm": 5.88224458694458, "learning_rate": 0.001, "loss": 1.9661, "step": 11058 }, { "epoch": 0.46784837972755733, "grad_norm": 0.16145947575569153, "learning_rate": 0.001, "loss": 2.1938, "step": 11059 }, { "epoch": 0.4678906844910737, "grad_norm": 0.19684232771396637, "learning_rate": 0.001, "loss": 1.6197, "step": 11060 }, { "epoch": 0.4679329892545901, "grad_norm": 45.53775405883789, "learning_rate": 0.001, "loss": 1.5319, "step": 11061 }, { "epoch": 0.46797529401810645, "grad_norm": 2.51239275932312, "learning_rate": 0.001, "loss": 2.2339, "step": 11062 }, { "epoch": 0.4680175987816228, "grad_norm": 0.19354106485843658, "learning_rate": 0.001, "loss": 2.2878, "step": 11063 }, { "epoch": 0.46805990354513916, "grad_norm": 63.523895263671875, "learning_rate": 0.001, "loss": 2.1583, "step": 11064 }, { "epoch": 0.46810220830865557, "grad_norm": 0.21123424172401428, "learning_rate": 0.001, "loss": 1.6651, "step": 11065 }, { "epoch": 0.4681445130721719, "grad_norm": 0.5979301333427429, "learning_rate": 0.001, "loss": 1.8025, "step": 11066 }, { "epoch": 0.4681868178356883, "grad_norm": 0.15575318038463593, "learning_rate": 0.001, "loss": 2.2109, "step": 11067 }, { "epoch": 0.4682291225992047, "grad_norm": 0.2060658186674118, "learning_rate": 0.001, "loss": 1.9252, "step": 11068 }, { "epoch": 0.46827142736272104, "grad_norm": 0.19070769846439362, "learning_rate": 0.001, "loss": 2.1284, "step": 11069 }, { "epoch": 0.4683137321262374, "grad_norm": 0.14260248839855194, "learning_rate": 0.001, "loss": 1.8289, "step": 11070 }, { "epoch": 0.4683560368897538, "grad_norm": 0.2190844863653183, "learning_rate": 0.001, "loss": 2.1024, "step": 11071 }, { "epoch": 0.46839834165327016, "grad_norm": 0.1667780727148056, "learning_rate": 0.001, "loss": 2.5757, "step": 11072 }, { "epoch": 0.4684406464167865, "grad_norm": 0.19789689779281616, "learning_rate": 0.001, "loss": 2.0722, "step": 11073 }, { "epoch": 0.4684829511803029, "grad_norm": 0.16835686564445496, "learning_rate": 0.001, "loss": 1.8042, "step": 11074 }, { "epoch": 0.4685252559438193, "grad_norm": 0.9263663291931152, "learning_rate": 0.001, "loss": 2.0782, "step": 11075 }, { "epoch": 0.46856756070733563, "grad_norm": 0.21614162623882294, "learning_rate": 0.001, "loss": 2.924, "step": 11076 }, { "epoch": 0.46860986547085204, "grad_norm": 3.207810163497925, "learning_rate": 0.001, "loss": 2.6613, "step": 11077 }, { "epoch": 0.4686521702343684, "grad_norm": 0.5324118733406067, "learning_rate": 0.001, "loss": 2.3755, "step": 11078 }, { "epoch": 0.46869447499788475, "grad_norm": 0.17789721488952637, "learning_rate": 0.001, "loss": 2.0421, "step": 11079 }, { "epoch": 0.46873677976140116, "grad_norm": 0.17907847464084625, "learning_rate": 0.001, "loss": 2.6084, "step": 11080 }, { "epoch": 0.4687790845249175, "grad_norm": 0.23135465383529663, "learning_rate": 0.001, "loss": 1.7174, "step": 11081 }, { "epoch": 0.46882138928843387, "grad_norm": 0.17100471258163452, "learning_rate": 0.001, "loss": 2.5313, "step": 11082 }, { "epoch": 0.4688636940519503, "grad_norm": 0.16610606014728546, "learning_rate": 0.001, "loss": 2.9865, "step": 11083 }, { "epoch": 0.46890599881546663, "grad_norm": 0.15432819724082947, "learning_rate": 0.001, "loss": 3.2703, "step": 11084 }, { "epoch": 0.468948303578983, "grad_norm": 0.18594351410865784, "learning_rate": 0.001, "loss": 3.4266, "step": 11085 }, { "epoch": 0.46899060834249934, "grad_norm": 6.842674732208252, "learning_rate": 0.001, "loss": 2.4358, "step": 11086 }, { "epoch": 0.46903291310601575, "grad_norm": 0.17697936296463013, "learning_rate": 0.001, "loss": 1.7158, "step": 11087 }, { "epoch": 0.4690752178695321, "grad_norm": 0.17671042680740356, "learning_rate": 0.001, "loss": 2.4786, "step": 11088 }, { "epoch": 0.46911752263304846, "grad_norm": 1.0439887046813965, "learning_rate": 0.001, "loss": 2.1289, "step": 11089 }, { "epoch": 0.46915982739656487, "grad_norm": 0.21606509387493134, "learning_rate": 0.001, "loss": 1.9614, "step": 11090 }, { "epoch": 0.4692021321600812, "grad_norm": 0.24721235036849976, "learning_rate": 0.001, "loss": 2.8101, "step": 11091 }, { "epoch": 0.4692444369235976, "grad_norm": 0.21566319465637207, "learning_rate": 0.001, "loss": 2.1605, "step": 11092 }, { "epoch": 0.469286741687114, "grad_norm": 0.5193756818771362, "learning_rate": 0.001, "loss": 2.787, "step": 11093 }, { "epoch": 0.46932904645063034, "grad_norm": 0.20842444896697998, "learning_rate": 0.001, "loss": 1.9445, "step": 11094 }, { "epoch": 0.4693713512141467, "grad_norm": 0.1922614574432373, "learning_rate": 0.001, "loss": 2.0826, "step": 11095 }, { "epoch": 0.4694136559776631, "grad_norm": 0.9516851305961609, "learning_rate": 0.001, "loss": 1.9346, "step": 11096 }, { "epoch": 0.46945596074117946, "grad_norm": 0.18452809751033783, "learning_rate": 0.001, "loss": 2.7229, "step": 11097 }, { "epoch": 0.4694982655046958, "grad_norm": 0.21180051565170288, "learning_rate": 0.001, "loss": 2.497, "step": 11098 }, { "epoch": 0.4695405702682122, "grad_norm": 3.082894802093506, "learning_rate": 0.001, "loss": 1.9389, "step": 11099 }, { "epoch": 0.4695828750317286, "grad_norm": 0.17565667629241943, "learning_rate": 0.001, "loss": 1.4547, "step": 11100 }, { "epoch": 0.46962517979524493, "grad_norm": 0.2029939591884613, "learning_rate": 0.001, "loss": 2.4275, "step": 11101 }, { "epoch": 0.46966748455876134, "grad_norm": 0.20764219760894775, "learning_rate": 0.001, "loss": 2.1751, "step": 11102 }, { "epoch": 0.4697097893222777, "grad_norm": 0.19430102407932281, "learning_rate": 0.001, "loss": 1.736, "step": 11103 }, { "epoch": 0.46975209408579405, "grad_norm": 0.180740088224411, "learning_rate": 0.001, "loss": 1.6614, "step": 11104 }, { "epoch": 0.46979439884931046, "grad_norm": 8.314878463745117, "learning_rate": 0.001, "loss": 2.2919, "step": 11105 }, { "epoch": 0.4698367036128268, "grad_norm": 0.21296004951000214, "learning_rate": 0.001, "loss": 2.1662, "step": 11106 }, { "epoch": 0.46987900837634317, "grad_norm": 0.21107828617095947, "learning_rate": 0.001, "loss": 2.2709, "step": 11107 }, { "epoch": 0.4699213131398596, "grad_norm": 0.19116267561912537, "learning_rate": 0.001, "loss": 1.8983, "step": 11108 }, { "epoch": 0.46996361790337593, "grad_norm": 0.17168934643268585, "learning_rate": 0.001, "loss": 1.8434, "step": 11109 }, { "epoch": 0.4700059226668923, "grad_norm": 0.18456289172172546, "learning_rate": 0.001, "loss": 2.4683, "step": 11110 }, { "epoch": 0.47004822743040864, "grad_norm": 0.874439001083374, "learning_rate": 0.001, "loss": 3.1001, "step": 11111 }, { "epoch": 0.47009053219392505, "grad_norm": 0.16725631058216095, "learning_rate": 0.001, "loss": 1.8565, "step": 11112 }, { "epoch": 0.4701328369574414, "grad_norm": 0.1976451575756073, "learning_rate": 0.001, "loss": 2.5576, "step": 11113 }, { "epoch": 0.47017514172095776, "grad_norm": 0.17343725264072418, "learning_rate": 0.001, "loss": 2.7862, "step": 11114 }, { "epoch": 0.47021744648447417, "grad_norm": 0.22081388533115387, "learning_rate": 0.001, "loss": 2.4001, "step": 11115 }, { "epoch": 0.4702597512479905, "grad_norm": 0.375754177570343, "learning_rate": 0.001, "loss": 2.0034, "step": 11116 }, { "epoch": 0.4703020560115069, "grad_norm": 0.20346909761428833, "learning_rate": 0.001, "loss": 2.6006, "step": 11117 }, { "epoch": 0.4703443607750233, "grad_norm": 0.21501335501670837, "learning_rate": 0.001, "loss": 1.8364, "step": 11118 }, { "epoch": 0.47038666553853964, "grad_norm": 2.512514352798462, "learning_rate": 0.001, "loss": 2.0339, "step": 11119 }, { "epoch": 0.470428970302056, "grad_norm": 0.18703396618366241, "learning_rate": 0.001, "loss": 2.2686, "step": 11120 }, { "epoch": 0.4704712750655724, "grad_norm": 0.2161937803030014, "learning_rate": 0.001, "loss": 2.3499, "step": 11121 }, { "epoch": 0.47051357982908876, "grad_norm": 0.22765906155109406, "learning_rate": 0.001, "loss": 2.2785, "step": 11122 }, { "epoch": 0.4705558845926051, "grad_norm": 0.16340266168117523, "learning_rate": 0.001, "loss": 2.2066, "step": 11123 }, { "epoch": 0.4705981893561215, "grad_norm": 0.17582248151302338, "learning_rate": 0.001, "loss": 3.2365, "step": 11124 }, { "epoch": 0.4706404941196379, "grad_norm": 2.7968180179595947, "learning_rate": 0.001, "loss": 2.1311, "step": 11125 }, { "epoch": 0.47068279888315423, "grad_norm": 0.19796128571033478, "learning_rate": 0.001, "loss": 2.6811, "step": 11126 }, { "epoch": 0.47072510364667064, "grad_norm": 0.3247911334037781, "learning_rate": 0.001, "loss": 2.0883, "step": 11127 }, { "epoch": 0.470767408410187, "grad_norm": 0.21730183064937592, "learning_rate": 0.001, "loss": 2.1616, "step": 11128 }, { "epoch": 0.47080971317370335, "grad_norm": 0.226370707154274, "learning_rate": 0.001, "loss": 3.4228, "step": 11129 }, { "epoch": 0.47085201793721976, "grad_norm": 2.1633243560791016, "learning_rate": 0.001, "loss": 2.2012, "step": 11130 }, { "epoch": 0.4708943227007361, "grad_norm": 0.2938755750656128, "learning_rate": 0.001, "loss": 3.1938, "step": 11131 }, { "epoch": 0.47093662746425247, "grad_norm": 0.15652383863925934, "learning_rate": 0.001, "loss": 1.8906, "step": 11132 }, { "epoch": 0.4709789322277688, "grad_norm": 0.17891067266464233, "learning_rate": 0.001, "loss": 3.3451, "step": 11133 }, { "epoch": 0.47102123699128523, "grad_norm": 0.47045284509658813, "learning_rate": 0.001, "loss": 2.3286, "step": 11134 }, { "epoch": 0.4710635417548016, "grad_norm": 0.3203991949558258, "learning_rate": 0.001, "loss": 2.0339, "step": 11135 }, { "epoch": 0.47110584651831794, "grad_norm": 0.1709599643945694, "learning_rate": 0.001, "loss": 1.9023, "step": 11136 }, { "epoch": 0.47114815128183435, "grad_norm": 0.2922183871269226, "learning_rate": 0.001, "loss": 2.1608, "step": 11137 }, { "epoch": 0.4711904560453507, "grad_norm": 8.45193862915039, "learning_rate": 0.001, "loss": 1.8432, "step": 11138 }, { "epoch": 0.47123276080886706, "grad_norm": 0.18489998579025269, "learning_rate": 0.001, "loss": 2.5255, "step": 11139 }, { "epoch": 0.47127506557238347, "grad_norm": 0.5812740325927734, "learning_rate": 0.001, "loss": 2.3951, "step": 11140 }, { "epoch": 0.4713173703358998, "grad_norm": 0.18123511970043182, "learning_rate": 0.001, "loss": 1.8583, "step": 11141 }, { "epoch": 0.4713596750994162, "grad_norm": 0.22358356416225433, "learning_rate": 0.001, "loss": 2.1568, "step": 11142 }, { "epoch": 0.4714019798629326, "grad_norm": 0.363231360912323, "learning_rate": 0.001, "loss": 1.5782, "step": 11143 }, { "epoch": 0.47144428462644894, "grad_norm": 0.2926989793777466, "learning_rate": 0.001, "loss": 2.6199, "step": 11144 }, { "epoch": 0.4714865893899653, "grad_norm": 0.17961940169334412, "learning_rate": 0.001, "loss": 1.9892, "step": 11145 }, { "epoch": 0.4715288941534817, "grad_norm": 0.17067760229110718, "learning_rate": 0.001, "loss": 2.2848, "step": 11146 }, { "epoch": 0.47157119891699806, "grad_norm": 0.16751837730407715, "learning_rate": 0.001, "loss": 1.7813, "step": 11147 }, { "epoch": 0.4716135036805144, "grad_norm": 0.20085522532463074, "learning_rate": 0.001, "loss": 2.9713, "step": 11148 }, { "epoch": 0.4716558084440308, "grad_norm": 0.20381022989749908, "learning_rate": 0.001, "loss": 3.0651, "step": 11149 }, { "epoch": 0.4716981132075472, "grad_norm": 0.2398488074541092, "learning_rate": 0.001, "loss": 2.5814, "step": 11150 }, { "epoch": 0.47174041797106353, "grad_norm": 1.2999179363250732, "learning_rate": 0.001, "loss": 1.6057, "step": 11151 }, { "epoch": 0.47178272273457994, "grad_norm": 0.7444708347320557, "learning_rate": 0.001, "loss": 2.5191, "step": 11152 }, { "epoch": 0.4718250274980963, "grad_norm": 0.16335336863994598, "learning_rate": 0.001, "loss": 2.7914, "step": 11153 }, { "epoch": 0.47186733226161265, "grad_norm": 0.5560107827186584, "learning_rate": 0.001, "loss": 2.2272, "step": 11154 }, { "epoch": 0.471909637025129, "grad_norm": 0.16256298124790192, "learning_rate": 0.001, "loss": 1.6319, "step": 11155 }, { "epoch": 0.4719519417886454, "grad_norm": 0.23074427247047424, "learning_rate": 0.001, "loss": 2.5899, "step": 11156 }, { "epoch": 0.47199424655216177, "grad_norm": 0.17271144688129425, "learning_rate": 0.001, "loss": 2.4049, "step": 11157 }, { "epoch": 0.4720365513156781, "grad_norm": 0.21863944828510284, "learning_rate": 0.001, "loss": 2.4901, "step": 11158 }, { "epoch": 0.47207885607919453, "grad_norm": 0.15925271809101105, "learning_rate": 0.001, "loss": 2.034, "step": 11159 }, { "epoch": 0.4721211608427109, "grad_norm": 0.1758195459842682, "learning_rate": 0.001, "loss": 2.6075, "step": 11160 }, { "epoch": 0.47216346560622724, "grad_norm": 0.2449083775281906, "learning_rate": 0.001, "loss": 2.4067, "step": 11161 }, { "epoch": 0.47220577036974365, "grad_norm": 0.14552642405033112, "learning_rate": 0.001, "loss": 1.9266, "step": 11162 }, { "epoch": 0.47224807513326, "grad_norm": 0.3583971858024597, "learning_rate": 0.001, "loss": 1.9896, "step": 11163 }, { "epoch": 0.47229037989677636, "grad_norm": 0.22816874086856842, "learning_rate": 0.001, "loss": 2.1876, "step": 11164 }, { "epoch": 0.47233268466029277, "grad_norm": 0.18025673925876617, "learning_rate": 0.001, "loss": 1.8776, "step": 11165 }, { "epoch": 0.4723749894238091, "grad_norm": 0.17543093860149384, "learning_rate": 0.001, "loss": 2.6487, "step": 11166 }, { "epoch": 0.4724172941873255, "grad_norm": 0.1770016998052597, "learning_rate": 0.001, "loss": 3.3751, "step": 11167 }, { "epoch": 0.4724595989508419, "grad_norm": 0.1971733570098877, "learning_rate": 0.001, "loss": 2.0229, "step": 11168 }, { "epoch": 0.47250190371435824, "grad_norm": 0.22783638536930084, "learning_rate": 0.001, "loss": 1.7606, "step": 11169 }, { "epoch": 0.4725442084778746, "grad_norm": 0.17248298227787018, "learning_rate": 0.001, "loss": 1.8782, "step": 11170 }, { "epoch": 0.472586513241391, "grad_norm": 0.1579669564962387, "learning_rate": 0.001, "loss": 1.4778, "step": 11171 }, { "epoch": 0.47262881800490736, "grad_norm": 0.5877176523208618, "learning_rate": 0.001, "loss": 1.8328, "step": 11172 }, { "epoch": 0.4726711227684237, "grad_norm": 0.2934803366661072, "learning_rate": 0.001, "loss": 2.7896, "step": 11173 }, { "epoch": 0.4727134275319401, "grad_norm": 0.28287965059280396, "learning_rate": 0.001, "loss": 3.311, "step": 11174 }, { "epoch": 0.4727557322954565, "grad_norm": 0.1633968949317932, "learning_rate": 0.001, "loss": 1.8999, "step": 11175 }, { "epoch": 0.47279803705897283, "grad_norm": 0.2607437074184418, "learning_rate": 0.001, "loss": 1.9724, "step": 11176 }, { "epoch": 0.4728403418224892, "grad_norm": 0.6347095370292664, "learning_rate": 0.001, "loss": 2.791, "step": 11177 }, { "epoch": 0.4728826465860056, "grad_norm": 0.20074185729026794, "learning_rate": 0.001, "loss": 3.0955, "step": 11178 }, { "epoch": 0.47292495134952195, "grad_norm": 0.19658905267715454, "learning_rate": 0.001, "loss": 2.3828, "step": 11179 }, { "epoch": 0.4729672561130383, "grad_norm": 0.14689096808433533, "learning_rate": 0.001, "loss": 2.1146, "step": 11180 }, { "epoch": 0.4730095608765547, "grad_norm": 0.21568824350833893, "learning_rate": 0.001, "loss": 3.265, "step": 11181 }, { "epoch": 0.47305186564007107, "grad_norm": 0.20874953269958496, "learning_rate": 0.001, "loss": 2.4134, "step": 11182 }, { "epoch": 0.4730941704035874, "grad_norm": 5.027991771697998, "learning_rate": 0.001, "loss": 3.1703, "step": 11183 }, { "epoch": 0.47313647516710383, "grad_norm": 0.1729307919740677, "learning_rate": 0.001, "loss": 1.7616, "step": 11184 }, { "epoch": 0.4731787799306202, "grad_norm": 0.1840585619211197, "learning_rate": 0.001, "loss": 2.9509, "step": 11185 }, { "epoch": 0.47322108469413654, "grad_norm": 0.19434517621994019, "learning_rate": 0.001, "loss": 2.1666, "step": 11186 }, { "epoch": 0.47326338945765295, "grad_norm": 0.1548105627298355, "learning_rate": 0.001, "loss": 2.6967, "step": 11187 }, { "epoch": 0.4733056942211693, "grad_norm": 0.15446540713310242, "learning_rate": 0.001, "loss": 1.9184, "step": 11188 }, { "epoch": 0.47334799898468566, "grad_norm": 0.15472517907619476, "learning_rate": 0.001, "loss": 1.9038, "step": 11189 }, { "epoch": 0.47339030374820207, "grad_norm": 0.29968515038490295, "learning_rate": 0.001, "loss": 1.7286, "step": 11190 }, { "epoch": 0.4734326085117184, "grad_norm": 3.557898759841919, "learning_rate": 0.001, "loss": 3.0507, "step": 11191 }, { "epoch": 0.4734749132752348, "grad_norm": 1.0494880676269531, "learning_rate": 0.001, "loss": 1.6712, "step": 11192 }, { "epoch": 0.4735172180387512, "grad_norm": 0.18932278454303741, "learning_rate": 0.001, "loss": 2.3845, "step": 11193 }, { "epoch": 0.47355952280226754, "grad_norm": 0.7504813075065613, "learning_rate": 0.001, "loss": 2.8903, "step": 11194 }, { "epoch": 0.4736018275657839, "grad_norm": 0.2353019416332245, "learning_rate": 0.001, "loss": 2.2606, "step": 11195 }, { "epoch": 0.4736441323293003, "grad_norm": 0.22455672919750214, "learning_rate": 0.001, "loss": 2.4879, "step": 11196 }, { "epoch": 0.47368643709281666, "grad_norm": 0.28473368287086487, "learning_rate": 0.001, "loss": 2.6755, "step": 11197 }, { "epoch": 0.473728741856333, "grad_norm": 0.19479401409626007, "learning_rate": 0.001, "loss": 2.784, "step": 11198 }, { "epoch": 0.47377104661984937, "grad_norm": 0.17094643414020538, "learning_rate": 0.001, "loss": 2.3451, "step": 11199 }, { "epoch": 0.4738133513833658, "grad_norm": 0.17563167214393616, "learning_rate": 0.001, "loss": 2.3602, "step": 11200 }, { "epoch": 0.47385565614688213, "grad_norm": 0.19318290054798126, "learning_rate": 0.001, "loss": 1.8098, "step": 11201 }, { "epoch": 0.4738979609103985, "grad_norm": 0.1785099357366562, "learning_rate": 0.001, "loss": 2.6017, "step": 11202 }, { "epoch": 0.4739402656739149, "grad_norm": 0.23971965909004211, "learning_rate": 0.001, "loss": 3.1176, "step": 11203 }, { "epoch": 0.47398257043743125, "grad_norm": 0.17552334070205688, "learning_rate": 0.001, "loss": 2.6471, "step": 11204 }, { "epoch": 0.4740248752009476, "grad_norm": 0.309535950422287, "learning_rate": 0.001, "loss": 1.5446, "step": 11205 }, { "epoch": 0.474067179964464, "grad_norm": 0.23233279585838318, "learning_rate": 0.001, "loss": 2.4261, "step": 11206 }, { "epoch": 0.47410948472798037, "grad_norm": 0.17341110110282898, "learning_rate": 0.001, "loss": 1.5443, "step": 11207 }, { "epoch": 0.4741517894914967, "grad_norm": 0.9063680768013, "learning_rate": 0.001, "loss": 1.5549, "step": 11208 }, { "epoch": 0.47419409425501313, "grad_norm": 0.20930500328540802, "learning_rate": 0.001, "loss": 2.9428, "step": 11209 }, { "epoch": 0.4742363990185295, "grad_norm": 0.1631225347518921, "learning_rate": 0.001, "loss": 2.0326, "step": 11210 }, { "epoch": 0.47427870378204584, "grad_norm": 0.3474618196487427, "learning_rate": 0.001, "loss": 2.0844, "step": 11211 }, { "epoch": 0.47432100854556225, "grad_norm": 0.16792355477809906, "learning_rate": 0.001, "loss": 2.1918, "step": 11212 }, { "epoch": 0.4743633133090786, "grad_norm": 0.2315523475408554, "learning_rate": 0.001, "loss": 2.7786, "step": 11213 }, { "epoch": 0.47440561807259496, "grad_norm": 0.21026229858398438, "learning_rate": 0.001, "loss": 2.1381, "step": 11214 }, { "epoch": 0.47444792283611137, "grad_norm": 0.1558665633201599, "learning_rate": 0.001, "loss": 1.828, "step": 11215 }, { "epoch": 0.4744902275996277, "grad_norm": 0.19669584929943085, "learning_rate": 0.001, "loss": 2.3019, "step": 11216 }, { "epoch": 0.4745325323631441, "grad_norm": 0.16526706516742706, "learning_rate": 0.001, "loss": 2.1734, "step": 11217 }, { "epoch": 0.4745748371266605, "grad_norm": 0.1388012170791626, "learning_rate": 0.001, "loss": 2.5851, "step": 11218 }, { "epoch": 0.47461714189017684, "grad_norm": 0.20129254460334778, "learning_rate": 0.001, "loss": 2.3251, "step": 11219 }, { "epoch": 0.4746594466536932, "grad_norm": 0.1600094735622406, "learning_rate": 0.001, "loss": 1.8666, "step": 11220 }, { "epoch": 0.4747017514172096, "grad_norm": 0.1788060963153839, "learning_rate": 0.001, "loss": 3.1572, "step": 11221 }, { "epoch": 0.47474405618072596, "grad_norm": 0.34340453147888184, "learning_rate": 0.001, "loss": 1.7197, "step": 11222 }, { "epoch": 0.4747863609442423, "grad_norm": 0.18429534137248993, "learning_rate": 0.001, "loss": 2.1796, "step": 11223 }, { "epoch": 0.47482866570775867, "grad_norm": 0.19663597643375397, "learning_rate": 0.001, "loss": 2.397, "step": 11224 }, { "epoch": 0.4748709704712751, "grad_norm": 0.16480304300785065, "learning_rate": 0.001, "loss": 1.901, "step": 11225 }, { "epoch": 0.47491327523479143, "grad_norm": 0.6021777391433716, "learning_rate": 0.001, "loss": 2.8861, "step": 11226 }, { "epoch": 0.4749555799983078, "grad_norm": 0.14651986956596375, "learning_rate": 0.001, "loss": 2.2698, "step": 11227 }, { "epoch": 0.4749978847618242, "grad_norm": 0.521893322467804, "learning_rate": 0.001, "loss": 2.3953, "step": 11228 }, { "epoch": 0.47504018952534055, "grad_norm": 0.20987311005592346, "learning_rate": 0.001, "loss": 2.5025, "step": 11229 }, { "epoch": 0.4750824942888569, "grad_norm": 0.14994093775749207, "learning_rate": 0.001, "loss": 1.6545, "step": 11230 }, { "epoch": 0.4751247990523733, "grad_norm": 0.47113507986068726, "learning_rate": 0.001, "loss": 2.1639, "step": 11231 }, { "epoch": 0.47516710381588967, "grad_norm": 0.14544245600700378, "learning_rate": 0.001, "loss": 2.5572, "step": 11232 }, { "epoch": 0.475209408579406, "grad_norm": 2.366591215133667, "learning_rate": 0.001, "loss": 2.1375, "step": 11233 }, { "epoch": 0.47525171334292243, "grad_norm": 0.14105114340782166, "learning_rate": 0.001, "loss": 2.686, "step": 11234 }, { "epoch": 0.4752940181064388, "grad_norm": 0.49145644903182983, "learning_rate": 0.001, "loss": 1.8348, "step": 11235 }, { "epoch": 0.47533632286995514, "grad_norm": 0.18383541703224182, "learning_rate": 0.001, "loss": 1.5315, "step": 11236 }, { "epoch": 0.47537862763347155, "grad_norm": 0.17911598086357117, "learning_rate": 0.001, "loss": 1.9033, "step": 11237 }, { "epoch": 0.4754209323969879, "grad_norm": 3.2058968544006348, "learning_rate": 0.001, "loss": 3.8925, "step": 11238 }, { "epoch": 0.47546323716050426, "grad_norm": 0.1748027503490448, "learning_rate": 0.001, "loss": 2.9772, "step": 11239 }, { "epoch": 0.47550554192402067, "grad_norm": 0.16488003730773926, "learning_rate": 0.001, "loss": 1.9438, "step": 11240 }, { "epoch": 0.475547846687537, "grad_norm": 0.2031308114528656, "learning_rate": 0.001, "loss": 1.5888, "step": 11241 }, { "epoch": 0.4755901514510534, "grad_norm": 0.2005867063999176, "learning_rate": 0.001, "loss": 2.3343, "step": 11242 }, { "epoch": 0.4756324562145698, "grad_norm": 5.769382953643799, "learning_rate": 0.001, "loss": 1.9595, "step": 11243 }, { "epoch": 0.47567476097808614, "grad_norm": 0.16878317296504974, "learning_rate": 0.001, "loss": 1.9647, "step": 11244 }, { "epoch": 0.4757170657416025, "grad_norm": 0.1860329806804657, "learning_rate": 0.001, "loss": 1.8979, "step": 11245 }, { "epoch": 0.47575937050511885, "grad_norm": 0.25839003920555115, "learning_rate": 0.001, "loss": 2.7318, "step": 11246 }, { "epoch": 0.47580167526863526, "grad_norm": 3.520124912261963, "learning_rate": 0.001, "loss": 2.6687, "step": 11247 }, { "epoch": 0.4758439800321516, "grad_norm": 0.2002669721841812, "learning_rate": 0.001, "loss": 3.1077, "step": 11248 }, { "epoch": 0.47588628479566797, "grad_norm": 0.31239837408065796, "learning_rate": 0.001, "loss": 2.4069, "step": 11249 }, { "epoch": 0.4759285895591844, "grad_norm": 0.2110671103000641, "learning_rate": 0.001, "loss": 2.3489, "step": 11250 }, { "epoch": 0.47597089432270073, "grad_norm": 0.22103223204612732, "learning_rate": 0.001, "loss": 2.9316, "step": 11251 }, { "epoch": 0.4760131990862171, "grad_norm": 0.7828096747398376, "learning_rate": 0.001, "loss": 2.5561, "step": 11252 }, { "epoch": 0.4760555038497335, "grad_norm": 0.20706386864185333, "learning_rate": 0.001, "loss": 2.9993, "step": 11253 }, { "epoch": 0.47609780861324985, "grad_norm": 0.19384732842445374, "learning_rate": 0.001, "loss": 1.7905, "step": 11254 }, { "epoch": 0.4761401133767662, "grad_norm": 0.25102004408836365, "learning_rate": 0.001, "loss": 2.7746, "step": 11255 }, { "epoch": 0.4761824181402826, "grad_norm": 0.1942043900489807, "learning_rate": 0.001, "loss": 2.215, "step": 11256 }, { "epoch": 0.47622472290379897, "grad_norm": 0.2124514877796173, "learning_rate": 0.001, "loss": 2.1071, "step": 11257 }, { "epoch": 0.4762670276673153, "grad_norm": 0.1634020060300827, "learning_rate": 0.001, "loss": 1.5839, "step": 11258 }, { "epoch": 0.47630933243083173, "grad_norm": 0.19070233404636383, "learning_rate": 0.001, "loss": 3.202, "step": 11259 }, { "epoch": 0.4763516371943481, "grad_norm": 0.18325842916965485, "learning_rate": 0.001, "loss": 3.1532, "step": 11260 }, { "epoch": 0.47639394195786444, "grad_norm": 0.23473717272281647, "learning_rate": 0.001, "loss": 2.2308, "step": 11261 }, { "epoch": 0.47643624672138085, "grad_norm": 0.17958499491214752, "learning_rate": 0.001, "loss": 2.0782, "step": 11262 }, { "epoch": 0.4764785514848972, "grad_norm": 0.31303510069847107, "learning_rate": 0.001, "loss": 2.5105, "step": 11263 }, { "epoch": 0.47652085624841356, "grad_norm": 0.18192079663276672, "learning_rate": 0.001, "loss": 1.7983, "step": 11264 }, { "epoch": 0.47656316101192997, "grad_norm": 0.31585457921028137, "learning_rate": 0.001, "loss": 2.8891, "step": 11265 }, { "epoch": 0.4766054657754463, "grad_norm": 0.19254378974437714, "learning_rate": 0.001, "loss": 1.9023, "step": 11266 }, { "epoch": 0.4766477705389627, "grad_norm": 0.5017719268798828, "learning_rate": 0.001, "loss": 1.9545, "step": 11267 }, { "epoch": 0.47669007530247903, "grad_norm": 0.1998423933982849, "learning_rate": 0.001, "loss": 2.9385, "step": 11268 }, { "epoch": 0.47673238006599544, "grad_norm": 0.18288664519786835, "learning_rate": 0.001, "loss": 1.5928, "step": 11269 }, { "epoch": 0.4767746848295118, "grad_norm": 0.1892680823802948, "learning_rate": 0.001, "loss": 2.1967, "step": 11270 }, { "epoch": 0.47681698959302815, "grad_norm": 0.7018859386444092, "learning_rate": 0.001, "loss": 2.2363, "step": 11271 }, { "epoch": 0.47685929435654456, "grad_norm": 0.6510068774223328, "learning_rate": 0.001, "loss": 2.1761, "step": 11272 }, { "epoch": 0.4769015991200609, "grad_norm": 1.0637061595916748, "learning_rate": 0.001, "loss": 1.9906, "step": 11273 }, { "epoch": 0.47694390388357727, "grad_norm": 0.15826399624347687, "learning_rate": 0.001, "loss": 2.4301, "step": 11274 }, { "epoch": 0.4769862086470937, "grad_norm": 0.17290960252285004, "learning_rate": 0.001, "loss": 3.1263, "step": 11275 }, { "epoch": 0.47702851341061003, "grad_norm": 0.19678086042404175, "learning_rate": 0.001, "loss": 2.8927, "step": 11276 }, { "epoch": 0.4770708181741264, "grad_norm": 0.19402678310871124, "learning_rate": 0.001, "loss": 2.1192, "step": 11277 }, { "epoch": 0.4771131229376428, "grad_norm": 0.20166024565696716, "learning_rate": 0.001, "loss": 2.2653, "step": 11278 }, { "epoch": 0.47715542770115915, "grad_norm": 0.15184548497200012, "learning_rate": 0.001, "loss": 3.5353, "step": 11279 }, { "epoch": 0.4771977324646755, "grad_norm": 0.15294674038887024, "learning_rate": 0.001, "loss": 2.1343, "step": 11280 }, { "epoch": 0.4772400372281919, "grad_norm": 0.23221707344055176, "learning_rate": 0.001, "loss": 1.9696, "step": 11281 }, { "epoch": 0.47728234199170827, "grad_norm": 0.541606605052948, "learning_rate": 0.001, "loss": 1.8145, "step": 11282 }, { "epoch": 0.4773246467552246, "grad_norm": 12.742988586425781, "learning_rate": 0.001, "loss": 2.5281, "step": 11283 }, { "epoch": 0.47736695151874103, "grad_norm": 0.15748733282089233, "learning_rate": 0.001, "loss": 2.6597, "step": 11284 }, { "epoch": 0.4774092562822574, "grad_norm": 0.1852605640888214, "learning_rate": 0.001, "loss": 2.1034, "step": 11285 }, { "epoch": 0.47745156104577374, "grad_norm": 0.5693913102149963, "learning_rate": 0.001, "loss": 3.0643, "step": 11286 }, { "epoch": 0.47749386580929015, "grad_norm": 0.570594072341919, "learning_rate": 0.001, "loss": 3.6691, "step": 11287 }, { "epoch": 0.4775361705728065, "grad_norm": 0.20175811648368835, "learning_rate": 0.001, "loss": 2.628, "step": 11288 }, { "epoch": 0.47757847533632286, "grad_norm": 0.2098332792520523, "learning_rate": 0.001, "loss": 2.3156, "step": 11289 }, { "epoch": 0.4776207800998392, "grad_norm": 0.18259800970554352, "learning_rate": 0.001, "loss": 3.0061, "step": 11290 }, { "epoch": 0.4776630848633556, "grad_norm": 0.17582936584949493, "learning_rate": 0.001, "loss": 2.0574, "step": 11291 }, { "epoch": 0.477705389626872, "grad_norm": 3.1900362968444824, "learning_rate": 0.001, "loss": 2.023, "step": 11292 }, { "epoch": 0.47774769439038833, "grad_norm": 0.18563833832740784, "learning_rate": 0.001, "loss": 2.3776, "step": 11293 }, { "epoch": 0.47778999915390474, "grad_norm": 0.1735265702009201, "learning_rate": 0.001, "loss": 3.3993, "step": 11294 }, { "epoch": 0.4778323039174211, "grad_norm": 0.17807775735855103, "learning_rate": 0.001, "loss": 1.9739, "step": 11295 }, { "epoch": 0.47787460868093745, "grad_norm": 0.42924782633781433, "learning_rate": 0.001, "loss": 2.0357, "step": 11296 }, { "epoch": 0.47791691344445386, "grad_norm": 0.2000165730714798, "learning_rate": 0.001, "loss": 2.0919, "step": 11297 }, { "epoch": 0.4779592182079702, "grad_norm": 0.14181675016880035, "learning_rate": 0.001, "loss": 2.5228, "step": 11298 }, { "epoch": 0.47800152297148657, "grad_norm": 5.60988187789917, "learning_rate": 0.001, "loss": 1.5685, "step": 11299 }, { "epoch": 0.478043827735003, "grad_norm": 0.1896638423204422, "learning_rate": 0.001, "loss": 2.7071, "step": 11300 }, { "epoch": 0.47808613249851933, "grad_norm": 46.45356750488281, "learning_rate": 0.001, "loss": 3.0575, "step": 11301 }, { "epoch": 0.4781284372620357, "grad_norm": 0.5738785266876221, "learning_rate": 0.001, "loss": 2.3252, "step": 11302 }, { "epoch": 0.4781707420255521, "grad_norm": 0.18505975604057312, "learning_rate": 0.001, "loss": 2.2331, "step": 11303 }, { "epoch": 0.47821304678906845, "grad_norm": 0.18580776453018188, "learning_rate": 0.001, "loss": 2.7998, "step": 11304 }, { "epoch": 0.4782553515525848, "grad_norm": 2.873171329498291, "learning_rate": 0.001, "loss": 3.3834, "step": 11305 }, { "epoch": 0.4782976563161012, "grad_norm": 0.17090004682540894, "learning_rate": 0.001, "loss": 2.4539, "step": 11306 }, { "epoch": 0.47833996107961757, "grad_norm": 0.5887268781661987, "learning_rate": 0.001, "loss": 1.9116, "step": 11307 }, { "epoch": 0.4783822658431339, "grad_norm": 0.2121281623840332, "learning_rate": 0.001, "loss": 2.3149, "step": 11308 }, { "epoch": 0.47842457060665033, "grad_norm": 0.31115978956222534, "learning_rate": 0.001, "loss": 2.1889, "step": 11309 }, { "epoch": 0.4784668753701667, "grad_norm": 0.1875309944152832, "learning_rate": 0.001, "loss": 2.1295, "step": 11310 }, { "epoch": 0.47850918013368304, "grad_norm": 0.1967727243900299, "learning_rate": 0.001, "loss": 1.7845, "step": 11311 }, { "epoch": 0.4785514848971994, "grad_norm": 2.0214593410491943, "learning_rate": 0.001, "loss": 2.2464, "step": 11312 }, { "epoch": 0.4785937896607158, "grad_norm": 0.18990519642829895, "learning_rate": 0.001, "loss": 2.0703, "step": 11313 }, { "epoch": 0.47863609442423216, "grad_norm": 1.7356641292572021, "learning_rate": 0.001, "loss": 2.1795, "step": 11314 }, { "epoch": 0.4786783991877485, "grad_norm": 0.220098614692688, "learning_rate": 0.001, "loss": 2.4705, "step": 11315 }, { "epoch": 0.4787207039512649, "grad_norm": 0.1770542711019516, "learning_rate": 0.001, "loss": 2.8367, "step": 11316 }, { "epoch": 0.4787630087147813, "grad_norm": 0.22387665510177612, "learning_rate": 0.001, "loss": 2.1766, "step": 11317 }, { "epoch": 0.47880531347829763, "grad_norm": 0.1825076937675476, "learning_rate": 0.001, "loss": 3.0979, "step": 11318 }, { "epoch": 0.47884761824181404, "grad_norm": 0.2045036405324936, "learning_rate": 0.001, "loss": 2.9486, "step": 11319 }, { "epoch": 0.4788899230053304, "grad_norm": 0.20427003502845764, "learning_rate": 0.001, "loss": 2.7808, "step": 11320 }, { "epoch": 0.47893222776884675, "grad_norm": 0.21618923544883728, "learning_rate": 0.001, "loss": 2.3455, "step": 11321 }, { "epoch": 0.47897453253236316, "grad_norm": 0.19083094596862793, "learning_rate": 0.001, "loss": 2.621, "step": 11322 }, { "epoch": 0.4790168372958795, "grad_norm": 10.630045890808105, "learning_rate": 0.001, "loss": 2.8354, "step": 11323 }, { "epoch": 0.47905914205939587, "grad_norm": 0.20413336157798767, "learning_rate": 0.001, "loss": 2.2648, "step": 11324 }, { "epoch": 0.4791014468229123, "grad_norm": 0.23747549951076508, "learning_rate": 0.001, "loss": 3.3187, "step": 11325 }, { "epoch": 0.47914375158642863, "grad_norm": 21.391681671142578, "learning_rate": 0.001, "loss": 1.6874, "step": 11326 }, { "epoch": 0.479186056349945, "grad_norm": 0.2882399559020996, "learning_rate": 0.001, "loss": 2.4637, "step": 11327 }, { "epoch": 0.4792283611134614, "grad_norm": 0.330314576625824, "learning_rate": 0.001, "loss": 3.253, "step": 11328 }, { "epoch": 0.47927066587697775, "grad_norm": 1.4046648740768433, "learning_rate": 0.001, "loss": 2.8288, "step": 11329 }, { "epoch": 0.4793129706404941, "grad_norm": 0.7811962962150574, "learning_rate": 0.001, "loss": 2.6857, "step": 11330 }, { "epoch": 0.4793552754040105, "grad_norm": 0.2128724753856659, "learning_rate": 0.001, "loss": 2.478, "step": 11331 }, { "epoch": 0.47939758016752687, "grad_norm": 0.2501393258571625, "learning_rate": 0.001, "loss": 2.4157, "step": 11332 }, { "epoch": 0.4794398849310432, "grad_norm": 0.22114041447639465, "learning_rate": 0.001, "loss": 1.7828, "step": 11333 }, { "epoch": 0.47948218969455964, "grad_norm": 0.1902758628129959, "learning_rate": 0.001, "loss": 2.0687, "step": 11334 }, { "epoch": 0.479524494458076, "grad_norm": 0.548904538154602, "learning_rate": 0.001, "loss": 2.5692, "step": 11335 }, { "epoch": 0.47956679922159234, "grad_norm": 0.18059691786766052, "learning_rate": 0.001, "loss": 2.0568, "step": 11336 }, { "epoch": 0.4796091039851087, "grad_norm": 0.17880599200725555, "learning_rate": 0.001, "loss": 2.1907, "step": 11337 }, { "epoch": 0.4796514087486251, "grad_norm": 0.3715362548828125, "learning_rate": 0.001, "loss": 2.5385, "step": 11338 }, { "epoch": 0.47969371351214146, "grad_norm": 0.1764996498823166, "learning_rate": 0.001, "loss": 1.8525, "step": 11339 }, { "epoch": 0.4797360182756578, "grad_norm": 0.8168947696685791, "learning_rate": 0.001, "loss": 2.5725, "step": 11340 }, { "epoch": 0.4797783230391742, "grad_norm": 0.161908358335495, "learning_rate": 0.001, "loss": 2.0986, "step": 11341 }, { "epoch": 0.4798206278026906, "grad_norm": 0.1834360510110855, "learning_rate": 0.001, "loss": 2.0397, "step": 11342 }, { "epoch": 0.47986293256620693, "grad_norm": 0.17309635877609253, "learning_rate": 0.001, "loss": 2.5099, "step": 11343 }, { "epoch": 0.47990523732972334, "grad_norm": 0.19647128880023956, "learning_rate": 0.001, "loss": 2.0393, "step": 11344 }, { "epoch": 0.4799475420932397, "grad_norm": 0.670528769493103, "learning_rate": 0.001, "loss": 3.4804, "step": 11345 }, { "epoch": 0.47998984685675605, "grad_norm": 0.1826171725988388, "learning_rate": 0.001, "loss": 2.0048, "step": 11346 }, { "epoch": 0.48003215162027246, "grad_norm": 0.20341645181179047, "learning_rate": 0.001, "loss": 2.7362, "step": 11347 }, { "epoch": 0.4800744563837888, "grad_norm": 0.2916952967643738, "learning_rate": 0.001, "loss": 1.6388, "step": 11348 }, { "epoch": 0.48011676114730517, "grad_norm": 0.5783160328865051, "learning_rate": 0.001, "loss": 2.3783, "step": 11349 }, { "epoch": 0.4801590659108216, "grad_norm": 0.20249702036380768, "learning_rate": 0.001, "loss": 2.3376, "step": 11350 }, { "epoch": 0.48020137067433794, "grad_norm": 0.18847358226776123, "learning_rate": 0.001, "loss": 2.1271, "step": 11351 }, { "epoch": 0.4802436754378543, "grad_norm": 2.4059906005859375, "learning_rate": 0.001, "loss": 1.7613, "step": 11352 }, { "epoch": 0.4802859802013707, "grad_norm": 0.1835888773202896, "learning_rate": 0.001, "loss": 2.379, "step": 11353 }, { "epoch": 0.48032828496488705, "grad_norm": 0.34716445207595825, "learning_rate": 0.001, "loss": 2.0613, "step": 11354 }, { "epoch": 0.4803705897284034, "grad_norm": 0.154496431350708, "learning_rate": 0.001, "loss": 2.3463, "step": 11355 }, { "epoch": 0.4804128944919198, "grad_norm": 0.5748295783996582, "learning_rate": 0.001, "loss": 2.2405, "step": 11356 }, { "epoch": 0.48045519925543617, "grad_norm": 0.3923312723636627, "learning_rate": 0.001, "loss": 2.7319, "step": 11357 }, { "epoch": 0.4804975040189525, "grad_norm": 0.5746713280677795, "learning_rate": 0.001, "loss": 1.8951, "step": 11358 }, { "epoch": 0.4805398087824689, "grad_norm": 1.0330185890197754, "learning_rate": 0.001, "loss": 3.1664, "step": 11359 }, { "epoch": 0.4805821135459853, "grad_norm": 0.1738303303718567, "learning_rate": 0.001, "loss": 2.808, "step": 11360 }, { "epoch": 0.48062441830950164, "grad_norm": 8.028282165527344, "learning_rate": 0.001, "loss": 1.9271, "step": 11361 }, { "epoch": 0.480666723073018, "grad_norm": 0.18833428621292114, "learning_rate": 0.001, "loss": 2.1628, "step": 11362 }, { "epoch": 0.4807090278365344, "grad_norm": 0.1686827838420868, "learning_rate": 0.001, "loss": 1.6737, "step": 11363 }, { "epoch": 0.48075133260005076, "grad_norm": 0.18253150582313538, "learning_rate": 0.001, "loss": 2.337, "step": 11364 }, { "epoch": 0.4807936373635671, "grad_norm": 0.23626413941383362, "learning_rate": 0.001, "loss": 3.2853, "step": 11365 }, { "epoch": 0.4808359421270835, "grad_norm": 0.718061089515686, "learning_rate": 0.001, "loss": 2.8863, "step": 11366 }, { "epoch": 0.4808782468905999, "grad_norm": 0.21760138869285583, "learning_rate": 0.001, "loss": 2.4297, "step": 11367 }, { "epoch": 0.48092055165411624, "grad_norm": 0.19987843930721283, "learning_rate": 0.001, "loss": 3.1579, "step": 11368 }, { "epoch": 0.48096285641763264, "grad_norm": 0.16361162066459656, "learning_rate": 0.001, "loss": 1.5716, "step": 11369 }, { "epoch": 0.481005161181149, "grad_norm": 0.169572651386261, "learning_rate": 0.001, "loss": 1.8561, "step": 11370 }, { "epoch": 0.48104746594466535, "grad_norm": 1.216856598854065, "learning_rate": 0.001, "loss": 2.5188, "step": 11371 }, { "epoch": 0.48108977070818176, "grad_norm": 0.20964156091213226, "learning_rate": 0.001, "loss": 2.9859, "step": 11372 }, { "epoch": 0.4811320754716981, "grad_norm": 0.5650727152824402, "learning_rate": 0.001, "loss": 3.1159, "step": 11373 }, { "epoch": 0.48117438023521447, "grad_norm": 0.2616911232471466, "learning_rate": 0.001, "loss": 2.1743, "step": 11374 }, { "epoch": 0.4812166849987309, "grad_norm": 0.22309203445911407, "learning_rate": 0.001, "loss": 2.7001, "step": 11375 }, { "epoch": 0.48125898976224724, "grad_norm": 0.18695230782032013, "learning_rate": 0.001, "loss": 2.203, "step": 11376 }, { "epoch": 0.4813012945257636, "grad_norm": 0.1963418573141098, "learning_rate": 0.001, "loss": 3.2709, "step": 11377 }, { "epoch": 0.48134359928928, "grad_norm": 0.16834786534309387, "learning_rate": 0.001, "loss": 2.769, "step": 11378 }, { "epoch": 0.48138590405279635, "grad_norm": 0.1826411634683609, "learning_rate": 0.001, "loss": 1.7105, "step": 11379 }, { "epoch": 0.4814282088163127, "grad_norm": 0.43672510981559753, "learning_rate": 0.001, "loss": 2.3934, "step": 11380 }, { "epoch": 0.48147051357982906, "grad_norm": 0.16782554984092712, "learning_rate": 0.001, "loss": 2.589, "step": 11381 }, { "epoch": 0.4815128183433455, "grad_norm": 0.16116951406002045, "learning_rate": 0.001, "loss": 2.2085, "step": 11382 }, { "epoch": 0.4815551231068618, "grad_norm": 0.18043899536132812, "learning_rate": 0.001, "loss": 1.7793, "step": 11383 }, { "epoch": 0.4815974278703782, "grad_norm": 0.5931171774864197, "learning_rate": 0.001, "loss": 1.7696, "step": 11384 }, { "epoch": 0.4816397326338946, "grad_norm": 0.4791402816772461, "learning_rate": 0.001, "loss": 3.4351, "step": 11385 }, { "epoch": 0.48168203739741094, "grad_norm": 0.2714325487613678, "learning_rate": 0.001, "loss": 2.2362, "step": 11386 }, { "epoch": 0.4817243421609273, "grad_norm": 0.19778069853782654, "learning_rate": 0.001, "loss": 2.5545, "step": 11387 }, { "epoch": 0.4817666469244437, "grad_norm": 0.208540141582489, "learning_rate": 0.001, "loss": 2.9821, "step": 11388 }, { "epoch": 0.48180895168796006, "grad_norm": 0.16977502405643463, "learning_rate": 0.001, "loss": 1.8149, "step": 11389 }, { "epoch": 0.4818512564514764, "grad_norm": 0.22707661986351013, "learning_rate": 0.001, "loss": 2.7141, "step": 11390 }, { "epoch": 0.4818935612149928, "grad_norm": 0.38424187898635864, "learning_rate": 0.001, "loss": 3.1576, "step": 11391 }, { "epoch": 0.4819358659785092, "grad_norm": 1.370203971862793, "learning_rate": 0.001, "loss": 2.2048, "step": 11392 }, { "epoch": 0.48197817074202554, "grad_norm": 0.18375247716903687, "learning_rate": 0.001, "loss": 2.3193, "step": 11393 }, { "epoch": 0.48202047550554195, "grad_norm": 0.9614766836166382, "learning_rate": 0.001, "loss": 1.794, "step": 11394 }, { "epoch": 0.4820627802690583, "grad_norm": 0.18902945518493652, "learning_rate": 0.001, "loss": 2.1396, "step": 11395 }, { "epoch": 0.48210508503257465, "grad_norm": 0.16414183378219604, "learning_rate": 0.001, "loss": 1.6427, "step": 11396 }, { "epoch": 0.48214738979609106, "grad_norm": 0.16200171411037445, "learning_rate": 0.001, "loss": 2.0166, "step": 11397 }, { "epoch": 0.4821896945596074, "grad_norm": 3.9230146408081055, "learning_rate": 0.001, "loss": 2.5453, "step": 11398 }, { "epoch": 0.4822319993231238, "grad_norm": 0.211333766579628, "learning_rate": 0.001, "loss": 2.9545, "step": 11399 }, { "epoch": 0.4822743040866402, "grad_norm": 0.2078399807214737, "learning_rate": 0.001, "loss": 2.085, "step": 11400 }, { "epoch": 0.48231660885015654, "grad_norm": 0.1923077553510666, "learning_rate": 0.001, "loss": 2.3923, "step": 11401 }, { "epoch": 0.4823589136136729, "grad_norm": 1.6450941562652588, "learning_rate": 0.001, "loss": 2.4251, "step": 11402 }, { "epoch": 0.48240121837718924, "grad_norm": 0.2333688586950302, "learning_rate": 0.001, "loss": 2.2939, "step": 11403 }, { "epoch": 0.48244352314070565, "grad_norm": 6.017459392547607, "learning_rate": 0.001, "loss": 1.9482, "step": 11404 }, { "epoch": 0.482485827904222, "grad_norm": 1.64377760887146, "learning_rate": 0.001, "loss": 1.9915, "step": 11405 }, { "epoch": 0.48252813266773836, "grad_norm": 0.22818098962306976, "learning_rate": 0.001, "loss": 3.1697, "step": 11406 }, { "epoch": 0.4825704374312548, "grad_norm": 0.20856161415576935, "learning_rate": 0.001, "loss": 1.7394, "step": 11407 }, { "epoch": 0.4826127421947711, "grad_norm": 0.2079148292541504, "learning_rate": 0.001, "loss": 2.85, "step": 11408 }, { "epoch": 0.4826550469582875, "grad_norm": 3.2609381675720215, "learning_rate": 0.001, "loss": 2.744, "step": 11409 }, { "epoch": 0.4826973517218039, "grad_norm": 0.4268743395805359, "learning_rate": 0.001, "loss": 3.3981, "step": 11410 }, { "epoch": 0.48273965648532025, "grad_norm": 0.260139137506485, "learning_rate": 0.001, "loss": 2.0163, "step": 11411 }, { "epoch": 0.4827819612488366, "grad_norm": 0.21238379180431366, "learning_rate": 0.001, "loss": 2.566, "step": 11412 }, { "epoch": 0.482824266012353, "grad_norm": 0.19492459297180176, "learning_rate": 0.001, "loss": 2.2858, "step": 11413 }, { "epoch": 0.48286657077586936, "grad_norm": 2.3117878437042236, "learning_rate": 0.001, "loss": 2.0708, "step": 11414 }, { "epoch": 0.4829088755393857, "grad_norm": 0.20078395307064056, "learning_rate": 0.001, "loss": 3.0703, "step": 11415 }, { "epoch": 0.4829511803029021, "grad_norm": 0.18445464968681335, "learning_rate": 0.001, "loss": 2.7102, "step": 11416 }, { "epoch": 0.4829934850664185, "grad_norm": 0.2198317050933838, "learning_rate": 0.001, "loss": 2.8357, "step": 11417 }, { "epoch": 0.48303578982993484, "grad_norm": 2.0135302543640137, "learning_rate": 0.001, "loss": 1.8806, "step": 11418 }, { "epoch": 0.48307809459345125, "grad_norm": 0.25936415791511536, "learning_rate": 0.001, "loss": 3.3516, "step": 11419 }, { "epoch": 0.4831203993569676, "grad_norm": 0.2253940850496292, "learning_rate": 0.001, "loss": 2.3307, "step": 11420 }, { "epoch": 0.48316270412048395, "grad_norm": 0.2977537214756012, "learning_rate": 0.001, "loss": 2.1588, "step": 11421 }, { "epoch": 0.48320500888400036, "grad_norm": 1.3595587015151978, "learning_rate": 0.001, "loss": 2.6004, "step": 11422 }, { "epoch": 0.4832473136475167, "grad_norm": 2.3829686641693115, "learning_rate": 0.001, "loss": 2.6128, "step": 11423 }, { "epoch": 0.4832896184110331, "grad_norm": 0.2193344682455063, "learning_rate": 0.001, "loss": 2.9908, "step": 11424 }, { "epoch": 0.4833319231745494, "grad_norm": 0.19066233932971954, "learning_rate": 0.001, "loss": 2.025, "step": 11425 }, { "epoch": 0.48337422793806584, "grad_norm": 0.20256930589675903, "learning_rate": 0.001, "loss": 1.9787, "step": 11426 }, { "epoch": 0.4834165327015822, "grad_norm": 0.22213421761989594, "learning_rate": 0.001, "loss": 1.9955, "step": 11427 }, { "epoch": 0.48345883746509855, "grad_norm": 0.2642061412334442, "learning_rate": 0.001, "loss": 2.1242, "step": 11428 }, { "epoch": 0.48350114222861496, "grad_norm": 0.23516133427619934, "learning_rate": 0.001, "loss": 2.2582, "step": 11429 }, { "epoch": 0.4835434469921313, "grad_norm": 0.22922705113887787, "learning_rate": 0.001, "loss": 2.3993, "step": 11430 }, { "epoch": 0.48358575175564766, "grad_norm": 0.2899877727031708, "learning_rate": 0.001, "loss": 3.879, "step": 11431 }, { "epoch": 0.4836280565191641, "grad_norm": 0.17571720480918884, "learning_rate": 0.001, "loss": 2.1495, "step": 11432 }, { "epoch": 0.4836703612826804, "grad_norm": 2.013770580291748, "learning_rate": 0.001, "loss": 2.7839, "step": 11433 }, { "epoch": 0.4837126660461968, "grad_norm": 3.0192947387695312, "learning_rate": 0.001, "loss": 2.1995, "step": 11434 }, { "epoch": 0.4837549708097132, "grad_norm": 0.20939669013023376, "learning_rate": 0.001, "loss": 2.1262, "step": 11435 }, { "epoch": 0.48379727557322955, "grad_norm": 0.2533853352069855, "learning_rate": 0.001, "loss": 2.8203, "step": 11436 }, { "epoch": 0.4838395803367459, "grad_norm": 0.2081190049648285, "learning_rate": 0.001, "loss": 2.3127, "step": 11437 }, { "epoch": 0.4838818851002623, "grad_norm": 0.1944902241230011, "learning_rate": 0.001, "loss": 1.8991, "step": 11438 }, { "epoch": 0.48392418986377866, "grad_norm": 0.19556209444999695, "learning_rate": 0.001, "loss": 2.3796, "step": 11439 }, { "epoch": 0.483966494627295, "grad_norm": 0.27376073598861694, "learning_rate": 0.001, "loss": 2.5798, "step": 11440 }, { "epoch": 0.48400879939081143, "grad_norm": 0.8901777863502502, "learning_rate": 0.001, "loss": 3.6993, "step": 11441 }, { "epoch": 0.4840511041543278, "grad_norm": 0.5265764594078064, "learning_rate": 0.001, "loss": 1.7463, "step": 11442 }, { "epoch": 0.48409340891784414, "grad_norm": 0.4732452630996704, "learning_rate": 0.001, "loss": 1.6442, "step": 11443 }, { "epoch": 0.48413571368136055, "grad_norm": 0.23689015209674835, "learning_rate": 0.001, "loss": 2.367, "step": 11444 }, { "epoch": 0.4841780184448769, "grad_norm": 0.1857021003961563, "learning_rate": 0.001, "loss": 2.0991, "step": 11445 }, { "epoch": 0.48422032320839326, "grad_norm": 9.68715763092041, "learning_rate": 0.001, "loss": 1.781, "step": 11446 }, { "epoch": 0.4842626279719096, "grad_norm": 0.18469716608524323, "learning_rate": 0.001, "loss": 2.3133, "step": 11447 }, { "epoch": 0.484304932735426, "grad_norm": 0.195147305727005, "learning_rate": 0.001, "loss": 3.6543, "step": 11448 }, { "epoch": 0.4843472374989424, "grad_norm": 0.19625887274742126, "learning_rate": 0.001, "loss": 1.7704, "step": 11449 }, { "epoch": 0.4843895422624587, "grad_norm": 1.0234781503677368, "learning_rate": 0.001, "loss": 2.4147, "step": 11450 }, { "epoch": 0.48443184702597514, "grad_norm": 1.8966562747955322, "learning_rate": 0.001, "loss": 1.4547, "step": 11451 }, { "epoch": 0.4844741517894915, "grad_norm": 0.29470208287239075, "learning_rate": 0.001, "loss": 2.3858, "step": 11452 }, { "epoch": 0.48451645655300785, "grad_norm": 0.4111064672470093, "learning_rate": 0.001, "loss": 2.43, "step": 11453 }, { "epoch": 0.48455876131652426, "grad_norm": 0.1965835988521576, "learning_rate": 0.001, "loss": 1.5909, "step": 11454 }, { "epoch": 0.4846010660800406, "grad_norm": 0.19790798425674438, "learning_rate": 0.001, "loss": 2.1684, "step": 11455 }, { "epoch": 0.48464337084355696, "grad_norm": 0.20888280868530273, "learning_rate": 0.001, "loss": 2.1558, "step": 11456 }, { "epoch": 0.4846856756070734, "grad_norm": 0.16559502482414246, "learning_rate": 0.001, "loss": 1.9507, "step": 11457 }, { "epoch": 0.48472798037058973, "grad_norm": 0.1633719652891159, "learning_rate": 0.001, "loss": 2.0244, "step": 11458 }, { "epoch": 0.4847702851341061, "grad_norm": 0.1771150678396225, "learning_rate": 0.001, "loss": 2.4581, "step": 11459 }, { "epoch": 0.4848125898976225, "grad_norm": 0.2200511246919632, "learning_rate": 0.001, "loss": 2.4251, "step": 11460 }, { "epoch": 0.48485489466113885, "grad_norm": 0.17225952446460724, "learning_rate": 0.001, "loss": 2.7225, "step": 11461 }, { "epoch": 0.4848971994246552, "grad_norm": 0.555501401424408, "learning_rate": 0.001, "loss": 2.3918, "step": 11462 }, { "epoch": 0.4849395041881716, "grad_norm": 0.16668234765529633, "learning_rate": 0.001, "loss": 2.0368, "step": 11463 }, { "epoch": 0.48498180895168796, "grad_norm": 0.27358829975128174, "learning_rate": 0.001, "loss": 1.9445, "step": 11464 }, { "epoch": 0.4850241137152043, "grad_norm": 0.18736232817173004, "learning_rate": 0.001, "loss": 2.7039, "step": 11465 }, { "epoch": 0.48506641847872073, "grad_norm": 0.15612122416496277, "learning_rate": 0.001, "loss": 2.6935, "step": 11466 }, { "epoch": 0.4851087232422371, "grad_norm": 2.6563851833343506, "learning_rate": 0.001, "loss": 1.7286, "step": 11467 }, { "epoch": 0.48515102800575344, "grad_norm": 0.1503165066242218, "learning_rate": 0.001, "loss": 2.1244, "step": 11468 }, { "epoch": 0.48519333276926985, "grad_norm": 0.19594672322273254, "learning_rate": 0.001, "loss": 2.0981, "step": 11469 }, { "epoch": 0.4852356375327862, "grad_norm": 0.1888262778520584, "learning_rate": 0.001, "loss": 2.8126, "step": 11470 }, { "epoch": 0.48527794229630256, "grad_norm": 0.20222362875938416, "learning_rate": 0.001, "loss": 2.2918, "step": 11471 }, { "epoch": 0.4853202470598189, "grad_norm": 1.1823304891586304, "learning_rate": 0.001, "loss": 1.7587, "step": 11472 }, { "epoch": 0.4853625518233353, "grad_norm": 0.19512400031089783, "learning_rate": 0.001, "loss": 2.1734, "step": 11473 }, { "epoch": 0.4854048565868517, "grad_norm": 1.402773380279541, "learning_rate": 0.001, "loss": 2.6338, "step": 11474 }, { "epoch": 0.48544716135036803, "grad_norm": 0.22815269231796265, "learning_rate": 0.001, "loss": 2.9793, "step": 11475 }, { "epoch": 0.48548946611388444, "grad_norm": 0.17088262736797333, "learning_rate": 0.001, "loss": 2.0868, "step": 11476 }, { "epoch": 0.4855317708774008, "grad_norm": 0.16063320636749268, "learning_rate": 0.001, "loss": 1.6893, "step": 11477 }, { "epoch": 0.48557407564091715, "grad_norm": 0.8555530309677124, "learning_rate": 0.001, "loss": 2.3043, "step": 11478 }, { "epoch": 0.48561638040443356, "grad_norm": 0.27816876769065857, "learning_rate": 0.001, "loss": 3.0965, "step": 11479 }, { "epoch": 0.4856586851679499, "grad_norm": 0.7349559664726257, "learning_rate": 0.001, "loss": 2.1048, "step": 11480 }, { "epoch": 0.48570098993146626, "grad_norm": 0.16931132972240448, "learning_rate": 0.001, "loss": 2.0924, "step": 11481 }, { "epoch": 0.4857432946949827, "grad_norm": 0.15484954416751862, "learning_rate": 0.001, "loss": 1.9129, "step": 11482 }, { "epoch": 0.48578559945849903, "grad_norm": 0.3390078842639923, "learning_rate": 0.001, "loss": 2.2157, "step": 11483 }, { "epoch": 0.4858279042220154, "grad_norm": 0.20084303617477417, "learning_rate": 0.001, "loss": 2.4884, "step": 11484 }, { "epoch": 0.4858702089855318, "grad_norm": 0.24924889206886292, "learning_rate": 0.001, "loss": 2.4044, "step": 11485 }, { "epoch": 0.48591251374904815, "grad_norm": 0.19741028547286987, "learning_rate": 0.001, "loss": 2.1854, "step": 11486 }, { "epoch": 0.4859548185125645, "grad_norm": 0.20189212262630463, "learning_rate": 0.001, "loss": 2.4314, "step": 11487 }, { "epoch": 0.4859971232760809, "grad_norm": 1.0460984706878662, "learning_rate": 0.001, "loss": 2.094, "step": 11488 }, { "epoch": 0.48603942803959727, "grad_norm": 2.5869855880737305, "learning_rate": 0.001, "loss": 2.3084, "step": 11489 }, { "epoch": 0.4860817328031136, "grad_norm": 0.25881245732307434, "learning_rate": 0.001, "loss": 3.5865, "step": 11490 }, { "epoch": 0.48612403756663003, "grad_norm": 3.5187735557556152, "learning_rate": 0.001, "loss": 1.9536, "step": 11491 }, { "epoch": 0.4861663423301464, "grad_norm": 0.179852694272995, "learning_rate": 0.001, "loss": 2.2011, "step": 11492 }, { "epoch": 0.48620864709366274, "grad_norm": 0.37321174144744873, "learning_rate": 0.001, "loss": 2.0074, "step": 11493 }, { "epoch": 0.4862509518571791, "grad_norm": 0.5720106959342957, "learning_rate": 0.001, "loss": 2.0811, "step": 11494 }, { "epoch": 0.4862932566206955, "grad_norm": 0.31446918845176697, "learning_rate": 0.001, "loss": 2.0816, "step": 11495 }, { "epoch": 0.48633556138421186, "grad_norm": 0.24225953221321106, "learning_rate": 0.001, "loss": 1.9998, "step": 11496 }, { "epoch": 0.4863778661477282, "grad_norm": 0.20280331373214722, "learning_rate": 0.001, "loss": 2.222, "step": 11497 }, { "epoch": 0.4864201709112446, "grad_norm": 0.1985984891653061, "learning_rate": 0.001, "loss": 2.5404, "step": 11498 }, { "epoch": 0.486462475674761, "grad_norm": 0.25399044156074524, "learning_rate": 0.001, "loss": 2.2409, "step": 11499 }, { "epoch": 0.48650478043827733, "grad_norm": 0.376498281955719, "learning_rate": 0.001, "loss": 2.584, "step": 11500 }, { "epoch": 0.48654708520179374, "grad_norm": 0.15857632458209991, "learning_rate": 0.001, "loss": 2.2376, "step": 11501 }, { "epoch": 0.4865893899653101, "grad_norm": 0.19442448019981384, "learning_rate": 0.001, "loss": 1.911, "step": 11502 }, { "epoch": 0.48663169472882645, "grad_norm": 0.28401103615760803, "learning_rate": 0.001, "loss": 2.037, "step": 11503 }, { "epoch": 0.48667399949234286, "grad_norm": 0.1915339082479477, "learning_rate": 0.001, "loss": 2.5886, "step": 11504 }, { "epoch": 0.4867163042558592, "grad_norm": 0.13712449371814728, "learning_rate": 0.001, "loss": 1.6988, "step": 11505 }, { "epoch": 0.48675860901937557, "grad_norm": 0.16903550922870636, "learning_rate": 0.001, "loss": 2.3553, "step": 11506 }, { "epoch": 0.486800913782892, "grad_norm": 0.3302583396434784, "learning_rate": 0.001, "loss": 2.4041, "step": 11507 }, { "epoch": 0.48684321854640833, "grad_norm": 0.16169388592243195, "learning_rate": 0.001, "loss": 1.5953, "step": 11508 }, { "epoch": 0.4868855233099247, "grad_norm": 0.19090786576271057, "learning_rate": 0.001, "loss": 2.6609, "step": 11509 }, { "epoch": 0.4869278280734411, "grad_norm": 0.2367997020483017, "learning_rate": 0.001, "loss": 2.7336, "step": 11510 }, { "epoch": 0.48697013283695745, "grad_norm": 29.403594970703125, "learning_rate": 0.001, "loss": 2.4013, "step": 11511 }, { "epoch": 0.4870124376004738, "grad_norm": 0.26779159903526306, "learning_rate": 0.001, "loss": 2.9784, "step": 11512 }, { "epoch": 0.4870547423639902, "grad_norm": 0.4845396876335144, "learning_rate": 0.001, "loss": 2.1083, "step": 11513 }, { "epoch": 0.48709704712750657, "grad_norm": 0.5120859742164612, "learning_rate": 0.001, "loss": 1.9852, "step": 11514 }, { "epoch": 0.4871393518910229, "grad_norm": 0.1959608644247055, "learning_rate": 0.001, "loss": 2.3048, "step": 11515 }, { "epoch": 0.4871816566545393, "grad_norm": 5.542116641998291, "learning_rate": 0.001, "loss": 2.3137, "step": 11516 }, { "epoch": 0.4872239614180557, "grad_norm": 0.26933497190475464, "learning_rate": 0.001, "loss": 2.1619, "step": 11517 }, { "epoch": 0.48726626618157204, "grad_norm": 0.24525229632854462, "learning_rate": 0.001, "loss": 2.0777, "step": 11518 }, { "epoch": 0.4873085709450884, "grad_norm": 0.20497070252895355, "learning_rate": 0.001, "loss": 2.317, "step": 11519 }, { "epoch": 0.4873508757086048, "grad_norm": 0.31247276067733765, "learning_rate": 0.001, "loss": 2.4255, "step": 11520 }, { "epoch": 0.48739318047212116, "grad_norm": 0.29333776235580444, "learning_rate": 0.001, "loss": 1.945, "step": 11521 }, { "epoch": 0.4874354852356375, "grad_norm": 1.8194544315338135, "learning_rate": 0.001, "loss": 2.4755, "step": 11522 }, { "epoch": 0.4874777899991539, "grad_norm": 0.182512104511261, "learning_rate": 0.001, "loss": 2.1394, "step": 11523 }, { "epoch": 0.4875200947626703, "grad_norm": 0.19571569561958313, "learning_rate": 0.001, "loss": 2.2593, "step": 11524 }, { "epoch": 0.48756239952618663, "grad_norm": 0.21386699378490448, "learning_rate": 0.001, "loss": 1.9803, "step": 11525 }, { "epoch": 0.48760470428970304, "grad_norm": 1.0484811067581177, "learning_rate": 0.001, "loss": 1.9055, "step": 11526 }, { "epoch": 0.4876470090532194, "grad_norm": 0.19381646811962128, "learning_rate": 0.001, "loss": 2.8984, "step": 11527 }, { "epoch": 0.48768931381673575, "grad_norm": 0.18061847984790802, "learning_rate": 0.001, "loss": 2.9683, "step": 11528 }, { "epoch": 0.48773161858025216, "grad_norm": 1.0904494524002075, "learning_rate": 0.001, "loss": 1.7487, "step": 11529 }, { "epoch": 0.4877739233437685, "grad_norm": 0.5851990580558777, "learning_rate": 0.001, "loss": 4.4732, "step": 11530 }, { "epoch": 0.48781622810728487, "grad_norm": 0.21629658341407776, "learning_rate": 0.001, "loss": 3.3176, "step": 11531 }, { "epoch": 0.4878585328708013, "grad_norm": 0.1830849051475525, "learning_rate": 0.001, "loss": 1.9741, "step": 11532 }, { "epoch": 0.48790083763431763, "grad_norm": 0.6447810530662537, "learning_rate": 0.001, "loss": 1.6206, "step": 11533 }, { "epoch": 0.487943142397834, "grad_norm": 0.22213850915431976, "learning_rate": 0.001, "loss": 2.1542, "step": 11534 }, { "epoch": 0.4879854471613504, "grad_norm": 0.6052437424659729, "learning_rate": 0.001, "loss": 2.0151, "step": 11535 }, { "epoch": 0.48802775192486675, "grad_norm": 0.15839864313602448, "learning_rate": 0.001, "loss": 1.9729, "step": 11536 }, { "epoch": 0.4880700566883831, "grad_norm": 1.3613576889038086, "learning_rate": 0.001, "loss": 2.4232, "step": 11537 }, { "epoch": 0.48811236145189946, "grad_norm": 0.19417421519756317, "learning_rate": 0.001, "loss": 1.7537, "step": 11538 }, { "epoch": 0.48815466621541587, "grad_norm": 0.3457423448562622, "learning_rate": 0.001, "loss": 2.1222, "step": 11539 }, { "epoch": 0.4881969709789322, "grad_norm": 0.20159010589122772, "learning_rate": 0.001, "loss": 2.4426, "step": 11540 }, { "epoch": 0.4882392757424486, "grad_norm": 0.188156396150589, "learning_rate": 0.001, "loss": 2.3599, "step": 11541 }, { "epoch": 0.488281580505965, "grad_norm": 0.5900790095329285, "learning_rate": 0.001, "loss": 2.0175, "step": 11542 }, { "epoch": 0.48832388526948134, "grad_norm": 0.2750971019268036, "learning_rate": 0.001, "loss": 2.0602, "step": 11543 }, { "epoch": 0.4883661900329977, "grad_norm": 0.1912364661693573, "learning_rate": 0.001, "loss": 2.2475, "step": 11544 }, { "epoch": 0.4884084947965141, "grad_norm": 0.20477798581123352, "learning_rate": 0.001, "loss": 2.7998, "step": 11545 }, { "epoch": 0.48845079956003046, "grad_norm": 0.1736663430929184, "learning_rate": 0.001, "loss": 2.1871, "step": 11546 }, { "epoch": 0.4884931043235468, "grad_norm": 0.1695953756570816, "learning_rate": 0.001, "loss": 2.4564, "step": 11547 }, { "epoch": 0.4885354090870632, "grad_norm": 0.19503745436668396, "learning_rate": 0.001, "loss": 2.5058, "step": 11548 }, { "epoch": 0.4885777138505796, "grad_norm": 0.1817982941865921, "learning_rate": 0.001, "loss": 2.7636, "step": 11549 }, { "epoch": 0.48862001861409593, "grad_norm": 0.18275293707847595, "learning_rate": 0.001, "loss": 3.3698, "step": 11550 }, { "epoch": 0.48866232337761234, "grad_norm": 5.7995758056640625, "learning_rate": 0.001, "loss": 3.3286, "step": 11551 }, { "epoch": 0.4887046281411287, "grad_norm": 0.1749723106622696, "learning_rate": 0.001, "loss": 2.4678, "step": 11552 }, { "epoch": 0.48874693290464505, "grad_norm": 0.22669129073619843, "learning_rate": 0.001, "loss": 2.0439, "step": 11553 }, { "epoch": 0.48878923766816146, "grad_norm": 0.4565039277076721, "learning_rate": 0.001, "loss": 2.2781, "step": 11554 }, { "epoch": 0.4888315424316778, "grad_norm": 0.15998007357120514, "learning_rate": 0.001, "loss": 2.4679, "step": 11555 }, { "epoch": 0.48887384719519417, "grad_norm": 0.17309017479419708, "learning_rate": 0.001, "loss": 2.0567, "step": 11556 }, { "epoch": 0.4889161519587106, "grad_norm": 0.16389106214046478, "learning_rate": 0.001, "loss": 2.0645, "step": 11557 }, { "epoch": 0.48895845672222693, "grad_norm": 0.1628233641386032, "learning_rate": 0.001, "loss": 2.142, "step": 11558 }, { "epoch": 0.4890007614857433, "grad_norm": 0.17292705178260803, "learning_rate": 0.001, "loss": 2.4255, "step": 11559 }, { "epoch": 0.48904306624925964, "grad_norm": 0.13833680748939514, "learning_rate": 0.001, "loss": 1.5506, "step": 11560 }, { "epoch": 0.48908537101277605, "grad_norm": 0.1819964051246643, "learning_rate": 0.001, "loss": 2.4794, "step": 11561 }, { "epoch": 0.4891276757762924, "grad_norm": 0.17787259817123413, "learning_rate": 0.001, "loss": 2.5472, "step": 11562 }, { "epoch": 0.48916998053980876, "grad_norm": 0.1847681850194931, "learning_rate": 0.001, "loss": 1.929, "step": 11563 }, { "epoch": 0.48921228530332517, "grad_norm": 0.16343270242214203, "learning_rate": 0.001, "loss": 1.9423, "step": 11564 }, { "epoch": 0.4892545900668415, "grad_norm": 0.18636071681976318, "learning_rate": 0.001, "loss": 1.5396, "step": 11565 }, { "epoch": 0.4892968948303579, "grad_norm": 0.3037877678871155, "learning_rate": 0.001, "loss": 1.9659, "step": 11566 }, { "epoch": 0.4893391995938743, "grad_norm": 5.196141242980957, "learning_rate": 0.001, "loss": 2.4534, "step": 11567 }, { "epoch": 0.48938150435739064, "grad_norm": 0.16297638416290283, "learning_rate": 0.001, "loss": 2.0671, "step": 11568 }, { "epoch": 0.489423809120907, "grad_norm": 0.14544682204723358, "learning_rate": 0.001, "loss": 2.069, "step": 11569 }, { "epoch": 0.4894661138844234, "grad_norm": 0.922217071056366, "learning_rate": 0.001, "loss": 2.0644, "step": 11570 }, { "epoch": 0.48950841864793976, "grad_norm": 0.13835956156253815, "learning_rate": 0.001, "loss": 1.6519, "step": 11571 }, { "epoch": 0.4895507234114561, "grad_norm": 0.3947632610797882, "learning_rate": 0.001, "loss": 2.2417, "step": 11572 }, { "epoch": 0.4895930281749725, "grad_norm": 0.15445981919765472, "learning_rate": 0.001, "loss": 2.5499, "step": 11573 }, { "epoch": 0.4896353329384889, "grad_norm": 0.17741024494171143, "learning_rate": 0.001, "loss": 1.8943, "step": 11574 }, { "epoch": 0.48967763770200523, "grad_norm": 0.17265914380550385, "learning_rate": 0.001, "loss": 2.0673, "step": 11575 }, { "epoch": 0.48971994246552164, "grad_norm": 0.640771210193634, "learning_rate": 0.001, "loss": 3.3352, "step": 11576 }, { "epoch": 0.489762247229038, "grad_norm": 0.17222177982330322, "learning_rate": 0.001, "loss": 1.9422, "step": 11577 }, { "epoch": 0.48980455199255435, "grad_norm": 0.18314416706562042, "learning_rate": 0.001, "loss": 1.9239, "step": 11578 }, { "epoch": 0.48984685675607076, "grad_norm": 0.3542311489582062, "learning_rate": 0.001, "loss": 3.8196, "step": 11579 }, { "epoch": 0.4898891615195871, "grad_norm": 0.14956578612327576, "learning_rate": 0.001, "loss": 1.9715, "step": 11580 }, { "epoch": 0.48993146628310347, "grad_norm": 0.17212481796741486, "learning_rate": 0.001, "loss": 2.7058, "step": 11581 }, { "epoch": 0.4899737710466199, "grad_norm": 0.2047460526227951, "learning_rate": 0.001, "loss": 2.4195, "step": 11582 }, { "epoch": 0.49001607581013623, "grad_norm": 0.16925156116485596, "learning_rate": 0.001, "loss": 1.4925, "step": 11583 }, { "epoch": 0.4900583805736526, "grad_norm": 0.17683045566082, "learning_rate": 0.001, "loss": 2.1021, "step": 11584 }, { "epoch": 0.49010068533716894, "grad_norm": 0.19544382393360138, "learning_rate": 0.001, "loss": 2.8976, "step": 11585 }, { "epoch": 0.49014299010068535, "grad_norm": 0.17539872229099274, "learning_rate": 0.001, "loss": 2.7534, "step": 11586 }, { "epoch": 0.4901852948642017, "grad_norm": 0.17753051221370697, "learning_rate": 0.001, "loss": 2.2047, "step": 11587 }, { "epoch": 0.49022759962771806, "grad_norm": 0.16815239191055298, "learning_rate": 0.001, "loss": 1.6325, "step": 11588 }, { "epoch": 0.49026990439123447, "grad_norm": 0.2511793375015259, "learning_rate": 0.001, "loss": 2.8271, "step": 11589 }, { "epoch": 0.4903122091547508, "grad_norm": 3.128819465637207, "learning_rate": 0.001, "loss": 1.7008, "step": 11590 }, { "epoch": 0.4903545139182672, "grad_norm": 0.2257847785949707, "learning_rate": 0.001, "loss": 2.4141, "step": 11591 }, { "epoch": 0.4903968186817836, "grad_norm": 1.591920256614685, "learning_rate": 0.001, "loss": 1.9112, "step": 11592 }, { "epoch": 0.49043912344529994, "grad_norm": 0.2209502011537552, "learning_rate": 0.001, "loss": 2.9542, "step": 11593 }, { "epoch": 0.4904814282088163, "grad_norm": 0.19603808224201202, "learning_rate": 0.001, "loss": 2.5474, "step": 11594 }, { "epoch": 0.4905237329723327, "grad_norm": 0.6029075980186462, "learning_rate": 0.001, "loss": 1.8916, "step": 11595 }, { "epoch": 0.49056603773584906, "grad_norm": 0.1723041832447052, "learning_rate": 0.001, "loss": 2.0029, "step": 11596 }, { "epoch": 0.4906083424993654, "grad_norm": 0.156670480966568, "learning_rate": 0.001, "loss": 1.8345, "step": 11597 }, { "epoch": 0.4906506472628818, "grad_norm": 0.2527156174182892, "learning_rate": 0.001, "loss": 3.4863, "step": 11598 }, { "epoch": 0.4906929520263982, "grad_norm": 0.15961304306983948, "learning_rate": 0.001, "loss": 2.3809, "step": 11599 }, { "epoch": 0.49073525678991453, "grad_norm": 0.16630440950393677, "learning_rate": 0.001, "loss": 2.33, "step": 11600 }, { "epoch": 0.49077756155343094, "grad_norm": 0.1635364443063736, "learning_rate": 0.001, "loss": 2.1123, "step": 11601 }, { "epoch": 0.4908198663169473, "grad_norm": 12.366936683654785, "learning_rate": 0.001, "loss": 2.755, "step": 11602 }, { "epoch": 0.49086217108046365, "grad_norm": 0.16810069978237152, "learning_rate": 0.001, "loss": 2.0871, "step": 11603 }, { "epoch": 0.49090447584398006, "grad_norm": 0.16595399379730225, "learning_rate": 0.001, "loss": 2.1468, "step": 11604 }, { "epoch": 0.4909467806074964, "grad_norm": 0.5384510159492493, "learning_rate": 0.001, "loss": 2.1914, "step": 11605 }, { "epoch": 0.49098908537101277, "grad_norm": 0.18624725937843323, "learning_rate": 0.001, "loss": 3.0921, "step": 11606 }, { "epoch": 0.4910313901345291, "grad_norm": 0.18056440353393555, "learning_rate": 0.001, "loss": 1.936, "step": 11607 }, { "epoch": 0.49107369489804553, "grad_norm": 1.4659069776535034, "learning_rate": 0.001, "loss": 2.5418, "step": 11608 }, { "epoch": 0.4911159996615619, "grad_norm": 0.23289033770561218, "learning_rate": 0.001, "loss": 3.2947, "step": 11609 }, { "epoch": 0.49115830442507824, "grad_norm": 0.18133248388767242, "learning_rate": 0.001, "loss": 1.8595, "step": 11610 }, { "epoch": 0.49120060918859465, "grad_norm": 0.16130225360393524, "learning_rate": 0.001, "loss": 2.1366, "step": 11611 }, { "epoch": 0.491242913952111, "grad_norm": 0.6706212759017944, "learning_rate": 0.001, "loss": 3.0816, "step": 11612 }, { "epoch": 0.49128521871562736, "grad_norm": 0.2708061635494232, "learning_rate": 0.001, "loss": 2.2324, "step": 11613 }, { "epoch": 0.49132752347914377, "grad_norm": 0.1868041753768921, "learning_rate": 0.001, "loss": 2.1089, "step": 11614 }, { "epoch": 0.4913698282426601, "grad_norm": 0.21697242558002472, "learning_rate": 0.001, "loss": 2.0443, "step": 11615 }, { "epoch": 0.4914121330061765, "grad_norm": 0.1955055445432663, "learning_rate": 0.001, "loss": 2.3835, "step": 11616 }, { "epoch": 0.4914544377696929, "grad_norm": 0.18908244371414185, "learning_rate": 0.001, "loss": 3.2207, "step": 11617 }, { "epoch": 0.49149674253320924, "grad_norm": 0.17205405235290527, "learning_rate": 0.001, "loss": 2.0338, "step": 11618 }, { "epoch": 0.4915390472967256, "grad_norm": 0.2222311645746231, "learning_rate": 0.001, "loss": 2.4758, "step": 11619 }, { "epoch": 0.491581352060242, "grad_norm": 0.552980899810791, "learning_rate": 0.001, "loss": 1.7828, "step": 11620 }, { "epoch": 0.49162365682375836, "grad_norm": 0.17426225543022156, "learning_rate": 0.001, "loss": 2.4719, "step": 11621 }, { "epoch": 0.4916659615872747, "grad_norm": 0.17539294064044952, "learning_rate": 0.001, "loss": 2.2529, "step": 11622 }, { "epoch": 0.4917082663507911, "grad_norm": 0.38558903336524963, "learning_rate": 0.001, "loss": 1.621, "step": 11623 }, { "epoch": 0.4917505711143075, "grad_norm": 12.589545249938965, "learning_rate": 0.001, "loss": 3.9498, "step": 11624 }, { "epoch": 0.49179287587782383, "grad_norm": 0.18124458193778992, "learning_rate": 0.001, "loss": 2.783, "step": 11625 }, { "epoch": 0.49183518064134024, "grad_norm": 0.1963767558336258, "learning_rate": 0.001, "loss": 1.9605, "step": 11626 }, { "epoch": 0.4918774854048566, "grad_norm": 0.6345813274383545, "learning_rate": 0.001, "loss": 2.0434, "step": 11627 }, { "epoch": 0.49191979016837295, "grad_norm": 0.21551281213760376, "learning_rate": 0.001, "loss": 3.1873, "step": 11628 }, { "epoch": 0.4919620949318893, "grad_norm": 0.17613928020000458, "learning_rate": 0.001, "loss": 2.0574, "step": 11629 }, { "epoch": 0.4920043996954057, "grad_norm": 0.20698675513267517, "learning_rate": 0.001, "loss": 3.0403, "step": 11630 }, { "epoch": 0.49204670445892207, "grad_norm": 0.2309427410364151, "learning_rate": 0.001, "loss": 2.8583, "step": 11631 }, { "epoch": 0.4920890092224384, "grad_norm": 0.18390582501888275, "learning_rate": 0.001, "loss": 1.6705, "step": 11632 }, { "epoch": 0.49213131398595483, "grad_norm": 0.2480992078781128, "learning_rate": 0.001, "loss": 2.2437, "step": 11633 }, { "epoch": 0.4921736187494712, "grad_norm": 0.5748304724693298, "learning_rate": 0.001, "loss": 2.3204, "step": 11634 }, { "epoch": 0.49221592351298754, "grad_norm": 0.151663139462471, "learning_rate": 0.001, "loss": 2.4468, "step": 11635 }, { "epoch": 0.49225822827650395, "grad_norm": 0.7663713693618774, "learning_rate": 0.001, "loss": 2.0351, "step": 11636 }, { "epoch": 0.4923005330400203, "grad_norm": 0.2127770632505417, "learning_rate": 0.001, "loss": 2.0751, "step": 11637 }, { "epoch": 0.49234283780353666, "grad_norm": 0.6651235222816467, "learning_rate": 0.001, "loss": 2.5762, "step": 11638 }, { "epoch": 0.49238514256705307, "grad_norm": 0.18715116381645203, "learning_rate": 0.001, "loss": 1.8795, "step": 11639 }, { "epoch": 0.4924274473305694, "grad_norm": 0.45163238048553467, "learning_rate": 0.001, "loss": 2.6674, "step": 11640 }, { "epoch": 0.4924697520940858, "grad_norm": 0.2655963599681854, "learning_rate": 0.001, "loss": 1.9527, "step": 11641 }, { "epoch": 0.4925120568576022, "grad_norm": 0.2013363540172577, "learning_rate": 0.001, "loss": 3.415, "step": 11642 }, { "epoch": 0.49255436162111854, "grad_norm": 0.20579899847507477, "learning_rate": 0.001, "loss": 2.5534, "step": 11643 }, { "epoch": 0.4925966663846349, "grad_norm": 0.6283543705940247, "learning_rate": 0.001, "loss": 2.4812, "step": 11644 }, { "epoch": 0.4926389711481513, "grad_norm": 0.19560718536376953, "learning_rate": 0.001, "loss": 1.6794, "step": 11645 }, { "epoch": 0.49268127591166766, "grad_norm": 0.20248031616210938, "learning_rate": 0.001, "loss": 2.4745, "step": 11646 }, { "epoch": 0.492723580675184, "grad_norm": 0.20276713371276855, "learning_rate": 0.001, "loss": 2.1166, "step": 11647 }, { "epoch": 0.4927658854387004, "grad_norm": 0.21462062001228333, "learning_rate": 0.001, "loss": 2.0011, "step": 11648 }, { "epoch": 0.4928081902022168, "grad_norm": 0.1634545773267746, "learning_rate": 0.001, "loss": 1.8337, "step": 11649 }, { "epoch": 0.49285049496573313, "grad_norm": 0.18558084964752197, "learning_rate": 0.001, "loss": 1.4515, "step": 11650 }, { "epoch": 0.4928927997292495, "grad_norm": 0.21327702701091766, "learning_rate": 0.001, "loss": 2.023, "step": 11651 }, { "epoch": 0.4929351044927659, "grad_norm": 0.18165309727191925, "learning_rate": 0.001, "loss": 2.1323, "step": 11652 }, { "epoch": 0.49297740925628225, "grad_norm": 0.16911716759204865, "learning_rate": 0.001, "loss": 2.1337, "step": 11653 }, { "epoch": 0.4930197140197986, "grad_norm": 0.7974897623062134, "learning_rate": 0.001, "loss": 3.1178, "step": 11654 }, { "epoch": 0.493062018783315, "grad_norm": 0.2036375254392624, "learning_rate": 0.001, "loss": 2.142, "step": 11655 }, { "epoch": 0.49310432354683137, "grad_norm": 0.21488189697265625, "learning_rate": 0.001, "loss": 2.9894, "step": 11656 }, { "epoch": 0.4931466283103477, "grad_norm": 0.1535966992378235, "learning_rate": 0.001, "loss": 2.5775, "step": 11657 }, { "epoch": 0.49318893307386413, "grad_norm": 0.19664905965328217, "learning_rate": 0.001, "loss": 2.1606, "step": 11658 }, { "epoch": 0.4932312378373805, "grad_norm": 0.45215147733688354, "learning_rate": 0.001, "loss": 1.9469, "step": 11659 }, { "epoch": 0.49327354260089684, "grad_norm": 0.17420543730258942, "learning_rate": 0.001, "loss": 1.9453, "step": 11660 }, { "epoch": 0.49331584736441325, "grad_norm": 0.1706647127866745, "learning_rate": 0.001, "loss": 2.4864, "step": 11661 }, { "epoch": 0.4933581521279296, "grad_norm": 0.17685231566429138, "learning_rate": 0.001, "loss": 2.1576, "step": 11662 }, { "epoch": 0.49340045689144596, "grad_norm": 0.14751426875591278, "learning_rate": 0.001, "loss": 2.7795, "step": 11663 }, { "epoch": 0.49344276165496237, "grad_norm": 0.33146944642066956, "learning_rate": 0.001, "loss": 2.1473, "step": 11664 }, { "epoch": 0.4934850664184787, "grad_norm": 0.1822434514760971, "learning_rate": 0.001, "loss": 2.3431, "step": 11665 }, { "epoch": 0.4935273711819951, "grad_norm": 0.17229153215885162, "learning_rate": 0.001, "loss": 1.9659, "step": 11666 }, { "epoch": 0.4935696759455115, "grad_norm": 0.17021816968917847, "learning_rate": 0.001, "loss": 2.4419, "step": 11667 }, { "epoch": 0.49361198070902784, "grad_norm": 0.15960778295993805, "learning_rate": 0.001, "loss": 1.9672, "step": 11668 }, { "epoch": 0.4936542854725442, "grad_norm": 0.28872665762901306, "learning_rate": 0.001, "loss": 4.1413, "step": 11669 }, { "epoch": 0.4936965902360606, "grad_norm": 0.16721804440021515, "learning_rate": 0.001, "loss": 1.9916, "step": 11670 }, { "epoch": 0.49373889499957696, "grad_norm": 0.16407166421413422, "learning_rate": 0.001, "loss": 1.6337, "step": 11671 }, { "epoch": 0.4937811997630933, "grad_norm": 0.16153931617736816, "learning_rate": 0.001, "loss": 1.9922, "step": 11672 }, { "epoch": 0.49382350452660967, "grad_norm": 0.14842648804187775, "learning_rate": 0.001, "loss": 1.9083, "step": 11673 }, { "epoch": 0.4938658092901261, "grad_norm": 0.301229327917099, "learning_rate": 0.001, "loss": 2.2091, "step": 11674 }, { "epoch": 0.49390811405364243, "grad_norm": 0.1671200543642044, "learning_rate": 0.001, "loss": 2.6286, "step": 11675 }, { "epoch": 0.4939504188171588, "grad_norm": 0.1930837333202362, "learning_rate": 0.001, "loss": 2.7532, "step": 11676 }, { "epoch": 0.4939927235806752, "grad_norm": 0.1453826129436493, "learning_rate": 0.001, "loss": 2.1149, "step": 11677 }, { "epoch": 0.49403502834419155, "grad_norm": 0.16736523807048798, "learning_rate": 0.001, "loss": 2.0743, "step": 11678 }, { "epoch": 0.4940773331077079, "grad_norm": 0.14059995114803314, "learning_rate": 0.001, "loss": 1.8672, "step": 11679 }, { "epoch": 0.4941196378712243, "grad_norm": 0.8272671699523926, "learning_rate": 0.001, "loss": 2.879, "step": 11680 }, { "epoch": 0.49416194263474067, "grad_norm": 0.1517970710992813, "learning_rate": 0.001, "loss": 2.3603, "step": 11681 }, { "epoch": 0.494204247398257, "grad_norm": 0.19491681456565857, "learning_rate": 0.001, "loss": 1.7711, "step": 11682 }, { "epoch": 0.49424655216177343, "grad_norm": 0.16503435373306274, "learning_rate": 0.001, "loss": 2.2452, "step": 11683 }, { "epoch": 0.4942888569252898, "grad_norm": 0.2520374059677124, "learning_rate": 0.001, "loss": 2.4305, "step": 11684 }, { "epoch": 0.49433116168880614, "grad_norm": 0.325879842042923, "learning_rate": 0.001, "loss": 1.5981, "step": 11685 }, { "epoch": 0.49437346645232255, "grad_norm": 0.17779172956943512, "learning_rate": 0.001, "loss": 2.5801, "step": 11686 }, { "epoch": 0.4944157712158389, "grad_norm": 0.21204549074172974, "learning_rate": 0.001, "loss": 2.7278, "step": 11687 }, { "epoch": 0.49445807597935526, "grad_norm": 0.21863149106502533, "learning_rate": 0.001, "loss": 2.3487, "step": 11688 }, { "epoch": 0.49450038074287167, "grad_norm": 0.2039804309606552, "learning_rate": 0.001, "loss": 2.3773, "step": 11689 }, { "epoch": 0.494542685506388, "grad_norm": 0.39557844400405884, "learning_rate": 0.001, "loss": 1.7114, "step": 11690 }, { "epoch": 0.4945849902699044, "grad_norm": 0.28419822454452515, "learning_rate": 0.001, "loss": 1.9244, "step": 11691 }, { "epoch": 0.4946272950334208, "grad_norm": 3.456125020980835, "learning_rate": 0.001, "loss": 2.3652, "step": 11692 }, { "epoch": 0.49466959979693714, "grad_norm": 0.1735409051179886, "learning_rate": 0.001, "loss": 1.5861, "step": 11693 }, { "epoch": 0.4947119045604535, "grad_norm": 0.2236192673444748, "learning_rate": 0.001, "loss": 2.8274, "step": 11694 }, { "epoch": 0.49475420932396985, "grad_norm": 4.79661226272583, "learning_rate": 0.001, "loss": 2.3105, "step": 11695 }, { "epoch": 0.49479651408748626, "grad_norm": 0.15469114482402802, "learning_rate": 0.001, "loss": 1.5461, "step": 11696 }, { "epoch": 0.4948388188510026, "grad_norm": 0.383112370967865, "learning_rate": 0.001, "loss": 1.8817, "step": 11697 }, { "epoch": 0.49488112361451897, "grad_norm": 0.17388460040092468, "learning_rate": 0.001, "loss": 2.0987, "step": 11698 }, { "epoch": 0.4949234283780354, "grad_norm": 0.21153829991817474, "learning_rate": 0.001, "loss": 2.1188, "step": 11699 }, { "epoch": 0.49496573314155173, "grad_norm": 0.19076654314994812, "learning_rate": 0.001, "loss": 2.7168, "step": 11700 }, { "epoch": 0.4950080379050681, "grad_norm": 0.17775197327136993, "learning_rate": 0.001, "loss": 2.3872, "step": 11701 }, { "epoch": 0.4950503426685845, "grad_norm": 0.18127349019050598, "learning_rate": 0.001, "loss": 2.3911, "step": 11702 }, { "epoch": 0.49509264743210085, "grad_norm": 0.20719602704048157, "learning_rate": 0.001, "loss": 2.4219, "step": 11703 }, { "epoch": 0.4951349521956172, "grad_norm": 1.0996108055114746, "learning_rate": 0.001, "loss": 2.0785, "step": 11704 }, { "epoch": 0.4951772569591336, "grad_norm": 0.20830421149730682, "learning_rate": 0.001, "loss": 2.3977, "step": 11705 }, { "epoch": 0.49521956172264997, "grad_norm": 0.16215115785598755, "learning_rate": 0.001, "loss": 2.1351, "step": 11706 }, { "epoch": 0.4952618664861663, "grad_norm": 0.18560096621513367, "learning_rate": 0.001, "loss": 2.045, "step": 11707 }, { "epoch": 0.49530417124968273, "grad_norm": 1.037510871887207, "learning_rate": 0.001, "loss": 3.0518, "step": 11708 }, { "epoch": 0.4953464760131991, "grad_norm": 0.197851300239563, "learning_rate": 0.001, "loss": 2.118, "step": 11709 }, { "epoch": 0.49538878077671544, "grad_norm": 0.38033074140548706, "learning_rate": 0.001, "loss": 1.8406, "step": 11710 }, { "epoch": 0.49543108554023185, "grad_norm": 0.17631156742572784, "learning_rate": 0.001, "loss": 1.9683, "step": 11711 }, { "epoch": 0.4954733903037482, "grad_norm": 51.784454345703125, "learning_rate": 0.001, "loss": 2.3855, "step": 11712 }, { "epoch": 0.49551569506726456, "grad_norm": 0.14018891751766205, "learning_rate": 0.001, "loss": 2.3842, "step": 11713 }, { "epoch": 0.49555799983078097, "grad_norm": 0.2898732125759125, "learning_rate": 0.001, "loss": 2.951, "step": 11714 }, { "epoch": 0.4956003045942973, "grad_norm": 0.1552821397781372, "learning_rate": 0.001, "loss": 1.5485, "step": 11715 }, { "epoch": 0.4956426093578137, "grad_norm": 0.2228671759366989, "learning_rate": 0.001, "loss": 3.3397, "step": 11716 }, { "epoch": 0.4956849141213301, "grad_norm": 0.17871029675006866, "learning_rate": 0.001, "loss": 2.4764, "step": 11717 }, { "epoch": 0.49572721888484644, "grad_norm": 0.15905271470546722, "learning_rate": 0.001, "loss": 2.8845, "step": 11718 }, { "epoch": 0.4957695236483628, "grad_norm": 0.19031955301761627, "learning_rate": 0.001, "loss": 2.0218, "step": 11719 }, { "epoch": 0.49581182841187915, "grad_norm": 0.1882801651954651, "learning_rate": 0.001, "loss": 2.6585, "step": 11720 }, { "epoch": 0.49585413317539556, "grad_norm": 0.19072453677654266, "learning_rate": 0.001, "loss": 1.9483, "step": 11721 }, { "epoch": 0.4958964379389119, "grad_norm": 0.1833895891904831, "learning_rate": 0.001, "loss": 1.8572, "step": 11722 }, { "epoch": 0.49593874270242827, "grad_norm": 0.1797013133764267, "learning_rate": 0.001, "loss": 1.9646, "step": 11723 }, { "epoch": 0.4959810474659447, "grad_norm": 0.15591877698898315, "learning_rate": 0.001, "loss": 1.7613, "step": 11724 }, { "epoch": 0.49602335222946103, "grad_norm": 0.19704052805900574, "learning_rate": 0.001, "loss": 2.5827, "step": 11725 }, { "epoch": 0.4960656569929774, "grad_norm": 0.14575530588626862, "learning_rate": 0.001, "loss": 1.9924, "step": 11726 }, { "epoch": 0.4961079617564938, "grad_norm": 0.27594003081321716, "learning_rate": 0.001, "loss": 2.2579, "step": 11727 }, { "epoch": 0.49615026652001015, "grad_norm": 0.4626797139644623, "learning_rate": 0.001, "loss": 1.6564, "step": 11728 }, { "epoch": 0.4961925712835265, "grad_norm": 0.16353465616703033, "learning_rate": 0.001, "loss": 2.0006, "step": 11729 }, { "epoch": 0.4962348760470429, "grad_norm": 0.12566418945789337, "learning_rate": 0.001, "loss": 1.9094, "step": 11730 }, { "epoch": 0.49627718081055927, "grad_norm": 0.24055039882659912, "learning_rate": 0.001, "loss": 2.1373, "step": 11731 }, { "epoch": 0.4963194855740756, "grad_norm": 0.5243616104125977, "learning_rate": 0.001, "loss": 1.875, "step": 11732 }, { "epoch": 0.49636179033759203, "grad_norm": 0.2162822037935257, "learning_rate": 0.001, "loss": 1.5605, "step": 11733 }, { "epoch": 0.4964040951011084, "grad_norm": 0.3283466398715973, "learning_rate": 0.001, "loss": 1.8846, "step": 11734 }, { "epoch": 0.49644639986462474, "grad_norm": 0.20547711849212646, "learning_rate": 0.001, "loss": 2.197, "step": 11735 }, { "epoch": 0.49648870462814115, "grad_norm": 0.26368358731269836, "learning_rate": 0.001, "loss": 3.4798, "step": 11736 }, { "epoch": 0.4965310093916575, "grad_norm": 0.6923643350601196, "learning_rate": 0.001, "loss": 2.3685, "step": 11737 }, { "epoch": 0.49657331415517386, "grad_norm": 0.21621890366077423, "learning_rate": 0.001, "loss": 2.4319, "step": 11738 }, { "epoch": 0.49661561891869027, "grad_norm": 0.16339389979839325, "learning_rate": 0.001, "loss": 2.7695, "step": 11739 }, { "epoch": 0.4966579236822066, "grad_norm": 0.16505463421344757, "learning_rate": 0.001, "loss": 2.034, "step": 11740 }, { "epoch": 0.496700228445723, "grad_norm": 0.1787840873003006, "learning_rate": 0.001, "loss": 2.2629, "step": 11741 }, { "epoch": 0.49674253320923933, "grad_norm": 0.34870707988739014, "learning_rate": 0.001, "loss": 2.0484, "step": 11742 }, { "epoch": 0.49678483797275574, "grad_norm": 0.20475776493549347, "learning_rate": 0.001, "loss": 2.0176, "step": 11743 }, { "epoch": 0.4968271427362721, "grad_norm": 3.398766040802002, "learning_rate": 0.001, "loss": 1.8543, "step": 11744 }, { "epoch": 0.49686944749978845, "grad_norm": 0.15450245141983032, "learning_rate": 0.001, "loss": 2.6702, "step": 11745 }, { "epoch": 0.49691175226330486, "grad_norm": 0.58338463306427, "learning_rate": 0.001, "loss": 3.001, "step": 11746 }, { "epoch": 0.4969540570268212, "grad_norm": 0.1919245570898056, "learning_rate": 0.001, "loss": 2.2014, "step": 11747 }, { "epoch": 0.49699636179033757, "grad_norm": 0.2006487101316452, "learning_rate": 0.001, "loss": 1.9795, "step": 11748 }, { "epoch": 0.497038666553854, "grad_norm": 2.63030743598938, "learning_rate": 0.001, "loss": 2.3299, "step": 11749 }, { "epoch": 0.49708097131737033, "grad_norm": 0.16151180863380432, "learning_rate": 0.001, "loss": 1.5329, "step": 11750 }, { "epoch": 0.4971232760808867, "grad_norm": 0.2099246382713318, "learning_rate": 0.001, "loss": 1.9674, "step": 11751 }, { "epoch": 0.4971655808444031, "grad_norm": 0.18836641311645508, "learning_rate": 0.001, "loss": 2.001, "step": 11752 }, { "epoch": 0.49720788560791945, "grad_norm": 0.17593342065811157, "learning_rate": 0.001, "loss": 2.0003, "step": 11753 }, { "epoch": 0.4972501903714358, "grad_norm": 0.21019577980041504, "learning_rate": 0.001, "loss": 3.3791, "step": 11754 }, { "epoch": 0.4972924951349522, "grad_norm": 0.19833973050117493, "learning_rate": 0.001, "loss": 1.5016, "step": 11755 }, { "epoch": 0.49733479989846857, "grad_norm": 0.1627396196126938, "learning_rate": 0.001, "loss": 1.9796, "step": 11756 }, { "epoch": 0.4973771046619849, "grad_norm": 0.18876367807388306, "learning_rate": 0.001, "loss": 2.0286, "step": 11757 }, { "epoch": 0.49741940942550134, "grad_norm": 0.2460365891456604, "learning_rate": 0.001, "loss": 2.237, "step": 11758 }, { "epoch": 0.4974617141890177, "grad_norm": 0.34945163130760193, "learning_rate": 0.001, "loss": 1.7602, "step": 11759 }, { "epoch": 0.49750401895253404, "grad_norm": 0.24172699451446533, "learning_rate": 0.001, "loss": 3.5869, "step": 11760 }, { "epoch": 0.49754632371605045, "grad_norm": 0.19941669702529907, "learning_rate": 0.001, "loss": 1.7282, "step": 11761 }, { "epoch": 0.4975886284795668, "grad_norm": 0.16542641818523407, "learning_rate": 0.001, "loss": 2.2762, "step": 11762 }, { "epoch": 0.49763093324308316, "grad_norm": 0.18127164244651794, "learning_rate": 0.001, "loss": 1.7135, "step": 11763 }, { "epoch": 0.4976732380065995, "grad_norm": 0.16587230563163757, "learning_rate": 0.001, "loss": 1.7269, "step": 11764 }, { "epoch": 0.4977155427701159, "grad_norm": 0.15559205412864685, "learning_rate": 0.001, "loss": 2.0026, "step": 11765 }, { "epoch": 0.4977578475336323, "grad_norm": 0.1646786332130432, "learning_rate": 0.001, "loss": 2.0569, "step": 11766 }, { "epoch": 0.49780015229714863, "grad_norm": 0.1686537265777588, "learning_rate": 0.001, "loss": 2.0358, "step": 11767 }, { "epoch": 0.49784245706066504, "grad_norm": 0.20397379994392395, "learning_rate": 0.001, "loss": 2.5713, "step": 11768 }, { "epoch": 0.4978847618241814, "grad_norm": 0.2577773928642273, "learning_rate": 0.001, "loss": 1.9993, "step": 11769 }, { "epoch": 0.49792706658769775, "grad_norm": 0.1761125773191452, "learning_rate": 0.001, "loss": 2.0988, "step": 11770 }, { "epoch": 0.49796937135121416, "grad_norm": 0.3869670629501343, "learning_rate": 0.001, "loss": 2.7036, "step": 11771 }, { "epoch": 0.4980116761147305, "grad_norm": 0.205720916390419, "learning_rate": 0.001, "loss": 2.7888, "step": 11772 }, { "epoch": 0.49805398087824687, "grad_norm": 0.17144684493541718, "learning_rate": 0.001, "loss": 2.9138, "step": 11773 }, { "epoch": 0.4980962856417633, "grad_norm": 0.4699496626853943, "learning_rate": 0.001, "loss": 2.1829, "step": 11774 }, { "epoch": 0.49813859040527964, "grad_norm": 0.19264104962348938, "learning_rate": 0.001, "loss": 1.7184, "step": 11775 }, { "epoch": 0.498180895168796, "grad_norm": 0.21069301664829254, "learning_rate": 0.001, "loss": 1.8648, "step": 11776 }, { "epoch": 0.4982231999323124, "grad_norm": 0.17750690877437592, "learning_rate": 0.001, "loss": 2.6243, "step": 11777 }, { "epoch": 0.49826550469582875, "grad_norm": 0.17451412975788116, "learning_rate": 0.001, "loss": 1.9341, "step": 11778 }, { "epoch": 0.4983078094593451, "grad_norm": 0.18429610133171082, "learning_rate": 0.001, "loss": 1.8176, "step": 11779 }, { "epoch": 0.4983501142228615, "grad_norm": 0.18034300208091736, "learning_rate": 0.001, "loss": 1.4398, "step": 11780 }, { "epoch": 0.49839241898637787, "grad_norm": 0.2214241474866867, "learning_rate": 0.001, "loss": 2.8547, "step": 11781 }, { "epoch": 0.4984347237498942, "grad_norm": 0.3767451047897339, "learning_rate": 0.001, "loss": 1.8602, "step": 11782 }, { "epoch": 0.49847702851341064, "grad_norm": 0.16168661415576935, "learning_rate": 0.001, "loss": 1.378, "step": 11783 }, { "epoch": 0.498519333276927, "grad_norm": 0.19141288101673126, "learning_rate": 0.001, "loss": 2.4181, "step": 11784 }, { "epoch": 0.49856163804044334, "grad_norm": 0.17355014383792877, "learning_rate": 0.001, "loss": 1.8218, "step": 11785 }, { "epoch": 0.4986039428039597, "grad_norm": 0.15057973563671112, "learning_rate": 0.001, "loss": 2.808, "step": 11786 }, { "epoch": 0.4986462475674761, "grad_norm": 1.3171411752700806, "learning_rate": 0.001, "loss": 2.5091, "step": 11787 }, { "epoch": 0.49868855233099246, "grad_norm": 0.20347653329372406, "learning_rate": 0.001, "loss": 2.2627, "step": 11788 }, { "epoch": 0.4987308570945088, "grad_norm": 0.189688041806221, "learning_rate": 0.001, "loss": 2.2906, "step": 11789 }, { "epoch": 0.4987731618580252, "grad_norm": 3.7641372680664062, "learning_rate": 0.001, "loss": 2.4856, "step": 11790 }, { "epoch": 0.4988154666215416, "grad_norm": 0.2324923574924469, "learning_rate": 0.001, "loss": 3.4146, "step": 11791 }, { "epoch": 0.49885777138505794, "grad_norm": 0.9900776147842407, "learning_rate": 0.001, "loss": 1.8225, "step": 11792 }, { "epoch": 0.49890007614857435, "grad_norm": 0.4273008406162262, "learning_rate": 0.001, "loss": 2.9934, "step": 11793 }, { "epoch": 0.4989423809120907, "grad_norm": 0.14736799895763397, "learning_rate": 0.001, "loss": 2.5134, "step": 11794 }, { "epoch": 0.49898468567560705, "grad_norm": 2.7909343242645264, "learning_rate": 0.001, "loss": 1.9295, "step": 11795 }, { "epoch": 0.49902699043912346, "grad_norm": 0.27986812591552734, "learning_rate": 0.001, "loss": 3.138, "step": 11796 }, { "epoch": 0.4990692952026398, "grad_norm": 0.18478405475616455, "learning_rate": 0.001, "loss": 1.9531, "step": 11797 }, { "epoch": 0.49911159996615617, "grad_norm": 0.18496938049793243, "learning_rate": 0.001, "loss": 2.3311, "step": 11798 }, { "epoch": 0.4991539047296726, "grad_norm": 0.18009823560714722, "learning_rate": 0.001, "loss": 2.0114, "step": 11799 }, { "epoch": 0.49919620949318894, "grad_norm": 0.23303596675395966, "learning_rate": 0.001, "loss": 1.9756, "step": 11800 }, { "epoch": 0.4992385142567053, "grad_norm": 0.1994001865386963, "learning_rate": 0.001, "loss": 2.563, "step": 11801 }, { "epoch": 0.4992808190202217, "grad_norm": 0.2099967747926712, "learning_rate": 0.001, "loss": 1.5605, "step": 11802 }, { "epoch": 0.49932312378373805, "grad_norm": 0.34757867455482483, "learning_rate": 0.001, "loss": 3.3986, "step": 11803 }, { "epoch": 0.4993654285472544, "grad_norm": 0.15620851516723633, "learning_rate": 0.001, "loss": 2.5944, "step": 11804 }, { "epoch": 0.4994077333107708, "grad_norm": 0.2044965624809265, "learning_rate": 0.001, "loss": 3.5678, "step": 11805 }, { "epoch": 0.4994500380742872, "grad_norm": 0.1616724729537964, "learning_rate": 0.001, "loss": 2.2451, "step": 11806 }, { "epoch": 0.4994923428378035, "grad_norm": 0.21824952960014343, "learning_rate": 0.001, "loss": 3.1134, "step": 11807 }, { "epoch": 0.4995346476013199, "grad_norm": 0.19981542229652405, "learning_rate": 0.001, "loss": 2.2409, "step": 11808 }, { "epoch": 0.4995769523648363, "grad_norm": 0.538329541683197, "learning_rate": 0.001, "loss": 1.8192, "step": 11809 }, { "epoch": 0.49961925712835265, "grad_norm": 0.18705959618091583, "learning_rate": 0.001, "loss": 2.367, "step": 11810 }, { "epoch": 0.499661561891869, "grad_norm": 1.9119819402694702, "learning_rate": 0.001, "loss": 1.9958, "step": 11811 }, { "epoch": 0.4997038666553854, "grad_norm": 0.1916486769914627, "learning_rate": 0.001, "loss": 2.6751, "step": 11812 }, { "epoch": 0.49974617141890176, "grad_norm": 0.17251789569854736, "learning_rate": 0.001, "loss": 1.6438, "step": 11813 }, { "epoch": 0.4997884761824181, "grad_norm": 0.2549959421157837, "learning_rate": 0.001, "loss": 2.6528, "step": 11814 }, { "epoch": 0.4998307809459345, "grad_norm": 0.16786809265613556, "learning_rate": 0.001, "loss": 1.7551, "step": 11815 }, { "epoch": 0.4998730857094509, "grad_norm": 0.22102710604667664, "learning_rate": 0.001, "loss": 1.6668, "step": 11816 }, { "epoch": 0.49991539047296724, "grad_norm": 0.2043444961309433, "learning_rate": 0.001, "loss": 2.0156, "step": 11817 }, { "epoch": 0.49995769523648365, "grad_norm": 0.1693449169397354, "learning_rate": 0.001, "loss": 1.7542, "step": 11818 }, { "epoch": 0.5, "grad_norm": 0.18922848999500275, "learning_rate": 0.001, "loss": 3.11, "step": 11819 }, { "epoch": 0.5000423047635164, "grad_norm": 0.19555483758449554, "learning_rate": 0.001, "loss": 3.5945, "step": 11820 }, { "epoch": 0.5000846095270327, "grad_norm": 0.22335830330848694, "learning_rate": 0.001, "loss": 3.515, "step": 11821 }, { "epoch": 0.5001269142905491, "grad_norm": 0.17894285917282104, "learning_rate": 0.001, "loss": 1.6961, "step": 11822 }, { "epoch": 0.5001692190540655, "grad_norm": 0.16279856860637665, "learning_rate": 0.001, "loss": 2.7065, "step": 11823 }, { "epoch": 0.5002115238175818, "grad_norm": 0.2117103487253189, "learning_rate": 0.001, "loss": 2.481, "step": 11824 }, { "epoch": 0.5002538285810982, "grad_norm": 0.1833728402853012, "learning_rate": 0.001, "loss": 2.1502, "step": 11825 }, { "epoch": 0.5002961333446146, "grad_norm": 0.1608177125453949, "learning_rate": 0.001, "loss": 1.8915, "step": 11826 }, { "epoch": 0.500338438108131, "grad_norm": 0.15755756199359894, "learning_rate": 0.001, "loss": 2.0134, "step": 11827 }, { "epoch": 0.5003807428716474, "grad_norm": 1.6536004543304443, "learning_rate": 0.001, "loss": 2.4632, "step": 11828 }, { "epoch": 0.5004230476351638, "grad_norm": 0.17885924875736237, "learning_rate": 0.001, "loss": 2.8906, "step": 11829 }, { "epoch": 0.5004653523986801, "grad_norm": 0.17914816737174988, "learning_rate": 0.001, "loss": 1.82, "step": 11830 }, { "epoch": 0.5005076571621965, "grad_norm": 0.4052477478981018, "learning_rate": 0.001, "loss": 1.7167, "step": 11831 }, { "epoch": 0.5005499619257129, "grad_norm": 0.18463678658008575, "learning_rate": 0.001, "loss": 2.2511, "step": 11832 }, { "epoch": 0.5005922666892292, "grad_norm": 0.194928377866745, "learning_rate": 0.001, "loss": 1.9792, "step": 11833 }, { "epoch": 0.5006345714527456, "grad_norm": 0.17245490849018097, "learning_rate": 0.001, "loss": 1.9821, "step": 11834 }, { "epoch": 0.500676876216262, "grad_norm": 0.1762954294681549, "learning_rate": 0.001, "loss": 1.6249, "step": 11835 }, { "epoch": 0.5007191809797783, "grad_norm": 0.16809524595737457, "learning_rate": 0.001, "loss": 1.6515, "step": 11836 }, { "epoch": 0.5007614857432947, "grad_norm": 0.2633932828903198, "learning_rate": 0.001, "loss": 2.8323, "step": 11837 }, { "epoch": 0.5008037905068111, "grad_norm": 1.2055695056915283, "learning_rate": 0.001, "loss": 2.7611, "step": 11838 }, { "epoch": 0.5008460952703274, "grad_norm": 0.5635671019554138, "learning_rate": 0.001, "loss": 2.3381, "step": 11839 }, { "epoch": 0.5008884000338438, "grad_norm": 0.22210033237934113, "learning_rate": 0.001, "loss": 2.4092, "step": 11840 }, { "epoch": 0.5009307047973602, "grad_norm": 0.3583562672138214, "learning_rate": 0.001, "loss": 2.3941, "step": 11841 }, { "epoch": 0.5009730095608765, "grad_norm": 1.5744633674621582, "learning_rate": 0.001, "loss": 2.1961, "step": 11842 }, { "epoch": 0.501015314324393, "grad_norm": 0.19284333288669586, "learning_rate": 0.001, "loss": 2.6481, "step": 11843 }, { "epoch": 0.5010576190879092, "grad_norm": 0.16375435888767242, "learning_rate": 0.001, "loss": 3.0202, "step": 11844 }, { "epoch": 0.5010999238514257, "grad_norm": 0.1810348629951477, "learning_rate": 0.001, "loss": 2.0206, "step": 11845 }, { "epoch": 0.5011422286149421, "grad_norm": 0.15742111206054688, "learning_rate": 0.001, "loss": 2.0867, "step": 11846 }, { "epoch": 0.5011845333784584, "grad_norm": 0.18012715876102448, "learning_rate": 0.001, "loss": 1.8844, "step": 11847 }, { "epoch": 0.5012268381419748, "grad_norm": 0.21920199692249298, "learning_rate": 0.001, "loss": 2.0386, "step": 11848 }, { "epoch": 0.5012691429054912, "grad_norm": 0.20530590415000916, "learning_rate": 0.001, "loss": 2.7012, "step": 11849 }, { "epoch": 0.5013114476690075, "grad_norm": 0.1777116060256958, "learning_rate": 0.001, "loss": 2.2618, "step": 11850 }, { "epoch": 0.5013537524325239, "grad_norm": 0.9319034814834595, "learning_rate": 0.001, "loss": 2.1148, "step": 11851 }, { "epoch": 0.5013960571960403, "grad_norm": 0.22064313292503357, "learning_rate": 0.001, "loss": 2.2856, "step": 11852 }, { "epoch": 0.5014383619595566, "grad_norm": 0.19049854576587677, "learning_rate": 0.001, "loss": 2.2153, "step": 11853 }, { "epoch": 0.501480666723073, "grad_norm": 0.1864825040102005, "learning_rate": 0.001, "loss": 2.225, "step": 11854 }, { "epoch": 0.5015229714865894, "grad_norm": 0.20713578164577484, "learning_rate": 0.001, "loss": 2.5584, "step": 11855 }, { "epoch": 0.5015652762501057, "grad_norm": 0.2419637143611908, "learning_rate": 0.001, "loss": 1.9168, "step": 11856 }, { "epoch": 0.5016075810136221, "grad_norm": 0.18344347178936005, "learning_rate": 0.001, "loss": 1.6852, "step": 11857 }, { "epoch": 0.5016498857771385, "grad_norm": 3.606316328048706, "learning_rate": 0.001, "loss": 1.7524, "step": 11858 }, { "epoch": 0.5016921905406548, "grad_norm": 0.1817534863948822, "learning_rate": 0.001, "loss": 1.6807, "step": 11859 }, { "epoch": 0.5017344953041712, "grad_norm": 0.22895671427249908, "learning_rate": 0.001, "loss": 2.1781, "step": 11860 }, { "epoch": 0.5017768000676877, "grad_norm": 0.1826825588941574, "learning_rate": 0.001, "loss": 1.8662, "step": 11861 }, { "epoch": 0.501819104831204, "grad_norm": 0.1994072049856186, "learning_rate": 0.001, "loss": 2.2856, "step": 11862 }, { "epoch": 0.5018614095947204, "grad_norm": 0.1609574556350708, "learning_rate": 0.001, "loss": 2.5509, "step": 11863 }, { "epoch": 0.5019037143582368, "grad_norm": 0.28358885645866394, "learning_rate": 0.001, "loss": 2.2666, "step": 11864 }, { "epoch": 0.5019460191217531, "grad_norm": 0.19718047976493835, "learning_rate": 0.001, "loss": 2.8378, "step": 11865 }, { "epoch": 0.5019883238852695, "grad_norm": 0.17806696891784668, "learning_rate": 0.001, "loss": 2.5277, "step": 11866 }, { "epoch": 0.5020306286487859, "grad_norm": 0.22065989673137665, "learning_rate": 0.001, "loss": 3.4721, "step": 11867 }, { "epoch": 0.5020729334123022, "grad_norm": 1.0695648193359375, "learning_rate": 0.001, "loss": 2.6934, "step": 11868 }, { "epoch": 0.5021152381758186, "grad_norm": 0.34497925639152527, "learning_rate": 0.001, "loss": 2.5293, "step": 11869 }, { "epoch": 0.502157542939335, "grad_norm": 0.2196062207221985, "learning_rate": 0.001, "loss": 1.7822, "step": 11870 }, { "epoch": 0.5021998477028513, "grad_norm": 1.1649377346038818, "learning_rate": 0.001, "loss": 2.6756, "step": 11871 }, { "epoch": 0.5022421524663677, "grad_norm": 0.18098509311676025, "learning_rate": 0.001, "loss": 2.4486, "step": 11872 }, { "epoch": 0.5022844572298841, "grad_norm": 0.21399042010307312, "learning_rate": 0.001, "loss": 2.5243, "step": 11873 }, { "epoch": 0.5023267619934004, "grad_norm": 0.19336672127246857, "learning_rate": 0.001, "loss": 1.3858, "step": 11874 }, { "epoch": 0.5023690667569168, "grad_norm": 1.1209372282028198, "learning_rate": 0.001, "loss": 2.7417, "step": 11875 }, { "epoch": 0.5024113715204332, "grad_norm": 0.1669924110174179, "learning_rate": 0.001, "loss": 1.8934, "step": 11876 }, { "epoch": 0.5024536762839495, "grad_norm": 0.20557548105716705, "learning_rate": 0.001, "loss": 1.9532, "step": 11877 }, { "epoch": 0.502495981047466, "grad_norm": 1.1194974184036255, "learning_rate": 0.001, "loss": 3.5188, "step": 11878 }, { "epoch": 0.5025382858109824, "grad_norm": 1.5623433589935303, "learning_rate": 0.001, "loss": 2.5858, "step": 11879 }, { "epoch": 0.5025805905744987, "grad_norm": 0.32952699065208435, "learning_rate": 0.001, "loss": 2.9636, "step": 11880 }, { "epoch": 0.5026228953380151, "grad_norm": 0.18571840226650238, "learning_rate": 0.001, "loss": 1.854, "step": 11881 }, { "epoch": 0.5026652001015315, "grad_norm": 6.811452865600586, "learning_rate": 0.001, "loss": 1.903, "step": 11882 }, { "epoch": 0.5027075048650478, "grad_norm": 0.1865660697221756, "learning_rate": 0.001, "loss": 2.0409, "step": 11883 }, { "epoch": 0.5027498096285642, "grad_norm": 0.17085236310958862, "learning_rate": 0.001, "loss": 2.2718, "step": 11884 }, { "epoch": 0.5027921143920806, "grad_norm": 0.19499720633029938, "learning_rate": 0.001, "loss": 2.1248, "step": 11885 }, { "epoch": 0.5028344191555969, "grad_norm": 0.1698610633611679, "learning_rate": 0.001, "loss": 1.8143, "step": 11886 }, { "epoch": 0.5028767239191133, "grad_norm": 0.18589156866073608, "learning_rate": 0.001, "loss": 2.1618, "step": 11887 }, { "epoch": 0.5029190286826296, "grad_norm": 0.17679286003112793, "learning_rate": 0.001, "loss": 2.1766, "step": 11888 }, { "epoch": 0.502961333446146, "grad_norm": 0.18507613241672516, "learning_rate": 0.001, "loss": 1.8563, "step": 11889 }, { "epoch": 0.5030036382096624, "grad_norm": 0.1703113168478012, "learning_rate": 0.001, "loss": 1.8142, "step": 11890 }, { "epoch": 0.5030459429731787, "grad_norm": 0.1954764574766159, "learning_rate": 0.001, "loss": 2.0398, "step": 11891 }, { "epoch": 0.5030882477366951, "grad_norm": 0.1964946985244751, "learning_rate": 0.001, "loss": 2.9447, "step": 11892 }, { "epoch": 0.5031305525002115, "grad_norm": 0.23403804004192352, "learning_rate": 0.001, "loss": 2.4022, "step": 11893 }, { "epoch": 0.5031728572637278, "grad_norm": 0.20903852581977844, "learning_rate": 0.001, "loss": 2.6347, "step": 11894 }, { "epoch": 0.5032151620272443, "grad_norm": 0.14716748893260956, "learning_rate": 0.001, "loss": 2.6717, "step": 11895 }, { "epoch": 0.5032574667907607, "grad_norm": 0.1724325567483902, "learning_rate": 0.001, "loss": 2.4629, "step": 11896 }, { "epoch": 0.503299771554277, "grad_norm": 2.00115704536438, "learning_rate": 0.001, "loss": 2.8087, "step": 11897 }, { "epoch": 0.5033420763177934, "grad_norm": 0.3015700876712799, "learning_rate": 0.001, "loss": 1.8208, "step": 11898 }, { "epoch": 0.5033843810813098, "grad_norm": 0.18735451996326447, "learning_rate": 0.001, "loss": 1.9337, "step": 11899 }, { "epoch": 0.5034266858448261, "grad_norm": 0.18726201355457306, "learning_rate": 0.001, "loss": 2.8956, "step": 11900 }, { "epoch": 0.5034689906083425, "grad_norm": 0.22728313505649567, "learning_rate": 0.001, "loss": 2.3571, "step": 11901 }, { "epoch": 0.5035112953718589, "grad_norm": 0.3538459241390228, "learning_rate": 0.001, "loss": 2.0404, "step": 11902 }, { "epoch": 0.5035536001353752, "grad_norm": 0.2191903442144394, "learning_rate": 0.001, "loss": 2.8035, "step": 11903 }, { "epoch": 0.5035959048988916, "grad_norm": 1.2303228378295898, "learning_rate": 0.001, "loss": 2.9012, "step": 11904 }, { "epoch": 0.503638209662408, "grad_norm": 0.6738932132720947, "learning_rate": 0.001, "loss": 2.0589, "step": 11905 }, { "epoch": 0.5036805144259243, "grad_norm": 0.18187807500362396, "learning_rate": 0.001, "loss": 2.1458, "step": 11906 }, { "epoch": 0.5037228191894407, "grad_norm": 0.20437604188919067, "learning_rate": 0.001, "loss": 2.5876, "step": 11907 }, { "epoch": 0.5037651239529571, "grad_norm": 0.17892666161060333, "learning_rate": 0.001, "loss": 2.4489, "step": 11908 }, { "epoch": 0.5038074287164734, "grad_norm": 0.1988033950328827, "learning_rate": 0.001, "loss": 2.935, "step": 11909 }, { "epoch": 0.5038497334799898, "grad_norm": 0.16450054943561554, "learning_rate": 0.001, "loss": 1.6965, "step": 11910 }, { "epoch": 0.5038920382435063, "grad_norm": 0.1781654804944992, "learning_rate": 0.001, "loss": 2.5796, "step": 11911 }, { "epoch": 0.5039343430070226, "grad_norm": 0.17082349956035614, "learning_rate": 0.001, "loss": 2.9056, "step": 11912 }, { "epoch": 0.503976647770539, "grad_norm": 0.23962025344371796, "learning_rate": 0.001, "loss": 2.1747, "step": 11913 }, { "epoch": 0.5040189525340554, "grad_norm": 0.18529817461967468, "learning_rate": 0.001, "loss": 2.4123, "step": 11914 }, { "epoch": 0.5040612572975717, "grad_norm": 0.3135690689086914, "learning_rate": 0.001, "loss": 1.7927, "step": 11915 }, { "epoch": 0.5041035620610881, "grad_norm": 0.7973864674568176, "learning_rate": 0.001, "loss": 1.7801, "step": 11916 }, { "epoch": 0.5041458668246045, "grad_norm": 0.25282907485961914, "learning_rate": 0.001, "loss": 2.5156, "step": 11917 }, { "epoch": 0.5041881715881208, "grad_norm": 0.8695905804634094, "learning_rate": 0.001, "loss": 2.0658, "step": 11918 }, { "epoch": 0.5042304763516372, "grad_norm": 0.1739739030599594, "learning_rate": 0.001, "loss": 1.7482, "step": 11919 }, { "epoch": 0.5042727811151536, "grad_norm": 0.639236330986023, "learning_rate": 0.001, "loss": 1.906, "step": 11920 }, { "epoch": 0.5043150858786699, "grad_norm": 0.2065761834383011, "learning_rate": 0.001, "loss": 1.6717, "step": 11921 }, { "epoch": 0.5043573906421863, "grad_norm": 0.22129188477993011, "learning_rate": 0.001, "loss": 2.7184, "step": 11922 }, { "epoch": 0.5043996954057027, "grad_norm": 0.16457290947437286, "learning_rate": 0.001, "loss": 1.9362, "step": 11923 }, { "epoch": 0.504442000169219, "grad_norm": 0.7113840579986572, "learning_rate": 0.001, "loss": 1.3661, "step": 11924 }, { "epoch": 0.5044843049327354, "grad_norm": 0.16901074349880219, "learning_rate": 0.001, "loss": 2.3126, "step": 11925 }, { "epoch": 0.5045266096962518, "grad_norm": 0.2004851996898651, "learning_rate": 0.001, "loss": 2.6944, "step": 11926 }, { "epoch": 0.5045689144597681, "grad_norm": 0.17636452615261078, "learning_rate": 0.001, "loss": 2.3345, "step": 11927 }, { "epoch": 0.5046112192232846, "grad_norm": 0.16024649143218994, "learning_rate": 0.001, "loss": 1.7696, "step": 11928 }, { "epoch": 0.504653523986801, "grad_norm": 0.18146340548992157, "learning_rate": 0.001, "loss": 2.6534, "step": 11929 }, { "epoch": 0.5046958287503173, "grad_norm": 0.19489388167858124, "learning_rate": 0.001, "loss": 2.5024, "step": 11930 }, { "epoch": 0.5047381335138337, "grad_norm": 0.5140373706817627, "learning_rate": 0.001, "loss": 3.1386, "step": 11931 }, { "epoch": 0.50478043827735, "grad_norm": 1.8529261350631714, "learning_rate": 0.001, "loss": 2.1581, "step": 11932 }, { "epoch": 0.5048227430408664, "grad_norm": 0.16722053289413452, "learning_rate": 0.001, "loss": 2.019, "step": 11933 }, { "epoch": 0.5048650478043828, "grad_norm": 0.16939833760261536, "learning_rate": 0.001, "loss": 1.843, "step": 11934 }, { "epoch": 0.5049073525678991, "grad_norm": 0.3722081780433655, "learning_rate": 0.001, "loss": 2.8935, "step": 11935 }, { "epoch": 0.5049496573314155, "grad_norm": 0.15549683570861816, "learning_rate": 0.001, "loss": 1.5844, "step": 11936 }, { "epoch": 0.5049919620949319, "grad_norm": 0.19663242995738983, "learning_rate": 0.001, "loss": 2.0955, "step": 11937 }, { "epoch": 0.5050342668584482, "grad_norm": 0.557640016078949, "learning_rate": 0.001, "loss": 3.2234, "step": 11938 }, { "epoch": 0.5050765716219646, "grad_norm": 0.24764712154865265, "learning_rate": 0.001, "loss": 2.313, "step": 11939 }, { "epoch": 0.505118876385481, "grad_norm": 0.22433580458164215, "learning_rate": 0.001, "loss": 2.8558, "step": 11940 }, { "epoch": 0.5051611811489973, "grad_norm": 0.29622960090637207, "learning_rate": 0.001, "loss": 2.7506, "step": 11941 }, { "epoch": 0.5052034859125137, "grad_norm": 0.23077332973480225, "learning_rate": 0.001, "loss": 1.8445, "step": 11942 }, { "epoch": 0.5052457906760301, "grad_norm": 0.16419456899166107, "learning_rate": 0.001, "loss": 2.2429, "step": 11943 }, { "epoch": 0.5052880954395464, "grad_norm": 1.3198111057281494, "learning_rate": 0.001, "loss": 2.769, "step": 11944 }, { "epoch": 0.5053304002030629, "grad_norm": 0.32443439960479736, "learning_rate": 0.001, "loss": 2.6581, "step": 11945 }, { "epoch": 0.5053727049665793, "grad_norm": 0.34906241297721863, "learning_rate": 0.001, "loss": 2.769, "step": 11946 }, { "epoch": 0.5054150097300956, "grad_norm": 0.1365213692188263, "learning_rate": 0.001, "loss": 2.729, "step": 11947 }, { "epoch": 0.505457314493612, "grad_norm": 0.3047373294830322, "learning_rate": 0.001, "loss": 1.8934, "step": 11948 }, { "epoch": 0.5054996192571284, "grad_norm": 0.4273305833339691, "learning_rate": 0.001, "loss": 2.3561, "step": 11949 }, { "epoch": 0.5055419240206447, "grad_norm": 0.22199836373329163, "learning_rate": 0.001, "loss": 2.0293, "step": 11950 }, { "epoch": 0.5055842287841611, "grad_norm": 0.3782862424850464, "learning_rate": 0.001, "loss": 1.7579, "step": 11951 }, { "epoch": 0.5056265335476775, "grad_norm": 0.19411024451255798, "learning_rate": 0.001, "loss": 1.4811, "step": 11952 }, { "epoch": 0.5056688383111938, "grad_norm": 0.1856779307126999, "learning_rate": 0.001, "loss": 2.2312, "step": 11953 }, { "epoch": 0.5057111430747102, "grad_norm": 0.17408660054206848, "learning_rate": 0.001, "loss": 2.3105, "step": 11954 }, { "epoch": 0.5057534478382266, "grad_norm": 0.19032998383045197, "learning_rate": 0.001, "loss": 2.088, "step": 11955 }, { "epoch": 0.5057957526017429, "grad_norm": 0.18069171905517578, "learning_rate": 0.001, "loss": 2.6122, "step": 11956 }, { "epoch": 0.5058380573652593, "grad_norm": 0.16551326215267181, "learning_rate": 0.001, "loss": 2.1795, "step": 11957 }, { "epoch": 0.5058803621287757, "grad_norm": 2.8570401668548584, "learning_rate": 0.001, "loss": 1.9708, "step": 11958 }, { "epoch": 0.505922666892292, "grad_norm": 0.20645208656787872, "learning_rate": 0.001, "loss": 2.3922, "step": 11959 }, { "epoch": 0.5059649716558084, "grad_norm": 4.1343817710876465, "learning_rate": 0.001, "loss": 2.1069, "step": 11960 }, { "epoch": 0.5060072764193249, "grad_norm": 0.17171384394168854, "learning_rate": 0.001, "loss": 1.5699, "step": 11961 }, { "epoch": 0.5060495811828412, "grad_norm": 0.4710404574871063, "learning_rate": 0.001, "loss": 1.9224, "step": 11962 }, { "epoch": 0.5060918859463576, "grad_norm": 0.9586241245269775, "learning_rate": 0.001, "loss": 2.1856, "step": 11963 }, { "epoch": 0.506134190709874, "grad_norm": 0.1798468381166458, "learning_rate": 0.001, "loss": 2.836, "step": 11964 }, { "epoch": 0.5061764954733903, "grad_norm": 0.2874303162097931, "learning_rate": 0.001, "loss": 2.2059, "step": 11965 }, { "epoch": 0.5062188002369067, "grad_norm": 0.1568385362625122, "learning_rate": 0.001, "loss": 1.7764, "step": 11966 }, { "epoch": 0.5062611050004231, "grad_norm": 0.17159751057624817, "learning_rate": 0.001, "loss": 2.1703, "step": 11967 }, { "epoch": 0.5063034097639394, "grad_norm": 0.20359356701374054, "learning_rate": 0.001, "loss": 2.4419, "step": 11968 }, { "epoch": 0.5063457145274558, "grad_norm": 0.1408676654100418, "learning_rate": 0.001, "loss": 2.12, "step": 11969 }, { "epoch": 0.5063880192909722, "grad_norm": 0.16593052446842194, "learning_rate": 0.001, "loss": 1.9553, "step": 11970 }, { "epoch": 0.5064303240544885, "grad_norm": 0.12770260870456696, "learning_rate": 0.001, "loss": 2.1708, "step": 11971 }, { "epoch": 0.5064726288180049, "grad_norm": 0.15011616051197052, "learning_rate": 0.001, "loss": 2.1359, "step": 11972 }, { "epoch": 0.5065149335815213, "grad_norm": 0.47157904505729675, "learning_rate": 0.001, "loss": 1.9827, "step": 11973 }, { "epoch": 0.5065572383450376, "grad_norm": 0.15799900889396667, "learning_rate": 0.001, "loss": 2.2315, "step": 11974 }, { "epoch": 0.506599543108554, "grad_norm": 0.20162101089954376, "learning_rate": 0.001, "loss": 2.2226, "step": 11975 }, { "epoch": 0.5066418478720704, "grad_norm": 0.21195806562900543, "learning_rate": 0.001, "loss": 2.3964, "step": 11976 }, { "epoch": 0.5066841526355867, "grad_norm": 0.18306732177734375, "learning_rate": 0.001, "loss": 3.3474, "step": 11977 }, { "epoch": 0.5067264573991032, "grad_norm": 0.15694935619831085, "learning_rate": 0.001, "loss": 2.5094, "step": 11978 }, { "epoch": 0.5067687621626195, "grad_norm": 0.22060388326644897, "learning_rate": 0.001, "loss": 2.6429, "step": 11979 }, { "epoch": 0.5068110669261359, "grad_norm": 0.2225179672241211, "learning_rate": 0.001, "loss": 1.6857, "step": 11980 }, { "epoch": 0.5068533716896523, "grad_norm": 0.19372129440307617, "learning_rate": 0.001, "loss": 2.5101, "step": 11981 }, { "epoch": 0.5068956764531686, "grad_norm": 0.13902443647384644, "learning_rate": 0.001, "loss": 2.4499, "step": 11982 }, { "epoch": 0.506937981216685, "grad_norm": 0.15087106823921204, "learning_rate": 0.001, "loss": 2.1793, "step": 11983 }, { "epoch": 0.5069802859802014, "grad_norm": 0.16260752081871033, "learning_rate": 0.001, "loss": 2.0164, "step": 11984 }, { "epoch": 0.5070225907437177, "grad_norm": 0.159589946269989, "learning_rate": 0.001, "loss": 2.8375, "step": 11985 }, { "epoch": 0.5070648955072341, "grad_norm": 0.21032920479774475, "learning_rate": 0.001, "loss": 4.2796, "step": 11986 }, { "epoch": 0.5071072002707505, "grad_norm": 0.19876354932785034, "learning_rate": 0.001, "loss": 2.9237, "step": 11987 }, { "epoch": 0.5071495050342668, "grad_norm": 0.35270553827285767, "learning_rate": 0.001, "loss": 2.9928, "step": 11988 }, { "epoch": 0.5071918097977832, "grad_norm": 0.16005408763885498, "learning_rate": 0.001, "loss": 2.1871, "step": 11989 }, { "epoch": 0.5072341145612996, "grad_norm": 0.13983991742134094, "learning_rate": 0.001, "loss": 2.6865, "step": 11990 }, { "epoch": 0.5072764193248159, "grad_norm": 0.17701087892055511, "learning_rate": 0.001, "loss": 2.0827, "step": 11991 }, { "epoch": 0.5073187240883323, "grad_norm": 0.15667004883289337, "learning_rate": 0.001, "loss": 1.466, "step": 11992 }, { "epoch": 0.5073610288518487, "grad_norm": 0.22075319290161133, "learning_rate": 0.001, "loss": 2.0947, "step": 11993 }, { "epoch": 0.507403333615365, "grad_norm": 0.14011307060718536, "learning_rate": 0.001, "loss": 2.5497, "step": 11994 }, { "epoch": 0.5074456383788815, "grad_norm": 0.1682954877614975, "learning_rate": 0.001, "loss": 1.8733, "step": 11995 }, { "epoch": 0.5074879431423979, "grad_norm": 30.817886352539062, "learning_rate": 0.001, "loss": 2.5975, "step": 11996 }, { "epoch": 0.5075302479059142, "grad_norm": 0.17083485424518585, "learning_rate": 0.001, "loss": 2.1617, "step": 11997 }, { "epoch": 0.5075725526694306, "grad_norm": 0.23582878708839417, "learning_rate": 0.001, "loss": 2.6255, "step": 11998 }, { "epoch": 0.507614857432947, "grad_norm": 0.16995792090892792, "learning_rate": 0.001, "loss": 2.413, "step": 11999 }, { "epoch": 0.5076571621964633, "grad_norm": 0.1696588695049286, "learning_rate": 0.001, "loss": 2.9008, "step": 12000 }, { "epoch": 0.5076994669599797, "grad_norm": 1.6405519247055054, "learning_rate": 0.001, "loss": 1.7866, "step": 12001 }, { "epoch": 0.5077417717234961, "grad_norm": 0.8979272246360779, "learning_rate": 0.001, "loss": 2.6012, "step": 12002 }, { "epoch": 0.5077840764870124, "grad_norm": 0.20839910209178925, "learning_rate": 0.001, "loss": 2.8517, "step": 12003 }, { "epoch": 0.5078263812505288, "grad_norm": 0.2238219529390335, "learning_rate": 0.001, "loss": 2.2089, "step": 12004 }, { "epoch": 0.5078686860140452, "grad_norm": 0.21982823312282562, "learning_rate": 0.001, "loss": 1.9233, "step": 12005 }, { "epoch": 0.5079109907775615, "grad_norm": 0.20902688801288605, "learning_rate": 0.001, "loss": 2.416, "step": 12006 }, { "epoch": 0.5079532955410779, "grad_norm": 0.4519989788532257, "learning_rate": 0.001, "loss": 1.7383, "step": 12007 }, { "epoch": 0.5079956003045943, "grad_norm": 0.15626175701618195, "learning_rate": 0.001, "loss": 3.0287, "step": 12008 }, { "epoch": 0.5080379050681106, "grad_norm": 0.1568712294101715, "learning_rate": 0.001, "loss": 2.3071, "step": 12009 }, { "epoch": 0.508080209831627, "grad_norm": 0.17470332980155945, "learning_rate": 0.001, "loss": 1.9627, "step": 12010 }, { "epoch": 0.5081225145951435, "grad_norm": 0.29563674330711365, "learning_rate": 0.001, "loss": 2.2517, "step": 12011 }, { "epoch": 0.5081648193586598, "grad_norm": 0.1537560075521469, "learning_rate": 0.001, "loss": 1.5154, "step": 12012 }, { "epoch": 0.5082071241221762, "grad_norm": 0.24751636385917664, "learning_rate": 0.001, "loss": 2.1838, "step": 12013 }, { "epoch": 0.5082494288856926, "grad_norm": 0.2212720662355423, "learning_rate": 0.001, "loss": 2.7298, "step": 12014 }, { "epoch": 0.5082917336492089, "grad_norm": 0.1655520498752594, "learning_rate": 0.001, "loss": 2.1824, "step": 12015 }, { "epoch": 0.5083340384127253, "grad_norm": 0.17084214091300964, "learning_rate": 0.001, "loss": 2.413, "step": 12016 }, { "epoch": 0.5083763431762417, "grad_norm": 0.1613297313451767, "learning_rate": 0.001, "loss": 2.0016, "step": 12017 }, { "epoch": 0.508418647939758, "grad_norm": 0.28114500641822815, "learning_rate": 0.001, "loss": 1.6721, "step": 12018 }, { "epoch": 0.5084609527032744, "grad_norm": 0.19233211874961853, "learning_rate": 0.001, "loss": 1.9725, "step": 12019 }, { "epoch": 0.5085032574667908, "grad_norm": 0.17586541175842285, "learning_rate": 0.001, "loss": 1.7528, "step": 12020 }, { "epoch": 0.5085455622303071, "grad_norm": 0.1447594314813614, "learning_rate": 0.001, "loss": 2.6402, "step": 12021 }, { "epoch": 0.5085878669938235, "grad_norm": 0.14185841381549835, "learning_rate": 0.001, "loss": 1.6817, "step": 12022 }, { "epoch": 0.5086301717573398, "grad_norm": 0.171967551112175, "learning_rate": 0.001, "loss": 2.3004, "step": 12023 }, { "epoch": 0.5086724765208562, "grad_norm": 0.18312491476535797, "learning_rate": 0.001, "loss": 2.5789, "step": 12024 }, { "epoch": 0.5087147812843726, "grad_norm": 0.298218697309494, "learning_rate": 0.001, "loss": 2.98, "step": 12025 }, { "epoch": 0.5087570860478889, "grad_norm": 0.1913619339466095, "learning_rate": 0.001, "loss": 3.5224, "step": 12026 }, { "epoch": 0.5087993908114053, "grad_norm": 0.18118982017040253, "learning_rate": 0.001, "loss": 1.9072, "step": 12027 }, { "epoch": 0.5088416955749218, "grad_norm": 0.6203705072402954, "learning_rate": 0.001, "loss": 2.6366, "step": 12028 }, { "epoch": 0.5088840003384381, "grad_norm": 0.17132526636123657, "learning_rate": 0.001, "loss": 2.0598, "step": 12029 }, { "epoch": 0.5089263051019545, "grad_norm": 0.18120494484901428, "learning_rate": 0.001, "loss": 2.783, "step": 12030 }, { "epoch": 0.5089686098654709, "grad_norm": 0.20556822419166565, "learning_rate": 0.001, "loss": 2.4337, "step": 12031 }, { "epoch": 0.5090109146289872, "grad_norm": 0.16901428997516632, "learning_rate": 0.001, "loss": 1.8192, "step": 12032 }, { "epoch": 0.5090532193925036, "grad_norm": 0.16724789142608643, "learning_rate": 0.001, "loss": 2.1285, "step": 12033 }, { "epoch": 0.50909552415602, "grad_norm": 0.15585707128047943, "learning_rate": 0.001, "loss": 1.7745, "step": 12034 }, { "epoch": 0.5091378289195363, "grad_norm": 0.5937365889549255, "learning_rate": 0.001, "loss": 2.4572, "step": 12035 }, { "epoch": 0.5091801336830527, "grad_norm": 0.17110083997249603, "learning_rate": 0.001, "loss": 2.4326, "step": 12036 }, { "epoch": 0.5092224384465691, "grad_norm": 0.18722589313983917, "learning_rate": 0.001, "loss": 3.232, "step": 12037 }, { "epoch": 0.5092647432100854, "grad_norm": 0.2509753704071045, "learning_rate": 0.001, "loss": 2.1435, "step": 12038 }, { "epoch": 0.5093070479736018, "grad_norm": 0.1581326723098755, "learning_rate": 0.001, "loss": 2.3724, "step": 12039 }, { "epoch": 0.5093493527371182, "grad_norm": 0.2504560053348541, "learning_rate": 0.001, "loss": 2.3174, "step": 12040 }, { "epoch": 0.5093916575006345, "grad_norm": 0.19486159086227417, "learning_rate": 0.001, "loss": 2.1135, "step": 12041 }, { "epoch": 0.5094339622641509, "grad_norm": 0.16291144490242004, "learning_rate": 0.001, "loss": 1.9069, "step": 12042 }, { "epoch": 0.5094762670276674, "grad_norm": 0.17306125164031982, "learning_rate": 0.001, "loss": 2.8266, "step": 12043 }, { "epoch": 0.5095185717911836, "grad_norm": 0.17126202583312988, "learning_rate": 0.001, "loss": 3.3905, "step": 12044 }, { "epoch": 0.5095608765547001, "grad_norm": 0.18580661714076996, "learning_rate": 0.001, "loss": 2.1005, "step": 12045 }, { "epoch": 0.5096031813182165, "grad_norm": 1.9326337575912476, "learning_rate": 0.001, "loss": 1.8201, "step": 12046 }, { "epoch": 0.5096454860817328, "grad_norm": 0.24594885110855103, "learning_rate": 0.001, "loss": 2.2209, "step": 12047 }, { "epoch": 0.5096877908452492, "grad_norm": 0.14384353160858154, "learning_rate": 0.001, "loss": 1.7018, "step": 12048 }, { "epoch": 0.5097300956087656, "grad_norm": 0.1911388784646988, "learning_rate": 0.001, "loss": 2.9794, "step": 12049 }, { "epoch": 0.5097724003722819, "grad_norm": 0.15660209953784943, "learning_rate": 0.001, "loss": 2.4576, "step": 12050 }, { "epoch": 0.5098147051357983, "grad_norm": 0.27428507804870605, "learning_rate": 0.001, "loss": 2.893, "step": 12051 }, { "epoch": 0.5098570098993147, "grad_norm": 0.17285604774951935, "learning_rate": 0.001, "loss": 1.984, "step": 12052 }, { "epoch": 0.509899314662831, "grad_norm": 0.17490775883197784, "learning_rate": 0.001, "loss": 2.0082, "step": 12053 }, { "epoch": 0.5099416194263474, "grad_norm": 0.1558762937784195, "learning_rate": 0.001, "loss": 1.9196, "step": 12054 }, { "epoch": 0.5099839241898638, "grad_norm": 0.42359229922294617, "learning_rate": 0.001, "loss": 2.7122, "step": 12055 }, { "epoch": 0.5100262289533801, "grad_norm": 0.17612966895103455, "learning_rate": 0.001, "loss": 1.7214, "step": 12056 }, { "epoch": 0.5100685337168965, "grad_norm": 0.3643682599067688, "learning_rate": 0.001, "loss": 1.7763, "step": 12057 }, { "epoch": 0.5101108384804129, "grad_norm": 0.205791637301445, "learning_rate": 0.001, "loss": 2.0399, "step": 12058 }, { "epoch": 0.5101531432439292, "grad_norm": 2.9326553344726562, "learning_rate": 0.001, "loss": 1.8045, "step": 12059 }, { "epoch": 0.5101954480074457, "grad_norm": 0.22265328466892242, "learning_rate": 0.001, "loss": 2.1796, "step": 12060 }, { "epoch": 0.5102377527709621, "grad_norm": 0.17063187062740326, "learning_rate": 0.001, "loss": 2.4523, "step": 12061 }, { "epoch": 0.5102800575344784, "grad_norm": 0.18064865469932556, "learning_rate": 0.001, "loss": 1.5731, "step": 12062 }, { "epoch": 0.5103223622979948, "grad_norm": 2.2805631160736084, "learning_rate": 0.001, "loss": 3.7232, "step": 12063 }, { "epoch": 0.5103646670615112, "grad_norm": 0.1840154379606247, "learning_rate": 0.001, "loss": 1.7874, "step": 12064 }, { "epoch": 0.5104069718250275, "grad_norm": 0.16922450065612793, "learning_rate": 0.001, "loss": 1.9633, "step": 12065 }, { "epoch": 0.5104492765885439, "grad_norm": 0.1468036025762558, "learning_rate": 0.001, "loss": 1.8785, "step": 12066 }, { "epoch": 0.5104915813520603, "grad_norm": 0.15584182739257812, "learning_rate": 0.001, "loss": 2.47, "step": 12067 }, { "epoch": 0.5105338861155766, "grad_norm": 0.19544000923633575, "learning_rate": 0.001, "loss": 2.2966, "step": 12068 }, { "epoch": 0.510576190879093, "grad_norm": 0.15952937304973602, "learning_rate": 0.001, "loss": 1.9361, "step": 12069 }, { "epoch": 0.5106184956426093, "grad_norm": 2.266671657562256, "learning_rate": 0.001, "loss": 3.9638, "step": 12070 }, { "epoch": 0.5106608004061257, "grad_norm": 0.20230835676193237, "learning_rate": 0.001, "loss": 2.0883, "step": 12071 }, { "epoch": 0.5107031051696421, "grad_norm": 0.22591619193553925, "learning_rate": 0.001, "loss": 2.8491, "step": 12072 }, { "epoch": 0.5107454099331584, "grad_norm": 0.18922626972198486, "learning_rate": 0.001, "loss": 1.9407, "step": 12073 }, { "epoch": 0.5107877146966748, "grad_norm": 0.17691117525100708, "learning_rate": 0.001, "loss": 1.8264, "step": 12074 }, { "epoch": 0.5108300194601912, "grad_norm": 0.17793789505958557, "learning_rate": 0.001, "loss": 2.0651, "step": 12075 }, { "epoch": 0.5108723242237075, "grad_norm": 0.2747514545917511, "learning_rate": 0.001, "loss": 1.9631, "step": 12076 }, { "epoch": 0.510914628987224, "grad_norm": 0.16500215232372284, "learning_rate": 0.001, "loss": 1.5828, "step": 12077 }, { "epoch": 0.5109569337507404, "grad_norm": 0.2846604883670807, "learning_rate": 0.001, "loss": 3.0458, "step": 12078 }, { "epoch": 0.5109992385142567, "grad_norm": 0.18914783000946045, "learning_rate": 0.001, "loss": 2.501, "step": 12079 }, { "epoch": 0.5110415432777731, "grad_norm": 3.4462015628814697, "learning_rate": 0.001, "loss": 1.8007, "step": 12080 }, { "epoch": 0.5110838480412895, "grad_norm": 0.22221924364566803, "learning_rate": 0.001, "loss": 2.4593, "step": 12081 }, { "epoch": 0.5111261528048058, "grad_norm": 0.3206009268760681, "learning_rate": 0.001, "loss": 2.1852, "step": 12082 }, { "epoch": 0.5111684575683222, "grad_norm": 0.19568482041358948, "learning_rate": 0.001, "loss": 2.0723, "step": 12083 }, { "epoch": 0.5112107623318386, "grad_norm": 0.13651347160339355, "learning_rate": 0.001, "loss": 2.1792, "step": 12084 }, { "epoch": 0.5112530670953549, "grad_norm": 0.9014352560043335, "learning_rate": 0.001, "loss": 2.1313, "step": 12085 }, { "epoch": 0.5112953718588713, "grad_norm": 0.17774808406829834, "learning_rate": 0.001, "loss": 2.194, "step": 12086 }, { "epoch": 0.5113376766223877, "grad_norm": 0.2228837013244629, "learning_rate": 0.001, "loss": 3.0247, "step": 12087 }, { "epoch": 0.511379981385904, "grad_norm": 0.20427215099334717, "learning_rate": 0.001, "loss": 2.176, "step": 12088 }, { "epoch": 0.5114222861494204, "grad_norm": 0.1658029705286026, "learning_rate": 0.001, "loss": 2.0688, "step": 12089 }, { "epoch": 0.5114645909129368, "grad_norm": 0.19632932543754578, "learning_rate": 0.001, "loss": 3.3308, "step": 12090 }, { "epoch": 0.5115068956764531, "grad_norm": 0.17053432762622833, "learning_rate": 0.001, "loss": 2.1211, "step": 12091 }, { "epoch": 0.5115492004399695, "grad_norm": 0.17195382714271545, "learning_rate": 0.001, "loss": 1.7132, "step": 12092 }, { "epoch": 0.511591505203486, "grad_norm": 0.17372830212116241, "learning_rate": 0.001, "loss": 2.7497, "step": 12093 }, { "epoch": 0.5116338099670023, "grad_norm": 0.18535274267196655, "learning_rate": 0.001, "loss": 2.2781, "step": 12094 }, { "epoch": 0.5116761147305187, "grad_norm": 0.1690640151500702, "learning_rate": 0.001, "loss": 1.9845, "step": 12095 }, { "epoch": 0.5117184194940351, "grad_norm": 0.18286339938640594, "learning_rate": 0.001, "loss": 3.2735, "step": 12096 }, { "epoch": 0.5117607242575514, "grad_norm": 0.15377508103847504, "learning_rate": 0.001, "loss": 2.0673, "step": 12097 }, { "epoch": 0.5118030290210678, "grad_norm": 1.7860561609268188, "learning_rate": 0.001, "loss": 2.9895, "step": 12098 }, { "epoch": 0.5118453337845842, "grad_norm": 0.2976422607898712, "learning_rate": 0.001, "loss": 2.5911, "step": 12099 }, { "epoch": 0.5118876385481005, "grad_norm": 0.6552213430404663, "learning_rate": 0.001, "loss": 3.01, "step": 12100 }, { "epoch": 0.5119299433116169, "grad_norm": 0.1674286127090454, "learning_rate": 0.001, "loss": 1.5554, "step": 12101 }, { "epoch": 0.5119722480751333, "grad_norm": 0.17942678928375244, "learning_rate": 0.001, "loss": 2.5887, "step": 12102 }, { "epoch": 0.5120145528386496, "grad_norm": 0.3610702455043793, "learning_rate": 0.001, "loss": 2.0485, "step": 12103 }, { "epoch": 0.512056857602166, "grad_norm": 9.683119773864746, "learning_rate": 0.001, "loss": 1.5999, "step": 12104 }, { "epoch": 0.5120991623656824, "grad_norm": 0.23564742505550385, "learning_rate": 0.001, "loss": 2.6848, "step": 12105 }, { "epoch": 0.5121414671291987, "grad_norm": 0.20375941693782806, "learning_rate": 0.001, "loss": 2.348, "step": 12106 }, { "epoch": 0.5121837718927151, "grad_norm": 0.17690829932689667, "learning_rate": 0.001, "loss": 1.8304, "step": 12107 }, { "epoch": 0.5122260766562315, "grad_norm": 0.3078082501888275, "learning_rate": 0.001, "loss": 2.4127, "step": 12108 }, { "epoch": 0.5122683814197478, "grad_norm": 0.17831426858901978, "learning_rate": 0.001, "loss": 2.4171, "step": 12109 }, { "epoch": 0.5123106861832643, "grad_norm": 0.19849298894405365, "learning_rate": 0.001, "loss": 3.024, "step": 12110 }, { "epoch": 0.5123529909467807, "grad_norm": 0.25595778226852417, "learning_rate": 0.001, "loss": 1.8982, "step": 12111 }, { "epoch": 0.512395295710297, "grad_norm": 0.19416412711143494, "learning_rate": 0.001, "loss": 2.3878, "step": 12112 }, { "epoch": 0.5124376004738134, "grad_norm": 0.5815975666046143, "learning_rate": 0.001, "loss": 2.9711, "step": 12113 }, { "epoch": 0.5124799052373297, "grad_norm": 0.34129148721694946, "learning_rate": 0.001, "loss": 2.6444, "step": 12114 }, { "epoch": 0.5125222100008461, "grad_norm": 0.495937705039978, "learning_rate": 0.001, "loss": 2.1635, "step": 12115 }, { "epoch": 0.5125645147643625, "grad_norm": 0.23061814904212952, "learning_rate": 0.001, "loss": 2.387, "step": 12116 }, { "epoch": 0.5126068195278788, "grad_norm": 0.28698426485061646, "learning_rate": 0.001, "loss": 2.4555, "step": 12117 }, { "epoch": 0.5126491242913952, "grad_norm": 0.3059033453464508, "learning_rate": 0.001, "loss": 2.024, "step": 12118 }, { "epoch": 0.5126914290549116, "grad_norm": 0.24312277138233185, "learning_rate": 0.001, "loss": 2.1431, "step": 12119 }, { "epoch": 0.5127337338184279, "grad_norm": 0.17924846708774567, "learning_rate": 0.001, "loss": 2.322, "step": 12120 }, { "epoch": 0.5127760385819443, "grad_norm": 0.15020184218883514, "learning_rate": 0.001, "loss": 1.4752, "step": 12121 }, { "epoch": 0.5128183433454607, "grad_norm": 3.8008360862731934, "learning_rate": 0.001, "loss": 2.591, "step": 12122 }, { "epoch": 0.512860648108977, "grad_norm": 0.20697496831417084, "learning_rate": 0.001, "loss": 2.5281, "step": 12123 }, { "epoch": 0.5129029528724934, "grad_norm": 0.18046455085277557, "learning_rate": 0.001, "loss": 1.9783, "step": 12124 }, { "epoch": 0.5129452576360098, "grad_norm": 4.528714656829834, "learning_rate": 0.001, "loss": 1.7568, "step": 12125 }, { "epoch": 0.5129875623995261, "grad_norm": 0.520558774471283, "learning_rate": 0.001, "loss": 2.9131, "step": 12126 }, { "epoch": 0.5130298671630426, "grad_norm": 0.529538631439209, "learning_rate": 0.001, "loss": 2.5839, "step": 12127 }, { "epoch": 0.513072171926559, "grad_norm": 0.3380816578865051, "learning_rate": 0.001, "loss": 1.9767, "step": 12128 }, { "epoch": 0.5131144766900753, "grad_norm": 0.5433550477027893, "learning_rate": 0.001, "loss": 2.5798, "step": 12129 }, { "epoch": 0.5131567814535917, "grad_norm": 0.17710618674755096, "learning_rate": 0.001, "loss": 2.614, "step": 12130 }, { "epoch": 0.5131990862171081, "grad_norm": 0.23254965245723724, "learning_rate": 0.001, "loss": 2.8047, "step": 12131 }, { "epoch": 0.5132413909806244, "grad_norm": 0.1852521151304245, "learning_rate": 0.001, "loss": 2.0367, "step": 12132 }, { "epoch": 0.5132836957441408, "grad_norm": 0.19175705313682556, "learning_rate": 0.001, "loss": 2.0662, "step": 12133 }, { "epoch": 0.5133260005076572, "grad_norm": 0.18979766964912415, "learning_rate": 0.001, "loss": 2.3063, "step": 12134 }, { "epoch": 0.5133683052711735, "grad_norm": 0.1920190006494522, "learning_rate": 0.001, "loss": 2.7921, "step": 12135 }, { "epoch": 0.5134106100346899, "grad_norm": 0.5775536298751831, "learning_rate": 0.001, "loss": 2.0244, "step": 12136 }, { "epoch": 0.5134529147982063, "grad_norm": 0.1514301896095276, "learning_rate": 0.001, "loss": 1.7011, "step": 12137 }, { "epoch": 0.5134952195617226, "grad_norm": 0.15868137776851654, "learning_rate": 0.001, "loss": 2.9314, "step": 12138 }, { "epoch": 0.513537524325239, "grad_norm": 0.18454696238040924, "learning_rate": 0.001, "loss": 2.7521, "step": 12139 }, { "epoch": 0.5135798290887554, "grad_norm": 0.21990300714969635, "learning_rate": 0.001, "loss": 1.5834, "step": 12140 }, { "epoch": 0.5136221338522717, "grad_norm": 0.21516460180282593, "learning_rate": 0.001, "loss": 2.0828, "step": 12141 }, { "epoch": 0.5136644386157881, "grad_norm": 0.2636964023113251, "learning_rate": 0.001, "loss": 2.2502, "step": 12142 }, { "epoch": 0.5137067433793046, "grad_norm": 0.21557240188121796, "learning_rate": 0.001, "loss": 2.4408, "step": 12143 }, { "epoch": 0.5137490481428209, "grad_norm": 8.734152793884277, "learning_rate": 0.001, "loss": 2.9185, "step": 12144 }, { "epoch": 0.5137913529063373, "grad_norm": 0.32210806012153625, "learning_rate": 0.001, "loss": 2.2702, "step": 12145 }, { "epoch": 0.5138336576698537, "grad_norm": 0.3446987569332123, "learning_rate": 0.001, "loss": 2.9083, "step": 12146 }, { "epoch": 0.51387596243337, "grad_norm": 3.7084527015686035, "learning_rate": 0.001, "loss": 1.4133, "step": 12147 }, { "epoch": 0.5139182671968864, "grad_norm": 3.1599371433258057, "learning_rate": 0.001, "loss": 2.0267, "step": 12148 }, { "epoch": 0.5139605719604028, "grad_norm": 0.19752854108810425, "learning_rate": 0.001, "loss": 2.0721, "step": 12149 }, { "epoch": 0.5140028767239191, "grad_norm": 0.49823862314224243, "learning_rate": 0.001, "loss": 1.9638, "step": 12150 }, { "epoch": 0.5140451814874355, "grad_norm": 0.1748315989971161, "learning_rate": 0.001, "loss": 3.1363, "step": 12151 }, { "epoch": 0.5140874862509519, "grad_norm": 0.1747720092535019, "learning_rate": 0.001, "loss": 2.1459, "step": 12152 }, { "epoch": 0.5141297910144682, "grad_norm": 2.5184714794158936, "learning_rate": 0.001, "loss": 1.7912, "step": 12153 }, { "epoch": 0.5141720957779846, "grad_norm": 0.30171647667884827, "learning_rate": 0.001, "loss": 2.5741, "step": 12154 }, { "epoch": 0.514214400541501, "grad_norm": 0.19412563741207123, "learning_rate": 0.001, "loss": 1.8221, "step": 12155 }, { "epoch": 0.5142567053050173, "grad_norm": 0.44253069162368774, "learning_rate": 0.001, "loss": 2.7872, "step": 12156 }, { "epoch": 0.5142990100685337, "grad_norm": 0.16020557284355164, "learning_rate": 0.001, "loss": 1.8939, "step": 12157 }, { "epoch": 0.51434131483205, "grad_norm": 0.6392630934715271, "learning_rate": 0.001, "loss": 2.4378, "step": 12158 }, { "epoch": 0.5143836195955664, "grad_norm": 0.15356384217739105, "learning_rate": 0.001, "loss": 2.2201, "step": 12159 }, { "epoch": 0.5144259243590829, "grad_norm": 0.2277487814426422, "learning_rate": 0.001, "loss": 2.1108, "step": 12160 }, { "epoch": 0.5144682291225992, "grad_norm": 0.19493798911571503, "learning_rate": 0.001, "loss": 2.7649, "step": 12161 }, { "epoch": 0.5145105338861156, "grad_norm": 0.8036705851554871, "learning_rate": 0.001, "loss": 2.6234, "step": 12162 }, { "epoch": 0.514552838649632, "grad_norm": 0.23683510720729828, "learning_rate": 0.001, "loss": 3.3676, "step": 12163 }, { "epoch": 0.5145951434131483, "grad_norm": 0.20313666760921478, "learning_rate": 0.001, "loss": 2.2018, "step": 12164 }, { "epoch": 0.5146374481766647, "grad_norm": 0.16080090403556824, "learning_rate": 0.001, "loss": 1.4551, "step": 12165 }, { "epoch": 0.5146797529401811, "grad_norm": 0.19314774870872498, "learning_rate": 0.001, "loss": 2.5364, "step": 12166 }, { "epoch": 0.5147220577036974, "grad_norm": 0.2120058387517929, "learning_rate": 0.001, "loss": 2.1422, "step": 12167 }, { "epoch": 0.5147643624672138, "grad_norm": 0.15870290994644165, "learning_rate": 0.001, "loss": 1.7913, "step": 12168 }, { "epoch": 0.5148066672307302, "grad_norm": 1.8567591905593872, "learning_rate": 0.001, "loss": 1.8428, "step": 12169 }, { "epoch": 0.5148489719942465, "grad_norm": 0.25935545563697815, "learning_rate": 0.001, "loss": 2.1154, "step": 12170 }, { "epoch": 0.5148912767577629, "grad_norm": 0.19411538541316986, "learning_rate": 0.001, "loss": 1.6982, "step": 12171 }, { "epoch": 0.5149335815212793, "grad_norm": 0.1572023630142212, "learning_rate": 0.001, "loss": 1.9115, "step": 12172 }, { "epoch": 0.5149758862847956, "grad_norm": 1.9859943389892578, "learning_rate": 0.001, "loss": 2.9479, "step": 12173 }, { "epoch": 0.515018191048312, "grad_norm": 0.16157686710357666, "learning_rate": 0.001, "loss": 2.1507, "step": 12174 }, { "epoch": 0.5150604958118284, "grad_norm": 1.8809139728546143, "learning_rate": 0.001, "loss": 2.054, "step": 12175 }, { "epoch": 0.5151028005753447, "grad_norm": 0.17030459642410278, "learning_rate": 0.001, "loss": 1.8893, "step": 12176 }, { "epoch": 0.5151451053388612, "grad_norm": 0.16130401194095612, "learning_rate": 0.001, "loss": 2.5668, "step": 12177 }, { "epoch": 0.5151874101023776, "grad_norm": 0.19014528393745422, "learning_rate": 0.001, "loss": 2.4916, "step": 12178 }, { "epoch": 0.5152297148658939, "grad_norm": 0.4937807321548462, "learning_rate": 0.001, "loss": 2.3189, "step": 12179 }, { "epoch": 0.5152720196294103, "grad_norm": 0.24521322548389435, "learning_rate": 0.001, "loss": 1.4842, "step": 12180 }, { "epoch": 0.5153143243929267, "grad_norm": 0.17198513448238373, "learning_rate": 0.001, "loss": 2.2285, "step": 12181 }, { "epoch": 0.515356629156443, "grad_norm": 0.2905202805995941, "learning_rate": 0.001, "loss": 3.2654, "step": 12182 }, { "epoch": 0.5153989339199594, "grad_norm": 0.1737702488899231, "learning_rate": 0.001, "loss": 2.1419, "step": 12183 }, { "epoch": 0.5154412386834758, "grad_norm": 0.23815499246120453, "learning_rate": 0.001, "loss": 1.6129, "step": 12184 }, { "epoch": 0.5154835434469921, "grad_norm": 0.19331566989421844, "learning_rate": 0.001, "loss": 2.0372, "step": 12185 }, { "epoch": 0.5155258482105085, "grad_norm": 0.20304280519485474, "learning_rate": 0.001, "loss": 2.8925, "step": 12186 }, { "epoch": 0.5155681529740249, "grad_norm": 0.16830262541770935, "learning_rate": 0.001, "loss": 2.4063, "step": 12187 }, { "epoch": 0.5156104577375412, "grad_norm": 0.2067635953426361, "learning_rate": 0.001, "loss": 2.4644, "step": 12188 }, { "epoch": 0.5156527625010576, "grad_norm": 0.26112356781959534, "learning_rate": 0.001, "loss": 1.8161, "step": 12189 }, { "epoch": 0.515695067264574, "grad_norm": 0.317443311214447, "learning_rate": 0.001, "loss": 2.3216, "step": 12190 }, { "epoch": 0.5157373720280903, "grad_norm": 0.1734699159860611, "learning_rate": 0.001, "loss": 2.4626, "step": 12191 }, { "epoch": 0.5157796767916067, "grad_norm": 0.21294571459293365, "learning_rate": 0.001, "loss": 3.3039, "step": 12192 }, { "epoch": 0.5158219815551232, "grad_norm": 0.6978985071182251, "learning_rate": 0.001, "loss": 3.0031, "step": 12193 }, { "epoch": 0.5158642863186395, "grad_norm": 0.38809531927108765, "learning_rate": 0.001, "loss": 1.5207, "step": 12194 }, { "epoch": 0.5159065910821559, "grad_norm": 0.17903557419776917, "learning_rate": 0.001, "loss": 3.2565, "step": 12195 }, { "epoch": 0.5159488958456723, "grad_norm": 3.361924648284912, "learning_rate": 0.001, "loss": 2.2003, "step": 12196 }, { "epoch": 0.5159912006091886, "grad_norm": 0.16305619478225708, "learning_rate": 0.001, "loss": 1.7818, "step": 12197 }, { "epoch": 0.516033505372705, "grad_norm": 2.384248733520508, "learning_rate": 0.001, "loss": 2.5049, "step": 12198 }, { "epoch": 0.5160758101362214, "grad_norm": 0.1969674527645111, "learning_rate": 0.001, "loss": 2.1683, "step": 12199 }, { "epoch": 0.5161181148997377, "grad_norm": 0.42641380429267883, "learning_rate": 0.001, "loss": 1.9276, "step": 12200 }, { "epoch": 0.5161604196632541, "grad_norm": 0.18019993603229523, "learning_rate": 0.001, "loss": 2.4077, "step": 12201 }, { "epoch": 0.5162027244267705, "grad_norm": 0.2010054886341095, "learning_rate": 0.001, "loss": 2.147, "step": 12202 }, { "epoch": 0.5162450291902868, "grad_norm": 0.31541866064071655, "learning_rate": 0.001, "loss": 2.5211, "step": 12203 }, { "epoch": 0.5162873339538032, "grad_norm": 0.16396036744117737, "learning_rate": 0.001, "loss": 2.0067, "step": 12204 }, { "epoch": 0.5163296387173195, "grad_norm": 0.1641075164079666, "learning_rate": 0.001, "loss": 1.8241, "step": 12205 }, { "epoch": 0.5163719434808359, "grad_norm": 0.9481202960014343, "learning_rate": 0.001, "loss": 2.4869, "step": 12206 }, { "epoch": 0.5164142482443523, "grad_norm": 0.17808358371257782, "learning_rate": 0.001, "loss": 1.9912, "step": 12207 }, { "epoch": 0.5164565530078686, "grad_norm": 0.15392626821994781, "learning_rate": 0.001, "loss": 2.3439, "step": 12208 }, { "epoch": 0.516498857771385, "grad_norm": 0.1524217277765274, "learning_rate": 0.001, "loss": 1.5912, "step": 12209 }, { "epoch": 0.5165411625349015, "grad_norm": 0.21630914509296417, "learning_rate": 0.001, "loss": 1.8016, "step": 12210 }, { "epoch": 0.5165834672984178, "grad_norm": 0.1926201581954956, "learning_rate": 0.001, "loss": 2.1263, "step": 12211 }, { "epoch": 0.5166257720619342, "grad_norm": 0.4192337691783905, "learning_rate": 0.001, "loss": 2.6866, "step": 12212 }, { "epoch": 0.5166680768254506, "grad_norm": 2.8745205402374268, "learning_rate": 0.001, "loss": 1.7879, "step": 12213 }, { "epoch": 0.5167103815889669, "grad_norm": 0.4913157820701599, "learning_rate": 0.001, "loss": 1.9727, "step": 12214 }, { "epoch": 0.5167526863524833, "grad_norm": 0.2506055533885956, "learning_rate": 0.001, "loss": 2.5039, "step": 12215 }, { "epoch": 0.5167949911159997, "grad_norm": 0.19540032744407654, "learning_rate": 0.001, "loss": 1.7671, "step": 12216 }, { "epoch": 0.516837295879516, "grad_norm": 0.174033522605896, "learning_rate": 0.001, "loss": 2.5238, "step": 12217 }, { "epoch": 0.5168796006430324, "grad_norm": 0.1758839190006256, "learning_rate": 0.001, "loss": 1.882, "step": 12218 }, { "epoch": 0.5169219054065488, "grad_norm": 0.1755172461271286, "learning_rate": 0.001, "loss": 2.0126, "step": 12219 }, { "epoch": 0.5169642101700651, "grad_norm": 0.2143266797065735, "learning_rate": 0.001, "loss": 2.2329, "step": 12220 }, { "epoch": 0.5170065149335815, "grad_norm": 0.20903651416301727, "learning_rate": 0.001, "loss": 2.5899, "step": 12221 }, { "epoch": 0.5170488196970979, "grad_norm": 0.19313296675682068, "learning_rate": 0.001, "loss": 1.7994, "step": 12222 }, { "epoch": 0.5170911244606142, "grad_norm": 0.1702556163072586, "learning_rate": 0.001, "loss": 1.8909, "step": 12223 }, { "epoch": 0.5171334292241306, "grad_norm": 0.17211031913757324, "learning_rate": 0.001, "loss": 2.1869, "step": 12224 }, { "epoch": 0.517175733987647, "grad_norm": 0.18440312147140503, "learning_rate": 0.001, "loss": 2.1057, "step": 12225 }, { "epoch": 0.5172180387511633, "grad_norm": 0.18326327204704285, "learning_rate": 0.001, "loss": 2.9968, "step": 12226 }, { "epoch": 0.5172603435146798, "grad_norm": 0.3492085933685303, "learning_rate": 0.001, "loss": 1.8782, "step": 12227 }, { "epoch": 0.5173026482781962, "grad_norm": 0.20614147186279297, "learning_rate": 0.001, "loss": 1.6796, "step": 12228 }, { "epoch": 0.5173449530417125, "grad_norm": 0.18441130220890045, "learning_rate": 0.001, "loss": 1.968, "step": 12229 }, { "epoch": 0.5173872578052289, "grad_norm": 0.21896757185459137, "learning_rate": 0.001, "loss": 2.4805, "step": 12230 }, { "epoch": 0.5174295625687453, "grad_norm": 0.3186761736869812, "learning_rate": 0.001, "loss": 2.0246, "step": 12231 }, { "epoch": 0.5174718673322616, "grad_norm": 0.24206700921058655, "learning_rate": 0.001, "loss": 2.3907, "step": 12232 }, { "epoch": 0.517514172095778, "grad_norm": 0.19111651182174683, "learning_rate": 0.001, "loss": 1.9669, "step": 12233 }, { "epoch": 0.5175564768592944, "grad_norm": 0.5492585897445679, "learning_rate": 0.001, "loss": 1.8345, "step": 12234 }, { "epoch": 0.5175987816228107, "grad_norm": 0.4502498209476471, "learning_rate": 0.001, "loss": 2.326, "step": 12235 }, { "epoch": 0.5176410863863271, "grad_norm": 0.22445209324359894, "learning_rate": 0.001, "loss": 1.5903, "step": 12236 }, { "epoch": 0.5176833911498435, "grad_norm": 0.24608655273914337, "learning_rate": 0.001, "loss": 3.3088, "step": 12237 }, { "epoch": 0.5177256959133598, "grad_norm": 0.25341930985450745, "learning_rate": 0.001, "loss": 2.5506, "step": 12238 }, { "epoch": 0.5177680006768762, "grad_norm": 0.18000063300132751, "learning_rate": 0.001, "loss": 2.4819, "step": 12239 }, { "epoch": 0.5178103054403926, "grad_norm": 0.21232323348522186, "learning_rate": 0.001, "loss": 2.9278, "step": 12240 }, { "epoch": 0.5178526102039089, "grad_norm": 0.14666417241096497, "learning_rate": 0.001, "loss": 2.0134, "step": 12241 }, { "epoch": 0.5178949149674253, "grad_norm": 0.15804670751094818, "learning_rate": 0.001, "loss": 1.7012, "step": 12242 }, { "epoch": 0.5179372197309418, "grad_norm": 0.25225335359573364, "learning_rate": 0.001, "loss": 2.1588, "step": 12243 }, { "epoch": 0.517979524494458, "grad_norm": 0.18607112765312195, "learning_rate": 0.001, "loss": 1.8788, "step": 12244 }, { "epoch": 0.5180218292579745, "grad_norm": 0.20208168029785156, "learning_rate": 0.001, "loss": 1.9131, "step": 12245 }, { "epoch": 0.5180641340214909, "grad_norm": 1.9069957733154297, "learning_rate": 0.001, "loss": 1.4981, "step": 12246 }, { "epoch": 0.5181064387850072, "grad_norm": 1.0007692575454712, "learning_rate": 0.001, "loss": 1.9453, "step": 12247 }, { "epoch": 0.5181487435485236, "grad_norm": 0.1494317203760147, "learning_rate": 0.001, "loss": 1.5441, "step": 12248 }, { "epoch": 0.5181910483120399, "grad_norm": 0.19041891396045685, "learning_rate": 0.001, "loss": 1.7273, "step": 12249 }, { "epoch": 0.5182333530755563, "grad_norm": 0.4171985983848572, "learning_rate": 0.001, "loss": 1.4179, "step": 12250 }, { "epoch": 0.5182756578390727, "grad_norm": 0.19176238775253296, "learning_rate": 0.001, "loss": 2.2115, "step": 12251 }, { "epoch": 0.518317962602589, "grad_norm": 1.5357853174209595, "learning_rate": 0.001, "loss": 2.3413, "step": 12252 }, { "epoch": 0.5183602673661054, "grad_norm": 0.19013294577598572, "learning_rate": 0.001, "loss": 2.2362, "step": 12253 }, { "epoch": 0.5184025721296218, "grad_norm": 0.19564557075500488, "learning_rate": 0.001, "loss": 1.971, "step": 12254 }, { "epoch": 0.5184448768931381, "grad_norm": 0.21926617622375488, "learning_rate": 0.001, "loss": 2.6159, "step": 12255 }, { "epoch": 0.5184871816566545, "grad_norm": 0.17380297183990479, "learning_rate": 0.001, "loss": 2.8202, "step": 12256 }, { "epoch": 0.5185294864201709, "grad_norm": 0.5753722190856934, "learning_rate": 0.001, "loss": 3.7983, "step": 12257 }, { "epoch": 0.5185717911836872, "grad_norm": 0.25849559903144836, "learning_rate": 0.001, "loss": 2.8161, "step": 12258 }, { "epoch": 0.5186140959472036, "grad_norm": 0.18519815802574158, "learning_rate": 0.001, "loss": 2.1676, "step": 12259 }, { "epoch": 0.5186564007107201, "grad_norm": 0.19013738632202148, "learning_rate": 0.001, "loss": 1.5707, "step": 12260 }, { "epoch": 0.5186987054742364, "grad_norm": 0.17344141006469727, "learning_rate": 0.001, "loss": 2.1209, "step": 12261 }, { "epoch": 0.5187410102377528, "grad_norm": 1.2750663757324219, "learning_rate": 0.001, "loss": 1.9932, "step": 12262 }, { "epoch": 0.5187833150012692, "grad_norm": 0.16777728497982025, "learning_rate": 0.001, "loss": 1.5308, "step": 12263 }, { "epoch": 0.5188256197647855, "grad_norm": 0.1650819033384323, "learning_rate": 0.001, "loss": 2.7328, "step": 12264 }, { "epoch": 0.5188679245283019, "grad_norm": 0.14448492228984833, "learning_rate": 0.001, "loss": 2.4757, "step": 12265 }, { "epoch": 0.5189102292918183, "grad_norm": 0.18940666317939758, "learning_rate": 0.001, "loss": 1.5308, "step": 12266 }, { "epoch": 0.5189525340553346, "grad_norm": 0.19672076404094696, "learning_rate": 0.001, "loss": 1.9485, "step": 12267 }, { "epoch": 0.518994838818851, "grad_norm": 0.16662020981311798, "learning_rate": 0.001, "loss": 1.9421, "step": 12268 }, { "epoch": 0.5190371435823674, "grad_norm": 0.20337186753749847, "learning_rate": 0.001, "loss": 2.6949, "step": 12269 }, { "epoch": 0.5190794483458837, "grad_norm": 3.0193428993225098, "learning_rate": 0.001, "loss": 1.9591, "step": 12270 }, { "epoch": 0.5191217531094001, "grad_norm": 0.2497682422399521, "learning_rate": 0.001, "loss": 2.3117, "step": 12271 }, { "epoch": 0.5191640578729165, "grad_norm": 0.16508491337299347, "learning_rate": 0.001, "loss": 2.6591, "step": 12272 }, { "epoch": 0.5192063626364328, "grad_norm": 0.17598336935043335, "learning_rate": 0.001, "loss": 1.634, "step": 12273 }, { "epoch": 0.5192486673999492, "grad_norm": 0.22913537919521332, "learning_rate": 0.001, "loss": 1.7489, "step": 12274 }, { "epoch": 0.5192909721634656, "grad_norm": 0.19798220694065094, "learning_rate": 0.001, "loss": 1.8771, "step": 12275 }, { "epoch": 0.519333276926982, "grad_norm": 0.181406632065773, "learning_rate": 0.001, "loss": 1.9688, "step": 12276 }, { "epoch": 0.5193755816904984, "grad_norm": 0.19419100880622864, "learning_rate": 0.001, "loss": 3.1894, "step": 12277 }, { "epoch": 0.5194178864540148, "grad_norm": 0.17426498234272003, "learning_rate": 0.001, "loss": 2.2271, "step": 12278 }, { "epoch": 0.5194601912175311, "grad_norm": 0.2602224349975586, "learning_rate": 0.001, "loss": 1.8576, "step": 12279 }, { "epoch": 0.5195024959810475, "grad_norm": 0.16941271722316742, "learning_rate": 0.001, "loss": 2.652, "step": 12280 }, { "epoch": 0.5195448007445639, "grad_norm": 2.4823379516601562, "learning_rate": 0.001, "loss": 3.1498, "step": 12281 }, { "epoch": 0.5195871055080802, "grad_norm": 0.16280913352966309, "learning_rate": 0.001, "loss": 2.2317, "step": 12282 }, { "epoch": 0.5196294102715966, "grad_norm": 0.2174593210220337, "learning_rate": 0.001, "loss": 2.4013, "step": 12283 }, { "epoch": 0.519671715035113, "grad_norm": 0.2575055956840515, "learning_rate": 0.001, "loss": 3.4478, "step": 12284 }, { "epoch": 0.5197140197986293, "grad_norm": 0.18891672790050507, "learning_rate": 0.001, "loss": 3.3149, "step": 12285 }, { "epoch": 0.5197563245621457, "grad_norm": 0.17338892817497253, "learning_rate": 0.001, "loss": 1.7199, "step": 12286 }, { "epoch": 0.5197986293256621, "grad_norm": 0.4391571283340454, "learning_rate": 0.001, "loss": 1.8649, "step": 12287 }, { "epoch": 0.5198409340891784, "grad_norm": 0.17235347628593445, "learning_rate": 0.001, "loss": 2.0456, "step": 12288 }, { "epoch": 0.5198832388526948, "grad_norm": 0.2375982701778412, "learning_rate": 0.001, "loss": 3.4219, "step": 12289 }, { "epoch": 0.5199255436162112, "grad_norm": 0.201025128364563, "learning_rate": 0.001, "loss": 1.732, "step": 12290 }, { "epoch": 0.5199678483797275, "grad_norm": 0.3689859211444855, "learning_rate": 0.001, "loss": 3.6554, "step": 12291 }, { "epoch": 0.520010153143244, "grad_norm": 0.4115980267524719, "learning_rate": 0.001, "loss": 3.1115, "step": 12292 }, { "epoch": 0.5200524579067602, "grad_norm": 0.20673608779907227, "learning_rate": 0.001, "loss": 2.0847, "step": 12293 }, { "epoch": 0.5200947626702767, "grad_norm": 0.18936006724834442, "learning_rate": 0.001, "loss": 1.8575, "step": 12294 }, { "epoch": 0.5201370674337931, "grad_norm": 0.18387524783611298, "learning_rate": 0.001, "loss": 2.73, "step": 12295 }, { "epoch": 0.5201793721973094, "grad_norm": 0.1719491332769394, "learning_rate": 0.001, "loss": 2.1344, "step": 12296 }, { "epoch": 0.5202216769608258, "grad_norm": 0.18453410267829895, "learning_rate": 0.001, "loss": 1.9734, "step": 12297 }, { "epoch": 0.5202639817243422, "grad_norm": 0.1657096892595291, "learning_rate": 0.001, "loss": 2.1985, "step": 12298 }, { "epoch": 0.5203062864878585, "grad_norm": 0.16189830005168915, "learning_rate": 0.001, "loss": 1.5063, "step": 12299 }, { "epoch": 0.5203485912513749, "grad_norm": 1.7175111770629883, "learning_rate": 0.001, "loss": 2.564, "step": 12300 }, { "epoch": 0.5203908960148913, "grad_norm": 0.5387628674507141, "learning_rate": 0.001, "loss": 2.2738, "step": 12301 }, { "epoch": 0.5204332007784076, "grad_norm": 0.16023603081703186, "learning_rate": 0.001, "loss": 2.5196, "step": 12302 }, { "epoch": 0.520475505541924, "grad_norm": 0.21019423007965088, "learning_rate": 0.001, "loss": 1.763, "step": 12303 }, { "epoch": 0.5205178103054404, "grad_norm": 0.5185931921005249, "learning_rate": 0.001, "loss": 1.9254, "step": 12304 }, { "epoch": 0.5205601150689567, "grad_norm": 0.16839724779129028, "learning_rate": 0.001, "loss": 1.5031, "step": 12305 }, { "epoch": 0.5206024198324731, "grad_norm": 0.22643537819385529, "learning_rate": 0.001, "loss": 2.4948, "step": 12306 }, { "epoch": 0.5206447245959895, "grad_norm": 0.40977147221565247, "learning_rate": 0.001, "loss": 2.2601, "step": 12307 }, { "epoch": 0.5206870293595058, "grad_norm": 0.1929093897342682, "learning_rate": 0.001, "loss": 1.6832, "step": 12308 }, { "epoch": 0.5207293341230222, "grad_norm": 0.1705029159784317, "learning_rate": 0.001, "loss": 2.3835, "step": 12309 }, { "epoch": 0.5207716388865387, "grad_norm": 0.17721432447433472, "learning_rate": 0.001, "loss": 2.0197, "step": 12310 }, { "epoch": 0.520813943650055, "grad_norm": 0.17962828278541565, "learning_rate": 0.001, "loss": 2.0358, "step": 12311 }, { "epoch": 0.5208562484135714, "grad_norm": 0.17918899655342102, "learning_rate": 0.001, "loss": 2.6547, "step": 12312 }, { "epoch": 0.5208985531770878, "grad_norm": 0.18718057870864868, "learning_rate": 0.001, "loss": 2.2771, "step": 12313 }, { "epoch": 0.5209408579406041, "grad_norm": 0.21974588930606842, "learning_rate": 0.001, "loss": 1.9895, "step": 12314 }, { "epoch": 0.5209831627041205, "grad_norm": 0.2035195678472519, "learning_rate": 0.001, "loss": 2.0884, "step": 12315 }, { "epoch": 0.5210254674676369, "grad_norm": 0.8761468529701233, "learning_rate": 0.001, "loss": 3.3274, "step": 12316 }, { "epoch": 0.5210677722311532, "grad_norm": 0.20555326342582703, "learning_rate": 0.001, "loss": 2.0817, "step": 12317 }, { "epoch": 0.5211100769946696, "grad_norm": 0.18637920916080475, "learning_rate": 0.001, "loss": 2.6589, "step": 12318 }, { "epoch": 0.521152381758186, "grad_norm": 0.1818656623363495, "learning_rate": 0.001, "loss": 2.0427, "step": 12319 }, { "epoch": 0.5211946865217023, "grad_norm": 2.5001955032348633, "learning_rate": 0.001, "loss": 1.729, "step": 12320 }, { "epoch": 0.5212369912852187, "grad_norm": 0.3406553566455841, "learning_rate": 0.001, "loss": 1.5588, "step": 12321 }, { "epoch": 0.5212792960487351, "grad_norm": 0.2558114528656006, "learning_rate": 0.001, "loss": 2.0768, "step": 12322 }, { "epoch": 0.5213216008122514, "grad_norm": 7.105432033538818, "learning_rate": 0.001, "loss": 2.6181, "step": 12323 }, { "epoch": 0.5213639055757678, "grad_norm": 0.18078719079494476, "learning_rate": 0.001, "loss": 2.1346, "step": 12324 }, { "epoch": 0.5214062103392842, "grad_norm": 0.2121170312166214, "learning_rate": 0.001, "loss": 2.2811, "step": 12325 }, { "epoch": 0.5214485151028005, "grad_norm": 0.21996238827705383, "learning_rate": 0.001, "loss": 2.6754, "step": 12326 }, { "epoch": 0.521490819866317, "grad_norm": 0.1870933473110199, "learning_rate": 0.001, "loss": 2.6883, "step": 12327 }, { "epoch": 0.5215331246298334, "grad_norm": 0.2098461240530014, "learning_rate": 0.001, "loss": 2.1624, "step": 12328 }, { "epoch": 0.5215754293933497, "grad_norm": 0.1886197030544281, "learning_rate": 0.001, "loss": 1.961, "step": 12329 }, { "epoch": 0.5216177341568661, "grad_norm": 0.2189781665802002, "learning_rate": 0.001, "loss": 2.2462, "step": 12330 }, { "epoch": 0.5216600389203825, "grad_norm": 0.1763230264186859, "learning_rate": 0.001, "loss": 2.1573, "step": 12331 }, { "epoch": 0.5217023436838988, "grad_norm": 1.2748078107833862, "learning_rate": 0.001, "loss": 2.7384, "step": 12332 }, { "epoch": 0.5217446484474152, "grad_norm": 0.18424801528453827, "learning_rate": 0.001, "loss": 2.4212, "step": 12333 }, { "epoch": 0.5217869532109316, "grad_norm": 0.8379740715026855, "learning_rate": 0.001, "loss": 2.8512, "step": 12334 }, { "epoch": 0.5218292579744479, "grad_norm": 0.19673675298690796, "learning_rate": 0.001, "loss": 1.5325, "step": 12335 }, { "epoch": 0.5218715627379643, "grad_norm": 1.2396926879882812, "learning_rate": 0.001, "loss": 3.2144, "step": 12336 }, { "epoch": 0.5219138675014807, "grad_norm": 0.1718030571937561, "learning_rate": 0.001, "loss": 2.4473, "step": 12337 }, { "epoch": 0.521956172264997, "grad_norm": 0.1788032203912735, "learning_rate": 0.001, "loss": 3.1653, "step": 12338 }, { "epoch": 0.5219984770285134, "grad_norm": 0.17052021622657776, "learning_rate": 0.001, "loss": 3.2152, "step": 12339 }, { "epoch": 0.5220407817920297, "grad_norm": 2.6638343334198, "learning_rate": 0.001, "loss": 2.5477, "step": 12340 }, { "epoch": 0.5220830865555461, "grad_norm": 0.20238353312015533, "learning_rate": 0.001, "loss": 2.1136, "step": 12341 }, { "epoch": 0.5221253913190625, "grad_norm": 0.17800480127334595, "learning_rate": 0.001, "loss": 1.9329, "step": 12342 }, { "epoch": 0.5221676960825788, "grad_norm": 0.1630057841539383, "learning_rate": 0.001, "loss": 2.7349, "step": 12343 }, { "epoch": 0.5222100008460953, "grad_norm": 0.20257359743118286, "learning_rate": 0.001, "loss": 2.3163, "step": 12344 }, { "epoch": 0.5222523056096117, "grad_norm": 0.2002708911895752, "learning_rate": 0.001, "loss": 1.8127, "step": 12345 }, { "epoch": 0.522294610373128, "grad_norm": 0.6013284921646118, "learning_rate": 0.001, "loss": 1.9079, "step": 12346 }, { "epoch": 0.5223369151366444, "grad_norm": 0.1917499452829361, "learning_rate": 0.001, "loss": 2.3516, "step": 12347 }, { "epoch": 0.5223792199001608, "grad_norm": 0.27086690068244934, "learning_rate": 0.001, "loss": 1.9564, "step": 12348 }, { "epoch": 0.5224215246636771, "grad_norm": 0.22160665690898895, "learning_rate": 0.001, "loss": 2.3388, "step": 12349 }, { "epoch": 0.5224638294271935, "grad_norm": 0.17383019626140594, "learning_rate": 0.001, "loss": 2.104, "step": 12350 }, { "epoch": 0.5225061341907099, "grad_norm": 12.725953102111816, "learning_rate": 0.001, "loss": 1.8924, "step": 12351 }, { "epoch": 0.5225484389542262, "grad_norm": 0.20004230737686157, "learning_rate": 0.001, "loss": 1.8242, "step": 12352 }, { "epoch": 0.5225907437177426, "grad_norm": 0.20954293012619019, "learning_rate": 0.001, "loss": 2.2262, "step": 12353 }, { "epoch": 0.522633048481259, "grad_norm": 0.2141389548778534, "learning_rate": 0.001, "loss": 2.7329, "step": 12354 }, { "epoch": 0.5226753532447753, "grad_norm": 0.21340107917785645, "learning_rate": 0.001, "loss": 2.121, "step": 12355 }, { "epoch": 0.5227176580082917, "grad_norm": 0.17848850786685944, "learning_rate": 0.001, "loss": 1.7691, "step": 12356 }, { "epoch": 0.5227599627718081, "grad_norm": 0.21579593420028687, "learning_rate": 0.001, "loss": 4.1183, "step": 12357 }, { "epoch": 0.5228022675353244, "grad_norm": 0.3494706451892853, "learning_rate": 0.001, "loss": 2.0613, "step": 12358 }, { "epoch": 0.5228445722988408, "grad_norm": 0.1930229514837265, "learning_rate": 0.001, "loss": 1.9758, "step": 12359 }, { "epoch": 0.5228868770623573, "grad_norm": 0.18557684123516083, "learning_rate": 0.001, "loss": 1.7799, "step": 12360 }, { "epoch": 0.5229291818258736, "grad_norm": 0.6549309492111206, "learning_rate": 0.001, "loss": 2.3321, "step": 12361 }, { "epoch": 0.52297148658939, "grad_norm": 0.4236035645008087, "learning_rate": 0.001, "loss": 1.793, "step": 12362 }, { "epoch": 0.5230137913529064, "grad_norm": 0.21107986569404602, "learning_rate": 0.001, "loss": 2.1026, "step": 12363 }, { "epoch": 0.5230560961164227, "grad_norm": 0.2254447042942047, "learning_rate": 0.001, "loss": 3.0301, "step": 12364 }, { "epoch": 0.5230984008799391, "grad_norm": 0.19465897977352142, "learning_rate": 0.001, "loss": 2.5741, "step": 12365 }, { "epoch": 0.5231407056434555, "grad_norm": 0.2160634696483612, "learning_rate": 0.001, "loss": 2.5059, "step": 12366 }, { "epoch": 0.5231830104069718, "grad_norm": 0.20978963375091553, "learning_rate": 0.001, "loss": 2.9964, "step": 12367 }, { "epoch": 0.5232253151704882, "grad_norm": 0.22652214765548706, "learning_rate": 0.001, "loss": 2.0517, "step": 12368 }, { "epoch": 0.5232676199340046, "grad_norm": 0.20549549162387848, "learning_rate": 0.001, "loss": 2.668, "step": 12369 }, { "epoch": 0.5233099246975209, "grad_norm": 0.16967639327049255, "learning_rate": 0.001, "loss": 2.1324, "step": 12370 }, { "epoch": 0.5233522294610373, "grad_norm": 0.21468955278396606, "learning_rate": 0.001, "loss": 2.0383, "step": 12371 }, { "epoch": 0.5233945342245537, "grad_norm": 0.1573602259159088, "learning_rate": 0.001, "loss": 1.9794, "step": 12372 }, { "epoch": 0.52343683898807, "grad_norm": 0.8761491775512695, "learning_rate": 0.001, "loss": 2.5732, "step": 12373 }, { "epoch": 0.5234791437515864, "grad_norm": 0.3122228980064392, "learning_rate": 0.001, "loss": 3.2694, "step": 12374 }, { "epoch": 0.5235214485151028, "grad_norm": 0.19162549078464508, "learning_rate": 0.001, "loss": 2.2557, "step": 12375 }, { "epoch": 0.5235637532786191, "grad_norm": 0.22822991013526917, "learning_rate": 0.001, "loss": 2.1365, "step": 12376 }, { "epoch": 0.5236060580421356, "grad_norm": 0.16961225867271423, "learning_rate": 0.001, "loss": 1.7234, "step": 12377 }, { "epoch": 0.523648362805652, "grad_norm": 0.1936628669500351, "learning_rate": 0.001, "loss": 1.4112, "step": 12378 }, { "epoch": 0.5236906675691683, "grad_norm": 0.1944187730550766, "learning_rate": 0.001, "loss": 1.7775, "step": 12379 }, { "epoch": 0.5237329723326847, "grad_norm": 0.2097603678703308, "learning_rate": 0.001, "loss": 2.2723, "step": 12380 }, { "epoch": 0.5237752770962011, "grad_norm": 0.4582602381706238, "learning_rate": 0.001, "loss": 2.1067, "step": 12381 }, { "epoch": 0.5238175818597174, "grad_norm": 0.15353432297706604, "learning_rate": 0.001, "loss": 2.1572, "step": 12382 }, { "epoch": 0.5238598866232338, "grad_norm": 0.2179485708475113, "learning_rate": 0.001, "loss": 2.1966, "step": 12383 }, { "epoch": 0.5239021913867501, "grad_norm": 0.2067425549030304, "learning_rate": 0.001, "loss": 1.6399, "step": 12384 }, { "epoch": 0.5239444961502665, "grad_norm": 0.19505983591079712, "learning_rate": 0.001, "loss": 2.661, "step": 12385 }, { "epoch": 0.5239868009137829, "grad_norm": 0.17970894277095795, "learning_rate": 0.001, "loss": 1.9452, "step": 12386 }, { "epoch": 0.5240291056772992, "grad_norm": 1.098179817199707, "learning_rate": 0.001, "loss": 2.2812, "step": 12387 }, { "epoch": 0.5240714104408156, "grad_norm": 6.329110622406006, "learning_rate": 0.001, "loss": 3.141, "step": 12388 }, { "epoch": 0.524113715204332, "grad_norm": 0.18318776786327362, "learning_rate": 0.001, "loss": 2.4143, "step": 12389 }, { "epoch": 0.5241560199678483, "grad_norm": 0.22181005775928497, "learning_rate": 0.001, "loss": 2.1807, "step": 12390 }, { "epoch": 0.5241983247313647, "grad_norm": 0.17904549837112427, "learning_rate": 0.001, "loss": 1.9226, "step": 12391 }, { "epoch": 0.5242406294948811, "grad_norm": 0.21022500097751617, "learning_rate": 0.001, "loss": 2.4073, "step": 12392 }, { "epoch": 0.5242829342583974, "grad_norm": 0.17983204126358032, "learning_rate": 0.001, "loss": 2.1061, "step": 12393 }, { "epoch": 0.5243252390219139, "grad_norm": 0.29281267523765564, "learning_rate": 0.001, "loss": 3.1756, "step": 12394 }, { "epoch": 0.5243675437854303, "grad_norm": 1.9023278951644897, "learning_rate": 0.001, "loss": 2.1109, "step": 12395 }, { "epoch": 0.5244098485489466, "grad_norm": 0.2698245942592621, "learning_rate": 0.001, "loss": 1.7295, "step": 12396 }, { "epoch": 0.524452153312463, "grad_norm": 0.22318899631500244, "learning_rate": 0.001, "loss": 2.1475, "step": 12397 }, { "epoch": 0.5244944580759794, "grad_norm": 0.1895742565393448, "learning_rate": 0.001, "loss": 2.3827, "step": 12398 }, { "epoch": 0.5245367628394957, "grad_norm": 0.22232870757579803, "learning_rate": 0.001, "loss": 2.3418, "step": 12399 }, { "epoch": 0.5245790676030121, "grad_norm": 0.2738935947418213, "learning_rate": 0.001, "loss": 4.0447, "step": 12400 }, { "epoch": 0.5246213723665285, "grad_norm": 0.18201978504657745, "learning_rate": 0.001, "loss": 2.4426, "step": 12401 }, { "epoch": 0.5246636771300448, "grad_norm": 0.26819124817848206, "learning_rate": 0.001, "loss": 2.2201, "step": 12402 }, { "epoch": 0.5247059818935612, "grad_norm": 0.40238019824028015, "learning_rate": 0.001, "loss": 1.9465, "step": 12403 }, { "epoch": 0.5247482866570776, "grad_norm": 0.18625056743621826, "learning_rate": 0.001, "loss": 2.6789, "step": 12404 }, { "epoch": 0.5247905914205939, "grad_norm": 0.19995659589767456, "learning_rate": 0.001, "loss": 1.4283, "step": 12405 }, { "epoch": 0.5248328961841103, "grad_norm": 0.19528824090957642, "learning_rate": 0.001, "loss": 1.6882, "step": 12406 }, { "epoch": 0.5248752009476267, "grad_norm": 0.17695745825767517, "learning_rate": 0.001, "loss": 1.8934, "step": 12407 }, { "epoch": 0.524917505711143, "grad_norm": 0.1534528285264969, "learning_rate": 0.001, "loss": 1.7225, "step": 12408 }, { "epoch": 0.5249598104746594, "grad_norm": 0.21268439292907715, "learning_rate": 0.001, "loss": 2.0355, "step": 12409 }, { "epoch": 0.5250021152381759, "grad_norm": 0.18775634467601776, "learning_rate": 0.001, "loss": 2.0029, "step": 12410 }, { "epoch": 0.5250444200016922, "grad_norm": 1.877071738243103, "learning_rate": 0.001, "loss": 2.0333, "step": 12411 }, { "epoch": 0.5250867247652086, "grad_norm": 0.19529949128627777, "learning_rate": 0.001, "loss": 2.5546, "step": 12412 }, { "epoch": 0.525129029528725, "grad_norm": 0.16856685280799866, "learning_rate": 0.001, "loss": 2.258, "step": 12413 }, { "epoch": 0.5251713342922413, "grad_norm": 0.16465047001838684, "learning_rate": 0.001, "loss": 2.2241, "step": 12414 }, { "epoch": 0.5252136390557577, "grad_norm": 0.16923588514328003, "learning_rate": 0.001, "loss": 1.7148, "step": 12415 }, { "epoch": 0.5252559438192741, "grad_norm": 0.15065397322177887, "learning_rate": 0.001, "loss": 1.7362, "step": 12416 }, { "epoch": 0.5252982485827904, "grad_norm": 0.18638429045677185, "learning_rate": 0.001, "loss": 2.3777, "step": 12417 }, { "epoch": 0.5253405533463068, "grad_norm": 0.16178961098194122, "learning_rate": 0.001, "loss": 2.0762, "step": 12418 }, { "epoch": 0.5253828581098232, "grad_norm": 0.1650550216436386, "learning_rate": 0.001, "loss": 2.1552, "step": 12419 }, { "epoch": 0.5254251628733395, "grad_norm": 1.2916520833969116, "learning_rate": 0.001, "loss": 3.1018, "step": 12420 }, { "epoch": 0.5254674676368559, "grad_norm": 0.17819340527057648, "learning_rate": 0.001, "loss": 2.2599, "step": 12421 }, { "epoch": 0.5255097724003723, "grad_norm": 0.18574517965316772, "learning_rate": 0.001, "loss": 2.1839, "step": 12422 }, { "epoch": 0.5255520771638886, "grad_norm": 0.15550824999809265, "learning_rate": 0.001, "loss": 1.4498, "step": 12423 }, { "epoch": 0.525594381927405, "grad_norm": 0.2876248061656952, "learning_rate": 0.001, "loss": 2.4418, "step": 12424 }, { "epoch": 0.5256366866909215, "grad_norm": 0.5879155397415161, "learning_rate": 0.001, "loss": 2.641, "step": 12425 }, { "epoch": 0.5256789914544377, "grad_norm": 0.29421496391296387, "learning_rate": 0.001, "loss": 3.3343, "step": 12426 }, { "epoch": 0.5257212962179542, "grad_norm": 0.17267128825187683, "learning_rate": 0.001, "loss": 1.9214, "step": 12427 }, { "epoch": 0.5257636009814705, "grad_norm": 0.24222494661808014, "learning_rate": 0.001, "loss": 2.4036, "step": 12428 }, { "epoch": 0.5258059057449869, "grad_norm": 0.171603262424469, "learning_rate": 0.001, "loss": 2.1059, "step": 12429 }, { "epoch": 0.5258482105085033, "grad_norm": 0.2517464756965637, "learning_rate": 0.001, "loss": 2.3314, "step": 12430 }, { "epoch": 0.5258905152720196, "grad_norm": 0.19529391825199127, "learning_rate": 0.001, "loss": 2.3075, "step": 12431 }, { "epoch": 0.525932820035536, "grad_norm": 3.9220962524414062, "learning_rate": 0.001, "loss": 2.0059, "step": 12432 }, { "epoch": 0.5259751247990524, "grad_norm": 0.2157333493232727, "learning_rate": 0.001, "loss": 2.0792, "step": 12433 }, { "epoch": 0.5260174295625687, "grad_norm": 0.7744253277778625, "learning_rate": 0.001, "loss": 1.6057, "step": 12434 }, { "epoch": 0.5260597343260851, "grad_norm": 0.18592911958694458, "learning_rate": 0.001, "loss": 1.5647, "step": 12435 }, { "epoch": 0.5261020390896015, "grad_norm": 0.15915216505527496, "learning_rate": 0.001, "loss": 1.7973, "step": 12436 }, { "epoch": 0.5261443438531178, "grad_norm": 26.635772705078125, "learning_rate": 0.001, "loss": 2.6575, "step": 12437 }, { "epoch": 0.5261866486166342, "grad_norm": 0.17832812666893005, "learning_rate": 0.001, "loss": 1.701, "step": 12438 }, { "epoch": 0.5262289533801506, "grad_norm": 0.3404991328716278, "learning_rate": 0.001, "loss": 1.7622, "step": 12439 }, { "epoch": 0.5262712581436669, "grad_norm": 0.17686522006988525, "learning_rate": 0.001, "loss": 1.5594, "step": 12440 }, { "epoch": 0.5263135629071833, "grad_norm": 0.17130149900913239, "learning_rate": 0.001, "loss": 2.038, "step": 12441 }, { "epoch": 0.5263558676706998, "grad_norm": 0.7331603169441223, "learning_rate": 0.001, "loss": 1.3103, "step": 12442 }, { "epoch": 0.526398172434216, "grad_norm": 0.1968250870704651, "learning_rate": 0.001, "loss": 1.8041, "step": 12443 }, { "epoch": 0.5264404771977325, "grad_norm": 0.2132941633462906, "learning_rate": 0.001, "loss": 1.9928, "step": 12444 }, { "epoch": 0.5264827819612489, "grad_norm": 0.36532631516456604, "learning_rate": 0.001, "loss": 2.7363, "step": 12445 }, { "epoch": 0.5265250867247652, "grad_norm": 0.2068646103143692, "learning_rate": 0.001, "loss": 1.6601, "step": 12446 }, { "epoch": 0.5265673914882816, "grad_norm": 0.20064789056777954, "learning_rate": 0.001, "loss": 2.081, "step": 12447 }, { "epoch": 0.526609696251798, "grad_norm": 0.23239631950855255, "learning_rate": 0.001, "loss": 1.8456, "step": 12448 }, { "epoch": 0.5266520010153143, "grad_norm": 0.186894029378891, "learning_rate": 0.001, "loss": 2.5725, "step": 12449 }, { "epoch": 0.5266943057788307, "grad_norm": 0.16730260848999023, "learning_rate": 0.001, "loss": 1.9904, "step": 12450 }, { "epoch": 0.5267366105423471, "grad_norm": 0.2060299515724182, "learning_rate": 0.001, "loss": 2.014, "step": 12451 }, { "epoch": 0.5267789153058634, "grad_norm": 0.5314529538154602, "learning_rate": 0.001, "loss": 2.1433, "step": 12452 }, { "epoch": 0.5268212200693798, "grad_norm": 0.18238548934459686, "learning_rate": 0.001, "loss": 1.9902, "step": 12453 }, { "epoch": 0.5268635248328962, "grad_norm": 0.235755056142807, "learning_rate": 0.001, "loss": 1.8839, "step": 12454 }, { "epoch": 0.5269058295964125, "grad_norm": 0.19867976009845734, "learning_rate": 0.001, "loss": 3.061, "step": 12455 }, { "epoch": 0.5269481343599289, "grad_norm": 0.1549888551235199, "learning_rate": 0.001, "loss": 2.0597, "step": 12456 }, { "epoch": 0.5269904391234453, "grad_norm": 0.1728786826133728, "learning_rate": 0.001, "loss": 2.26, "step": 12457 }, { "epoch": 0.5270327438869616, "grad_norm": 0.20742489397525787, "learning_rate": 0.001, "loss": 2.246, "step": 12458 }, { "epoch": 0.527075048650478, "grad_norm": 0.15079474449157715, "learning_rate": 0.001, "loss": 1.3225, "step": 12459 }, { "epoch": 0.5271173534139945, "grad_norm": 0.18330729007720947, "learning_rate": 0.001, "loss": 1.6184, "step": 12460 }, { "epoch": 0.5271596581775108, "grad_norm": 0.17771340906620026, "learning_rate": 0.001, "loss": 2.4973, "step": 12461 }, { "epoch": 0.5272019629410272, "grad_norm": 6.8827996253967285, "learning_rate": 0.001, "loss": 2.6216, "step": 12462 }, { "epoch": 0.5272442677045436, "grad_norm": 0.1736050844192505, "learning_rate": 0.001, "loss": 1.9857, "step": 12463 }, { "epoch": 0.5272865724680599, "grad_norm": 0.17495718598365784, "learning_rate": 0.001, "loss": 1.9657, "step": 12464 }, { "epoch": 0.5273288772315763, "grad_norm": 0.16420039534568787, "learning_rate": 0.001, "loss": 1.7354, "step": 12465 }, { "epoch": 0.5273711819950927, "grad_norm": 0.4817570447921753, "learning_rate": 0.001, "loss": 1.7285, "step": 12466 }, { "epoch": 0.527413486758609, "grad_norm": 0.17338165640830994, "learning_rate": 0.001, "loss": 2.617, "step": 12467 }, { "epoch": 0.5274557915221254, "grad_norm": 0.19620858132839203, "learning_rate": 0.001, "loss": 3.0908, "step": 12468 }, { "epoch": 0.5274980962856418, "grad_norm": 0.2547888457775116, "learning_rate": 0.001, "loss": 2.1735, "step": 12469 }, { "epoch": 0.5275404010491581, "grad_norm": 0.20097443461418152, "learning_rate": 0.001, "loss": 2.0749, "step": 12470 }, { "epoch": 0.5275827058126745, "grad_norm": 0.16553282737731934, "learning_rate": 0.001, "loss": 2.3331, "step": 12471 }, { "epoch": 0.5276250105761909, "grad_norm": 0.3828006982803345, "learning_rate": 0.001, "loss": 1.2995, "step": 12472 }, { "epoch": 0.5276673153397072, "grad_norm": 0.1714460253715515, "learning_rate": 0.001, "loss": 1.5497, "step": 12473 }, { "epoch": 0.5277096201032236, "grad_norm": 0.38405609130859375, "learning_rate": 0.001, "loss": 2.9096, "step": 12474 }, { "epoch": 0.5277519248667399, "grad_norm": 0.19936303794384003, "learning_rate": 0.001, "loss": 1.9383, "step": 12475 }, { "epoch": 0.5277942296302564, "grad_norm": 0.20733779668807983, "learning_rate": 0.001, "loss": 2.1244, "step": 12476 }, { "epoch": 0.5278365343937728, "grad_norm": 0.19483071565628052, "learning_rate": 0.001, "loss": 2.6053, "step": 12477 }, { "epoch": 0.5278788391572891, "grad_norm": 0.1924906224012375, "learning_rate": 0.001, "loss": 2.3413, "step": 12478 }, { "epoch": 0.5279211439208055, "grad_norm": 0.15672799944877625, "learning_rate": 0.001, "loss": 2.4487, "step": 12479 }, { "epoch": 0.5279634486843219, "grad_norm": 4.736910820007324, "learning_rate": 0.001, "loss": 1.852, "step": 12480 }, { "epoch": 0.5280057534478382, "grad_norm": 0.17773562669754028, "learning_rate": 0.001, "loss": 2.2705, "step": 12481 }, { "epoch": 0.5280480582113546, "grad_norm": 0.17934168875217438, "learning_rate": 0.001, "loss": 2.0574, "step": 12482 }, { "epoch": 0.528090362974871, "grad_norm": 0.5093991160392761, "learning_rate": 0.001, "loss": 3.1038, "step": 12483 }, { "epoch": 0.5281326677383873, "grad_norm": 0.18139603734016418, "learning_rate": 0.001, "loss": 1.9695, "step": 12484 }, { "epoch": 0.5281749725019037, "grad_norm": 0.17985884845256805, "learning_rate": 0.001, "loss": 2.5194, "step": 12485 }, { "epoch": 0.5282172772654201, "grad_norm": 0.3463243842124939, "learning_rate": 0.001, "loss": 2.4281, "step": 12486 }, { "epoch": 0.5282595820289364, "grad_norm": 0.21633853018283844, "learning_rate": 0.001, "loss": 3.2592, "step": 12487 }, { "epoch": 0.5283018867924528, "grad_norm": 0.16759130358695984, "learning_rate": 0.001, "loss": 2.6158, "step": 12488 }, { "epoch": 0.5283441915559692, "grad_norm": 0.20517773926258087, "learning_rate": 0.001, "loss": 1.9567, "step": 12489 }, { "epoch": 0.5283864963194855, "grad_norm": 1.2722563743591309, "learning_rate": 0.001, "loss": 2.1083, "step": 12490 }, { "epoch": 0.5284288010830019, "grad_norm": 0.21564523875713348, "learning_rate": 0.001, "loss": 2.731, "step": 12491 }, { "epoch": 0.5284711058465184, "grad_norm": 0.18117859959602356, "learning_rate": 0.001, "loss": 1.7917, "step": 12492 }, { "epoch": 0.5285134106100347, "grad_norm": 0.18480683863162994, "learning_rate": 0.001, "loss": 1.7639, "step": 12493 }, { "epoch": 0.5285557153735511, "grad_norm": 0.18521404266357422, "learning_rate": 0.001, "loss": 1.7627, "step": 12494 }, { "epoch": 0.5285980201370675, "grad_norm": 0.15031592547893524, "learning_rate": 0.001, "loss": 1.4082, "step": 12495 }, { "epoch": 0.5286403249005838, "grad_norm": 0.2322620302438736, "learning_rate": 0.001, "loss": 2.1222, "step": 12496 }, { "epoch": 0.5286826296641002, "grad_norm": 0.21710190176963806, "learning_rate": 0.001, "loss": 2.4748, "step": 12497 }, { "epoch": 0.5287249344276166, "grad_norm": 2.9545514583587646, "learning_rate": 0.001, "loss": 2.2093, "step": 12498 }, { "epoch": 0.5287672391911329, "grad_norm": 0.20980048179626465, "learning_rate": 0.001, "loss": 2.3927, "step": 12499 }, { "epoch": 0.5288095439546493, "grad_norm": 0.2512090802192688, "learning_rate": 0.001, "loss": 1.5181, "step": 12500 }, { "epoch": 0.5288518487181657, "grad_norm": 0.19579851627349854, "learning_rate": 0.001, "loss": 2.7172, "step": 12501 }, { "epoch": 0.528894153481682, "grad_norm": 0.16972453892230988, "learning_rate": 0.001, "loss": 2.4574, "step": 12502 }, { "epoch": 0.5289364582451984, "grad_norm": 2.3002378940582275, "learning_rate": 0.001, "loss": 2.8155, "step": 12503 }, { "epoch": 0.5289787630087148, "grad_norm": 0.15065890550613403, "learning_rate": 0.001, "loss": 1.7237, "step": 12504 }, { "epoch": 0.5290210677722311, "grad_norm": 0.1834668517112732, "learning_rate": 0.001, "loss": 2.3786, "step": 12505 }, { "epoch": 0.5290633725357475, "grad_norm": 0.37765857577323914, "learning_rate": 0.001, "loss": 2.7428, "step": 12506 }, { "epoch": 0.5291056772992639, "grad_norm": 0.27641263604164124, "learning_rate": 0.001, "loss": 2.5696, "step": 12507 }, { "epoch": 0.5291479820627802, "grad_norm": 0.2046346813440323, "learning_rate": 0.001, "loss": 2.0424, "step": 12508 }, { "epoch": 0.5291902868262967, "grad_norm": 2.7164547443389893, "learning_rate": 0.001, "loss": 3.0047, "step": 12509 }, { "epoch": 0.5292325915898131, "grad_norm": 0.24318911135196686, "learning_rate": 0.001, "loss": 2.8722, "step": 12510 }, { "epoch": 0.5292748963533294, "grad_norm": 0.20161695778369904, "learning_rate": 0.001, "loss": 3.1377, "step": 12511 }, { "epoch": 0.5293172011168458, "grad_norm": 0.18776634335517883, "learning_rate": 0.001, "loss": 3.193, "step": 12512 }, { "epoch": 0.5293595058803622, "grad_norm": 0.17002810537815094, "learning_rate": 0.001, "loss": 2.015, "step": 12513 }, { "epoch": 0.5294018106438785, "grad_norm": 0.16018223762512207, "learning_rate": 0.001, "loss": 2.0451, "step": 12514 }, { "epoch": 0.5294441154073949, "grad_norm": 0.2891288101673126, "learning_rate": 0.001, "loss": 1.7617, "step": 12515 }, { "epoch": 0.5294864201709113, "grad_norm": 0.19036170840263367, "learning_rate": 0.001, "loss": 2.1966, "step": 12516 }, { "epoch": 0.5295287249344276, "grad_norm": 0.16855932772159576, "learning_rate": 0.001, "loss": 3.1857, "step": 12517 }, { "epoch": 0.529571029697944, "grad_norm": 0.1747637689113617, "learning_rate": 0.001, "loss": 1.8404, "step": 12518 }, { "epoch": 0.5296133344614603, "grad_norm": 0.572921633720398, "learning_rate": 0.001, "loss": 2.1041, "step": 12519 }, { "epoch": 0.5296556392249767, "grad_norm": 0.17884446680545807, "learning_rate": 0.001, "loss": 2.4167, "step": 12520 }, { "epoch": 0.5296979439884931, "grad_norm": 0.15950831770896912, "learning_rate": 0.001, "loss": 1.9228, "step": 12521 }, { "epoch": 0.5297402487520094, "grad_norm": 0.18645338714122772, "learning_rate": 0.001, "loss": 1.7911, "step": 12522 }, { "epoch": 0.5297825535155258, "grad_norm": 0.1739381104707718, "learning_rate": 0.001, "loss": 2.4198, "step": 12523 }, { "epoch": 0.5298248582790422, "grad_norm": 0.2655837833881378, "learning_rate": 0.001, "loss": 3.0543, "step": 12524 }, { "epoch": 0.5298671630425585, "grad_norm": 0.16320541501045227, "learning_rate": 0.001, "loss": 1.8807, "step": 12525 }, { "epoch": 0.529909467806075, "grad_norm": 0.6725846529006958, "learning_rate": 0.001, "loss": 2.298, "step": 12526 }, { "epoch": 0.5299517725695914, "grad_norm": 0.18599937856197357, "learning_rate": 0.001, "loss": 3.1388, "step": 12527 }, { "epoch": 0.5299940773331077, "grad_norm": 0.17061804234981537, "learning_rate": 0.001, "loss": 1.4874, "step": 12528 }, { "epoch": 0.5300363820966241, "grad_norm": 0.15029898285865784, "learning_rate": 0.001, "loss": 1.9614, "step": 12529 }, { "epoch": 0.5300786868601405, "grad_norm": 0.16090351343154907, "learning_rate": 0.001, "loss": 2.6116, "step": 12530 }, { "epoch": 0.5301209916236568, "grad_norm": 0.19355669617652893, "learning_rate": 0.001, "loss": 1.8321, "step": 12531 }, { "epoch": 0.5301632963871732, "grad_norm": 0.15436525642871857, "learning_rate": 0.001, "loss": 2.2976, "step": 12532 }, { "epoch": 0.5302056011506896, "grad_norm": 0.20881301164627075, "learning_rate": 0.001, "loss": 3.927, "step": 12533 }, { "epoch": 0.5302479059142059, "grad_norm": 1.6116613149642944, "learning_rate": 0.001, "loss": 2.1104, "step": 12534 }, { "epoch": 0.5302902106777223, "grad_norm": 0.4252428710460663, "learning_rate": 0.001, "loss": 2.0893, "step": 12535 }, { "epoch": 0.5303325154412387, "grad_norm": 3.8983938694000244, "learning_rate": 0.001, "loss": 2.5195, "step": 12536 }, { "epoch": 0.530374820204755, "grad_norm": 0.23319566249847412, "learning_rate": 0.001, "loss": 2.5801, "step": 12537 }, { "epoch": 0.5304171249682714, "grad_norm": 2.187912940979004, "learning_rate": 0.001, "loss": 1.8214, "step": 12538 }, { "epoch": 0.5304594297317878, "grad_norm": 0.18814197182655334, "learning_rate": 0.001, "loss": 2.2121, "step": 12539 }, { "epoch": 0.5305017344953041, "grad_norm": 0.22966532409191132, "learning_rate": 0.001, "loss": 1.7224, "step": 12540 }, { "epoch": 0.5305440392588205, "grad_norm": 0.9131950736045837, "learning_rate": 0.001, "loss": 1.7284, "step": 12541 }, { "epoch": 0.530586344022337, "grad_norm": 0.2272689938545227, "learning_rate": 0.001, "loss": 2.2831, "step": 12542 }, { "epoch": 0.5306286487858533, "grad_norm": 0.23645274341106415, "learning_rate": 0.001, "loss": 1.8332, "step": 12543 }, { "epoch": 0.5306709535493697, "grad_norm": 0.2579519748687744, "learning_rate": 0.001, "loss": 2.0468, "step": 12544 }, { "epoch": 0.5307132583128861, "grad_norm": 0.2276112288236618, "learning_rate": 0.001, "loss": 2.3614, "step": 12545 }, { "epoch": 0.5307555630764024, "grad_norm": 0.23741212487220764, "learning_rate": 0.001, "loss": 2.4542, "step": 12546 }, { "epoch": 0.5307978678399188, "grad_norm": 0.2526029050350189, "learning_rate": 0.001, "loss": 2.5269, "step": 12547 }, { "epoch": 0.5308401726034352, "grad_norm": 0.20093029737472534, "learning_rate": 0.001, "loss": 1.6386, "step": 12548 }, { "epoch": 0.5308824773669515, "grad_norm": 0.20511755347251892, "learning_rate": 0.001, "loss": 2.6388, "step": 12549 }, { "epoch": 0.5309247821304679, "grad_norm": 0.2155929058790207, "learning_rate": 0.001, "loss": 2.2045, "step": 12550 }, { "epoch": 0.5309670868939843, "grad_norm": 0.20179694890975952, "learning_rate": 0.001, "loss": 2.0887, "step": 12551 }, { "epoch": 0.5310093916575006, "grad_norm": 0.7059963941574097, "learning_rate": 0.001, "loss": 1.7561, "step": 12552 }, { "epoch": 0.531051696421017, "grad_norm": 0.2252509593963623, "learning_rate": 0.001, "loss": 3.4456, "step": 12553 }, { "epoch": 0.5310940011845334, "grad_norm": 0.15286624431610107, "learning_rate": 0.001, "loss": 2.5038, "step": 12554 }, { "epoch": 0.5311363059480497, "grad_norm": 0.17845967411994934, "learning_rate": 0.001, "loss": 1.9147, "step": 12555 }, { "epoch": 0.5311786107115661, "grad_norm": 0.2328542023897171, "learning_rate": 0.001, "loss": 2.0514, "step": 12556 }, { "epoch": 0.5312209154750825, "grad_norm": 0.661899745464325, "learning_rate": 0.001, "loss": 2.1049, "step": 12557 }, { "epoch": 0.5312632202385988, "grad_norm": 0.5505738258361816, "learning_rate": 0.001, "loss": 2.063, "step": 12558 }, { "epoch": 0.5313055250021153, "grad_norm": 0.14693965017795563, "learning_rate": 0.001, "loss": 2.2266, "step": 12559 }, { "epoch": 0.5313478297656317, "grad_norm": 0.14494706690311432, "learning_rate": 0.001, "loss": 2.0662, "step": 12560 }, { "epoch": 0.531390134529148, "grad_norm": 0.4453897178173065, "learning_rate": 0.001, "loss": 2.1152, "step": 12561 }, { "epoch": 0.5314324392926644, "grad_norm": 0.23559346795082092, "learning_rate": 0.001, "loss": 1.9954, "step": 12562 }, { "epoch": 0.5314747440561808, "grad_norm": 0.17883579432964325, "learning_rate": 0.001, "loss": 2.1284, "step": 12563 }, { "epoch": 0.5315170488196971, "grad_norm": 0.16119042038917542, "learning_rate": 0.001, "loss": 2.1427, "step": 12564 }, { "epoch": 0.5315593535832135, "grad_norm": 0.16220824420452118, "learning_rate": 0.001, "loss": 1.5702, "step": 12565 }, { "epoch": 0.5316016583467298, "grad_norm": 0.8208723068237305, "learning_rate": 0.001, "loss": 2.2647, "step": 12566 }, { "epoch": 0.5316439631102462, "grad_norm": 0.17101448774337769, "learning_rate": 0.001, "loss": 1.8805, "step": 12567 }, { "epoch": 0.5316862678737626, "grad_norm": 0.18567869067192078, "learning_rate": 0.001, "loss": 1.8957, "step": 12568 }, { "epoch": 0.5317285726372789, "grad_norm": 0.15984505414962769, "learning_rate": 0.001, "loss": 2.0752, "step": 12569 }, { "epoch": 0.5317708774007953, "grad_norm": 0.1836562305688858, "learning_rate": 0.001, "loss": 2.0664, "step": 12570 }, { "epoch": 0.5318131821643117, "grad_norm": 0.2100599855184555, "learning_rate": 0.001, "loss": 3.0159, "step": 12571 }, { "epoch": 0.531855486927828, "grad_norm": 0.16221459209918976, "learning_rate": 0.001, "loss": 1.8475, "step": 12572 }, { "epoch": 0.5318977916913444, "grad_norm": 0.18409012258052826, "learning_rate": 0.001, "loss": 1.9189, "step": 12573 }, { "epoch": 0.5319400964548608, "grad_norm": 0.2355443686246872, "learning_rate": 0.001, "loss": 2.4434, "step": 12574 }, { "epoch": 0.5319824012183771, "grad_norm": 1.7131577730178833, "learning_rate": 0.001, "loss": 2.3271, "step": 12575 }, { "epoch": 0.5320247059818936, "grad_norm": 0.16390523314476013, "learning_rate": 0.001, "loss": 3.1412, "step": 12576 }, { "epoch": 0.53206701074541, "grad_norm": 0.17767739295959473, "learning_rate": 0.001, "loss": 3.1423, "step": 12577 }, { "epoch": 0.5321093155089263, "grad_norm": 0.28065043687820435, "learning_rate": 0.001, "loss": 1.9327, "step": 12578 }, { "epoch": 0.5321516202724427, "grad_norm": 0.15590040385723114, "learning_rate": 0.001, "loss": 3.3144, "step": 12579 }, { "epoch": 0.5321939250359591, "grad_norm": 0.18487408757209778, "learning_rate": 0.001, "loss": 2.1503, "step": 12580 }, { "epoch": 0.5322362297994754, "grad_norm": 0.4396058917045593, "learning_rate": 0.001, "loss": 2.4607, "step": 12581 }, { "epoch": 0.5322785345629918, "grad_norm": 1.2496229410171509, "learning_rate": 0.001, "loss": 2.3387, "step": 12582 }, { "epoch": 0.5323208393265082, "grad_norm": 0.19142375886440277, "learning_rate": 0.001, "loss": 1.9137, "step": 12583 }, { "epoch": 0.5323631440900245, "grad_norm": 1.4793001413345337, "learning_rate": 0.001, "loss": 2.1602, "step": 12584 }, { "epoch": 0.5324054488535409, "grad_norm": 0.4164004921913147, "learning_rate": 0.001, "loss": 2.9867, "step": 12585 }, { "epoch": 0.5324477536170573, "grad_norm": 0.17239639163017273, "learning_rate": 0.001, "loss": 2.5442, "step": 12586 }, { "epoch": 0.5324900583805736, "grad_norm": 0.19600702822208405, "learning_rate": 0.001, "loss": 1.9799, "step": 12587 }, { "epoch": 0.53253236314409, "grad_norm": 0.1782451719045639, "learning_rate": 0.001, "loss": 2.5812, "step": 12588 }, { "epoch": 0.5325746679076064, "grad_norm": 0.18545618653297424, "learning_rate": 0.001, "loss": 2.3101, "step": 12589 }, { "epoch": 0.5326169726711227, "grad_norm": 0.21294978260993958, "learning_rate": 0.001, "loss": 1.6784, "step": 12590 }, { "epoch": 0.5326592774346391, "grad_norm": 0.16218768060207367, "learning_rate": 0.001, "loss": 2.438, "step": 12591 }, { "epoch": 0.5327015821981556, "grad_norm": 1.644671082496643, "learning_rate": 0.001, "loss": 2.0642, "step": 12592 }, { "epoch": 0.5327438869616719, "grad_norm": 0.2141799032688141, "learning_rate": 0.001, "loss": 3.1325, "step": 12593 }, { "epoch": 0.5327861917251883, "grad_norm": 0.17733773589134216, "learning_rate": 0.001, "loss": 2.3464, "step": 12594 }, { "epoch": 0.5328284964887047, "grad_norm": 0.1840038150548935, "learning_rate": 0.001, "loss": 1.9205, "step": 12595 }, { "epoch": 0.532870801252221, "grad_norm": 0.3170630931854248, "learning_rate": 0.001, "loss": 2.405, "step": 12596 }, { "epoch": 0.5329131060157374, "grad_norm": 0.24258072674274445, "learning_rate": 0.001, "loss": 1.855, "step": 12597 }, { "epoch": 0.5329554107792538, "grad_norm": 0.9366814494132996, "learning_rate": 0.001, "loss": 2.0033, "step": 12598 }, { "epoch": 0.5329977155427701, "grad_norm": 0.8258916139602661, "learning_rate": 0.001, "loss": 2.5754, "step": 12599 }, { "epoch": 0.5330400203062865, "grad_norm": 1.7923530340194702, "learning_rate": 0.001, "loss": 2.3026, "step": 12600 }, { "epoch": 0.5330823250698029, "grad_norm": 0.5301435589790344, "learning_rate": 0.001, "loss": 2.2184, "step": 12601 }, { "epoch": 0.5331246298333192, "grad_norm": 0.1785147339105606, "learning_rate": 0.001, "loss": 2.0238, "step": 12602 }, { "epoch": 0.5331669345968356, "grad_norm": 0.15722912549972534, "learning_rate": 0.001, "loss": 1.7873, "step": 12603 }, { "epoch": 0.533209239360352, "grad_norm": 0.17702187597751617, "learning_rate": 0.001, "loss": 3.0993, "step": 12604 }, { "epoch": 0.5332515441238683, "grad_norm": 0.21016950905323029, "learning_rate": 0.001, "loss": 2.2791, "step": 12605 }, { "epoch": 0.5332938488873847, "grad_norm": 0.19180504977703094, "learning_rate": 0.001, "loss": 1.7831, "step": 12606 }, { "epoch": 0.5333361536509011, "grad_norm": 0.17286181449890137, "learning_rate": 0.001, "loss": 1.94, "step": 12607 }, { "epoch": 0.5333784584144174, "grad_norm": 0.25898101925849915, "learning_rate": 0.001, "loss": 2.2634, "step": 12608 }, { "epoch": 0.5334207631779339, "grad_norm": 0.2569015324115753, "learning_rate": 0.001, "loss": 2.2876, "step": 12609 }, { "epoch": 0.5334630679414502, "grad_norm": 0.32984477281570435, "learning_rate": 0.001, "loss": 2.9021, "step": 12610 }, { "epoch": 0.5335053727049666, "grad_norm": 0.2177520990371704, "learning_rate": 0.001, "loss": 2.2496, "step": 12611 }, { "epoch": 0.533547677468483, "grad_norm": 0.18992137908935547, "learning_rate": 0.001, "loss": 2.0016, "step": 12612 }, { "epoch": 0.5335899822319993, "grad_norm": 0.20471316576004028, "learning_rate": 0.001, "loss": 2.4037, "step": 12613 }, { "epoch": 0.5336322869955157, "grad_norm": 0.21054227650165558, "learning_rate": 0.001, "loss": 1.9332, "step": 12614 }, { "epoch": 0.5336745917590321, "grad_norm": 0.20003412663936615, "learning_rate": 0.001, "loss": 2.4779, "step": 12615 }, { "epoch": 0.5337168965225484, "grad_norm": 0.17985932528972626, "learning_rate": 0.001, "loss": 1.5322, "step": 12616 }, { "epoch": 0.5337592012860648, "grad_norm": 2.2871792316436768, "learning_rate": 0.001, "loss": 2.4277, "step": 12617 }, { "epoch": 0.5338015060495812, "grad_norm": 0.16966228187084198, "learning_rate": 0.001, "loss": 2.4963, "step": 12618 }, { "epoch": 0.5338438108130975, "grad_norm": 0.19583900272846222, "learning_rate": 0.001, "loss": 2.4291, "step": 12619 }, { "epoch": 0.5338861155766139, "grad_norm": 0.18535350263118744, "learning_rate": 0.001, "loss": 2.1341, "step": 12620 }, { "epoch": 0.5339284203401303, "grad_norm": 0.8092800378799438, "learning_rate": 0.001, "loss": 2.5207, "step": 12621 }, { "epoch": 0.5339707251036466, "grad_norm": 0.19231988489627838, "learning_rate": 0.001, "loss": 2.378, "step": 12622 }, { "epoch": 0.534013029867163, "grad_norm": 0.22486920654773712, "learning_rate": 0.001, "loss": 2.1152, "step": 12623 }, { "epoch": 0.5340553346306794, "grad_norm": 0.32526126503944397, "learning_rate": 0.001, "loss": 3.1697, "step": 12624 }, { "epoch": 0.5340976393941957, "grad_norm": 0.20092487335205078, "learning_rate": 0.001, "loss": 2.8525, "step": 12625 }, { "epoch": 0.5341399441577122, "grad_norm": 0.2700296640396118, "learning_rate": 0.001, "loss": 2.0498, "step": 12626 }, { "epoch": 0.5341822489212286, "grad_norm": 0.48722365498542786, "learning_rate": 0.001, "loss": 1.8687, "step": 12627 }, { "epoch": 0.5342245536847449, "grad_norm": 0.16998623311519623, "learning_rate": 0.001, "loss": 2.2888, "step": 12628 }, { "epoch": 0.5342668584482613, "grad_norm": 0.15864147245883942, "learning_rate": 0.001, "loss": 1.7734, "step": 12629 }, { "epoch": 0.5343091632117777, "grad_norm": 0.31583741307258606, "learning_rate": 0.001, "loss": 2.4233, "step": 12630 }, { "epoch": 0.534351467975294, "grad_norm": 0.19196642935276031, "learning_rate": 0.001, "loss": 1.7955, "step": 12631 }, { "epoch": 0.5343937727388104, "grad_norm": 0.1647808700799942, "learning_rate": 0.001, "loss": 2.5027, "step": 12632 }, { "epoch": 0.5344360775023268, "grad_norm": 0.5048976540565491, "learning_rate": 0.001, "loss": 2.1264, "step": 12633 }, { "epoch": 0.5344783822658431, "grad_norm": 0.22664372622966766, "learning_rate": 0.001, "loss": 2.9306, "step": 12634 }, { "epoch": 0.5345206870293595, "grad_norm": 0.20378440618515015, "learning_rate": 0.001, "loss": 2.8485, "step": 12635 }, { "epoch": 0.5345629917928759, "grad_norm": 0.4030454158782959, "learning_rate": 0.001, "loss": 2.3177, "step": 12636 }, { "epoch": 0.5346052965563922, "grad_norm": 0.7921542525291443, "learning_rate": 0.001, "loss": 3.1555, "step": 12637 }, { "epoch": 0.5346476013199086, "grad_norm": 0.7897359728813171, "learning_rate": 0.001, "loss": 2.6089, "step": 12638 }, { "epoch": 0.534689906083425, "grad_norm": 0.2273802012205124, "learning_rate": 0.001, "loss": 2.2542, "step": 12639 }, { "epoch": 0.5347322108469413, "grad_norm": 0.27411505579948425, "learning_rate": 0.001, "loss": 2.8019, "step": 12640 }, { "epoch": 0.5347745156104577, "grad_norm": 0.26895782351493835, "learning_rate": 0.001, "loss": 2.6258, "step": 12641 }, { "epoch": 0.5348168203739742, "grad_norm": 0.17039963603019714, "learning_rate": 0.001, "loss": 2.5587, "step": 12642 }, { "epoch": 0.5348591251374905, "grad_norm": 0.2543392777442932, "learning_rate": 0.001, "loss": 2.3699, "step": 12643 }, { "epoch": 0.5349014299010069, "grad_norm": 0.19515450298786163, "learning_rate": 0.001, "loss": 2.1001, "step": 12644 }, { "epoch": 0.5349437346645233, "grad_norm": 0.16606023907661438, "learning_rate": 0.001, "loss": 1.7925, "step": 12645 }, { "epoch": 0.5349860394280396, "grad_norm": 0.4345617890357971, "learning_rate": 0.001, "loss": 1.9236, "step": 12646 }, { "epoch": 0.535028344191556, "grad_norm": 0.18390463292598724, "learning_rate": 0.001, "loss": 2.1396, "step": 12647 }, { "epoch": 0.5350706489550724, "grad_norm": 0.17461326718330383, "learning_rate": 0.001, "loss": 1.7379, "step": 12648 }, { "epoch": 0.5351129537185887, "grad_norm": 0.17248356342315674, "learning_rate": 0.001, "loss": 2.4956, "step": 12649 }, { "epoch": 0.5351552584821051, "grad_norm": 0.17978371679782867, "learning_rate": 0.001, "loss": 2.7611, "step": 12650 }, { "epoch": 0.5351975632456215, "grad_norm": 0.4586908221244812, "learning_rate": 0.001, "loss": 1.4555, "step": 12651 }, { "epoch": 0.5352398680091378, "grad_norm": 0.233989417552948, "learning_rate": 0.001, "loss": 1.9321, "step": 12652 }, { "epoch": 0.5352821727726542, "grad_norm": 0.1826629787683487, "learning_rate": 0.001, "loss": 2.3415, "step": 12653 }, { "epoch": 0.5353244775361705, "grad_norm": 0.15267974138259888, "learning_rate": 0.001, "loss": 1.4848, "step": 12654 }, { "epoch": 0.5353667822996869, "grad_norm": 4.387269020080566, "learning_rate": 0.001, "loss": 2.716, "step": 12655 }, { "epoch": 0.5354090870632033, "grad_norm": 0.26923081278800964, "learning_rate": 0.001, "loss": 3.246, "step": 12656 }, { "epoch": 0.5354513918267196, "grad_norm": 0.15483248233795166, "learning_rate": 0.001, "loss": 1.5195, "step": 12657 }, { "epoch": 0.535493696590236, "grad_norm": 0.18110813200473785, "learning_rate": 0.001, "loss": 1.7063, "step": 12658 }, { "epoch": 0.5355360013537525, "grad_norm": 0.1986570507287979, "learning_rate": 0.001, "loss": 2.1137, "step": 12659 }, { "epoch": 0.5355783061172688, "grad_norm": 0.16241492331027985, "learning_rate": 0.001, "loss": 1.7866, "step": 12660 }, { "epoch": 0.5356206108807852, "grad_norm": 0.22161731123924255, "learning_rate": 0.001, "loss": 2.3589, "step": 12661 }, { "epoch": 0.5356629156443016, "grad_norm": 0.17555823922157288, "learning_rate": 0.001, "loss": 1.9163, "step": 12662 }, { "epoch": 0.5357052204078179, "grad_norm": 0.20892333984375, "learning_rate": 0.001, "loss": 2.7506, "step": 12663 }, { "epoch": 0.5357475251713343, "grad_norm": 0.1600860357284546, "learning_rate": 0.001, "loss": 1.9907, "step": 12664 }, { "epoch": 0.5357898299348507, "grad_norm": 0.20017333328723907, "learning_rate": 0.001, "loss": 2.6324, "step": 12665 }, { "epoch": 0.535832134698367, "grad_norm": 0.18923135101795197, "learning_rate": 0.001, "loss": 2.1908, "step": 12666 }, { "epoch": 0.5358744394618834, "grad_norm": 0.24769580364227295, "learning_rate": 0.001, "loss": 1.9219, "step": 12667 }, { "epoch": 0.5359167442253998, "grad_norm": 0.43653157353401184, "learning_rate": 0.001, "loss": 2.1292, "step": 12668 }, { "epoch": 0.5359590489889161, "grad_norm": 0.18919190764427185, "learning_rate": 0.001, "loss": 2.4091, "step": 12669 }, { "epoch": 0.5360013537524325, "grad_norm": 16.16994857788086, "learning_rate": 0.001, "loss": 2.1238, "step": 12670 }, { "epoch": 0.5360436585159489, "grad_norm": 0.16744960844516754, "learning_rate": 0.001, "loss": 2.7491, "step": 12671 }, { "epoch": 0.5360859632794652, "grad_norm": 0.1780613660812378, "learning_rate": 0.001, "loss": 1.724, "step": 12672 }, { "epoch": 0.5361282680429816, "grad_norm": 0.16598299145698547, "learning_rate": 0.001, "loss": 1.8861, "step": 12673 }, { "epoch": 0.536170572806498, "grad_norm": 0.38400644063949585, "learning_rate": 0.001, "loss": 2.247, "step": 12674 }, { "epoch": 0.5362128775700143, "grad_norm": 0.24128788709640503, "learning_rate": 0.001, "loss": 2.6687, "step": 12675 }, { "epoch": 0.5362551823335308, "grad_norm": 0.17561288177967072, "learning_rate": 0.001, "loss": 1.8747, "step": 12676 }, { "epoch": 0.5362974870970472, "grad_norm": 0.2160193771123886, "learning_rate": 0.001, "loss": 1.8482, "step": 12677 }, { "epoch": 0.5363397918605635, "grad_norm": 0.2257080227136612, "learning_rate": 0.001, "loss": 2.167, "step": 12678 }, { "epoch": 0.5363820966240799, "grad_norm": 0.16601672768592834, "learning_rate": 0.001, "loss": 1.2827, "step": 12679 }, { "epoch": 0.5364244013875963, "grad_norm": 0.20976103842258453, "learning_rate": 0.001, "loss": 2.4844, "step": 12680 }, { "epoch": 0.5364667061511126, "grad_norm": 0.18916988372802734, "learning_rate": 0.001, "loss": 1.8666, "step": 12681 }, { "epoch": 0.536509010914629, "grad_norm": 0.20311769843101501, "learning_rate": 0.001, "loss": 2.84, "step": 12682 }, { "epoch": 0.5365513156781454, "grad_norm": 0.21866144239902496, "learning_rate": 0.001, "loss": 2.5776, "step": 12683 }, { "epoch": 0.5365936204416617, "grad_norm": 0.2240811288356781, "learning_rate": 0.001, "loss": 2.7133, "step": 12684 }, { "epoch": 0.5366359252051781, "grad_norm": 0.13089242577552795, "learning_rate": 0.001, "loss": 1.3683, "step": 12685 }, { "epoch": 0.5366782299686945, "grad_norm": 0.16819548606872559, "learning_rate": 0.001, "loss": 2.8725, "step": 12686 }, { "epoch": 0.5367205347322108, "grad_norm": 0.2232055813074112, "learning_rate": 0.001, "loss": 2.0103, "step": 12687 }, { "epoch": 0.5367628394957272, "grad_norm": 0.1957560032606125, "learning_rate": 0.001, "loss": 2.4051, "step": 12688 }, { "epoch": 0.5368051442592436, "grad_norm": 0.18882183730602264, "learning_rate": 0.001, "loss": 2.382, "step": 12689 }, { "epoch": 0.5368474490227599, "grad_norm": 0.211518332362175, "learning_rate": 0.001, "loss": 2.4219, "step": 12690 }, { "epoch": 0.5368897537862763, "grad_norm": 0.4028577506542206, "learning_rate": 0.001, "loss": 1.6842, "step": 12691 }, { "epoch": 0.5369320585497928, "grad_norm": 0.5534846782684326, "learning_rate": 0.001, "loss": 2.8452, "step": 12692 }, { "epoch": 0.536974363313309, "grad_norm": 0.6162401437759399, "learning_rate": 0.001, "loss": 1.8964, "step": 12693 }, { "epoch": 0.5370166680768255, "grad_norm": 0.2093658596277237, "learning_rate": 0.001, "loss": 2.2681, "step": 12694 }, { "epoch": 0.5370589728403419, "grad_norm": 0.17829060554504395, "learning_rate": 0.001, "loss": 1.7986, "step": 12695 }, { "epoch": 0.5371012776038582, "grad_norm": 0.29665249586105347, "learning_rate": 0.001, "loss": 2.194, "step": 12696 }, { "epoch": 0.5371435823673746, "grad_norm": 0.19808493554592133, "learning_rate": 0.001, "loss": 2.5451, "step": 12697 }, { "epoch": 0.537185887130891, "grad_norm": 0.14953912794589996, "learning_rate": 0.001, "loss": 1.6499, "step": 12698 }, { "epoch": 0.5372281918944073, "grad_norm": 0.25920218229293823, "learning_rate": 0.001, "loss": 2.035, "step": 12699 }, { "epoch": 0.5372704966579237, "grad_norm": 0.2867739200592041, "learning_rate": 0.001, "loss": 2.2908, "step": 12700 }, { "epoch": 0.53731280142144, "grad_norm": 0.17412187159061432, "learning_rate": 0.001, "loss": 1.7965, "step": 12701 }, { "epoch": 0.5373551061849564, "grad_norm": 0.19530300796031952, "learning_rate": 0.001, "loss": 2.733, "step": 12702 }, { "epoch": 0.5373974109484728, "grad_norm": 0.8992458581924438, "learning_rate": 0.001, "loss": 1.9335, "step": 12703 }, { "epoch": 0.5374397157119891, "grad_norm": 0.16157057881355286, "learning_rate": 0.001, "loss": 1.4152, "step": 12704 }, { "epoch": 0.5374820204755055, "grad_norm": 0.7205559611320496, "learning_rate": 0.001, "loss": 1.7546, "step": 12705 }, { "epoch": 0.5375243252390219, "grad_norm": 0.16520078480243683, "learning_rate": 0.001, "loss": 1.9752, "step": 12706 }, { "epoch": 0.5375666300025382, "grad_norm": 1.2817127704620361, "learning_rate": 0.001, "loss": 2.4836, "step": 12707 }, { "epoch": 0.5376089347660546, "grad_norm": 0.576266348361969, "learning_rate": 0.001, "loss": 1.898, "step": 12708 }, { "epoch": 0.5376512395295711, "grad_norm": 0.25358226895332336, "learning_rate": 0.001, "loss": 2.1221, "step": 12709 }, { "epoch": 0.5376935442930874, "grad_norm": 0.23752950131893158, "learning_rate": 0.001, "loss": 2.4297, "step": 12710 }, { "epoch": 0.5377358490566038, "grad_norm": 0.17121052742004395, "learning_rate": 0.001, "loss": 2.2977, "step": 12711 }, { "epoch": 0.5377781538201202, "grad_norm": 5.213639736175537, "learning_rate": 0.001, "loss": 1.5537, "step": 12712 }, { "epoch": 0.5378204585836365, "grad_norm": 0.15050987899303436, "learning_rate": 0.001, "loss": 2.1932, "step": 12713 }, { "epoch": 0.5378627633471529, "grad_norm": 0.2271626889705658, "learning_rate": 0.001, "loss": 2.2467, "step": 12714 }, { "epoch": 0.5379050681106693, "grad_norm": 0.20240074396133423, "learning_rate": 0.001, "loss": 2.7468, "step": 12715 }, { "epoch": 0.5379473728741856, "grad_norm": 4.134634971618652, "learning_rate": 0.001, "loss": 1.7431, "step": 12716 }, { "epoch": 0.537989677637702, "grad_norm": 0.23283101618289948, "learning_rate": 0.001, "loss": 1.8593, "step": 12717 }, { "epoch": 0.5380319824012184, "grad_norm": 0.1952308714389801, "learning_rate": 0.001, "loss": 2.5033, "step": 12718 }, { "epoch": 0.5380742871647347, "grad_norm": 0.17305073142051697, "learning_rate": 0.001, "loss": 2.3579, "step": 12719 }, { "epoch": 0.5381165919282511, "grad_norm": 0.18969833850860596, "learning_rate": 0.001, "loss": 1.8591, "step": 12720 }, { "epoch": 0.5381588966917675, "grad_norm": 0.19060935080051422, "learning_rate": 0.001, "loss": 1.6669, "step": 12721 }, { "epoch": 0.5382012014552838, "grad_norm": 10.8906888961792, "learning_rate": 0.001, "loss": 1.7601, "step": 12722 }, { "epoch": 0.5382435062188002, "grad_norm": 0.2497670203447342, "learning_rate": 0.001, "loss": 2.2148, "step": 12723 }, { "epoch": 0.5382858109823166, "grad_norm": 0.2164415568113327, "learning_rate": 0.001, "loss": 2.146, "step": 12724 }, { "epoch": 0.538328115745833, "grad_norm": 0.2605139911174774, "learning_rate": 0.001, "loss": 1.9347, "step": 12725 }, { "epoch": 0.5383704205093494, "grad_norm": 23.029006958007812, "learning_rate": 0.001, "loss": 1.8033, "step": 12726 }, { "epoch": 0.5384127252728658, "grad_norm": 0.16982685029506683, "learning_rate": 0.001, "loss": 2.4865, "step": 12727 }, { "epoch": 0.5384550300363821, "grad_norm": 0.525914192199707, "learning_rate": 0.001, "loss": 1.8129, "step": 12728 }, { "epoch": 0.5384973347998985, "grad_norm": 0.15495622158050537, "learning_rate": 0.001, "loss": 2.5025, "step": 12729 }, { "epoch": 0.5385396395634149, "grad_norm": 0.19135193526744843, "learning_rate": 0.001, "loss": 1.598, "step": 12730 }, { "epoch": 0.5385819443269312, "grad_norm": 1.3947174549102783, "learning_rate": 0.001, "loss": 2.7353, "step": 12731 }, { "epoch": 0.5386242490904476, "grad_norm": 0.16519546508789062, "learning_rate": 0.001, "loss": 1.5702, "step": 12732 }, { "epoch": 0.538666553853964, "grad_norm": 0.19213086366653442, "learning_rate": 0.001, "loss": 2.4648, "step": 12733 }, { "epoch": 0.5387088586174803, "grad_norm": 1.1840908527374268, "learning_rate": 0.001, "loss": 2.0265, "step": 12734 }, { "epoch": 0.5387511633809967, "grad_norm": 0.32493284344673157, "learning_rate": 0.001, "loss": 2.6341, "step": 12735 }, { "epoch": 0.5387934681445131, "grad_norm": 0.1699162721633911, "learning_rate": 0.001, "loss": 2.3637, "step": 12736 }, { "epoch": 0.5388357729080294, "grad_norm": 0.40613803267478943, "learning_rate": 0.001, "loss": 1.9464, "step": 12737 }, { "epoch": 0.5388780776715458, "grad_norm": 0.2300586849451065, "learning_rate": 0.001, "loss": 1.9922, "step": 12738 }, { "epoch": 0.5389203824350622, "grad_norm": 0.23353518545627594, "learning_rate": 0.001, "loss": 1.4807, "step": 12739 }, { "epoch": 0.5389626871985785, "grad_norm": 0.20910601317882538, "learning_rate": 0.001, "loss": 2.0366, "step": 12740 }, { "epoch": 0.539004991962095, "grad_norm": 0.17505596578121185, "learning_rate": 0.001, "loss": 1.9472, "step": 12741 }, { "epoch": 0.5390472967256114, "grad_norm": 0.2388429343700409, "learning_rate": 0.001, "loss": 1.8028, "step": 12742 }, { "epoch": 0.5390896014891277, "grad_norm": 0.211119145154953, "learning_rate": 0.001, "loss": 3.0593, "step": 12743 }, { "epoch": 0.5391319062526441, "grad_norm": 0.1894591599702835, "learning_rate": 0.001, "loss": 3.2891, "step": 12744 }, { "epoch": 0.5391742110161604, "grad_norm": 0.20445877313613892, "learning_rate": 0.001, "loss": 1.9584, "step": 12745 }, { "epoch": 0.5392165157796768, "grad_norm": 0.2004939168691635, "learning_rate": 0.001, "loss": 2.1161, "step": 12746 }, { "epoch": 0.5392588205431932, "grad_norm": 0.20584918558597565, "learning_rate": 0.001, "loss": 2.124, "step": 12747 }, { "epoch": 0.5393011253067095, "grad_norm": 0.4080961048603058, "learning_rate": 0.001, "loss": 2.1308, "step": 12748 }, { "epoch": 0.5393434300702259, "grad_norm": 0.261042982339859, "learning_rate": 0.001, "loss": 2.3502, "step": 12749 }, { "epoch": 0.5393857348337423, "grad_norm": 0.18766193091869354, "learning_rate": 0.001, "loss": 1.9792, "step": 12750 }, { "epoch": 0.5394280395972586, "grad_norm": 0.178992360830307, "learning_rate": 0.001, "loss": 1.9826, "step": 12751 }, { "epoch": 0.539470344360775, "grad_norm": 0.22362402081489563, "learning_rate": 0.001, "loss": 2.6882, "step": 12752 }, { "epoch": 0.5395126491242914, "grad_norm": 0.6612523198127747, "learning_rate": 0.001, "loss": 2.3889, "step": 12753 }, { "epoch": 0.5395549538878077, "grad_norm": 0.24479064345359802, "learning_rate": 0.001, "loss": 2.4542, "step": 12754 }, { "epoch": 0.5395972586513241, "grad_norm": 0.3169039189815521, "learning_rate": 0.001, "loss": 2.1717, "step": 12755 }, { "epoch": 0.5396395634148405, "grad_norm": 0.22589461505413055, "learning_rate": 0.001, "loss": 3.304, "step": 12756 }, { "epoch": 0.5396818681783568, "grad_norm": 0.22644683718681335, "learning_rate": 0.001, "loss": 1.7265, "step": 12757 }, { "epoch": 0.5397241729418732, "grad_norm": 0.19094765186309814, "learning_rate": 0.001, "loss": 1.681, "step": 12758 }, { "epoch": 0.5397664777053897, "grad_norm": 0.17020857334136963, "learning_rate": 0.001, "loss": 2.3251, "step": 12759 }, { "epoch": 0.539808782468906, "grad_norm": 9.065228462219238, "learning_rate": 0.001, "loss": 2.7484, "step": 12760 }, { "epoch": 0.5398510872324224, "grad_norm": 0.32355996966362, "learning_rate": 0.001, "loss": 2.1063, "step": 12761 }, { "epoch": 0.5398933919959388, "grad_norm": 0.22074800729751587, "learning_rate": 0.001, "loss": 2.3476, "step": 12762 }, { "epoch": 0.5399356967594551, "grad_norm": 1.8396928310394287, "learning_rate": 0.001, "loss": 2.3019, "step": 12763 }, { "epoch": 0.5399780015229715, "grad_norm": 0.18278691172599792, "learning_rate": 0.001, "loss": 2.0778, "step": 12764 }, { "epoch": 0.5400203062864879, "grad_norm": 0.2510671019554138, "learning_rate": 0.001, "loss": 1.8773, "step": 12765 }, { "epoch": 0.5400626110500042, "grad_norm": 0.17996838688850403, "learning_rate": 0.001, "loss": 2.517, "step": 12766 }, { "epoch": 0.5401049158135206, "grad_norm": 0.15889018774032593, "learning_rate": 0.001, "loss": 1.7955, "step": 12767 }, { "epoch": 0.540147220577037, "grad_norm": 0.18059539794921875, "learning_rate": 0.001, "loss": 2.4617, "step": 12768 }, { "epoch": 0.5401895253405533, "grad_norm": 1.934308648109436, "learning_rate": 0.001, "loss": 2.4551, "step": 12769 }, { "epoch": 0.5402318301040697, "grad_norm": 0.764201283454895, "learning_rate": 0.001, "loss": 2.2693, "step": 12770 }, { "epoch": 0.5402741348675861, "grad_norm": 0.16094563901424408, "learning_rate": 0.001, "loss": 3.8751, "step": 12771 }, { "epoch": 0.5403164396311024, "grad_norm": 0.20989876985549927, "learning_rate": 0.001, "loss": 2.4648, "step": 12772 }, { "epoch": 0.5403587443946188, "grad_norm": 0.4561876058578491, "learning_rate": 0.001, "loss": 2.0358, "step": 12773 }, { "epoch": 0.5404010491581352, "grad_norm": 0.19272848963737488, "learning_rate": 0.001, "loss": 2.3598, "step": 12774 }, { "epoch": 0.5404433539216515, "grad_norm": 0.1573980748653412, "learning_rate": 0.001, "loss": 3.2602, "step": 12775 }, { "epoch": 0.540485658685168, "grad_norm": 0.20343995094299316, "learning_rate": 0.001, "loss": 2.9864, "step": 12776 }, { "epoch": 0.5405279634486844, "grad_norm": 1.2975257635116577, "learning_rate": 0.001, "loss": 2.7595, "step": 12777 }, { "epoch": 0.5405702682122007, "grad_norm": 0.20765641331672668, "learning_rate": 0.001, "loss": 2.8354, "step": 12778 }, { "epoch": 0.5406125729757171, "grad_norm": 0.1908862143754959, "learning_rate": 0.001, "loss": 1.833, "step": 12779 }, { "epoch": 0.5406548777392335, "grad_norm": 0.15055324137210846, "learning_rate": 0.001, "loss": 1.4764, "step": 12780 }, { "epoch": 0.5406971825027498, "grad_norm": 0.19887371361255646, "learning_rate": 0.001, "loss": 2.6805, "step": 12781 }, { "epoch": 0.5407394872662662, "grad_norm": 0.19075161218643188, "learning_rate": 0.001, "loss": 2.7236, "step": 12782 }, { "epoch": 0.5407817920297826, "grad_norm": 0.14533232152462006, "learning_rate": 0.001, "loss": 1.5061, "step": 12783 }, { "epoch": 0.5408240967932989, "grad_norm": 0.26192668080329895, "learning_rate": 0.001, "loss": 2.8406, "step": 12784 }, { "epoch": 0.5408664015568153, "grad_norm": 0.1515062302350998, "learning_rate": 0.001, "loss": 2.997, "step": 12785 }, { "epoch": 0.5409087063203317, "grad_norm": 4.155677795410156, "learning_rate": 0.001, "loss": 2.7793, "step": 12786 }, { "epoch": 0.540951011083848, "grad_norm": 0.24522151052951813, "learning_rate": 0.001, "loss": 1.8206, "step": 12787 }, { "epoch": 0.5409933158473644, "grad_norm": 1.2809727191925049, "learning_rate": 0.001, "loss": 3.2282, "step": 12788 }, { "epoch": 0.5410356206108807, "grad_norm": 0.21048471331596375, "learning_rate": 0.001, "loss": 2.7134, "step": 12789 }, { "epoch": 0.5410779253743971, "grad_norm": 0.1910332590341568, "learning_rate": 0.001, "loss": 1.9444, "step": 12790 }, { "epoch": 0.5411202301379135, "grad_norm": 0.20288752019405365, "learning_rate": 0.001, "loss": 1.7156, "step": 12791 }, { "epoch": 0.5411625349014298, "grad_norm": 0.1752224564552307, "learning_rate": 0.001, "loss": 2.0678, "step": 12792 }, { "epoch": 0.5412048396649463, "grad_norm": 0.18332380056381226, "learning_rate": 0.001, "loss": 1.6786, "step": 12793 }, { "epoch": 0.5412471444284627, "grad_norm": 0.16607828438282013, "learning_rate": 0.001, "loss": 2.0846, "step": 12794 }, { "epoch": 0.541289449191979, "grad_norm": 0.2198660671710968, "learning_rate": 0.001, "loss": 2.083, "step": 12795 }, { "epoch": 0.5413317539554954, "grad_norm": 0.19361701607704163, "learning_rate": 0.001, "loss": 1.8434, "step": 12796 }, { "epoch": 0.5413740587190118, "grad_norm": 0.17056864500045776, "learning_rate": 0.001, "loss": 1.7502, "step": 12797 }, { "epoch": 0.5414163634825281, "grad_norm": 0.2116164267063141, "learning_rate": 0.001, "loss": 2.3516, "step": 12798 }, { "epoch": 0.5414586682460445, "grad_norm": 0.1802276223897934, "learning_rate": 0.001, "loss": 2.6387, "step": 12799 }, { "epoch": 0.5415009730095609, "grad_norm": 0.1857595592737198, "learning_rate": 0.001, "loss": 2.052, "step": 12800 }, { "epoch": 0.5415432777730772, "grad_norm": 0.16181547939777374, "learning_rate": 0.001, "loss": 2.9238, "step": 12801 }, { "epoch": 0.5415855825365936, "grad_norm": 0.17999279499053955, "learning_rate": 0.001, "loss": 2.1358, "step": 12802 }, { "epoch": 0.54162788730011, "grad_norm": 0.17646092176437378, "learning_rate": 0.001, "loss": 1.9901, "step": 12803 }, { "epoch": 0.5416701920636263, "grad_norm": 0.19860303401947021, "learning_rate": 0.001, "loss": 2.5992, "step": 12804 }, { "epoch": 0.5417124968271427, "grad_norm": 0.43836483359336853, "learning_rate": 0.001, "loss": 2.0902, "step": 12805 }, { "epoch": 0.5417548015906591, "grad_norm": 0.32996633648872375, "learning_rate": 0.001, "loss": 1.8199, "step": 12806 }, { "epoch": 0.5417971063541754, "grad_norm": 0.16782180964946747, "learning_rate": 0.001, "loss": 1.8684, "step": 12807 }, { "epoch": 0.5418394111176918, "grad_norm": 0.29726442694664, "learning_rate": 0.001, "loss": 2.1423, "step": 12808 }, { "epoch": 0.5418817158812083, "grad_norm": 0.19097116589546204, "learning_rate": 0.001, "loss": 3.4195, "step": 12809 }, { "epoch": 0.5419240206447246, "grad_norm": 0.41679999232292175, "learning_rate": 0.001, "loss": 1.9064, "step": 12810 }, { "epoch": 0.541966325408241, "grad_norm": 0.15182755887508392, "learning_rate": 0.001, "loss": 2.4639, "step": 12811 }, { "epoch": 0.5420086301717574, "grad_norm": 0.16130530834197998, "learning_rate": 0.001, "loss": 1.777, "step": 12812 }, { "epoch": 0.5420509349352737, "grad_norm": 0.15703986585140228, "learning_rate": 0.001, "loss": 2.4515, "step": 12813 }, { "epoch": 0.5420932396987901, "grad_norm": 22.553680419921875, "learning_rate": 0.001, "loss": 2.077, "step": 12814 }, { "epoch": 0.5421355444623065, "grad_norm": 0.17222054302692413, "learning_rate": 0.001, "loss": 1.8372, "step": 12815 }, { "epoch": 0.5421778492258228, "grad_norm": 0.21091894805431366, "learning_rate": 0.001, "loss": 2.3193, "step": 12816 }, { "epoch": 0.5422201539893392, "grad_norm": 0.17758257687091827, "learning_rate": 0.001, "loss": 2.2889, "step": 12817 }, { "epoch": 0.5422624587528556, "grad_norm": 0.2192198932170868, "learning_rate": 0.001, "loss": 2.8897, "step": 12818 }, { "epoch": 0.5423047635163719, "grad_norm": 0.16406521201133728, "learning_rate": 0.001, "loss": 2.1214, "step": 12819 }, { "epoch": 0.5423470682798883, "grad_norm": 0.29036909341812134, "learning_rate": 0.001, "loss": 1.6994, "step": 12820 }, { "epoch": 0.5423893730434047, "grad_norm": 0.16406984627246857, "learning_rate": 0.001, "loss": 2.4504, "step": 12821 }, { "epoch": 0.542431677806921, "grad_norm": 0.310654878616333, "learning_rate": 0.001, "loss": 2.7693, "step": 12822 }, { "epoch": 0.5424739825704374, "grad_norm": 0.3856427073478699, "learning_rate": 0.001, "loss": 1.8619, "step": 12823 }, { "epoch": 0.5425162873339539, "grad_norm": 0.15692728757858276, "learning_rate": 0.001, "loss": 2.5887, "step": 12824 }, { "epoch": 0.5425585920974701, "grad_norm": 0.1917227804660797, "learning_rate": 0.001, "loss": 2.4362, "step": 12825 }, { "epoch": 0.5426008968609866, "grad_norm": 0.16880708932876587, "learning_rate": 0.001, "loss": 2.6024, "step": 12826 }, { "epoch": 0.542643201624503, "grad_norm": 0.22414904832839966, "learning_rate": 0.001, "loss": 1.8351, "step": 12827 }, { "epoch": 0.5426855063880193, "grad_norm": 0.19142870604991913, "learning_rate": 0.001, "loss": 2.1463, "step": 12828 }, { "epoch": 0.5427278111515357, "grad_norm": 0.9796238541603088, "learning_rate": 0.001, "loss": 2.4191, "step": 12829 }, { "epoch": 0.5427701159150521, "grad_norm": 0.18335986137390137, "learning_rate": 0.001, "loss": 2.8576, "step": 12830 }, { "epoch": 0.5428124206785684, "grad_norm": 0.162856325507164, "learning_rate": 0.001, "loss": 1.473, "step": 12831 }, { "epoch": 0.5428547254420848, "grad_norm": 0.162824347615242, "learning_rate": 0.001, "loss": 1.9001, "step": 12832 }, { "epoch": 0.5428970302056012, "grad_norm": 0.1477918028831482, "learning_rate": 0.001, "loss": 1.5069, "step": 12833 }, { "epoch": 0.5429393349691175, "grad_norm": 0.21265248954296112, "learning_rate": 0.001, "loss": 2.1842, "step": 12834 }, { "epoch": 0.5429816397326339, "grad_norm": 0.19099631905555725, "learning_rate": 0.001, "loss": 1.1751, "step": 12835 }, { "epoch": 0.5430239444961502, "grad_norm": 8.667723655700684, "learning_rate": 0.001, "loss": 1.6845, "step": 12836 }, { "epoch": 0.5430662492596666, "grad_norm": 0.22022154927253723, "learning_rate": 0.001, "loss": 2.5801, "step": 12837 }, { "epoch": 0.543108554023183, "grad_norm": 0.17374145984649658, "learning_rate": 0.001, "loss": 2.4922, "step": 12838 }, { "epoch": 0.5431508587866993, "grad_norm": 0.16898944973945618, "learning_rate": 0.001, "loss": 1.8262, "step": 12839 }, { "epoch": 0.5431931635502157, "grad_norm": 0.18354932963848114, "learning_rate": 0.001, "loss": 2.2251, "step": 12840 }, { "epoch": 0.5432354683137322, "grad_norm": 0.16196799278259277, "learning_rate": 0.001, "loss": 2.6641, "step": 12841 }, { "epoch": 0.5432777730772484, "grad_norm": 0.2222384810447693, "learning_rate": 0.001, "loss": 2.1457, "step": 12842 }, { "epoch": 0.5433200778407649, "grad_norm": 0.19985118508338928, "learning_rate": 0.001, "loss": 2.302, "step": 12843 }, { "epoch": 0.5433623826042813, "grad_norm": 0.1899694949388504, "learning_rate": 0.001, "loss": 3.4242, "step": 12844 }, { "epoch": 0.5434046873677976, "grad_norm": 0.19545935094356537, "learning_rate": 0.001, "loss": 1.9181, "step": 12845 }, { "epoch": 0.543446992131314, "grad_norm": 0.16950981318950653, "learning_rate": 0.001, "loss": 2.4149, "step": 12846 }, { "epoch": 0.5434892968948304, "grad_norm": 0.19809971749782562, "learning_rate": 0.001, "loss": 1.6052, "step": 12847 }, { "epoch": 0.5435316016583467, "grad_norm": 0.2023109793663025, "learning_rate": 0.001, "loss": 2.1515, "step": 12848 }, { "epoch": 0.5435739064218631, "grad_norm": 0.889703631401062, "learning_rate": 0.001, "loss": 2.0461, "step": 12849 }, { "epoch": 0.5436162111853795, "grad_norm": 0.18343132734298706, "learning_rate": 0.001, "loss": 2.4869, "step": 12850 }, { "epoch": 0.5436585159488958, "grad_norm": 0.16023565828800201, "learning_rate": 0.001, "loss": 2.1514, "step": 12851 }, { "epoch": 0.5437008207124122, "grad_norm": 0.16939814388751984, "learning_rate": 0.001, "loss": 1.9151, "step": 12852 }, { "epoch": 0.5437431254759286, "grad_norm": 0.16399548947811127, "learning_rate": 0.001, "loss": 2.4465, "step": 12853 }, { "epoch": 0.5437854302394449, "grad_norm": 0.1430288702249527, "learning_rate": 0.001, "loss": 1.7558, "step": 12854 }, { "epoch": 0.5438277350029613, "grad_norm": 0.5551788210868835, "learning_rate": 0.001, "loss": 1.9211, "step": 12855 }, { "epoch": 0.5438700397664777, "grad_norm": 0.15507280826568604, "learning_rate": 0.001, "loss": 1.4507, "step": 12856 }, { "epoch": 0.543912344529994, "grad_norm": 0.27142950892448425, "learning_rate": 0.001, "loss": 2.3643, "step": 12857 }, { "epoch": 0.5439546492935105, "grad_norm": 0.7331579327583313, "learning_rate": 0.001, "loss": 3.3138, "step": 12858 }, { "epoch": 0.5439969540570269, "grad_norm": 1.1282596588134766, "learning_rate": 0.001, "loss": 2.4492, "step": 12859 }, { "epoch": 0.5440392588205432, "grad_norm": 0.1771765947341919, "learning_rate": 0.001, "loss": 2.3039, "step": 12860 }, { "epoch": 0.5440815635840596, "grad_norm": 0.24000783264636993, "learning_rate": 0.001, "loss": 1.6899, "step": 12861 }, { "epoch": 0.544123868347576, "grad_norm": 0.23145051300525665, "learning_rate": 0.001, "loss": 2.4085, "step": 12862 }, { "epoch": 0.5441661731110923, "grad_norm": 0.21521174907684326, "learning_rate": 0.001, "loss": 3.2071, "step": 12863 }, { "epoch": 0.5442084778746087, "grad_norm": 0.16820016503334045, "learning_rate": 0.001, "loss": 2.267, "step": 12864 }, { "epoch": 0.5442507826381251, "grad_norm": 0.21583041548728943, "learning_rate": 0.001, "loss": 2.1649, "step": 12865 }, { "epoch": 0.5442930874016414, "grad_norm": 0.15462155640125275, "learning_rate": 0.001, "loss": 2.2514, "step": 12866 }, { "epoch": 0.5443353921651578, "grad_norm": 0.30019253492355347, "learning_rate": 0.001, "loss": 2.4805, "step": 12867 }, { "epoch": 0.5443776969286742, "grad_norm": 0.17040373384952545, "learning_rate": 0.001, "loss": 2.1466, "step": 12868 }, { "epoch": 0.5444200016921905, "grad_norm": 0.2993798553943634, "learning_rate": 0.001, "loss": 1.5786, "step": 12869 }, { "epoch": 0.5444623064557069, "grad_norm": 0.17798520624637604, "learning_rate": 0.001, "loss": 1.7204, "step": 12870 }, { "epoch": 0.5445046112192233, "grad_norm": 0.18030913174152374, "learning_rate": 0.001, "loss": 2.9747, "step": 12871 }, { "epoch": 0.5445469159827396, "grad_norm": 0.22262489795684814, "learning_rate": 0.001, "loss": 2.4943, "step": 12872 }, { "epoch": 0.544589220746256, "grad_norm": 0.16953717172145844, "learning_rate": 0.001, "loss": 3.0568, "step": 12873 }, { "epoch": 0.5446315255097725, "grad_norm": 0.34046247601509094, "learning_rate": 0.001, "loss": 2.8593, "step": 12874 }, { "epoch": 0.5446738302732888, "grad_norm": 0.1570599377155304, "learning_rate": 0.001, "loss": 2.5897, "step": 12875 }, { "epoch": 0.5447161350368052, "grad_norm": 0.17176446318626404, "learning_rate": 0.001, "loss": 1.9241, "step": 12876 }, { "epoch": 0.5447584398003216, "grad_norm": 0.17753027379512787, "learning_rate": 0.001, "loss": 2.161, "step": 12877 }, { "epoch": 0.5448007445638379, "grad_norm": 0.1912051886320114, "learning_rate": 0.001, "loss": 1.8007, "step": 12878 }, { "epoch": 0.5448430493273543, "grad_norm": 0.49159884452819824, "learning_rate": 0.001, "loss": 1.7072, "step": 12879 }, { "epoch": 0.5448853540908706, "grad_norm": 0.23397985100746155, "learning_rate": 0.001, "loss": 1.9658, "step": 12880 }, { "epoch": 0.544927658854387, "grad_norm": 0.17725947499275208, "learning_rate": 0.001, "loss": 1.8988, "step": 12881 }, { "epoch": 0.5449699636179034, "grad_norm": 0.14953695237636566, "learning_rate": 0.001, "loss": 2.4378, "step": 12882 }, { "epoch": 0.5450122683814197, "grad_norm": 0.1612466275691986, "learning_rate": 0.001, "loss": 2.0907, "step": 12883 }, { "epoch": 0.5450545731449361, "grad_norm": 0.2178923636674881, "learning_rate": 0.001, "loss": 2.9344, "step": 12884 }, { "epoch": 0.5450968779084525, "grad_norm": 0.19458921253681183, "learning_rate": 0.001, "loss": 2.2585, "step": 12885 }, { "epoch": 0.5451391826719688, "grad_norm": 0.24837756156921387, "learning_rate": 0.001, "loss": 2.0981, "step": 12886 }, { "epoch": 0.5451814874354852, "grad_norm": 0.16423627734184265, "learning_rate": 0.001, "loss": 1.4668, "step": 12887 }, { "epoch": 0.5452237921990016, "grad_norm": 0.17279313504695892, "learning_rate": 0.001, "loss": 1.8586, "step": 12888 }, { "epoch": 0.5452660969625179, "grad_norm": 0.1528485268354416, "learning_rate": 0.001, "loss": 2.2937, "step": 12889 }, { "epoch": 0.5453084017260343, "grad_norm": 0.14856481552124023, "learning_rate": 0.001, "loss": 2.3376, "step": 12890 }, { "epoch": 0.5453507064895508, "grad_norm": 0.1969471573829651, "learning_rate": 0.001, "loss": 2.8098, "step": 12891 }, { "epoch": 0.545393011253067, "grad_norm": 0.17648574709892273, "learning_rate": 0.001, "loss": 2.3516, "step": 12892 }, { "epoch": 0.5454353160165835, "grad_norm": 0.2679540812969208, "learning_rate": 0.001, "loss": 2.5151, "step": 12893 }, { "epoch": 0.5454776207800999, "grad_norm": 0.17323081195354462, "learning_rate": 0.001, "loss": 1.472, "step": 12894 }, { "epoch": 0.5455199255436162, "grad_norm": 0.7942979335784912, "learning_rate": 0.001, "loss": 2.2294, "step": 12895 }, { "epoch": 0.5455622303071326, "grad_norm": 0.2997882664203644, "learning_rate": 0.001, "loss": 2.7284, "step": 12896 }, { "epoch": 0.545604535070649, "grad_norm": 0.1583070456981659, "learning_rate": 0.001, "loss": 2.6989, "step": 12897 }, { "epoch": 0.5456468398341653, "grad_norm": 0.19625523686408997, "learning_rate": 0.001, "loss": 1.7012, "step": 12898 }, { "epoch": 0.5456891445976817, "grad_norm": 0.16871203482151031, "learning_rate": 0.001, "loss": 1.7467, "step": 12899 }, { "epoch": 0.5457314493611981, "grad_norm": 0.17078301310539246, "learning_rate": 0.001, "loss": 2.7648, "step": 12900 }, { "epoch": 0.5457737541247144, "grad_norm": 0.1832430213689804, "learning_rate": 0.001, "loss": 2.109, "step": 12901 }, { "epoch": 0.5458160588882308, "grad_norm": 0.18720830976963043, "learning_rate": 0.001, "loss": 2.4266, "step": 12902 }, { "epoch": 0.5458583636517472, "grad_norm": 0.7275272011756897, "learning_rate": 0.001, "loss": 2.3496, "step": 12903 }, { "epoch": 0.5459006684152635, "grad_norm": 0.1972072273492813, "learning_rate": 0.001, "loss": 1.8186, "step": 12904 }, { "epoch": 0.5459429731787799, "grad_norm": 0.17827208340168, "learning_rate": 0.001, "loss": 2.7438, "step": 12905 }, { "epoch": 0.5459852779422963, "grad_norm": 7.450066089630127, "learning_rate": 0.001, "loss": 2.246, "step": 12906 }, { "epoch": 0.5460275827058126, "grad_norm": 0.15848639607429504, "learning_rate": 0.001, "loss": 1.7138, "step": 12907 }, { "epoch": 0.546069887469329, "grad_norm": 0.1964915245771408, "learning_rate": 0.001, "loss": 1.9687, "step": 12908 }, { "epoch": 0.5461121922328455, "grad_norm": 0.1862536370754242, "learning_rate": 0.001, "loss": 1.9072, "step": 12909 }, { "epoch": 0.5461544969963618, "grad_norm": 0.18136167526245117, "learning_rate": 0.001, "loss": 1.7342, "step": 12910 }, { "epoch": 0.5461968017598782, "grad_norm": 0.16859842836856842, "learning_rate": 0.001, "loss": 2.6862, "step": 12911 }, { "epoch": 0.5462391065233946, "grad_norm": 0.4886167049407959, "learning_rate": 0.001, "loss": 2.0441, "step": 12912 }, { "epoch": 0.5462814112869109, "grad_norm": 0.25893867015838623, "learning_rate": 0.001, "loss": 3.6045, "step": 12913 }, { "epoch": 0.5463237160504273, "grad_norm": 0.2573549747467041, "learning_rate": 0.001, "loss": 1.9222, "step": 12914 }, { "epoch": 0.5463660208139437, "grad_norm": 0.17247262597084045, "learning_rate": 0.001, "loss": 2.6157, "step": 12915 }, { "epoch": 0.54640832557746, "grad_norm": 2.4677224159240723, "learning_rate": 0.001, "loss": 2.0424, "step": 12916 }, { "epoch": 0.5464506303409764, "grad_norm": 0.20727023482322693, "learning_rate": 0.001, "loss": 1.9061, "step": 12917 }, { "epoch": 0.5464929351044928, "grad_norm": 0.1556202620267868, "learning_rate": 0.001, "loss": 2.4613, "step": 12918 }, { "epoch": 0.5465352398680091, "grad_norm": 0.18038994073867798, "learning_rate": 0.001, "loss": 2.001, "step": 12919 }, { "epoch": 0.5465775446315255, "grad_norm": 0.20001617074012756, "learning_rate": 0.001, "loss": 2.1099, "step": 12920 }, { "epoch": 0.5466198493950419, "grad_norm": 0.15489211678504944, "learning_rate": 0.001, "loss": 1.7065, "step": 12921 }, { "epoch": 0.5466621541585582, "grad_norm": 0.41907620429992676, "learning_rate": 0.001, "loss": 2.915, "step": 12922 }, { "epoch": 0.5467044589220746, "grad_norm": 0.15193894505500793, "learning_rate": 0.001, "loss": 2.4002, "step": 12923 }, { "epoch": 0.546746763685591, "grad_norm": 0.16042934358119965, "learning_rate": 0.001, "loss": 1.7455, "step": 12924 }, { "epoch": 0.5467890684491074, "grad_norm": 8.326644897460938, "learning_rate": 0.001, "loss": 2.3913, "step": 12925 }, { "epoch": 0.5468313732126238, "grad_norm": 0.22436167299747467, "learning_rate": 0.001, "loss": 2.9849, "step": 12926 }, { "epoch": 0.5468736779761401, "grad_norm": 0.17266489565372467, "learning_rate": 0.001, "loss": 1.7618, "step": 12927 }, { "epoch": 0.5469159827396565, "grad_norm": 0.21502335369586945, "learning_rate": 0.001, "loss": 1.632, "step": 12928 }, { "epoch": 0.5469582875031729, "grad_norm": 0.18782548606395721, "learning_rate": 0.001, "loss": 2.7248, "step": 12929 }, { "epoch": 0.5470005922666892, "grad_norm": 0.44364723563194275, "learning_rate": 0.001, "loss": 2.0572, "step": 12930 }, { "epoch": 0.5470428970302056, "grad_norm": 0.22250796854496002, "learning_rate": 0.001, "loss": 1.8936, "step": 12931 }, { "epoch": 0.547085201793722, "grad_norm": 0.19908872246742249, "learning_rate": 0.001, "loss": 2.5246, "step": 12932 }, { "epoch": 0.5471275065572383, "grad_norm": 0.15950053930282593, "learning_rate": 0.001, "loss": 2.6949, "step": 12933 }, { "epoch": 0.5471698113207547, "grad_norm": 0.45760655403137207, "learning_rate": 0.001, "loss": 1.5259, "step": 12934 }, { "epoch": 0.5472121160842711, "grad_norm": 1.588517427444458, "learning_rate": 0.001, "loss": 3.1214, "step": 12935 }, { "epoch": 0.5472544208477874, "grad_norm": 0.22262723743915558, "learning_rate": 0.001, "loss": 3.0762, "step": 12936 }, { "epoch": 0.5472967256113038, "grad_norm": 0.1947306990623474, "learning_rate": 0.001, "loss": 2.2629, "step": 12937 }, { "epoch": 0.5473390303748202, "grad_norm": 0.31676188111305237, "learning_rate": 0.001, "loss": 1.8793, "step": 12938 }, { "epoch": 0.5473813351383365, "grad_norm": 0.2185761034488678, "learning_rate": 0.001, "loss": 2.5543, "step": 12939 }, { "epoch": 0.5474236399018529, "grad_norm": 0.21403570473194122, "learning_rate": 0.001, "loss": 2.5567, "step": 12940 }, { "epoch": 0.5474659446653694, "grad_norm": 0.23452875018119812, "learning_rate": 0.001, "loss": 2.3212, "step": 12941 }, { "epoch": 0.5475082494288857, "grad_norm": 0.1822385936975479, "learning_rate": 0.001, "loss": 1.6779, "step": 12942 }, { "epoch": 0.5475505541924021, "grad_norm": 1.615887999534607, "learning_rate": 0.001, "loss": 1.8927, "step": 12943 }, { "epoch": 0.5475928589559185, "grad_norm": 0.2374815195798874, "learning_rate": 0.001, "loss": 1.8798, "step": 12944 }, { "epoch": 0.5476351637194348, "grad_norm": 0.3294655680656433, "learning_rate": 0.001, "loss": 3.5583, "step": 12945 }, { "epoch": 0.5476774684829512, "grad_norm": 0.17319463193416595, "learning_rate": 0.001, "loss": 2.3341, "step": 12946 }, { "epoch": 0.5477197732464676, "grad_norm": 0.18420511484146118, "learning_rate": 0.001, "loss": 2.6946, "step": 12947 }, { "epoch": 0.5477620780099839, "grad_norm": 0.5846876502037048, "learning_rate": 0.001, "loss": 2.2856, "step": 12948 }, { "epoch": 0.5478043827735003, "grad_norm": 0.23985953629016876, "learning_rate": 0.001, "loss": 2.0719, "step": 12949 }, { "epoch": 0.5478466875370167, "grad_norm": 0.1828928142786026, "learning_rate": 0.001, "loss": 2.1034, "step": 12950 }, { "epoch": 0.547888992300533, "grad_norm": 0.2998861074447632, "learning_rate": 0.001, "loss": 1.9081, "step": 12951 }, { "epoch": 0.5479312970640494, "grad_norm": 0.22397345304489136, "learning_rate": 0.001, "loss": 2.3908, "step": 12952 }, { "epoch": 0.5479736018275658, "grad_norm": 0.19218890368938446, "learning_rate": 0.001, "loss": 2.7397, "step": 12953 }, { "epoch": 0.5480159065910821, "grad_norm": 0.1939002424478531, "learning_rate": 0.001, "loss": 2.3556, "step": 12954 }, { "epoch": 0.5480582113545985, "grad_norm": 0.18285121023654938, "learning_rate": 0.001, "loss": 2.6213, "step": 12955 }, { "epoch": 0.5481005161181149, "grad_norm": 0.21203845739364624, "learning_rate": 0.001, "loss": 2.7509, "step": 12956 }, { "epoch": 0.5481428208816312, "grad_norm": 0.306901752948761, "learning_rate": 0.001, "loss": 2.286, "step": 12957 }, { "epoch": 0.5481851256451477, "grad_norm": 0.18208357691764832, "learning_rate": 0.001, "loss": 2.0693, "step": 12958 }, { "epoch": 0.5482274304086641, "grad_norm": 0.22899079322814941, "learning_rate": 0.001, "loss": 2.8397, "step": 12959 }, { "epoch": 0.5482697351721804, "grad_norm": 0.16618044674396515, "learning_rate": 0.001, "loss": 2.9579, "step": 12960 }, { "epoch": 0.5483120399356968, "grad_norm": 0.17978453636169434, "learning_rate": 0.001, "loss": 1.903, "step": 12961 }, { "epoch": 0.5483543446992132, "grad_norm": 0.18377989530563354, "learning_rate": 0.001, "loss": 2.4393, "step": 12962 }, { "epoch": 0.5483966494627295, "grad_norm": 0.21771173179149628, "learning_rate": 0.001, "loss": 3.4764, "step": 12963 }, { "epoch": 0.5484389542262459, "grad_norm": 0.16839885711669922, "learning_rate": 0.001, "loss": 1.7648, "step": 12964 }, { "epoch": 0.5484812589897623, "grad_norm": 0.17164848744869232, "learning_rate": 0.001, "loss": 1.8755, "step": 12965 }, { "epoch": 0.5485235637532786, "grad_norm": 0.25105589628219604, "learning_rate": 0.001, "loss": 2.2976, "step": 12966 }, { "epoch": 0.548565868516795, "grad_norm": 0.17218393087387085, "learning_rate": 0.001, "loss": 1.842, "step": 12967 }, { "epoch": 0.5486081732803114, "grad_norm": 0.2835632264614105, "learning_rate": 0.001, "loss": 2.5885, "step": 12968 }, { "epoch": 0.5486504780438277, "grad_norm": 0.2169799655675888, "learning_rate": 0.001, "loss": 2.1249, "step": 12969 }, { "epoch": 0.5486927828073441, "grad_norm": 0.28637203574180603, "learning_rate": 0.001, "loss": 1.6262, "step": 12970 }, { "epoch": 0.5487350875708604, "grad_norm": 0.2180042862892151, "learning_rate": 0.001, "loss": 1.9191, "step": 12971 }, { "epoch": 0.5487773923343768, "grad_norm": 0.20677872002124786, "learning_rate": 0.001, "loss": 3.2069, "step": 12972 }, { "epoch": 0.5488196970978932, "grad_norm": 2.7703301906585693, "learning_rate": 0.001, "loss": 3.6456, "step": 12973 }, { "epoch": 0.5488620018614095, "grad_norm": 0.4277131259441376, "learning_rate": 0.001, "loss": 3.3904, "step": 12974 }, { "epoch": 0.548904306624926, "grad_norm": 0.19730469584465027, "learning_rate": 0.001, "loss": 2.4932, "step": 12975 }, { "epoch": 0.5489466113884424, "grad_norm": 7.952031135559082, "learning_rate": 0.001, "loss": 1.7476, "step": 12976 }, { "epoch": 0.5489889161519587, "grad_norm": 0.3109131157398224, "learning_rate": 0.001, "loss": 2.152, "step": 12977 }, { "epoch": 0.5490312209154751, "grad_norm": 0.36644262075424194, "learning_rate": 0.001, "loss": 1.6802, "step": 12978 }, { "epoch": 0.5490735256789915, "grad_norm": 0.24093574285507202, "learning_rate": 0.001, "loss": 1.4931, "step": 12979 }, { "epoch": 0.5491158304425078, "grad_norm": 0.22954730689525604, "learning_rate": 0.001, "loss": 1.6246, "step": 12980 }, { "epoch": 0.5491581352060242, "grad_norm": 0.21042773127555847, "learning_rate": 0.001, "loss": 2.835, "step": 12981 }, { "epoch": 0.5492004399695406, "grad_norm": 0.1865551769733429, "learning_rate": 0.001, "loss": 2.28, "step": 12982 }, { "epoch": 0.5492427447330569, "grad_norm": 0.15189926326274872, "learning_rate": 0.001, "loss": 1.629, "step": 12983 }, { "epoch": 0.5492850494965733, "grad_norm": 0.3665443956851959, "learning_rate": 0.001, "loss": 2.1248, "step": 12984 }, { "epoch": 0.5493273542600897, "grad_norm": 0.13915681838989258, "learning_rate": 0.001, "loss": 3.073, "step": 12985 }, { "epoch": 0.549369659023606, "grad_norm": 0.1796284019947052, "learning_rate": 0.001, "loss": 1.1191, "step": 12986 }, { "epoch": 0.5494119637871224, "grad_norm": 0.3686416745185852, "learning_rate": 0.001, "loss": 2.0302, "step": 12987 }, { "epoch": 0.5494542685506388, "grad_norm": 0.685794472694397, "learning_rate": 0.001, "loss": 2.1283, "step": 12988 }, { "epoch": 0.5494965733141551, "grad_norm": 0.20203375816345215, "learning_rate": 0.001, "loss": 3.4477, "step": 12989 }, { "epoch": 0.5495388780776715, "grad_norm": 0.23336085677146912, "learning_rate": 0.001, "loss": 2.3422, "step": 12990 }, { "epoch": 0.549581182841188, "grad_norm": 0.1661679744720459, "learning_rate": 0.001, "loss": 2.7796, "step": 12991 }, { "epoch": 0.5496234876047043, "grad_norm": 0.17486825585365295, "learning_rate": 0.001, "loss": 2.0278, "step": 12992 }, { "epoch": 0.5496657923682207, "grad_norm": 0.1599670946598053, "learning_rate": 0.001, "loss": 2.1133, "step": 12993 }, { "epoch": 0.5497080971317371, "grad_norm": 0.17942267656326294, "learning_rate": 0.001, "loss": 2.227, "step": 12994 }, { "epoch": 0.5497504018952534, "grad_norm": 0.18072116374969482, "learning_rate": 0.001, "loss": 1.7973, "step": 12995 }, { "epoch": 0.5497927066587698, "grad_norm": 0.17400150001049042, "learning_rate": 0.001, "loss": 1.676, "step": 12996 }, { "epoch": 0.5498350114222862, "grad_norm": 0.2297709584236145, "learning_rate": 0.001, "loss": 2.1502, "step": 12997 }, { "epoch": 0.5498773161858025, "grad_norm": 0.2160550206899643, "learning_rate": 0.001, "loss": 2.0131, "step": 12998 }, { "epoch": 0.5499196209493189, "grad_norm": 0.22192339599132538, "learning_rate": 0.001, "loss": 3.1413, "step": 12999 }, { "epoch": 0.5499619257128353, "grad_norm": 0.2530875504016876, "learning_rate": 0.001, "loss": 2.6736, "step": 13000 }, { "epoch": 0.5500042304763516, "grad_norm": 0.17102450132369995, "learning_rate": 0.001, "loss": 2.3392, "step": 13001 }, { "epoch": 0.550046535239868, "grad_norm": 0.17439766228199005, "learning_rate": 0.001, "loss": 2.1533, "step": 13002 }, { "epoch": 0.5500888400033844, "grad_norm": 0.21379339694976807, "learning_rate": 0.001, "loss": 2.4908, "step": 13003 }, { "epoch": 0.5501311447669007, "grad_norm": 0.19133144617080688, "learning_rate": 0.001, "loss": 1.8931, "step": 13004 }, { "epoch": 0.5501734495304171, "grad_norm": 0.1691633015871048, "learning_rate": 0.001, "loss": 1.3374, "step": 13005 }, { "epoch": 0.5502157542939335, "grad_norm": 0.24116800725460052, "learning_rate": 0.001, "loss": 2.4278, "step": 13006 }, { "epoch": 0.5502580590574498, "grad_norm": 0.19122330844402313, "learning_rate": 0.001, "loss": 2.0226, "step": 13007 }, { "epoch": 0.5503003638209663, "grad_norm": 0.15570446848869324, "learning_rate": 0.001, "loss": 1.7573, "step": 13008 }, { "epoch": 0.5503426685844827, "grad_norm": 0.20239442586898804, "learning_rate": 0.001, "loss": 1.9359, "step": 13009 }, { "epoch": 0.550384973347999, "grad_norm": 0.1555495709180832, "learning_rate": 0.001, "loss": 3.2426, "step": 13010 }, { "epoch": 0.5504272781115154, "grad_norm": 0.16427169740200043, "learning_rate": 0.001, "loss": 1.7691, "step": 13011 }, { "epoch": 0.5504695828750318, "grad_norm": 0.16692587733268738, "learning_rate": 0.001, "loss": 2.0761, "step": 13012 }, { "epoch": 0.5505118876385481, "grad_norm": 0.1881280094385147, "learning_rate": 0.001, "loss": 2.1607, "step": 13013 }, { "epoch": 0.5505541924020645, "grad_norm": 0.1543162316083908, "learning_rate": 0.001, "loss": 2.4762, "step": 13014 }, { "epoch": 0.5505964971655808, "grad_norm": 0.1498258113861084, "learning_rate": 0.001, "loss": 2.1568, "step": 13015 }, { "epoch": 0.5506388019290972, "grad_norm": 0.15180440247058868, "learning_rate": 0.001, "loss": 2.5071, "step": 13016 }, { "epoch": 0.5506811066926136, "grad_norm": 0.16765782237052917, "learning_rate": 0.001, "loss": 2.642, "step": 13017 }, { "epoch": 0.5507234114561299, "grad_norm": 0.16857920587062836, "learning_rate": 0.001, "loss": 2.0443, "step": 13018 }, { "epoch": 0.5507657162196463, "grad_norm": 0.1566455215215683, "learning_rate": 0.001, "loss": 2.5403, "step": 13019 }, { "epoch": 0.5508080209831627, "grad_norm": 0.1734788417816162, "learning_rate": 0.001, "loss": 2.1921, "step": 13020 }, { "epoch": 0.550850325746679, "grad_norm": 0.15811264514923096, "learning_rate": 0.001, "loss": 1.4409, "step": 13021 }, { "epoch": 0.5508926305101954, "grad_norm": 0.25248953700065613, "learning_rate": 0.001, "loss": 1.9164, "step": 13022 }, { "epoch": 0.5509349352737118, "grad_norm": 0.19516262412071228, "learning_rate": 0.001, "loss": 2.1415, "step": 13023 }, { "epoch": 0.5509772400372281, "grad_norm": 0.16840478777885437, "learning_rate": 0.001, "loss": 2.2028, "step": 13024 }, { "epoch": 0.5510195448007446, "grad_norm": 6.247500896453857, "learning_rate": 0.001, "loss": 2.3796, "step": 13025 }, { "epoch": 0.551061849564261, "grad_norm": 2.1479499340057373, "learning_rate": 0.001, "loss": 2.4607, "step": 13026 }, { "epoch": 0.5511041543277773, "grad_norm": 0.1944126933813095, "learning_rate": 0.001, "loss": 1.8729, "step": 13027 }, { "epoch": 0.5511464590912937, "grad_norm": 0.16638629138469696, "learning_rate": 0.001, "loss": 1.7599, "step": 13028 }, { "epoch": 0.5511887638548101, "grad_norm": 0.17286452651023865, "learning_rate": 0.001, "loss": 3.7786, "step": 13029 }, { "epoch": 0.5512310686183264, "grad_norm": 0.18785692751407623, "learning_rate": 0.001, "loss": 1.624, "step": 13030 }, { "epoch": 0.5512733733818428, "grad_norm": 0.2681181728839874, "learning_rate": 0.001, "loss": 2.4162, "step": 13031 }, { "epoch": 0.5513156781453592, "grad_norm": 1.382765531539917, "learning_rate": 0.001, "loss": 2.8352, "step": 13032 }, { "epoch": 0.5513579829088755, "grad_norm": 0.1791725754737854, "learning_rate": 0.001, "loss": 1.5965, "step": 13033 }, { "epoch": 0.5514002876723919, "grad_norm": 0.22276152670383453, "learning_rate": 0.001, "loss": 2.2998, "step": 13034 }, { "epoch": 0.5514425924359083, "grad_norm": 0.21398457884788513, "learning_rate": 0.001, "loss": 1.873, "step": 13035 }, { "epoch": 0.5514848971994246, "grad_norm": 0.19248297810554504, "learning_rate": 0.001, "loss": 2.0772, "step": 13036 }, { "epoch": 0.551527201962941, "grad_norm": 0.19134873151779175, "learning_rate": 0.001, "loss": 2.2879, "step": 13037 }, { "epoch": 0.5515695067264574, "grad_norm": 0.2077552080154419, "learning_rate": 0.001, "loss": 2.2834, "step": 13038 }, { "epoch": 0.5516118114899737, "grad_norm": 0.18088361620903015, "learning_rate": 0.001, "loss": 1.7441, "step": 13039 }, { "epoch": 0.5516541162534901, "grad_norm": 0.19229553639888763, "learning_rate": 0.001, "loss": 2.3027, "step": 13040 }, { "epoch": 0.5516964210170066, "grad_norm": 0.20166753232479095, "learning_rate": 0.001, "loss": 2.6727, "step": 13041 }, { "epoch": 0.5517387257805229, "grad_norm": 0.24196840822696686, "learning_rate": 0.001, "loss": 2.5962, "step": 13042 }, { "epoch": 0.5517810305440393, "grad_norm": 0.18725115060806274, "learning_rate": 0.001, "loss": 2.1676, "step": 13043 }, { "epoch": 0.5518233353075557, "grad_norm": 0.20191504061222076, "learning_rate": 0.001, "loss": 2.0362, "step": 13044 }, { "epoch": 0.551865640071072, "grad_norm": 0.1696188747882843, "learning_rate": 0.001, "loss": 2.5537, "step": 13045 }, { "epoch": 0.5519079448345884, "grad_norm": 0.28044790029525757, "learning_rate": 0.001, "loss": 1.6491, "step": 13046 }, { "epoch": 0.5519502495981048, "grad_norm": 0.15965206921100616, "learning_rate": 0.001, "loss": 2.5324, "step": 13047 }, { "epoch": 0.5519925543616211, "grad_norm": 0.16759942471981049, "learning_rate": 0.001, "loss": 2.6668, "step": 13048 }, { "epoch": 0.5520348591251375, "grad_norm": 0.16884449124336243, "learning_rate": 0.001, "loss": 1.9035, "step": 13049 }, { "epoch": 0.5520771638886539, "grad_norm": 0.39007458090782166, "learning_rate": 0.001, "loss": 1.7417, "step": 13050 }, { "epoch": 0.5521194686521702, "grad_norm": 0.15368978679180145, "learning_rate": 0.001, "loss": 3.4138, "step": 13051 }, { "epoch": 0.5521617734156866, "grad_norm": 0.443830281496048, "learning_rate": 0.001, "loss": 1.5381, "step": 13052 }, { "epoch": 0.552204078179203, "grad_norm": 0.29350072145462036, "learning_rate": 0.001, "loss": 2.4967, "step": 13053 }, { "epoch": 0.5522463829427193, "grad_norm": 0.14899539947509766, "learning_rate": 0.001, "loss": 2.6561, "step": 13054 }, { "epoch": 0.5522886877062357, "grad_norm": 0.18092837929725647, "learning_rate": 0.001, "loss": 1.9984, "step": 13055 }, { "epoch": 0.5523309924697521, "grad_norm": 0.21982833743095398, "learning_rate": 0.001, "loss": 3.1229, "step": 13056 }, { "epoch": 0.5523732972332684, "grad_norm": 1.6211110353469849, "learning_rate": 0.001, "loss": 1.9592, "step": 13057 }, { "epoch": 0.5524156019967849, "grad_norm": 0.16090841591358185, "learning_rate": 0.001, "loss": 2.6147, "step": 13058 }, { "epoch": 0.5524579067603013, "grad_norm": 0.20968280732631683, "learning_rate": 0.001, "loss": 2.6148, "step": 13059 }, { "epoch": 0.5525002115238176, "grad_norm": 0.16014662384986877, "learning_rate": 0.001, "loss": 2.2449, "step": 13060 }, { "epoch": 0.552542516287334, "grad_norm": 0.4493609070777893, "learning_rate": 0.001, "loss": 1.5071, "step": 13061 }, { "epoch": 0.5525848210508503, "grad_norm": 0.20664609968662262, "learning_rate": 0.001, "loss": 2.2177, "step": 13062 }, { "epoch": 0.5526271258143667, "grad_norm": 0.1800384223461151, "learning_rate": 0.001, "loss": 1.9051, "step": 13063 }, { "epoch": 0.5526694305778831, "grad_norm": 0.16999885439872742, "learning_rate": 0.001, "loss": 2.2448, "step": 13064 }, { "epoch": 0.5527117353413994, "grad_norm": 0.1717803031206131, "learning_rate": 0.001, "loss": 1.679, "step": 13065 }, { "epoch": 0.5527540401049158, "grad_norm": 0.3044344186782837, "learning_rate": 0.001, "loss": 2.5296, "step": 13066 }, { "epoch": 0.5527963448684322, "grad_norm": 0.15595632791519165, "learning_rate": 0.001, "loss": 1.7021, "step": 13067 }, { "epoch": 0.5528386496319485, "grad_norm": 0.15000808238983154, "learning_rate": 0.001, "loss": 1.5892, "step": 13068 }, { "epoch": 0.5528809543954649, "grad_norm": 1.946815848350525, "learning_rate": 0.001, "loss": 1.4456, "step": 13069 }, { "epoch": 0.5529232591589813, "grad_norm": 0.16129404306411743, "learning_rate": 0.001, "loss": 1.5363, "step": 13070 }, { "epoch": 0.5529655639224976, "grad_norm": 0.22747275233268738, "learning_rate": 0.001, "loss": 1.9507, "step": 13071 }, { "epoch": 0.553007868686014, "grad_norm": 0.16564899682998657, "learning_rate": 0.001, "loss": 2.298, "step": 13072 }, { "epoch": 0.5530501734495304, "grad_norm": 0.1645679473876953, "learning_rate": 0.001, "loss": 2.7037, "step": 13073 }, { "epoch": 0.5530924782130467, "grad_norm": 0.18701903522014618, "learning_rate": 0.001, "loss": 1.9455, "step": 13074 }, { "epoch": 0.5531347829765632, "grad_norm": 0.16058051586151123, "learning_rate": 0.001, "loss": 2.1071, "step": 13075 }, { "epoch": 0.5531770877400796, "grad_norm": 0.16248644888401031, "learning_rate": 0.001, "loss": 2.7373, "step": 13076 }, { "epoch": 0.5532193925035959, "grad_norm": 0.20399704575538635, "learning_rate": 0.001, "loss": 2.0442, "step": 13077 }, { "epoch": 0.5532616972671123, "grad_norm": 0.14781156182289124, "learning_rate": 0.001, "loss": 1.7802, "step": 13078 }, { "epoch": 0.5533040020306287, "grad_norm": 0.3947334587574005, "learning_rate": 0.001, "loss": 1.9844, "step": 13079 }, { "epoch": 0.553346306794145, "grad_norm": 0.16816608607769012, "learning_rate": 0.001, "loss": 2.2254, "step": 13080 }, { "epoch": 0.5533886115576614, "grad_norm": 0.548812210559845, "learning_rate": 0.001, "loss": 2.331, "step": 13081 }, { "epoch": 0.5534309163211778, "grad_norm": 0.16253416240215302, "learning_rate": 0.001, "loss": 2.1435, "step": 13082 }, { "epoch": 0.5534732210846941, "grad_norm": 0.33334457874298096, "learning_rate": 0.001, "loss": 1.7767, "step": 13083 }, { "epoch": 0.5535155258482105, "grad_norm": 0.7046263813972473, "learning_rate": 0.001, "loss": 2.3232, "step": 13084 }, { "epoch": 0.5535578306117269, "grad_norm": 0.18305683135986328, "learning_rate": 0.001, "loss": 2.4841, "step": 13085 }, { "epoch": 0.5536001353752432, "grad_norm": 0.9522390365600586, "learning_rate": 0.001, "loss": 3.2129, "step": 13086 }, { "epoch": 0.5536424401387596, "grad_norm": 0.16383132338523865, "learning_rate": 0.001, "loss": 1.8029, "step": 13087 }, { "epoch": 0.553684744902276, "grad_norm": 0.16644108295440674, "learning_rate": 0.001, "loss": 1.9539, "step": 13088 }, { "epoch": 0.5537270496657923, "grad_norm": 0.16594970226287842, "learning_rate": 0.001, "loss": 2.1514, "step": 13089 }, { "epoch": 0.5537693544293087, "grad_norm": 0.19843560457229614, "learning_rate": 0.001, "loss": 2.751, "step": 13090 }, { "epoch": 0.5538116591928252, "grad_norm": 0.166713684797287, "learning_rate": 0.001, "loss": 1.9202, "step": 13091 }, { "epoch": 0.5538539639563415, "grad_norm": 0.18431521952152252, "learning_rate": 0.001, "loss": 2.3885, "step": 13092 }, { "epoch": 0.5538962687198579, "grad_norm": 0.16817611455917358, "learning_rate": 0.001, "loss": 2.1286, "step": 13093 }, { "epoch": 0.5539385734833743, "grad_norm": 0.15801174938678741, "learning_rate": 0.001, "loss": 1.8729, "step": 13094 }, { "epoch": 0.5539808782468906, "grad_norm": 0.3943849802017212, "learning_rate": 0.001, "loss": 2.5361, "step": 13095 }, { "epoch": 0.554023183010407, "grad_norm": 0.14820386469364166, "learning_rate": 0.001, "loss": 1.7892, "step": 13096 }, { "epoch": 0.5540654877739234, "grad_norm": 0.15670961141586304, "learning_rate": 0.001, "loss": 2.8515, "step": 13097 }, { "epoch": 0.5541077925374397, "grad_norm": 0.1757040172815323, "learning_rate": 0.001, "loss": 2.1255, "step": 13098 }, { "epoch": 0.5541500973009561, "grad_norm": 0.5804983377456665, "learning_rate": 0.001, "loss": 1.6095, "step": 13099 }, { "epoch": 0.5541924020644725, "grad_norm": 0.18276961147785187, "learning_rate": 0.001, "loss": 1.8125, "step": 13100 }, { "epoch": 0.5542347068279888, "grad_norm": 0.24642734229564667, "learning_rate": 0.001, "loss": 2.0911, "step": 13101 }, { "epoch": 0.5542770115915052, "grad_norm": 0.16965211927890778, "learning_rate": 0.001, "loss": 1.8867, "step": 13102 }, { "epoch": 0.5543193163550216, "grad_norm": 0.1579209566116333, "learning_rate": 0.001, "loss": 1.7393, "step": 13103 }, { "epoch": 0.5543616211185379, "grad_norm": 0.2219139188528061, "learning_rate": 0.001, "loss": 1.8149, "step": 13104 }, { "epoch": 0.5544039258820543, "grad_norm": 0.17188802361488342, "learning_rate": 0.001, "loss": 1.8126, "step": 13105 }, { "epoch": 0.5544462306455706, "grad_norm": 0.14392749965190887, "learning_rate": 0.001, "loss": 2.0954, "step": 13106 }, { "epoch": 0.554488535409087, "grad_norm": 0.19999627768993378, "learning_rate": 0.001, "loss": 1.8572, "step": 13107 }, { "epoch": 0.5545308401726035, "grad_norm": 0.20926088094711304, "learning_rate": 0.001, "loss": 3.1452, "step": 13108 }, { "epoch": 0.5545731449361198, "grad_norm": 0.19249233603477478, "learning_rate": 0.001, "loss": 2.3985, "step": 13109 }, { "epoch": 0.5546154496996362, "grad_norm": 0.1850895881652832, "learning_rate": 0.001, "loss": 1.7581, "step": 13110 }, { "epoch": 0.5546577544631526, "grad_norm": 0.1713310182094574, "learning_rate": 0.001, "loss": 3.1031, "step": 13111 }, { "epoch": 0.5547000592266689, "grad_norm": 0.21461325883865356, "learning_rate": 0.001, "loss": 1.462, "step": 13112 }, { "epoch": 0.5547423639901853, "grad_norm": 0.19272612035274506, "learning_rate": 0.001, "loss": 2.0605, "step": 13113 }, { "epoch": 0.5547846687537017, "grad_norm": 0.5416992902755737, "learning_rate": 0.001, "loss": 2.4492, "step": 13114 }, { "epoch": 0.554826973517218, "grad_norm": 0.16244624555110931, "learning_rate": 0.001, "loss": 1.9301, "step": 13115 }, { "epoch": 0.5548692782807344, "grad_norm": 0.1623430848121643, "learning_rate": 0.001, "loss": 1.8153, "step": 13116 }, { "epoch": 0.5549115830442508, "grad_norm": 0.15452533960342407, "learning_rate": 0.001, "loss": 2.2119, "step": 13117 }, { "epoch": 0.5549538878077671, "grad_norm": 0.1652129739522934, "learning_rate": 0.001, "loss": 2.0223, "step": 13118 }, { "epoch": 0.5549961925712835, "grad_norm": 0.19139006733894348, "learning_rate": 0.001, "loss": 2.2582, "step": 13119 }, { "epoch": 0.5550384973347999, "grad_norm": 0.21414639055728912, "learning_rate": 0.001, "loss": 1.978, "step": 13120 }, { "epoch": 0.5550808020983162, "grad_norm": 0.18957240879535675, "learning_rate": 0.001, "loss": 2.3123, "step": 13121 }, { "epoch": 0.5551231068618326, "grad_norm": 0.16492891311645508, "learning_rate": 0.001, "loss": 2.2614, "step": 13122 }, { "epoch": 0.555165411625349, "grad_norm": 0.17315563559532166, "learning_rate": 0.001, "loss": 1.4405, "step": 13123 }, { "epoch": 0.5552077163888653, "grad_norm": 0.18722382187843323, "learning_rate": 0.001, "loss": 2.4344, "step": 13124 }, { "epoch": 0.5552500211523818, "grad_norm": 0.13166561722755432, "learning_rate": 0.001, "loss": 2.4014, "step": 13125 }, { "epoch": 0.5552923259158982, "grad_norm": 0.22480559349060059, "learning_rate": 0.001, "loss": 1.9176, "step": 13126 }, { "epoch": 0.5553346306794145, "grad_norm": 0.15770910680294037, "learning_rate": 0.001, "loss": 2.2783, "step": 13127 }, { "epoch": 0.5553769354429309, "grad_norm": 0.17504677176475525, "learning_rate": 0.001, "loss": 1.8905, "step": 13128 }, { "epoch": 0.5554192402064473, "grad_norm": 0.18048499524593353, "learning_rate": 0.001, "loss": 2.557, "step": 13129 }, { "epoch": 0.5554615449699636, "grad_norm": 0.19142648577690125, "learning_rate": 0.001, "loss": 1.8773, "step": 13130 }, { "epoch": 0.55550384973348, "grad_norm": 0.16051210463047028, "learning_rate": 0.001, "loss": 1.8186, "step": 13131 }, { "epoch": 0.5555461544969964, "grad_norm": 0.21467453241348267, "learning_rate": 0.001, "loss": 2.09, "step": 13132 }, { "epoch": 0.5555884592605127, "grad_norm": 0.7346461415290833, "learning_rate": 0.001, "loss": 2.0679, "step": 13133 }, { "epoch": 0.5556307640240291, "grad_norm": 0.1747002899646759, "learning_rate": 0.001, "loss": 1.8083, "step": 13134 }, { "epoch": 0.5556730687875455, "grad_norm": 0.17838482558727264, "learning_rate": 0.001, "loss": 2.428, "step": 13135 }, { "epoch": 0.5557153735510618, "grad_norm": 0.15864208340644836, "learning_rate": 0.001, "loss": 2.0233, "step": 13136 }, { "epoch": 0.5557576783145782, "grad_norm": 0.27574223279953003, "learning_rate": 0.001, "loss": 2.9788, "step": 13137 }, { "epoch": 0.5557999830780946, "grad_norm": 0.18483395874500275, "learning_rate": 0.001, "loss": 2.0938, "step": 13138 }, { "epoch": 0.5558422878416109, "grad_norm": 0.1585381031036377, "learning_rate": 0.001, "loss": 1.4932, "step": 13139 }, { "epoch": 0.5558845926051273, "grad_norm": 0.19436490535736084, "learning_rate": 0.001, "loss": 2.827, "step": 13140 }, { "epoch": 0.5559268973686438, "grad_norm": 0.9730831384658813, "learning_rate": 0.001, "loss": 1.8887, "step": 13141 }, { "epoch": 0.5559692021321601, "grad_norm": 0.16187086701393127, "learning_rate": 0.001, "loss": 2.15, "step": 13142 }, { "epoch": 0.5560115068956765, "grad_norm": 0.17080490291118622, "learning_rate": 0.001, "loss": 2.7272, "step": 13143 }, { "epoch": 0.5560538116591929, "grad_norm": 0.4234422445297241, "learning_rate": 0.001, "loss": 2.6932, "step": 13144 }, { "epoch": 0.5560961164227092, "grad_norm": 0.16992883384227753, "learning_rate": 0.001, "loss": 1.6274, "step": 13145 }, { "epoch": 0.5561384211862256, "grad_norm": 0.1784851998090744, "learning_rate": 0.001, "loss": 1.9868, "step": 13146 }, { "epoch": 0.556180725949742, "grad_norm": 0.18666186928749084, "learning_rate": 0.001, "loss": 2.8875, "step": 13147 }, { "epoch": 0.5562230307132583, "grad_norm": 0.17807726562023163, "learning_rate": 0.001, "loss": 1.6625, "step": 13148 }, { "epoch": 0.5562653354767747, "grad_norm": 0.14291433990001678, "learning_rate": 0.001, "loss": 2.1721, "step": 13149 }, { "epoch": 0.556307640240291, "grad_norm": 0.18871572613716125, "learning_rate": 0.001, "loss": 1.5161, "step": 13150 }, { "epoch": 0.5563499450038074, "grad_norm": 0.2232998013496399, "learning_rate": 0.001, "loss": 1.7724, "step": 13151 }, { "epoch": 0.5563922497673238, "grad_norm": 0.21239492297172546, "learning_rate": 0.001, "loss": 2.0644, "step": 13152 }, { "epoch": 0.5564345545308401, "grad_norm": 0.177618145942688, "learning_rate": 0.001, "loss": 2.4754, "step": 13153 }, { "epoch": 0.5564768592943565, "grad_norm": 0.2010020613670349, "learning_rate": 0.001, "loss": 1.8863, "step": 13154 }, { "epoch": 0.5565191640578729, "grad_norm": 0.15301641821861267, "learning_rate": 0.001, "loss": 2.6225, "step": 13155 }, { "epoch": 0.5565614688213892, "grad_norm": 0.16343499720096588, "learning_rate": 0.001, "loss": 2.5127, "step": 13156 }, { "epoch": 0.5566037735849056, "grad_norm": 0.15531091392040253, "learning_rate": 0.001, "loss": 2.0179, "step": 13157 }, { "epoch": 0.5566460783484221, "grad_norm": 0.18499448895454407, "learning_rate": 0.001, "loss": 1.4872, "step": 13158 }, { "epoch": 0.5566883831119384, "grad_norm": 0.21716418862342834, "learning_rate": 0.001, "loss": 1.7633, "step": 13159 }, { "epoch": 0.5567306878754548, "grad_norm": 0.3633074462413788, "learning_rate": 0.001, "loss": 1.7639, "step": 13160 }, { "epoch": 0.5567729926389712, "grad_norm": 0.20026670396327972, "learning_rate": 0.001, "loss": 1.4217, "step": 13161 }, { "epoch": 0.5568152974024875, "grad_norm": 0.18606965243816376, "learning_rate": 0.001, "loss": 1.8908, "step": 13162 }, { "epoch": 0.5568576021660039, "grad_norm": 0.17996345460414886, "learning_rate": 0.001, "loss": 1.7892, "step": 13163 }, { "epoch": 0.5568999069295203, "grad_norm": 0.1515737622976303, "learning_rate": 0.001, "loss": 1.8319, "step": 13164 }, { "epoch": 0.5569422116930366, "grad_norm": 0.21763929724693298, "learning_rate": 0.001, "loss": 3.2495, "step": 13165 }, { "epoch": 0.556984516456553, "grad_norm": 0.18336108326911926, "learning_rate": 0.001, "loss": 1.6638, "step": 13166 }, { "epoch": 0.5570268212200694, "grad_norm": 0.1941053867340088, "learning_rate": 0.001, "loss": 2.2389, "step": 13167 }, { "epoch": 0.5570691259835857, "grad_norm": 0.2293504923582077, "learning_rate": 0.001, "loss": 1.7683, "step": 13168 }, { "epoch": 0.5571114307471021, "grad_norm": 0.18686215579509735, "learning_rate": 0.001, "loss": 1.6258, "step": 13169 }, { "epoch": 0.5571537355106185, "grad_norm": 0.17129628360271454, "learning_rate": 0.001, "loss": 1.8111, "step": 13170 }, { "epoch": 0.5571960402741348, "grad_norm": 0.20387300848960876, "learning_rate": 0.001, "loss": 2.2676, "step": 13171 }, { "epoch": 0.5572383450376512, "grad_norm": 0.15360292792320251, "learning_rate": 0.001, "loss": 1.8776, "step": 13172 }, { "epoch": 0.5572806498011676, "grad_norm": 0.20021335780620575, "learning_rate": 0.001, "loss": 1.9061, "step": 13173 }, { "epoch": 0.557322954564684, "grad_norm": 0.19312505424022675, "learning_rate": 0.001, "loss": 1.8143, "step": 13174 }, { "epoch": 0.5573652593282004, "grad_norm": 0.19307121634483337, "learning_rate": 0.001, "loss": 4.1747, "step": 13175 }, { "epoch": 0.5574075640917168, "grad_norm": 0.16115133464336395, "learning_rate": 0.001, "loss": 2.7506, "step": 13176 }, { "epoch": 0.5574498688552331, "grad_norm": 0.1843666285276413, "learning_rate": 0.001, "loss": 2.6517, "step": 13177 }, { "epoch": 0.5574921736187495, "grad_norm": 0.21303428709506989, "learning_rate": 0.001, "loss": 2.7585, "step": 13178 }, { "epoch": 0.5575344783822659, "grad_norm": 0.16230140626430511, "learning_rate": 0.001, "loss": 1.822, "step": 13179 }, { "epoch": 0.5575767831457822, "grad_norm": 0.20180179178714752, "learning_rate": 0.001, "loss": 2.4642, "step": 13180 }, { "epoch": 0.5576190879092986, "grad_norm": 0.1860133558511734, "learning_rate": 0.001, "loss": 2.4435, "step": 13181 }, { "epoch": 0.557661392672815, "grad_norm": 0.33960339426994324, "learning_rate": 0.001, "loss": 3.8256, "step": 13182 }, { "epoch": 0.5577036974363313, "grad_norm": 0.1435151845216751, "learning_rate": 0.001, "loss": 1.4867, "step": 13183 }, { "epoch": 0.5577460021998477, "grad_norm": 0.1442885398864746, "learning_rate": 0.001, "loss": 3.0296, "step": 13184 }, { "epoch": 0.5577883069633641, "grad_norm": 0.3879304528236389, "learning_rate": 0.001, "loss": 2.4309, "step": 13185 }, { "epoch": 0.5578306117268804, "grad_norm": 0.15823771059513092, "learning_rate": 0.001, "loss": 2.0006, "step": 13186 }, { "epoch": 0.5578729164903968, "grad_norm": 0.2137526571750641, "learning_rate": 0.001, "loss": 2.8213, "step": 13187 }, { "epoch": 0.5579152212539132, "grad_norm": 0.2220454066991806, "learning_rate": 0.001, "loss": 1.751, "step": 13188 }, { "epoch": 0.5579575260174295, "grad_norm": 0.15580366551876068, "learning_rate": 0.001, "loss": 3.3767, "step": 13189 }, { "epoch": 0.557999830780946, "grad_norm": 0.1863831877708435, "learning_rate": 0.001, "loss": 2.8947, "step": 13190 }, { "epoch": 0.5580421355444624, "grad_norm": 0.17505452036857605, "learning_rate": 0.001, "loss": 2.7388, "step": 13191 }, { "epoch": 0.5580844403079787, "grad_norm": 0.5289129018783569, "learning_rate": 0.001, "loss": 1.7719, "step": 13192 }, { "epoch": 0.5581267450714951, "grad_norm": 0.15774008631706238, "learning_rate": 0.001, "loss": 1.9727, "step": 13193 }, { "epoch": 0.5581690498350115, "grad_norm": 0.16783976554870605, "learning_rate": 0.001, "loss": 1.9035, "step": 13194 }, { "epoch": 0.5582113545985278, "grad_norm": 0.19274777173995972, "learning_rate": 0.001, "loss": 2.3897, "step": 13195 }, { "epoch": 0.5582536593620442, "grad_norm": 0.15515726804733276, "learning_rate": 0.001, "loss": 2.1614, "step": 13196 }, { "epoch": 0.5582959641255605, "grad_norm": 0.20335707068443298, "learning_rate": 0.001, "loss": 2.5368, "step": 13197 }, { "epoch": 0.5583382688890769, "grad_norm": 0.17299920320510864, "learning_rate": 0.001, "loss": 1.9999, "step": 13198 }, { "epoch": 0.5583805736525933, "grad_norm": 0.167136088013649, "learning_rate": 0.001, "loss": 1.841, "step": 13199 }, { "epoch": 0.5584228784161096, "grad_norm": 0.19228960573673248, "learning_rate": 0.001, "loss": 3.3171, "step": 13200 }, { "epoch": 0.558465183179626, "grad_norm": 0.13816307485103607, "learning_rate": 0.001, "loss": 3.1786, "step": 13201 }, { "epoch": 0.5585074879431424, "grad_norm": 1.117954969406128, "learning_rate": 0.001, "loss": 2.6146, "step": 13202 }, { "epoch": 0.5585497927066587, "grad_norm": 0.1563483625650406, "learning_rate": 0.001, "loss": 2.3211, "step": 13203 }, { "epoch": 0.5585920974701751, "grad_norm": 0.15023623406887054, "learning_rate": 0.001, "loss": 1.7047, "step": 13204 }, { "epoch": 0.5586344022336915, "grad_norm": 0.21180753409862518, "learning_rate": 0.001, "loss": 3.0361, "step": 13205 }, { "epoch": 0.5586767069972078, "grad_norm": 1.6476328372955322, "learning_rate": 0.001, "loss": 2.0756, "step": 13206 }, { "epoch": 0.5587190117607242, "grad_norm": 0.1770959347486496, "learning_rate": 0.001, "loss": 2.0876, "step": 13207 }, { "epoch": 0.5587613165242407, "grad_norm": 0.16517281532287598, "learning_rate": 0.001, "loss": 2.9517, "step": 13208 }, { "epoch": 0.558803621287757, "grad_norm": 0.24339810013771057, "learning_rate": 0.001, "loss": 2.227, "step": 13209 }, { "epoch": 0.5588459260512734, "grad_norm": 0.2662486732006073, "learning_rate": 0.001, "loss": 1.6548, "step": 13210 }, { "epoch": 0.5588882308147898, "grad_norm": 2.719054698944092, "learning_rate": 0.001, "loss": 2.0465, "step": 13211 }, { "epoch": 0.5589305355783061, "grad_norm": 0.4585936367511749, "learning_rate": 0.001, "loss": 2.4569, "step": 13212 }, { "epoch": 0.5589728403418225, "grad_norm": 0.2304200530052185, "learning_rate": 0.001, "loss": 2.2496, "step": 13213 }, { "epoch": 0.5590151451053389, "grad_norm": 0.4977359175682068, "learning_rate": 0.001, "loss": 2.5334, "step": 13214 }, { "epoch": 0.5590574498688552, "grad_norm": 0.1636125147342682, "learning_rate": 0.001, "loss": 2.0662, "step": 13215 }, { "epoch": 0.5590997546323716, "grad_norm": 0.18210989236831665, "learning_rate": 0.001, "loss": 1.9016, "step": 13216 }, { "epoch": 0.559142059395888, "grad_norm": 0.5983842611312866, "learning_rate": 0.001, "loss": 3.717, "step": 13217 }, { "epoch": 0.5591843641594043, "grad_norm": 0.20817320048809052, "learning_rate": 0.001, "loss": 1.5124, "step": 13218 }, { "epoch": 0.5592266689229207, "grad_norm": 0.16508392989635468, "learning_rate": 0.001, "loss": 1.7414, "step": 13219 }, { "epoch": 0.5592689736864371, "grad_norm": 0.19145143032073975, "learning_rate": 0.001, "loss": 2.4444, "step": 13220 }, { "epoch": 0.5593112784499534, "grad_norm": 0.2849757969379425, "learning_rate": 0.001, "loss": 4.1417, "step": 13221 }, { "epoch": 0.5593535832134698, "grad_norm": 0.20053541660308838, "learning_rate": 0.001, "loss": 2.7225, "step": 13222 }, { "epoch": 0.5593958879769863, "grad_norm": 0.19015778601169586, "learning_rate": 0.001, "loss": 1.9326, "step": 13223 }, { "epoch": 0.5594381927405025, "grad_norm": 0.22139129042625427, "learning_rate": 0.001, "loss": 2.0288, "step": 13224 }, { "epoch": 0.559480497504019, "grad_norm": 0.18793657422065735, "learning_rate": 0.001, "loss": 2.234, "step": 13225 }, { "epoch": 0.5595228022675354, "grad_norm": 0.3711003363132477, "learning_rate": 0.001, "loss": 2.8216, "step": 13226 }, { "epoch": 0.5595651070310517, "grad_norm": 0.35137489438056946, "learning_rate": 0.001, "loss": 2.3047, "step": 13227 }, { "epoch": 0.5596074117945681, "grad_norm": 0.3833855092525482, "learning_rate": 0.001, "loss": 2.2983, "step": 13228 }, { "epoch": 0.5596497165580845, "grad_norm": 0.19461855292320251, "learning_rate": 0.001, "loss": 2.4572, "step": 13229 }, { "epoch": 0.5596920213216008, "grad_norm": 0.22493231296539307, "learning_rate": 0.001, "loss": 2.1651, "step": 13230 }, { "epoch": 0.5597343260851172, "grad_norm": 0.19130484759807587, "learning_rate": 0.001, "loss": 2.1431, "step": 13231 }, { "epoch": 0.5597766308486336, "grad_norm": 0.1570371687412262, "learning_rate": 0.001, "loss": 1.8944, "step": 13232 }, { "epoch": 0.5598189356121499, "grad_norm": 0.16099748015403748, "learning_rate": 0.001, "loss": 2.277, "step": 13233 }, { "epoch": 0.5598612403756663, "grad_norm": 0.18128852546215057, "learning_rate": 0.001, "loss": 1.8642, "step": 13234 }, { "epoch": 0.5599035451391827, "grad_norm": 0.2672935426235199, "learning_rate": 0.001, "loss": 1.5749, "step": 13235 }, { "epoch": 0.559945849902699, "grad_norm": 0.1762719750404358, "learning_rate": 0.001, "loss": 2.1202, "step": 13236 }, { "epoch": 0.5599881546662154, "grad_norm": 0.17224456369876862, "learning_rate": 0.001, "loss": 2.2378, "step": 13237 }, { "epoch": 0.5600304594297318, "grad_norm": 0.15539632737636566, "learning_rate": 0.001, "loss": 2.3729, "step": 13238 }, { "epoch": 0.5600727641932481, "grad_norm": 0.16019120812416077, "learning_rate": 0.001, "loss": 2.0313, "step": 13239 }, { "epoch": 0.5601150689567646, "grad_norm": 0.21855637431144714, "learning_rate": 0.001, "loss": 2.3144, "step": 13240 }, { "epoch": 0.5601573737202808, "grad_norm": 0.20648352801799774, "learning_rate": 0.001, "loss": 1.9184, "step": 13241 }, { "epoch": 0.5601996784837973, "grad_norm": 0.1866380274295807, "learning_rate": 0.001, "loss": 2.8954, "step": 13242 }, { "epoch": 0.5602419832473137, "grad_norm": 4.305062294006348, "learning_rate": 0.001, "loss": 2.8966, "step": 13243 }, { "epoch": 0.56028428801083, "grad_norm": 0.18245016038417816, "learning_rate": 0.001, "loss": 2.472, "step": 13244 }, { "epoch": 0.5603265927743464, "grad_norm": 0.15738457441329956, "learning_rate": 0.001, "loss": 1.5529, "step": 13245 }, { "epoch": 0.5603688975378628, "grad_norm": 0.1629572957754135, "learning_rate": 0.001, "loss": 1.5193, "step": 13246 }, { "epoch": 0.5604112023013791, "grad_norm": 0.46464836597442627, "learning_rate": 0.001, "loss": 3.0418, "step": 13247 }, { "epoch": 0.5604535070648955, "grad_norm": 0.2348056435585022, "learning_rate": 0.001, "loss": 2.3288, "step": 13248 }, { "epoch": 0.5604958118284119, "grad_norm": 0.9263020753860474, "learning_rate": 0.001, "loss": 2.1417, "step": 13249 }, { "epoch": 0.5605381165919282, "grad_norm": 0.32688605785369873, "learning_rate": 0.001, "loss": 2.0048, "step": 13250 }, { "epoch": 0.5605804213554446, "grad_norm": 0.226731076836586, "learning_rate": 0.001, "loss": 2.6983, "step": 13251 }, { "epoch": 0.560622726118961, "grad_norm": 0.40565672516822815, "learning_rate": 0.001, "loss": 2.6863, "step": 13252 }, { "epoch": 0.5606650308824773, "grad_norm": 0.21357344090938568, "learning_rate": 0.001, "loss": 1.5538, "step": 13253 }, { "epoch": 0.5607073356459937, "grad_norm": 0.20866970717906952, "learning_rate": 0.001, "loss": 1.9627, "step": 13254 }, { "epoch": 0.5607496404095101, "grad_norm": 0.20830655097961426, "learning_rate": 0.001, "loss": 2.1551, "step": 13255 }, { "epoch": 0.5607919451730264, "grad_norm": 0.19615496695041656, "learning_rate": 0.001, "loss": 2.1498, "step": 13256 }, { "epoch": 0.5608342499365429, "grad_norm": 0.32191988825798035, "learning_rate": 0.001, "loss": 3.5976, "step": 13257 }, { "epoch": 0.5608765547000593, "grad_norm": 0.22609533369541168, "learning_rate": 0.001, "loss": 3.2048, "step": 13258 }, { "epoch": 0.5609188594635756, "grad_norm": 0.45731955766677856, "learning_rate": 0.001, "loss": 1.5562, "step": 13259 }, { "epoch": 0.560961164227092, "grad_norm": 0.19741539657115936, "learning_rate": 0.001, "loss": 2.3421, "step": 13260 }, { "epoch": 0.5610034689906084, "grad_norm": 0.24196267127990723, "learning_rate": 0.001, "loss": 3.386, "step": 13261 }, { "epoch": 0.5610457737541247, "grad_norm": 0.24868476390838623, "learning_rate": 0.001, "loss": 1.7664, "step": 13262 }, { "epoch": 0.5610880785176411, "grad_norm": 0.17724378407001495, "learning_rate": 0.001, "loss": 2.038, "step": 13263 }, { "epoch": 0.5611303832811575, "grad_norm": 0.17900416254997253, "learning_rate": 0.001, "loss": 2.5304, "step": 13264 }, { "epoch": 0.5611726880446738, "grad_norm": 0.2126455008983612, "learning_rate": 0.001, "loss": 2.1127, "step": 13265 }, { "epoch": 0.5612149928081902, "grad_norm": 0.20829962193965912, "learning_rate": 0.001, "loss": 2.4063, "step": 13266 }, { "epoch": 0.5612572975717066, "grad_norm": 0.22738134860992432, "learning_rate": 0.001, "loss": 1.7318, "step": 13267 }, { "epoch": 0.5612996023352229, "grad_norm": 0.18738828599452972, "learning_rate": 0.001, "loss": 2.0383, "step": 13268 }, { "epoch": 0.5613419070987393, "grad_norm": 0.2207876741886139, "learning_rate": 0.001, "loss": 2.9998, "step": 13269 }, { "epoch": 0.5613842118622557, "grad_norm": 0.1774303913116455, "learning_rate": 0.001, "loss": 2.2946, "step": 13270 }, { "epoch": 0.561426516625772, "grad_norm": 0.16445757448673248, "learning_rate": 0.001, "loss": 2.1301, "step": 13271 }, { "epoch": 0.5614688213892884, "grad_norm": 0.1810140758752823, "learning_rate": 0.001, "loss": 1.7674, "step": 13272 }, { "epoch": 0.5615111261528049, "grad_norm": 0.2058965414762497, "learning_rate": 0.001, "loss": 1.5423, "step": 13273 }, { "epoch": 0.5615534309163212, "grad_norm": 0.20573607087135315, "learning_rate": 0.001, "loss": 3.1317, "step": 13274 }, { "epoch": 0.5615957356798376, "grad_norm": 0.2003343552350998, "learning_rate": 0.001, "loss": 3.3971, "step": 13275 }, { "epoch": 0.561638040443354, "grad_norm": 0.16482414305210114, "learning_rate": 0.001, "loss": 2.5649, "step": 13276 }, { "epoch": 0.5616803452068703, "grad_norm": 0.16140007972717285, "learning_rate": 0.001, "loss": 2.7057, "step": 13277 }, { "epoch": 0.5617226499703867, "grad_norm": 2.4391520023345947, "learning_rate": 0.001, "loss": 1.9083, "step": 13278 }, { "epoch": 0.5617649547339031, "grad_norm": 0.17537763714790344, "learning_rate": 0.001, "loss": 2.0083, "step": 13279 }, { "epoch": 0.5618072594974194, "grad_norm": 0.17112372815608978, "learning_rate": 0.001, "loss": 1.7442, "step": 13280 }, { "epoch": 0.5618495642609358, "grad_norm": 0.17031919956207275, "learning_rate": 0.001, "loss": 1.8762, "step": 13281 }, { "epoch": 0.5618918690244522, "grad_norm": 0.15143336355686188, "learning_rate": 0.001, "loss": 2.3816, "step": 13282 }, { "epoch": 0.5619341737879685, "grad_norm": 0.15341715514659882, "learning_rate": 0.001, "loss": 2.9387, "step": 13283 }, { "epoch": 0.5619764785514849, "grad_norm": 0.18952025473117828, "learning_rate": 0.001, "loss": 2.3547, "step": 13284 }, { "epoch": 0.5620187833150013, "grad_norm": 0.18509429693222046, "learning_rate": 0.001, "loss": 2.503, "step": 13285 }, { "epoch": 0.5620610880785176, "grad_norm": 0.16015632450580597, "learning_rate": 0.001, "loss": 2.008, "step": 13286 }, { "epoch": 0.562103392842034, "grad_norm": 0.1822974979877472, "learning_rate": 0.001, "loss": 1.8725, "step": 13287 }, { "epoch": 0.5621456976055503, "grad_norm": 0.20895791053771973, "learning_rate": 0.001, "loss": 3.7383, "step": 13288 }, { "epoch": 0.5621880023690667, "grad_norm": 0.2861064076423645, "learning_rate": 0.001, "loss": 1.7759, "step": 13289 }, { "epoch": 0.5622303071325832, "grad_norm": 0.1581949144601822, "learning_rate": 0.001, "loss": 2.9572, "step": 13290 }, { "epoch": 0.5622726118960995, "grad_norm": 0.1519918292760849, "learning_rate": 0.001, "loss": 1.911, "step": 13291 }, { "epoch": 0.5623149166596159, "grad_norm": 1.3050894737243652, "learning_rate": 0.001, "loss": 1.6955, "step": 13292 }, { "epoch": 0.5623572214231323, "grad_norm": 0.16207565367221832, "learning_rate": 0.001, "loss": 1.9604, "step": 13293 }, { "epoch": 0.5623995261866486, "grad_norm": 0.21494990587234497, "learning_rate": 0.001, "loss": 1.8584, "step": 13294 }, { "epoch": 0.562441830950165, "grad_norm": 0.1507209986448288, "learning_rate": 0.001, "loss": 2.1614, "step": 13295 }, { "epoch": 0.5624841357136814, "grad_norm": 0.5043923854827881, "learning_rate": 0.001, "loss": 2.4189, "step": 13296 }, { "epoch": 0.5625264404771977, "grad_norm": 0.15872474014759064, "learning_rate": 0.001, "loss": 2.1197, "step": 13297 }, { "epoch": 0.5625687452407141, "grad_norm": 0.18348495662212372, "learning_rate": 0.001, "loss": 1.9253, "step": 13298 }, { "epoch": 0.5626110500042305, "grad_norm": 0.22080762684345245, "learning_rate": 0.001, "loss": 2.7428, "step": 13299 }, { "epoch": 0.5626533547677468, "grad_norm": 0.21116049587726593, "learning_rate": 0.001, "loss": 2.4198, "step": 13300 }, { "epoch": 0.5626956595312632, "grad_norm": 0.26892927289009094, "learning_rate": 0.001, "loss": 2.1072, "step": 13301 }, { "epoch": 0.5627379642947796, "grad_norm": 0.17305731773376465, "learning_rate": 0.001, "loss": 2.569, "step": 13302 }, { "epoch": 0.5627802690582959, "grad_norm": 0.18649710714817047, "learning_rate": 0.001, "loss": 3.0606, "step": 13303 }, { "epoch": 0.5628225738218123, "grad_norm": 0.15958692133426666, "learning_rate": 0.001, "loss": 2.2803, "step": 13304 }, { "epoch": 0.5628648785853287, "grad_norm": 0.20183780789375305, "learning_rate": 0.001, "loss": 3.1339, "step": 13305 }, { "epoch": 0.562907183348845, "grad_norm": 0.23811282217502594, "learning_rate": 0.001, "loss": 2.0551, "step": 13306 }, { "epoch": 0.5629494881123615, "grad_norm": 0.2011602222919464, "learning_rate": 0.001, "loss": 2.1267, "step": 13307 }, { "epoch": 0.5629917928758779, "grad_norm": 0.18026520311832428, "learning_rate": 0.001, "loss": 2.5384, "step": 13308 }, { "epoch": 0.5630340976393942, "grad_norm": 0.19247384369373322, "learning_rate": 0.001, "loss": 2.0309, "step": 13309 }, { "epoch": 0.5630764024029106, "grad_norm": 0.23704922199249268, "learning_rate": 0.001, "loss": 2.4209, "step": 13310 }, { "epoch": 0.563118707166427, "grad_norm": 0.19546206295490265, "learning_rate": 0.001, "loss": 2.3817, "step": 13311 }, { "epoch": 0.5631610119299433, "grad_norm": 0.1445315182209015, "learning_rate": 0.001, "loss": 1.8728, "step": 13312 }, { "epoch": 0.5632033166934597, "grad_norm": 0.14706316590309143, "learning_rate": 0.001, "loss": 1.7439, "step": 13313 }, { "epoch": 0.5632456214569761, "grad_norm": 0.19955837726593018, "learning_rate": 0.001, "loss": 2.1125, "step": 13314 }, { "epoch": 0.5632879262204924, "grad_norm": 0.19193534553050995, "learning_rate": 0.001, "loss": 3.1798, "step": 13315 }, { "epoch": 0.5633302309840088, "grad_norm": 0.19683898985385895, "learning_rate": 0.001, "loss": 1.9713, "step": 13316 }, { "epoch": 0.5633725357475252, "grad_norm": 0.182073175907135, "learning_rate": 0.001, "loss": 3.2368, "step": 13317 }, { "epoch": 0.5634148405110415, "grad_norm": 0.3637740910053253, "learning_rate": 0.001, "loss": 2.0353, "step": 13318 }, { "epoch": 0.5634571452745579, "grad_norm": 0.1640586256980896, "learning_rate": 0.001, "loss": 1.5906, "step": 13319 }, { "epoch": 0.5634994500380743, "grad_norm": 0.2922709584236145, "learning_rate": 0.001, "loss": 2.5071, "step": 13320 }, { "epoch": 0.5635417548015906, "grad_norm": 0.1738395392894745, "learning_rate": 0.001, "loss": 1.973, "step": 13321 }, { "epoch": 0.563584059565107, "grad_norm": 0.18015141785144806, "learning_rate": 0.001, "loss": 1.9577, "step": 13322 }, { "epoch": 0.5636263643286235, "grad_norm": 0.17254801094532013, "learning_rate": 0.001, "loss": 2.3277, "step": 13323 }, { "epoch": 0.5636686690921398, "grad_norm": 0.20676442980766296, "learning_rate": 0.001, "loss": 1.898, "step": 13324 }, { "epoch": 0.5637109738556562, "grad_norm": 0.19672460854053497, "learning_rate": 0.001, "loss": 2.5557, "step": 13325 }, { "epoch": 0.5637532786191726, "grad_norm": 0.16016629338264465, "learning_rate": 0.001, "loss": 1.7529, "step": 13326 }, { "epoch": 0.5637955833826889, "grad_norm": 0.1732088327407837, "learning_rate": 0.001, "loss": 1.8662, "step": 13327 }, { "epoch": 0.5638378881462053, "grad_norm": 0.1827797144651413, "learning_rate": 0.001, "loss": 1.7933, "step": 13328 }, { "epoch": 0.5638801929097217, "grad_norm": 0.16698625683784485, "learning_rate": 0.001, "loss": 2.7892, "step": 13329 }, { "epoch": 0.563922497673238, "grad_norm": 0.1709325909614563, "learning_rate": 0.001, "loss": 2.5153, "step": 13330 }, { "epoch": 0.5639648024367544, "grad_norm": 0.25048211216926575, "learning_rate": 0.001, "loss": 2.6259, "step": 13331 }, { "epoch": 0.5640071072002707, "grad_norm": 0.17622262239456177, "learning_rate": 0.001, "loss": 2.1055, "step": 13332 }, { "epoch": 0.5640494119637871, "grad_norm": 0.166434183716774, "learning_rate": 0.001, "loss": 1.8662, "step": 13333 }, { "epoch": 0.5640917167273035, "grad_norm": 0.7903834581375122, "learning_rate": 0.001, "loss": 1.9703, "step": 13334 }, { "epoch": 0.5641340214908198, "grad_norm": 0.15010859072208405, "learning_rate": 0.001, "loss": 1.7071, "step": 13335 }, { "epoch": 0.5641763262543362, "grad_norm": 0.1890597641468048, "learning_rate": 0.001, "loss": 2.995, "step": 13336 }, { "epoch": 0.5642186310178526, "grad_norm": 0.19205980002880096, "learning_rate": 0.001, "loss": 2.0444, "step": 13337 }, { "epoch": 0.5642609357813689, "grad_norm": 0.1756296306848526, "learning_rate": 0.001, "loss": 2.3472, "step": 13338 }, { "epoch": 0.5643032405448853, "grad_norm": 0.22155724465847015, "learning_rate": 0.001, "loss": 3.1698, "step": 13339 }, { "epoch": 0.5643455453084018, "grad_norm": 0.18253694474697113, "learning_rate": 0.001, "loss": 1.7616, "step": 13340 }, { "epoch": 0.564387850071918, "grad_norm": 0.22829587757587433, "learning_rate": 0.001, "loss": 1.5545, "step": 13341 }, { "epoch": 0.5644301548354345, "grad_norm": 2.0366973876953125, "learning_rate": 0.001, "loss": 1.7702, "step": 13342 }, { "epoch": 0.5644724595989509, "grad_norm": 0.7380493879318237, "learning_rate": 0.001, "loss": 2.1662, "step": 13343 }, { "epoch": 0.5645147643624672, "grad_norm": 0.19614994525909424, "learning_rate": 0.001, "loss": 2.5599, "step": 13344 }, { "epoch": 0.5645570691259836, "grad_norm": 0.39196836948394775, "learning_rate": 0.001, "loss": 1.6906, "step": 13345 }, { "epoch": 0.5645993738895, "grad_norm": 0.3456530272960663, "learning_rate": 0.001, "loss": 1.9598, "step": 13346 }, { "epoch": 0.5646416786530163, "grad_norm": 0.20277409255504608, "learning_rate": 0.001, "loss": 2.0331, "step": 13347 }, { "epoch": 0.5646839834165327, "grad_norm": 0.4468050003051758, "learning_rate": 0.001, "loss": 1.8206, "step": 13348 }, { "epoch": 0.5647262881800491, "grad_norm": 0.3284309208393097, "learning_rate": 0.001, "loss": 2.2595, "step": 13349 }, { "epoch": 0.5647685929435654, "grad_norm": 0.16732022166252136, "learning_rate": 0.001, "loss": 1.9984, "step": 13350 }, { "epoch": 0.5648108977070818, "grad_norm": 3.110480785369873, "learning_rate": 0.001, "loss": 2.6128, "step": 13351 }, { "epoch": 0.5648532024705982, "grad_norm": 0.18067099153995514, "learning_rate": 0.001, "loss": 2.0611, "step": 13352 }, { "epoch": 0.5648955072341145, "grad_norm": 0.17390722036361694, "learning_rate": 0.001, "loss": 2.0928, "step": 13353 }, { "epoch": 0.5649378119976309, "grad_norm": 0.18560457229614258, "learning_rate": 0.001, "loss": 3.1778, "step": 13354 }, { "epoch": 0.5649801167611473, "grad_norm": 0.18010523915290833, "learning_rate": 0.001, "loss": 2.5076, "step": 13355 }, { "epoch": 0.5650224215246636, "grad_norm": 0.18280434608459473, "learning_rate": 0.001, "loss": 2.2038, "step": 13356 }, { "epoch": 0.56506472628818, "grad_norm": 0.9968026280403137, "learning_rate": 0.001, "loss": 2.715, "step": 13357 }, { "epoch": 0.5651070310516965, "grad_norm": 1.5389236211776733, "learning_rate": 0.001, "loss": 2.5267, "step": 13358 }, { "epoch": 0.5651493358152128, "grad_norm": 0.8743659257888794, "learning_rate": 0.001, "loss": 2.5618, "step": 13359 }, { "epoch": 0.5651916405787292, "grad_norm": 0.20591340959072113, "learning_rate": 0.001, "loss": 2.3677, "step": 13360 }, { "epoch": 0.5652339453422456, "grad_norm": 0.20366302132606506, "learning_rate": 0.001, "loss": 2.3497, "step": 13361 }, { "epoch": 0.5652762501057619, "grad_norm": 0.18198545277118683, "learning_rate": 0.001, "loss": 2.1656, "step": 13362 }, { "epoch": 0.5653185548692783, "grad_norm": 0.1899123340845108, "learning_rate": 0.001, "loss": 1.8184, "step": 13363 }, { "epoch": 0.5653608596327947, "grad_norm": 0.21096497774124146, "learning_rate": 0.001, "loss": 2.3129, "step": 13364 }, { "epoch": 0.565403164396311, "grad_norm": 0.17829644680023193, "learning_rate": 0.001, "loss": 2.4071, "step": 13365 }, { "epoch": 0.5654454691598274, "grad_norm": 0.1842295229434967, "learning_rate": 0.001, "loss": 3.0018, "step": 13366 }, { "epoch": 0.5654877739233438, "grad_norm": 0.2008989155292511, "learning_rate": 0.001, "loss": 2.1184, "step": 13367 }, { "epoch": 0.5655300786868601, "grad_norm": 0.22309176623821259, "learning_rate": 0.001, "loss": 2.2204, "step": 13368 }, { "epoch": 0.5655723834503765, "grad_norm": 0.4550027847290039, "learning_rate": 0.001, "loss": 2.8608, "step": 13369 }, { "epoch": 0.5656146882138929, "grad_norm": 0.16846232116222382, "learning_rate": 0.001, "loss": 1.7115, "step": 13370 }, { "epoch": 0.5656569929774092, "grad_norm": 0.9170604944229126, "learning_rate": 0.001, "loss": 2.3162, "step": 13371 }, { "epoch": 0.5656992977409256, "grad_norm": 0.21079829335212708, "learning_rate": 0.001, "loss": 2.2757, "step": 13372 }, { "epoch": 0.565741602504442, "grad_norm": 0.15155363082885742, "learning_rate": 0.001, "loss": 1.9875, "step": 13373 }, { "epoch": 0.5657839072679584, "grad_norm": 0.5003759860992432, "learning_rate": 0.001, "loss": 2.126, "step": 13374 }, { "epoch": 0.5658262120314748, "grad_norm": 0.2025512456893921, "learning_rate": 0.001, "loss": 1.8468, "step": 13375 }, { "epoch": 0.5658685167949911, "grad_norm": 0.45572540163993835, "learning_rate": 0.001, "loss": 2.3299, "step": 13376 }, { "epoch": 0.5659108215585075, "grad_norm": 0.17576457560062408, "learning_rate": 0.001, "loss": 2.3811, "step": 13377 }, { "epoch": 0.5659531263220239, "grad_norm": 0.28992700576782227, "learning_rate": 0.001, "loss": 2.4098, "step": 13378 }, { "epoch": 0.5659954310855402, "grad_norm": 0.6396428942680359, "learning_rate": 0.001, "loss": 2.3187, "step": 13379 }, { "epoch": 0.5660377358490566, "grad_norm": 0.18657970428466797, "learning_rate": 0.001, "loss": 2.3154, "step": 13380 }, { "epoch": 0.566080040612573, "grad_norm": 0.4546518623828888, "learning_rate": 0.001, "loss": 1.7812, "step": 13381 }, { "epoch": 0.5661223453760893, "grad_norm": 0.4422479271888733, "learning_rate": 0.001, "loss": 2.0153, "step": 13382 }, { "epoch": 0.5661646501396057, "grad_norm": 0.19912591576576233, "learning_rate": 0.001, "loss": 1.6749, "step": 13383 }, { "epoch": 0.5662069549031221, "grad_norm": 0.20137451589107513, "learning_rate": 0.001, "loss": 2.3742, "step": 13384 }, { "epoch": 0.5662492596666384, "grad_norm": 0.22245512902736664, "learning_rate": 0.001, "loss": 3.0519, "step": 13385 }, { "epoch": 0.5662915644301548, "grad_norm": 0.179260715842247, "learning_rate": 0.001, "loss": 2.305, "step": 13386 }, { "epoch": 0.5663338691936712, "grad_norm": 0.19476951658725739, "learning_rate": 0.001, "loss": 2.0913, "step": 13387 }, { "epoch": 0.5663761739571875, "grad_norm": 0.1848563849925995, "learning_rate": 0.001, "loss": 2.4601, "step": 13388 }, { "epoch": 0.5664184787207039, "grad_norm": 0.26256316900253296, "learning_rate": 0.001, "loss": 2.8465, "step": 13389 }, { "epoch": 0.5664607834842204, "grad_norm": 0.1553989052772522, "learning_rate": 0.001, "loss": 1.6542, "step": 13390 }, { "epoch": 0.5665030882477367, "grad_norm": 0.17743252217769623, "learning_rate": 0.001, "loss": 1.7768, "step": 13391 }, { "epoch": 0.5665453930112531, "grad_norm": 0.19140616059303284, "learning_rate": 0.001, "loss": 1.461, "step": 13392 }, { "epoch": 0.5665876977747695, "grad_norm": 0.19345830380916595, "learning_rate": 0.001, "loss": 2.0676, "step": 13393 }, { "epoch": 0.5666300025382858, "grad_norm": 0.2207154482603073, "learning_rate": 0.001, "loss": 2.4146, "step": 13394 }, { "epoch": 0.5666723073018022, "grad_norm": 0.1724683791399002, "learning_rate": 0.001, "loss": 2.4437, "step": 13395 }, { "epoch": 0.5667146120653186, "grad_norm": 0.16311654448509216, "learning_rate": 0.001, "loss": 3.1914, "step": 13396 }, { "epoch": 0.5667569168288349, "grad_norm": 0.16985690593719482, "learning_rate": 0.001, "loss": 2.3727, "step": 13397 }, { "epoch": 0.5667992215923513, "grad_norm": 0.1899961233139038, "learning_rate": 0.001, "loss": 2.2874, "step": 13398 }, { "epoch": 0.5668415263558677, "grad_norm": 0.18617494404315948, "learning_rate": 0.001, "loss": 1.4395, "step": 13399 }, { "epoch": 0.566883831119384, "grad_norm": 0.18079866468906403, "learning_rate": 0.001, "loss": 3.0455, "step": 13400 }, { "epoch": 0.5669261358829004, "grad_norm": 0.18550261855125427, "learning_rate": 0.001, "loss": 3.1314, "step": 13401 }, { "epoch": 0.5669684406464168, "grad_norm": 0.17485053837299347, "learning_rate": 0.001, "loss": 2.1399, "step": 13402 }, { "epoch": 0.5670107454099331, "grad_norm": 0.15552884340286255, "learning_rate": 0.001, "loss": 1.5707, "step": 13403 }, { "epoch": 0.5670530501734495, "grad_norm": 0.14861054718494415, "learning_rate": 0.001, "loss": 1.7765, "step": 13404 }, { "epoch": 0.567095354936966, "grad_norm": 0.4289642572402954, "learning_rate": 0.001, "loss": 1.7516, "step": 13405 }, { "epoch": 0.5671376597004822, "grad_norm": 0.14437785744667053, "learning_rate": 0.001, "loss": 2.3381, "step": 13406 }, { "epoch": 0.5671799644639987, "grad_norm": 0.1592496633529663, "learning_rate": 0.001, "loss": 1.7272, "step": 13407 }, { "epoch": 0.5672222692275151, "grad_norm": 0.2676011323928833, "learning_rate": 0.001, "loss": 2.5001, "step": 13408 }, { "epoch": 0.5672645739910314, "grad_norm": 0.1811024695634842, "learning_rate": 0.001, "loss": 2.3746, "step": 13409 }, { "epoch": 0.5673068787545478, "grad_norm": 0.16653041541576385, "learning_rate": 0.001, "loss": 1.9266, "step": 13410 }, { "epoch": 0.5673491835180642, "grad_norm": 0.18817272782325745, "learning_rate": 0.001, "loss": 2.2231, "step": 13411 }, { "epoch": 0.5673914882815805, "grad_norm": 0.381883442401886, "learning_rate": 0.001, "loss": 1.8577, "step": 13412 }, { "epoch": 0.5674337930450969, "grad_norm": 0.1743682324886322, "learning_rate": 0.001, "loss": 2.6966, "step": 13413 }, { "epoch": 0.5674760978086133, "grad_norm": 0.18651573359966278, "learning_rate": 0.001, "loss": 2.3024, "step": 13414 }, { "epoch": 0.5675184025721296, "grad_norm": 2.339829206466675, "learning_rate": 0.001, "loss": 2.0363, "step": 13415 }, { "epoch": 0.567560707335646, "grad_norm": 0.28542375564575195, "learning_rate": 0.001, "loss": 2.1393, "step": 13416 }, { "epoch": 0.5676030120991624, "grad_norm": 0.17619076371192932, "learning_rate": 0.001, "loss": 1.4569, "step": 13417 }, { "epoch": 0.5676453168626787, "grad_norm": 0.171681746840477, "learning_rate": 0.001, "loss": 2.2856, "step": 13418 }, { "epoch": 0.5676876216261951, "grad_norm": 0.23256142437458038, "learning_rate": 0.001, "loss": 1.973, "step": 13419 }, { "epoch": 0.5677299263897115, "grad_norm": 0.22075031697750092, "learning_rate": 0.001, "loss": 1.9528, "step": 13420 }, { "epoch": 0.5677722311532278, "grad_norm": 0.12937584519386292, "learning_rate": 0.001, "loss": 2.196, "step": 13421 }, { "epoch": 0.5678145359167442, "grad_norm": 0.17965421080589294, "learning_rate": 0.001, "loss": 1.5309, "step": 13422 }, { "epoch": 0.5678568406802605, "grad_norm": 2.4008188247680664, "learning_rate": 0.001, "loss": 2.3106, "step": 13423 }, { "epoch": 0.567899145443777, "grad_norm": 0.16604921221733093, "learning_rate": 0.001, "loss": 2.2181, "step": 13424 }, { "epoch": 0.5679414502072934, "grad_norm": 0.2204952985048294, "learning_rate": 0.001, "loss": 2.9946, "step": 13425 }, { "epoch": 0.5679837549708097, "grad_norm": 0.2380129098892212, "learning_rate": 0.001, "loss": 1.8916, "step": 13426 }, { "epoch": 0.5680260597343261, "grad_norm": 0.20674805343151093, "learning_rate": 0.001, "loss": 2.224, "step": 13427 }, { "epoch": 0.5680683644978425, "grad_norm": 0.2611851990222931, "learning_rate": 0.001, "loss": 2.041, "step": 13428 }, { "epoch": 0.5681106692613588, "grad_norm": 4.238038063049316, "learning_rate": 0.001, "loss": 3.423, "step": 13429 }, { "epoch": 0.5681529740248752, "grad_norm": 0.26452019810676575, "learning_rate": 0.001, "loss": 2.9858, "step": 13430 }, { "epoch": 0.5681952787883916, "grad_norm": 0.2289876937866211, "learning_rate": 0.001, "loss": 2.6998, "step": 13431 }, { "epoch": 0.5682375835519079, "grad_norm": 0.3570123314857483, "learning_rate": 0.001, "loss": 2.1574, "step": 13432 }, { "epoch": 0.5682798883154243, "grad_norm": 0.33323732018470764, "learning_rate": 0.001, "loss": 1.9346, "step": 13433 }, { "epoch": 0.5683221930789407, "grad_norm": 0.2315545380115509, "learning_rate": 0.001, "loss": 1.9166, "step": 13434 }, { "epoch": 0.568364497842457, "grad_norm": 0.42233508825302124, "learning_rate": 0.001, "loss": 2.1505, "step": 13435 }, { "epoch": 0.5684068026059734, "grad_norm": 0.28535693883895874, "learning_rate": 0.001, "loss": 2.0267, "step": 13436 }, { "epoch": 0.5684491073694898, "grad_norm": 0.6629549264907837, "learning_rate": 0.001, "loss": 2.3038, "step": 13437 }, { "epoch": 0.5684914121330061, "grad_norm": 0.18485631048679352, "learning_rate": 0.001, "loss": 2.8315, "step": 13438 }, { "epoch": 0.5685337168965225, "grad_norm": 1.2029775381088257, "learning_rate": 0.001, "loss": 2.7915, "step": 13439 }, { "epoch": 0.568576021660039, "grad_norm": 0.16780279576778412, "learning_rate": 0.001, "loss": 2.0964, "step": 13440 }, { "epoch": 0.5686183264235553, "grad_norm": 0.4147586524486542, "learning_rate": 0.001, "loss": 2.8658, "step": 13441 }, { "epoch": 0.5686606311870717, "grad_norm": 0.21042154729366302, "learning_rate": 0.001, "loss": 2.2641, "step": 13442 }, { "epoch": 0.5687029359505881, "grad_norm": 0.2242194414138794, "learning_rate": 0.001, "loss": 2.0489, "step": 13443 }, { "epoch": 0.5687452407141044, "grad_norm": 0.27875545620918274, "learning_rate": 0.001, "loss": 1.592, "step": 13444 }, { "epoch": 0.5687875454776208, "grad_norm": 0.1999659240245819, "learning_rate": 0.001, "loss": 2.1463, "step": 13445 }, { "epoch": 0.5688298502411372, "grad_norm": 0.15846888720989227, "learning_rate": 0.001, "loss": 2.1381, "step": 13446 }, { "epoch": 0.5688721550046535, "grad_norm": 0.21760839223861694, "learning_rate": 0.001, "loss": 2.4506, "step": 13447 }, { "epoch": 0.5689144597681699, "grad_norm": 0.15946030616760254, "learning_rate": 0.001, "loss": 2.2288, "step": 13448 }, { "epoch": 0.5689567645316863, "grad_norm": 0.21263648569583893, "learning_rate": 0.001, "loss": 1.999, "step": 13449 }, { "epoch": 0.5689990692952026, "grad_norm": 0.1755702942609787, "learning_rate": 0.001, "loss": 1.845, "step": 13450 }, { "epoch": 0.569041374058719, "grad_norm": 0.14761324226856232, "learning_rate": 0.001, "loss": 1.5729, "step": 13451 }, { "epoch": 0.5690836788222354, "grad_norm": 0.16154663264751434, "learning_rate": 0.001, "loss": 1.4929, "step": 13452 }, { "epoch": 0.5691259835857517, "grad_norm": 0.15619386732578278, "learning_rate": 0.001, "loss": 2.8627, "step": 13453 }, { "epoch": 0.5691682883492681, "grad_norm": 0.2875896394252777, "learning_rate": 0.001, "loss": 3.1241, "step": 13454 }, { "epoch": 0.5692105931127845, "grad_norm": 0.20529961585998535, "learning_rate": 0.001, "loss": 2.3149, "step": 13455 }, { "epoch": 0.5692528978763008, "grad_norm": 0.134473517537117, "learning_rate": 0.001, "loss": 1.909, "step": 13456 }, { "epoch": 0.5692952026398173, "grad_norm": 0.15408681333065033, "learning_rate": 0.001, "loss": 2.2475, "step": 13457 }, { "epoch": 0.5693375074033337, "grad_norm": 0.17341335117816925, "learning_rate": 0.001, "loss": 2.7923, "step": 13458 }, { "epoch": 0.56937981216685, "grad_norm": 0.1393430083990097, "learning_rate": 0.001, "loss": 2.5653, "step": 13459 }, { "epoch": 0.5694221169303664, "grad_norm": 0.20593148469924927, "learning_rate": 0.001, "loss": 2.7352, "step": 13460 }, { "epoch": 0.5694644216938828, "grad_norm": 0.19775381684303284, "learning_rate": 0.001, "loss": 1.8592, "step": 13461 }, { "epoch": 0.5695067264573991, "grad_norm": 0.17005833983421326, "learning_rate": 0.001, "loss": 1.5063, "step": 13462 }, { "epoch": 0.5695490312209155, "grad_norm": 0.1618228405714035, "learning_rate": 0.001, "loss": 2.7133, "step": 13463 }, { "epoch": 0.5695913359844319, "grad_norm": 0.19688403606414795, "learning_rate": 0.001, "loss": 3.1128, "step": 13464 }, { "epoch": 0.5696336407479482, "grad_norm": 0.184058278799057, "learning_rate": 0.001, "loss": 2.203, "step": 13465 }, { "epoch": 0.5696759455114646, "grad_norm": 0.4234599769115448, "learning_rate": 0.001, "loss": 1.4275, "step": 13466 }, { "epoch": 0.5697182502749809, "grad_norm": 0.610440194606781, "learning_rate": 0.001, "loss": 1.8219, "step": 13467 }, { "epoch": 0.5697605550384973, "grad_norm": 0.6861364841461182, "learning_rate": 0.001, "loss": 1.8298, "step": 13468 }, { "epoch": 0.5698028598020137, "grad_norm": 0.17145289480686188, "learning_rate": 0.001, "loss": 2.5483, "step": 13469 }, { "epoch": 0.56984516456553, "grad_norm": 0.2071324735879898, "learning_rate": 0.001, "loss": 1.5715, "step": 13470 }, { "epoch": 0.5698874693290464, "grad_norm": 0.1772976666688919, "learning_rate": 0.001, "loss": 2.9208, "step": 13471 }, { "epoch": 0.5699297740925628, "grad_norm": 0.14574064314365387, "learning_rate": 0.001, "loss": 2.1653, "step": 13472 }, { "epoch": 0.5699720788560791, "grad_norm": 0.16666589677333832, "learning_rate": 0.001, "loss": 2.4713, "step": 13473 }, { "epoch": 0.5700143836195956, "grad_norm": 0.19138103723526, "learning_rate": 0.001, "loss": 2.0317, "step": 13474 }, { "epoch": 0.570056688383112, "grad_norm": 0.21919706463813782, "learning_rate": 0.001, "loss": 1.5688, "step": 13475 }, { "epoch": 0.5700989931466283, "grad_norm": 0.6911723613739014, "learning_rate": 0.001, "loss": 2.5346, "step": 13476 }, { "epoch": 0.5701412979101447, "grad_norm": 1.9358234405517578, "learning_rate": 0.001, "loss": 2.5255, "step": 13477 }, { "epoch": 0.5701836026736611, "grad_norm": 0.43052294850349426, "learning_rate": 0.001, "loss": 2.2194, "step": 13478 }, { "epoch": 0.5702259074371774, "grad_norm": 0.1567905694246292, "learning_rate": 0.001, "loss": 1.5455, "step": 13479 }, { "epoch": 0.5702682122006938, "grad_norm": 0.16955433785915375, "learning_rate": 0.001, "loss": 2.2044, "step": 13480 }, { "epoch": 0.5703105169642102, "grad_norm": 0.21759982407093048, "learning_rate": 0.001, "loss": 1.6764, "step": 13481 }, { "epoch": 0.5703528217277265, "grad_norm": 0.26957741379737854, "learning_rate": 0.001, "loss": 3.0424, "step": 13482 }, { "epoch": 0.5703951264912429, "grad_norm": 0.23791447281837463, "learning_rate": 0.001, "loss": 2.1794, "step": 13483 }, { "epoch": 0.5704374312547593, "grad_norm": 0.17288313806056976, "learning_rate": 0.001, "loss": 2.8003, "step": 13484 }, { "epoch": 0.5704797360182756, "grad_norm": 0.186492919921875, "learning_rate": 0.001, "loss": 2.5023, "step": 13485 }, { "epoch": 0.570522040781792, "grad_norm": 0.17396779358386993, "learning_rate": 0.001, "loss": 2.3465, "step": 13486 }, { "epoch": 0.5705643455453084, "grad_norm": 0.20612338185310364, "learning_rate": 0.001, "loss": 1.9371, "step": 13487 }, { "epoch": 0.5706066503088247, "grad_norm": 0.16367042064666748, "learning_rate": 0.001, "loss": 1.9677, "step": 13488 }, { "epoch": 0.5706489550723411, "grad_norm": 1.0515310764312744, "learning_rate": 0.001, "loss": 1.7665, "step": 13489 }, { "epoch": 0.5706912598358576, "grad_norm": 0.4732486307621002, "learning_rate": 0.001, "loss": 2.2618, "step": 13490 }, { "epoch": 0.5707335645993739, "grad_norm": 0.14785124361515045, "learning_rate": 0.001, "loss": 1.9295, "step": 13491 }, { "epoch": 0.5707758693628903, "grad_norm": 0.1713053435087204, "learning_rate": 0.001, "loss": 1.8555, "step": 13492 }, { "epoch": 0.5708181741264067, "grad_norm": 0.17723537981510162, "learning_rate": 0.001, "loss": 2.1308, "step": 13493 }, { "epoch": 0.570860478889923, "grad_norm": 1.3990591764450073, "learning_rate": 0.001, "loss": 2.5823, "step": 13494 }, { "epoch": 0.5709027836534394, "grad_norm": 0.6561245918273926, "learning_rate": 0.001, "loss": 2.6915, "step": 13495 }, { "epoch": 0.5709450884169558, "grad_norm": 1.7256704568862915, "learning_rate": 0.001, "loss": 1.4257, "step": 13496 }, { "epoch": 0.5709873931804721, "grad_norm": 0.22391989827156067, "learning_rate": 0.001, "loss": 2.3335, "step": 13497 }, { "epoch": 0.5710296979439885, "grad_norm": 0.1710107922554016, "learning_rate": 0.001, "loss": 1.8255, "step": 13498 }, { "epoch": 0.5710720027075049, "grad_norm": 0.21606822311878204, "learning_rate": 0.001, "loss": 2.894, "step": 13499 }, { "epoch": 0.5711143074710212, "grad_norm": 0.19133202731609344, "learning_rate": 0.001, "loss": 1.5382, "step": 13500 }, { "epoch": 0.5711566122345376, "grad_norm": 0.2007291465997696, "learning_rate": 0.001, "loss": 2.0458, "step": 13501 }, { "epoch": 0.571198916998054, "grad_norm": 0.21422770619392395, "learning_rate": 0.001, "loss": 2.758, "step": 13502 }, { "epoch": 0.5712412217615703, "grad_norm": 0.1938318908214569, "learning_rate": 0.001, "loss": 2.7117, "step": 13503 }, { "epoch": 0.5712835265250867, "grad_norm": 0.26207903027534485, "learning_rate": 0.001, "loss": 1.8618, "step": 13504 }, { "epoch": 0.5713258312886031, "grad_norm": 0.2131361961364746, "learning_rate": 0.001, "loss": 1.9149, "step": 13505 }, { "epoch": 0.5713681360521194, "grad_norm": 0.1706126481294632, "learning_rate": 0.001, "loss": 2.3218, "step": 13506 }, { "epoch": 0.5714104408156359, "grad_norm": 0.20523306727409363, "learning_rate": 0.001, "loss": 2.2526, "step": 13507 }, { "epoch": 0.5714527455791523, "grad_norm": 0.25251221656799316, "learning_rate": 0.001, "loss": 2.3534, "step": 13508 }, { "epoch": 0.5714950503426686, "grad_norm": 0.3562532663345337, "learning_rate": 0.001, "loss": 1.6221, "step": 13509 }, { "epoch": 0.571537355106185, "grad_norm": 0.19563472270965576, "learning_rate": 0.001, "loss": 2.2984, "step": 13510 }, { "epoch": 0.5715796598697013, "grad_norm": 0.19130732119083405, "learning_rate": 0.001, "loss": 3.6665, "step": 13511 }, { "epoch": 0.5716219646332177, "grad_norm": 0.2187919318675995, "learning_rate": 0.001, "loss": 2.7956, "step": 13512 }, { "epoch": 0.5716642693967341, "grad_norm": 0.7064900994300842, "learning_rate": 0.001, "loss": 1.7473, "step": 13513 }, { "epoch": 0.5717065741602504, "grad_norm": 0.17418193817138672, "learning_rate": 0.001, "loss": 2.1856, "step": 13514 }, { "epoch": 0.5717488789237668, "grad_norm": 0.1919695883989334, "learning_rate": 0.001, "loss": 2.6144, "step": 13515 }, { "epoch": 0.5717911836872832, "grad_norm": 0.1946498155593872, "learning_rate": 0.001, "loss": 1.8036, "step": 13516 }, { "epoch": 0.5718334884507995, "grad_norm": 0.18168438971042633, "learning_rate": 0.001, "loss": 2.7927, "step": 13517 }, { "epoch": 0.5718757932143159, "grad_norm": 0.410906583070755, "learning_rate": 0.001, "loss": 2.8412, "step": 13518 }, { "epoch": 0.5719180979778323, "grad_norm": 0.3952532708644867, "learning_rate": 0.001, "loss": 1.6598, "step": 13519 }, { "epoch": 0.5719604027413486, "grad_norm": 0.7694887518882751, "learning_rate": 0.001, "loss": 2.2268, "step": 13520 }, { "epoch": 0.572002707504865, "grad_norm": 0.2468140423297882, "learning_rate": 0.001, "loss": 2.8834, "step": 13521 }, { "epoch": 0.5720450122683814, "grad_norm": 0.23892976343631744, "learning_rate": 0.001, "loss": 1.4797, "step": 13522 }, { "epoch": 0.5720873170318977, "grad_norm": 0.2801859974861145, "learning_rate": 0.001, "loss": 2.2426, "step": 13523 }, { "epoch": 0.5721296217954142, "grad_norm": 0.1985364854335785, "learning_rate": 0.001, "loss": 2.5463, "step": 13524 }, { "epoch": 0.5721719265589306, "grad_norm": 0.24735181033611298, "learning_rate": 0.001, "loss": 2.4437, "step": 13525 }, { "epoch": 0.5722142313224469, "grad_norm": 0.18506476283073425, "learning_rate": 0.001, "loss": 1.8858, "step": 13526 }, { "epoch": 0.5722565360859633, "grad_norm": 0.2307346612215042, "learning_rate": 0.001, "loss": 2.1989, "step": 13527 }, { "epoch": 0.5722988408494797, "grad_norm": 0.16953586041927338, "learning_rate": 0.001, "loss": 2.0845, "step": 13528 }, { "epoch": 0.572341145612996, "grad_norm": 0.18418249487876892, "learning_rate": 0.001, "loss": 2.2964, "step": 13529 }, { "epoch": 0.5723834503765124, "grad_norm": 0.2580213248729706, "learning_rate": 0.001, "loss": 2.1883, "step": 13530 }, { "epoch": 0.5724257551400288, "grad_norm": 2.6409804821014404, "learning_rate": 0.001, "loss": 2.2781, "step": 13531 }, { "epoch": 0.5724680599035451, "grad_norm": 0.15479686856269836, "learning_rate": 0.001, "loss": 2.2066, "step": 13532 }, { "epoch": 0.5725103646670615, "grad_norm": 0.8504530787467957, "learning_rate": 0.001, "loss": 1.5421, "step": 13533 }, { "epoch": 0.5725526694305779, "grad_norm": 0.17100943624973297, "learning_rate": 0.001, "loss": 3.2645, "step": 13534 }, { "epoch": 0.5725949741940942, "grad_norm": 0.6614644527435303, "learning_rate": 0.001, "loss": 3.2304, "step": 13535 }, { "epoch": 0.5726372789576106, "grad_norm": 1.6653027534484863, "learning_rate": 0.001, "loss": 2.8675, "step": 13536 }, { "epoch": 0.572679583721127, "grad_norm": 0.16103801131248474, "learning_rate": 0.001, "loss": 1.9109, "step": 13537 }, { "epoch": 0.5727218884846433, "grad_norm": 0.16726185381412506, "learning_rate": 0.001, "loss": 3.5696, "step": 13538 }, { "epoch": 0.5727641932481597, "grad_norm": 0.3862936496734619, "learning_rate": 0.001, "loss": 2.0125, "step": 13539 }, { "epoch": 0.5728064980116762, "grad_norm": 1.980115532875061, "learning_rate": 0.001, "loss": 2.5362, "step": 13540 }, { "epoch": 0.5728488027751925, "grad_norm": 0.1421581506729126, "learning_rate": 0.001, "loss": 2.1459, "step": 13541 }, { "epoch": 0.5728911075387089, "grad_norm": 0.18831764161586761, "learning_rate": 0.001, "loss": 2.523, "step": 13542 }, { "epoch": 0.5729334123022253, "grad_norm": 0.15375645458698273, "learning_rate": 0.001, "loss": 1.82, "step": 13543 }, { "epoch": 0.5729757170657416, "grad_norm": 0.14795835316181183, "learning_rate": 0.001, "loss": 2.3571, "step": 13544 }, { "epoch": 0.573018021829258, "grad_norm": 0.15851891040802002, "learning_rate": 0.001, "loss": 2.5942, "step": 13545 }, { "epoch": 0.5730603265927744, "grad_norm": 0.15028858184814453, "learning_rate": 0.001, "loss": 3.2621, "step": 13546 }, { "epoch": 0.5731026313562907, "grad_norm": 0.15741689503192902, "learning_rate": 0.001, "loss": 1.9997, "step": 13547 }, { "epoch": 0.5731449361198071, "grad_norm": 0.1891627013683319, "learning_rate": 0.001, "loss": 2.2628, "step": 13548 }, { "epoch": 0.5731872408833235, "grad_norm": 0.16026534140110016, "learning_rate": 0.001, "loss": 2.6502, "step": 13549 }, { "epoch": 0.5732295456468398, "grad_norm": 0.18979184329509735, "learning_rate": 0.001, "loss": 2.1462, "step": 13550 }, { "epoch": 0.5732718504103562, "grad_norm": 0.17325901985168457, "learning_rate": 0.001, "loss": 1.8367, "step": 13551 }, { "epoch": 0.5733141551738726, "grad_norm": 0.2838257849216461, "learning_rate": 0.001, "loss": 2.2564, "step": 13552 }, { "epoch": 0.5733564599373889, "grad_norm": 0.4795995056629181, "learning_rate": 0.001, "loss": 1.9584, "step": 13553 }, { "epoch": 0.5733987647009053, "grad_norm": 1.5541175603866577, "learning_rate": 0.001, "loss": 3.0344, "step": 13554 }, { "epoch": 0.5734410694644217, "grad_norm": 0.1850278079509735, "learning_rate": 0.001, "loss": 2.1873, "step": 13555 }, { "epoch": 0.573483374227938, "grad_norm": 0.20095832645893097, "learning_rate": 0.001, "loss": 2.0309, "step": 13556 }, { "epoch": 0.5735256789914545, "grad_norm": 0.212057963013649, "learning_rate": 0.001, "loss": 2.0179, "step": 13557 }, { "epoch": 0.5735679837549708, "grad_norm": 0.18375861644744873, "learning_rate": 0.001, "loss": 1.7909, "step": 13558 }, { "epoch": 0.5736102885184872, "grad_norm": 0.1902046501636505, "learning_rate": 0.001, "loss": 1.7385, "step": 13559 }, { "epoch": 0.5736525932820036, "grad_norm": 0.1839946061372757, "learning_rate": 0.001, "loss": 1.9316, "step": 13560 }, { "epoch": 0.5736948980455199, "grad_norm": 0.16849452257156372, "learning_rate": 0.001, "loss": 2.4155, "step": 13561 }, { "epoch": 0.5737372028090363, "grad_norm": 0.18684113025665283, "learning_rate": 0.001, "loss": 2.9065, "step": 13562 }, { "epoch": 0.5737795075725527, "grad_norm": 0.325171560049057, "learning_rate": 0.001, "loss": 3.7306, "step": 13563 }, { "epoch": 0.573821812336069, "grad_norm": 0.1625150591135025, "learning_rate": 0.001, "loss": 1.9709, "step": 13564 }, { "epoch": 0.5738641170995854, "grad_norm": 0.5446557402610779, "learning_rate": 0.001, "loss": 2.5266, "step": 13565 }, { "epoch": 0.5739064218631018, "grad_norm": 0.1913897693157196, "learning_rate": 0.001, "loss": 2.5541, "step": 13566 }, { "epoch": 0.5739487266266181, "grad_norm": 0.23677568137645721, "learning_rate": 0.001, "loss": 3.3124, "step": 13567 }, { "epoch": 0.5739910313901345, "grad_norm": 0.18259602785110474, "learning_rate": 0.001, "loss": 2.398, "step": 13568 }, { "epoch": 0.5740333361536509, "grad_norm": 0.166291281580925, "learning_rate": 0.001, "loss": 1.7677, "step": 13569 }, { "epoch": 0.5740756409171672, "grad_norm": 0.19032956659793854, "learning_rate": 0.001, "loss": 2.0508, "step": 13570 }, { "epoch": 0.5741179456806836, "grad_norm": 0.303861528635025, "learning_rate": 0.001, "loss": 1.8316, "step": 13571 }, { "epoch": 0.5741602504442, "grad_norm": 0.41305580735206604, "learning_rate": 0.001, "loss": 2.0014, "step": 13572 }, { "epoch": 0.5742025552077163, "grad_norm": 0.1796369105577469, "learning_rate": 0.001, "loss": 2.7648, "step": 13573 }, { "epoch": 0.5742448599712328, "grad_norm": 0.15992271900177002, "learning_rate": 0.001, "loss": 1.8547, "step": 13574 }, { "epoch": 0.5742871647347492, "grad_norm": 0.195455402135849, "learning_rate": 0.001, "loss": 1.5503, "step": 13575 }, { "epoch": 0.5743294694982655, "grad_norm": 0.17384329438209534, "learning_rate": 0.001, "loss": 2.1787, "step": 13576 }, { "epoch": 0.5743717742617819, "grad_norm": 0.3714596629142761, "learning_rate": 0.001, "loss": 2.782, "step": 13577 }, { "epoch": 0.5744140790252983, "grad_norm": 0.9288785457611084, "learning_rate": 0.001, "loss": 2.5938, "step": 13578 }, { "epoch": 0.5744563837888146, "grad_norm": 0.28560522198677063, "learning_rate": 0.001, "loss": 2.3935, "step": 13579 }, { "epoch": 0.574498688552331, "grad_norm": 0.23299963772296906, "learning_rate": 0.001, "loss": 2.4586, "step": 13580 }, { "epoch": 0.5745409933158474, "grad_norm": 0.19017741084098816, "learning_rate": 0.001, "loss": 2.3019, "step": 13581 }, { "epoch": 0.5745832980793637, "grad_norm": 0.18954819440841675, "learning_rate": 0.001, "loss": 1.8131, "step": 13582 }, { "epoch": 0.5746256028428801, "grad_norm": 0.1803303062915802, "learning_rate": 0.001, "loss": 1.9121, "step": 13583 }, { "epoch": 0.5746679076063965, "grad_norm": 4.713398456573486, "learning_rate": 0.001, "loss": 1.9541, "step": 13584 }, { "epoch": 0.5747102123699128, "grad_norm": 0.2334275096654892, "learning_rate": 0.001, "loss": 1.7124, "step": 13585 }, { "epoch": 0.5747525171334292, "grad_norm": 0.1691213995218277, "learning_rate": 0.001, "loss": 1.6664, "step": 13586 }, { "epoch": 0.5747948218969456, "grad_norm": 0.21941591799259186, "learning_rate": 0.001, "loss": 2.0986, "step": 13587 }, { "epoch": 0.5748371266604619, "grad_norm": 0.30102136731147766, "learning_rate": 0.001, "loss": 2.5321, "step": 13588 }, { "epoch": 0.5748794314239783, "grad_norm": 0.21132823824882507, "learning_rate": 0.001, "loss": 2.5585, "step": 13589 }, { "epoch": 0.5749217361874948, "grad_norm": 3.1371676921844482, "learning_rate": 0.001, "loss": 3.0142, "step": 13590 }, { "epoch": 0.5749640409510111, "grad_norm": 0.3118325471878052, "learning_rate": 0.001, "loss": 1.7528, "step": 13591 }, { "epoch": 0.5750063457145275, "grad_norm": 0.26828131079673767, "learning_rate": 0.001, "loss": 2.4065, "step": 13592 }, { "epoch": 0.5750486504780439, "grad_norm": 0.21891672909259796, "learning_rate": 0.001, "loss": 1.8595, "step": 13593 }, { "epoch": 0.5750909552415602, "grad_norm": 0.20417436957359314, "learning_rate": 0.001, "loss": 1.8951, "step": 13594 }, { "epoch": 0.5751332600050766, "grad_norm": 0.4515736997127533, "learning_rate": 0.001, "loss": 2.1664, "step": 13595 }, { "epoch": 0.575175564768593, "grad_norm": 0.17579565942287445, "learning_rate": 0.001, "loss": 2.4499, "step": 13596 }, { "epoch": 0.5752178695321093, "grad_norm": 0.18956947326660156, "learning_rate": 0.001, "loss": 1.7775, "step": 13597 }, { "epoch": 0.5752601742956257, "grad_norm": 0.18791532516479492, "learning_rate": 0.001, "loss": 1.858, "step": 13598 }, { "epoch": 0.5753024790591421, "grad_norm": 0.18148577213287354, "learning_rate": 0.001, "loss": 1.8695, "step": 13599 }, { "epoch": 0.5753447838226584, "grad_norm": 0.16978557407855988, "learning_rate": 0.001, "loss": 2.0121, "step": 13600 }, { "epoch": 0.5753870885861748, "grad_norm": 0.1755099594593048, "learning_rate": 0.001, "loss": 1.9807, "step": 13601 }, { "epoch": 0.5754293933496911, "grad_norm": 0.22287164628505707, "learning_rate": 0.001, "loss": 2.9232, "step": 13602 }, { "epoch": 0.5754716981132075, "grad_norm": 0.2872997224330902, "learning_rate": 0.001, "loss": 2.2583, "step": 13603 }, { "epoch": 0.5755140028767239, "grad_norm": 0.43780237436294556, "learning_rate": 0.001, "loss": 1.995, "step": 13604 }, { "epoch": 0.5755563076402402, "grad_norm": 0.20227646827697754, "learning_rate": 0.001, "loss": 2.8379, "step": 13605 }, { "epoch": 0.5755986124037566, "grad_norm": 1.0031191110610962, "learning_rate": 0.001, "loss": 2.8675, "step": 13606 }, { "epoch": 0.5756409171672731, "grad_norm": 0.1701931655406952, "learning_rate": 0.001, "loss": 2.2968, "step": 13607 }, { "epoch": 0.5756832219307894, "grad_norm": 0.7706162929534912, "learning_rate": 0.001, "loss": 2.6568, "step": 13608 }, { "epoch": 0.5757255266943058, "grad_norm": 0.22482161223888397, "learning_rate": 0.001, "loss": 1.7453, "step": 13609 }, { "epoch": 0.5757678314578222, "grad_norm": 1.2364606857299805, "learning_rate": 0.001, "loss": 1.8909, "step": 13610 }, { "epoch": 0.5758101362213385, "grad_norm": 0.154103621840477, "learning_rate": 0.001, "loss": 1.7456, "step": 13611 }, { "epoch": 0.5758524409848549, "grad_norm": 0.15440334379673004, "learning_rate": 0.001, "loss": 2.0969, "step": 13612 }, { "epoch": 0.5758947457483713, "grad_norm": 0.21539804339408875, "learning_rate": 0.001, "loss": 2.4492, "step": 13613 }, { "epoch": 0.5759370505118876, "grad_norm": 0.17441478371620178, "learning_rate": 0.001, "loss": 1.9286, "step": 13614 }, { "epoch": 0.575979355275404, "grad_norm": 0.16577987372875214, "learning_rate": 0.001, "loss": 1.7983, "step": 13615 }, { "epoch": 0.5760216600389204, "grad_norm": 0.6091001033782959, "learning_rate": 0.001, "loss": 2.4681, "step": 13616 }, { "epoch": 0.5760639648024367, "grad_norm": 0.7010548114776611, "learning_rate": 0.001, "loss": 2.991, "step": 13617 }, { "epoch": 0.5761062695659531, "grad_norm": 0.18760816752910614, "learning_rate": 0.001, "loss": 2.7225, "step": 13618 }, { "epoch": 0.5761485743294695, "grad_norm": 0.2499050348997116, "learning_rate": 0.001, "loss": 1.889, "step": 13619 }, { "epoch": 0.5761908790929858, "grad_norm": 0.19262200593948364, "learning_rate": 0.001, "loss": 1.9694, "step": 13620 }, { "epoch": 0.5762331838565022, "grad_norm": 0.2874251902103424, "learning_rate": 0.001, "loss": 2.0702, "step": 13621 }, { "epoch": 0.5762754886200186, "grad_norm": 0.2215861827135086, "learning_rate": 0.001, "loss": 3.0974, "step": 13622 }, { "epoch": 0.576317793383535, "grad_norm": 1.4504274129867554, "learning_rate": 0.001, "loss": 2.6667, "step": 13623 }, { "epoch": 0.5763600981470514, "grad_norm": 0.3368513286113739, "learning_rate": 0.001, "loss": 2.2946, "step": 13624 }, { "epoch": 0.5764024029105678, "grad_norm": 0.5865402817726135, "learning_rate": 0.001, "loss": 2.1963, "step": 13625 }, { "epoch": 0.5764447076740841, "grad_norm": 0.16736286878585815, "learning_rate": 0.001, "loss": 2.4853, "step": 13626 }, { "epoch": 0.5764870124376005, "grad_norm": 0.18231581151485443, "learning_rate": 0.001, "loss": 1.9809, "step": 13627 }, { "epoch": 0.5765293172011169, "grad_norm": 0.16837336122989655, "learning_rate": 0.001, "loss": 3.887, "step": 13628 }, { "epoch": 0.5765716219646332, "grad_norm": 0.4993027150630951, "learning_rate": 0.001, "loss": 2.3137, "step": 13629 }, { "epoch": 0.5766139267281496, "grad_norm": 0.2104516625404358, "learning_rate": 0.001, "loss": 1.6916, "step": 13630 }, { "epoch": 0.576656231491666, "grad_norm": 0.20687484741210938, "learning_rate": 0.001, "loss": 2.2164, "step": 13631 }, { "epoch": 0.5766985362551823, "grad_norm": 0.18873409926891327, "learning_rate": 0.001, "loss": 2.7869, "step": 13632 }, { "epoch": 0.5767408410186987, "grad_norm": 0.16937416791915894, "learning_rate": 0.001, "loss": 1.9121, "step": 13633 }, { "epoch": 0.5767831457822151, "grad_norm": 0.15023858845233917, "learning_rate": 0.001, "loss": 2.6665, "step": 13634 }, { "epoch": 0.5768254505457314, "grad_norm": 0.15277251601219177, "learning_rate": 0.001, "loss": 2.0511, "step": 13635 }, { "epoch": 0.5768677553092478, "grad_norm": 0.19467462599277496, "learning_rate": 0.001, "loss": 1.6275, "step": 13636 }, { "epoch": 0.5769100600727642, "grad_norm": 0.1640617549419403, "learning_rate": 0.001, "loss": 1.331, "step": 13637 }, { "epoch": 0.5769523648362805, "grad_norm": 0.17704744637012482, "learning_rate": 0.001, "loss": 2.0327, "step": 13638 }, { "epoch": 0.576994669599797, "grad_norm": 0.16801083087921143, "learning_rate": 0.001, "loss": 2.6699, "step": 13639 }, { "epoch": 0.5770369743633134, "grad_norm": 0.22247737646102905, "learning_rate": 0.001, "loss": 1.7275, "step": 13640 }, { "epoch": 0.5770792791268297, "grad_norm": 0.2886200249195099, "learning_rate": 0.001, "loss": 2.255, "step": 13641 }, { "epoch": 0.5771215838903461, "grad_norm": 2.0032057762145996, "learning_rate": 0.001, "loss": 1.4168, "step": 13642 }, { "epoch": 0.5771638886538625, "grad_norm": 0.1862766593694687, "learning_rate": 0.001, "loss": 3.6991, "step": 13643 }, { "epoch": 0.5772061934173788, "grad_norm": 0.27933868765830994, "learning_rate": 0.001, "loss": 2.5766, "step": 13644 }, { "epoch": 0.5772484981808952, "grad_norm": 0.15503652393817902, "learning_rate": 0.001, "loss": 1.4434, "step": 13645 }, { "epoch": 0.5772908029444115, "grad_norm": 0.1637222021818161, "learning_rate": 0.001, "loss": 1.8226, "step": 13646 }, { "epoch": 0.5773331077079279, "grad_norm": 0.1900179535150528, "learning_rate": 0.001, "loss": 2.8638, "step": 13647 }, { "epoch": 0.5773754124714443, "grad_norm": 5.069095134735107, "learning_rate": 0.001, "loss": 2.3441, "step": 13648 }, { "epoch": 0.5774177172349606, "grad_norm": 0.21330921351909637, "learning_rate": 0.001, "loss": 2.6057, "step": 13649 }, { "epoch": 0.577460021998477, "grad_norm": 0.3153631389141083, "learning_rate": 0.001, "loss": 3.2548, "step": 13650 }, { "epoch": 0.5775023267619934, "grad_norm": 0.18381884694099426, "learning_rate": 0.001, "loss": 1.7146, "step": 13651 }, { "epoch": 0.5775446315255097, "grad_norm": 0.2848173677921295, "learning_rate": 0.001, "loss": 1.6744, "step": 13652 }, { "epoch": 0.5775869362890261, "grad_norm": 0.16625621914863586, "learning_rate": 0.001, "loss": 2.2554, "step": 13653 }, { "epoch": 0.5776292410525425, "grad_norm": 0.14194847643375397, "learning_rate": 0.001, "loss": 1.6789, "step": 13654 }, { "epoch": 0.5776715458160588, "grad_norm": 0.24383410811424255, "learning_rate": 0.001, "loss": 2.5261, "step": 13655 }, { "epoch": 0.5777138505795752, "grad_norm": 0.18297232687473297, "learning_rate": 0.001, "loss": 2.0025, "step": 13656 }, { "epoch": 0.5777561553430917, "grad_norm": 0.16804887354373932, "learning_rate": 0.001, "loss": 2.0048, "step": 13657 }, { "epoch": 0.577798460106608, "grad_norm": 0.20523540675640106, "learning_rate": 0.001, "loss": 3.0154, "step": 13658 }, { "epoch": 0.5778407648701244, "grad_norm": 0.9247041344642639, "learning_rate": 0.001, "loss": 2.0177, "step": 13659 }, { "epoch": 0.5778830696336408, "grad_norm": 0.27602866291999817, "learning_rate": 0.001, "loss": 1.8518, "step": 13660 }, { "epoch": 0.5779253743971571, "grad_norm": 0.1811681091785431, "learning_rate": 0.001, "loss": 2.2486, "step": 13661 }, { "epoch": 0.5779676791606735, "grad_norm": 1.9261819124221802, "learning_rate": 0.001, "loss": 2.2829, "step": 13662 }, { "epoch": 0.5780099839241899, "grad_norm": 0.23115941882133484, "learning_rate": 0.001, "loss": 2.0086, "step": 13663 }, { "epoch": 0.5780522886877062, "grad_norm": 0.20311318337917328, "learning_rate": 0.001, "loss": 2.3334, "step": 13664 }, { "epoch": 0.5780945934512226, "grad_norm": 0.1666504442691803, "learning_rate": 0.001, "loss": 2.0804, "step": 13665 }, { "epoch": 0.578136898214739, "grad_norm": 0.20418089628219604, "learning_rate": 0.001, "loss": 2.2807, "step": 13666 }, { "epoch": 0.5781792029782553, "grad_norm": 0.1729002743959427, "learning_rate": 0.001, "loss": 1.6245, "step": 13667 }, { "epoch": 0.5782215077417717, "grad_norm": 0.1773560643196106, "learning_rate": 0.001, "loss": 1.6606, "step": 13668 }, { "epoch": 0.5782638125052881, "grad_norm": 0.17324134707450867, "learning_rate": 0.001, "loss": 1.5963, "step": 13669 }, { "epoch": 0.5783061172688044, "grad_norm": 0.503576397895813, "learning_rate": 0.001, "loss": 2.0075, "step": 13670 }, { "epoch": 0.5783484220323208, "grad_norm": 0.15948913991451263, "learning_rate": 0.001, "loss": 1.7914, "step": 13671 }, { "epoch": 0.5783907267958373, "grad_norm": 0.2122723013162613, "learning_rate": 0.001, "loss": 2.4304, "step": 13672 }, { "epoch": 0.5784330315593535, "grad_norm": 0.9828127026557922, "learning_rate": 0.001, "loss": 2.3654, "step": 13673 }, { "epoch": 0.57847533632287, "grad_norm": 0.1700863391160965, "learning_rate": 0.001, "loss": 2.0107, "step": 13674 }, { "epoch": 0.5785176410863864, "grad_norm": 0.3214263617992401, "learning_rate": 0.001, "loss": 2.5081, "step": 13675 }, { "epoch": 0.5785599458499027, "grad_norm": 0.21781599521636963, "learning_rate": 0.001, "loss": 2.0888, "step": 13676 }, { "epoch": 0.5786022506134191, "grad_norm": 0.16866712272167206, "learning_rate": 0.001, "loss": 2.407, "step": 13677 }, { "epoch": 0.5786445553769355, "grad_norm": 0.15926438570022583, "learning_rate": 0.001, "loss": 1.3379, "step": 13678 }, { "epoch": 0.5786868601404518, "grad_norm": 0.23510025441646576, "learning_rate": 0.001, "loss": 1.7714, "step": 13679 }, { "epoch": 0.5787291649039682, "grad_norm": 0.14508269727230072, "learning_rate": 0.001, "loss": 2.1146, "step": 13680 }, { "epoch": 0.5787714696674846, "grad_norm": 2.956667423248291, "learning_rate": 0.001, "loss": 2.8911, "step": 13681 }, { "epoch": 0.5788137744310009, "grad_norm": 0.24240057170391083, "learning_rate": 0.001, "loss": 3.1488, "step": 13682 }, { "epoch": 0.5788560791945173, "grad_norm": 0.16761580109596252, "learning_rate": 0.001, "loss": 3.1758, "step": 13683 }, { "epoch": 0.5788983839580337, "grad_norm": 0.15230026841163635, "learning_rate": 0.001, "loss": 1.5823, "step": 13684 }, { "epoch": 0.57894068872155, "grad_norm": 0.14217911660671234, "learning_rate": 0.001, "loss": 1.3058, "step": 13685 }, { "epoch": 0.5789829934850664, "grad_norm": 0.23708081245422363, "learning_rate": 0.001, "loss": 3.0425, "step": 13686 }, { "epoch": 0.5790252982485828, "grad_norm": 0.19518715143203735, "learning_rate": 0.001, "loss": 2.1443, "step": 13687 }, { "epoch": 0.5790676030120991, "grad_norm": 0.2842262089252472, "learning_rate": 0.001, "loss": 2.324, "step": 13688 }, { "epoch": 0.5791099077756156, "grad_norm": 0.2659071683883667, "learning_rate": 0.001, "loss": 2.3996, "step": 13689 }, { "epoch": 0.579152212539132, "grad_norm": 0.16496817767620087, "learning_rate": 0.001, "loss": 1.9989, "step": 13690 }, { "epoch": 0.5791945173026483, "grad_norm": 0.19301360845565796, "learning_rate": 0.001, "loss": 3.5109, "step": 13691 }, { "epoch": 0.5792368220661647, "grad_norm": 0.3519555628299713, "learning_rate": 0.001, "loss": 3.3803, "step": 13692 }, { "epoch": 0.579279126829681, "grad_norm": 0.26763901114463806, "learning_rate": 0.001, "loss": 2.4265, "step": 13693 }, { "epoch": 0.5793214315931974, "grad_norm": 0.1800110638141632, "learning_rate": 0.001, "loss": 1.8322, "step": 13694 }, { "epoch": 0.5793637363567138, "grad_norm": 0.16732731461524963, "learning_rate": 0.001, "loss": 1.8233, "step": 13695 }, { "epoch": 0.5794060411202301, "grad_norm": 0.17104704678058624, "learning_rate": 0.001, "loss": 2.2859, "step": 13696 }, { "epoch": 0.5794483458837465, "grad_norm": 0.18005624413490295, "learning_rate": 0.001, "loss": 3.1283, "step": 13697 }, { "epoch": 0.5794906506472629, "grad_norm": 0.15262584388256073, "learning_rate": 0.001, "loss": 2.445, "step": 13698 }, { "epoch": 0.5795329554107792, "grad_norm": 0.13767917454242706, "learning_rate": 0.001, "loss": 1.5765, "step": 13699 }, { "epoch": 0.5795752601742956, "grad_norm": 0.18696613609790802, "learning_rate": 0.001, "loss": 2.0157, "step": 13700 }, { "epoch": 0.579617564937812, "grad_norm": 0.21265771985054016, "learning_rate": 0.001, "loss": 2.1951, "step": 13701 }, { "epoch": 0.5796598697013283, "grad_norm": 0.154225155711174, "learning_rate": 0.001, "loss": 2.6487, "step": 13702 }, { "epoch": 0.5797021744648447, "grad_norm": 0.3607092797756195, "learning_rate": 0.001, "loss": 1.9679, "step": 13703 }, { "epoch": 0.5797444792283611, "grad_norm": 0.30119165778160095, "learning_rate": 0.001, "loss": 2.7281, "step": 13704 }, { "epoch": 0.5797867839918774, "grad_norm": 0.14737650752067566, "learning_rate": 0.001, "loss": 1.6039, "step": 13705 }, { "epoch": 0.5798290887553939, "grad_norm": 0.1710781455039978, "learning_rate": 0.001, "loss": 1.7936, "step": 13706 }, { "epoch": 0.5798713935189103, "grad_norm": 0.21716709434986115, "learning_rate": 0.001, "loss": 2.3436, "step": 13707 }, { "epoch": 0.5799136982824266, "grad_norm": 0.13513155281543732, "learning_rate": 0.001, "loss": 2.2419, "step": 13708 }, { "epoch": 0.579956003045943, "grad_norm": 0.14529554545879364, "learning_rate": 0.001, "loss": 2.1327, "step": 13709 }, { "epoch": 0.5799983078094594, "grad_norm": 0.14698375761508942, "learning_rate": 0.001, "loss": 1.6831, "step": 13710 }, { "epoch": 0.5800406125729757, "grad_norm": 0.1601586788892746, "learning_rate": 0.001, "loss": 2.5013, "step": 13711 }, { "epoch": 0.5800829173364921, "grad_norm": 2.6871955394744873, "learning_rate": 0.001, "loss": 2.3585, "step": 13712 }, { "epoch": 0.5801252221000085, "grad_norm": 1.6209683418273926, "learning_rate": 0.001, "loss": 2.7192, "step": 13713 }, { "epoch": 0.5801675268635248, "grad_norm": 0.4082275927066803, "learning_rate": 0.001, "loss": 2.3748, "step": 13714 }, { "epoch": 0.5802098316270412, "grad_norm": 0.8170319199562073, "learning_rate": 0.001, "loss": 1.9599, "step": 13715 }, { "epoch": 0.5802521363905576, "grad_norm": 0.15276405215263367, "learning_rate": 0.001, "loss": 3.2077, "step": 13716 }, { "epoch": 0.5802944411540739, "grad_norm": 0.16727223992347717, "learning_rate": 0.001, "loss": 3.1438, "step": 13717 }, { "epoch": 0.5803367459175903, "grad_norm": 0.1725165992975235, "learning_rate": 0.001, "loss": 2.3128, "step": 13718 }, { "epoch": 0.5803790506811067, "grad_norm": 0.16121014952659607, "learning_rate": 0.001, "loss": 2.9876, "step": 13719 }, { "epoch": 0.580421355444623, "grad_norm": 0.27277088165283203, "learning_rate": 0.001, "loss": 2.1281, "step": 13720 }, { "epoch": 0.5804636602081394, "grad_norm": 0.2123079150915146, "learning_rate": 0.001, "loss": 2.2785, "step": 13721 }, { "epoch": 0.5805059649716559, "grad_norm": 0.7211734056472778, "learning_rate": 0.001, "loss": 2.1592, "step": 13722 }, { "epoch": 0.5805482697351722, "grad_norm": 0.5063629746437073, "learning_rate": 0.001, "loss": 1.605, "step": 13723 }, { "epoch": 0.5805905744986886, "grad_norm": 0.15953439474105835, "learning_rate": 0.001, "loss": 2.3575, "step": 13724 }, { "epoch": 0.580632879262205, "grad_norm": 0.2004745602607727, "learning_rate": 0.001, "loss": 2.5765, "step": 13725 }, { "epoch": 0.5806751840257213, "grad_norm": 0.17336571216583252, "learning_rate": 0.001, "loss": 2.242, "step": 13726 }, { "epoch": 0.5807174887892377, "grad_norm": 0.15672898292541504, "learning_rate": 0.001, "loss": 1.6338, "step": 13727 }, { "epoch": 0.5807597935527541, "grad_norm": 0.16324591636657715, "learning_rate": 0.001, "loss": 2.2554, "step": 13728 }, { "epoch": 0.5808020983162704, "grad_norm": 0.8709842562675476, "learning_rate": 0.001, "loss": 1.8762, "step": 13729 }, { "epoch": 0.5808444030797868, "grad_norm": 0.16312599182128906, "learning_rate": 0.001, "loss": 1.8523, "step": 13730 }, { "epoch": 0.5808867078433032, "grad_norm": 0.6291779279708862, "learning_rate": 0.001, "loss": 3.0103, "step": 13731 }, { "epoch": 0.5809290126068195, "grad_norm": 0.18601246178150177, "learning_rate": 0.001, "loss": 2.3727, "step": 13732 }, { "epoch": 0.5809713173703359, "grad_norm": 0.22245633602142334, "learning_rate": 0.001, "loss": 1.926, "step": 13733 }, { "epoch": 0.5810136221338523, "grad_norm": 0.1883118599653244, "learning_rate": 0.001, "loss": 2.1428, "step": 13734 }, { "epoch": 0.5810559268973686, "grad_norm": 0.15982557833194733, "learning_rate": 0.001, "loss": 2.1728, "step": 13735 }, { "epoch": 0.581098231660885, "grad_norm": 0.18497587740421295, "learning_rate": 0.001, "loss": 1.7981, "step": 13736 }, { "epoch": 0.5811405364244013, "grad_norm": 0.1280389130115509, "learning_rate": 0.001, "loss": 1.5147, "step": 13737 }, { "epoch": 0.5811828411879177, "grad_norm": 0.2510825991630554, "learning_rate": 0.001, "loss": 1.5612, "step": 13738 }, { "epoch": 0.5812251459514342, "grad_norm": 0.1742805689573288, "learning_rate": 0.001, "loss": 2.0385, "step": 13739 }, { "epoch": 0.5812674507149505, "grad_norm": 0.14577478170394897, "learning_rate": 0.001, "loss": 2.1657, "step": 13740 }, { "epoch": 0.5813097554784669, "grad_norm": 0.17133548855781555, "learning_rate": 0.001, "loss": 2.2744, "step": 13741 }, { "epoch": 0.5813520602419833, "grad_norm": 0.16263654828071594, "learning_rate": 0.001, "loss": 1.8197, "step": 13742 }, { "epoch": 0.5813943650054996, "grad_norm": 0.19162042438983917, "learning_rate": 0.001, "loss": 1.8052, "step": 13743 }, { "epoch": 0.581436669769016, "grad_norm": 0.17110483348369598, "learning_rate": 0.001, "loss": 2.1316, "step": 13744 }, { "epoch": 0.5814789745325324, "grad_norm": 0.4235318601131439, "learning_rate": 0.001, "loss": 2.5278, "step": 13745 }, { "epoch": 0.5815212792960487, "grad_norm": 0.15516316890716553, "learning_rate": 0.001, "loss": 2.2714, "step": 13746 }, { "epoch": 0.5815635840595651, "grad_norm": 1.130749225616455, "learning_rate": 0.001, "loss": 1.8972, "step": 13747 }, { "epoch": 0.5816058888230815, "grad_norm": 0.41215425729751587, "learning_rate": 0.001, "loss": 3.452, "step": 13748 }, { "epoch": 0.5816481935865978, "grad_norm": 4.2232441902160645, "learning_rate": 0.001, "loss": 1.7344, "step": 13749 }, { "epoch": 0.5816904983501142, "grad_norm": 0.19343996047973633, "learning_rate": 0.001, "loss": 2.1452, "step": 13750 }, { "epoch": 0.5817328031136306, "grad_norm": 36.49468231201172, "learning_rate": 0.001, "loss": 1.8784, "step": 13751 }, { "epoch": 0.5817751078771469, "grad_norm": 0.2389318197965622, "learning_rate": 0.001, "loss": 2.6143, "step": 13752 }, { "epoch": 0.5818174126406633, "grad_norm": 0.24423779547214508, "learning_rate": 0.001, "loss": 2.6543, "step": 13753 }, { "epoch": 0.5818597174041797, "grad_norm": 0.2429848611354828, "learning_rate": 0.001, "loss": 2.2033, "step": 13754 }, { "epoch": 0.581902022167696, "grad_norm": 0.2506393790245056, "learning_rate": 0.001, "loss": 2.3673, "step": 13755 }, { "epoch": 0.5819443269312125, "grad_norm": 0.1692809909582138, "learning_rate": 0.001, "loss": 2.2108, "step": 13756 }, { "epoch": 0.5819866316947289, "grad_norm": 5.47391939163208, "learning_rate": 0.001, "loss": 2.5862, "step": 13757 }, { "epoch": 0.5820289364582452, "grad_norm": 0.17192316055297852, "learning_rate": 0.001, "loss": 2.4285, "step": 13758 }, { "epoch": 0.5820712412217616, "grad_norm": 0.22438155114650726, "learning_rate": 0.001, "loss": 2.4762, "step": 13759 }, { "epoch": 0.582113545985278, "grad_norm": 1.2624096870422363, "learning_rate": 0.001, "loss": 2.1616, "step": 13760 }, { "epoch": 0.5821558507487943, "grad_norm": 0.202859029173851, "learning_rate": 0.001, "loss": 2.0482, "step": 13761 }, { "epoch": 0.5821981555123107, "grad_norm": 0.22284016013145447, "learning_rate": 0.001, "loss": 2.1997, "step": 13762 }, { "epoch": 0.5822404602758271, "grad_norm": 0.20142649114131927, "learning_rate": 0.001, "loss": 1.8527, "step": 13763 }, { "epoch": 0.5822827650393434, "grad_norm": 0.1934656798839569, "learning_rate": 0.001, "loss": 2.7891, "step": 13764 }, { "epoch": 0.5823250698028598, "grad_norm": 0.25359898805618286, "learning_rate": 0.001, "loss": 2.1556, "step": 13765 }, { "epoch": 0.5823673745663762, "grad_norm": 0.43819230794906616, "learning_rate": 0.001, "loss": 2.4958, "step": 13766 }, { "epoch": 0.5824096793298925, "grad_norm": 0.17032918334007263, "learning_rate": 0.001, "loss": 1.8058, "step": 13767 }, { "epoch": 0.5824519840934089, "grad_norm": 2.558115005493164, "learning_rate": 0.001, "loss": 3.1201, "step": 13768 }, { "epoch": 0.5824942888569253, "grad_norm": 0.20715180039405823, "learning_rate": 0.001, "loss": 2.2601, "step": 13769 }, { "epoch": 0.5825365936204416, "grad_norm": 0.5378583669662476, "learning_rate": 0.001, "loss": 2.006, "step": 13770 }, { "epoch": 0.582578898383958, "grad_norm": 0.28567495942115784, "learning_rate": 0.001, "loss": 1.7223, "step": 13771 }, { "epoch": 0.5826212031474745, "grad_norm": 0.18178150057792664, "learning_rate": 0.001, "loss": 1.3784, "step": 13772 }, { "epoch": 0.5826635079109908, "grad_norm": 0.23406149446964264, "learning_rate": 0.001, "loss": 4.444, "step": 13773 }, { "epoch": 0.5827058126745072, "grad_norm": 0.18092291057109833, "learning_rate": 0.001, "loss": 3.0016, "step": 13774 }, { "epoch": 0.5827481174380236, "grad_norm": 0.3098083436489105, "learning_rate": 0.001, "loss": 2.7088, "step": 13775 }, { "epoch": 0.5827904222015399, "grad_norm": 0.18877199292182922, "learning_rate": 0.001, "loss": 1.6503, "step": 13776 }, { "epoch": 0.5828327269650563, "grad_norm": 0.2062119096517563, "learning_rate": 0.001, "loss": 2.1285, "step": 13777 }, { "epoch": 0.5828750317285727, "grad_norm": 0.3584443926811218, "learning_rate": 0.001, "loss": 1.7564, "step": 13778 }, { "epoch": 0.582917336492089, "grad_norm": 0.17341378331184387, "learning_rate": 0.001, "loss": 1.9815, "step": 13779 }, { "epoch": 0.5829596412556054, "grad_norm": 0.17184588313102722, "learning_rate": 0.001, "loss": 1.8823, "step": 13780 }, { "epoch": 0.5830019460191218, "grad_norm": 0.16942469775676727, "learning_rate": 0.001, "loss": 1.9993, "step": 13781 }, { "epoch": 0.5830442507826381, "grad_norm": 0.17933522164821625, "learning_rate": 0.001, "loss": 2.1188, "step": 13782 }, { "epoch": 0.5830865555461545, "grad_norm": 0.15469792485237122, "learning_rate": 0.001, "loss": 2.1212, "step": 13783 }, { "epoch": 0.5831288603096708, "grad_norm": 0.1891220211982727, "learning_rate": 0.001, "loss": 3.1775, "step": 13784 }, { "epoch": 0.5831711650731872, "grad_norm": 0.1707414835691452, "learning_rate": 0.001, "loss": 1.991, "step": 13785 }, { "epoch": 0.5832134698367036, "grad_norm": 1.5627321004867554, "learning_rate": 0.001, "loss": 1.999, "step": 13786 }, { "epoch": 0.5832557746002199, "grad_norm": 0.15185093879699707, "learning_rate": 0.001, "loss": 2.8022, "step": 13787 }, { "epoch": 0.5832980793637363, "grad_norm": 0.1897820383310318, "learning_rate": 0.001, "loss": 2.0066, "step": 13788 }, { "epoch": 0.5833403841272528, "grad_norm": 0.4447472393512726, "learning_rate": 0.001, "loss": 3.3133, "step": 13789 }, { "epoch": 0.583382688890769, "grad_norm": 0.15921324491500854, "learning_rate": 0.001, "loss": 2.5172, "step": 13790 }, { "epoch": 0.5834249936542855, "grad_norm": 0.22110402584075928, "learning_rate": 0.001, "loss": 2.0695, "step": 13791 }, { "epoch": 0.5834672984178019, "grad_norm": 0.34887969493865967, "learning_rate": 0.001, "loss": 2.6305, "step": 13792 }, { "epoch": 0.5835096031813182, "grad_norm": 0.16820944845676422, "learning_rate": 0.001, "loss": 2.5623, "step": 13793 }, { "epoch": 0.5835519079448346, "grad_norm": 0.4705934226512909, "learning_rate": 0.001, "loss": 2.2158, "step": 13794 }, { "epoch": 0.583594212708351, "grad_norm": 0.24807946383953094, "learning_rate": 0.001, "loss": 1.9532, "step": 13795 }, { "epoch": 0.5836365174718673, "grad_norm": 1.3096846342086792, "learning_rate": 0.001, "loss": 1.9417, "step": 13796 }, { "epoch": 0.5836788222353837, "grad_norm": 0.1883528232574463, "learning_rate": 0.001, "loss": 2.1403, "step": 13797 }, { "epoch": 0.5837211269989001, "grad_norm": 0.20582067966461182, "learning_rate": 0.001, "loss": 2.3299, "step": 13798 }, { "epoch": 0.5837634317624164, "grad_norm": 2.3853557109832764, "learning_rate": 0.001, "loss": 1.6175, "step": 13799 }, { "epoch": 0.5838057365259328, "grad_norm": 0.21598750352859497, "learning_rate": 0.001, "loss": 2.3283, "step": 13800 }, { "epoch": 0.5838480412894492, "grad_norm": 0.41916608810424805, "learning_rate": 0.001, "loss": 2.8298, "step": 13801 }, { "epoch": 0.5838903460529655, "grad_norm": 0.18645887076854706, "learning_rate": 0.001, "loss": 1.4992, "step": 13802 }, { "epoch": 0.5839326508164819, "grad_norm": 0.15576979517936707, "learning_rate": 0.001, "loss": 1.6169, "step": 13803 }, { "epoch": 0.5839749555799983, "grad_norm": 0.1782311350107193, "learning_rate": 0.001, "loss": 1.7768, "step": 13804 }, { "epoch": 0.5840172603435146, "grad_norm": 0.8121781349182129, "learning_rate": 0.001, "loss": 2.2347, "step": 13805 }, { "epoch": 0.584059565107031, "grad_norm": 0.20430848002433777, "learning_rate": 0.001, "loss": 2.1171, "step": 13806 }, { "epoch": 0.5841018698705475, "grad_norm": 0.5235466361045837, "learning_rate": 0.001, "loss": 1.7292, "step": 13807 }, { "epoch": 0.5841441746340638, "grad_norm": 0.17465122044086456, "learning_rate": 0.001, "loss": 1.7302, "step": 13808 }, { "epoch": 0.5841864793975802, "grad_norm": 0.18919281661510468, "learning_rate": 0.001, "loss": 1.8968, "step": 13809 }, { "epoch": 0.5842287841610966, "grad_norm": 0.17904122173786163, "learning_rate": 0.001, "loss": 1.8885, "step": 13810 }, { "epoch": 0.5842710889246129, "grad_norm": 3.1780359745025635, "learning_rate": 0.001, "loss": 2.5823, "step": 13811 }, { "epoch": 0.5843133936881293, "grad_norm": 0.4842691123485565, "learning_rate": 0.001, "loss": 3.0183, "step": 13812 }, { "epoch": 0.5843556984516457, "grad_norm": 0.21552209556102753, "learning_rate": 0.001, "loss": 2.5317, "step": 13813 }, { "epoch": 0.584398003215162, "grad_norm": 2.484487533569336, "learning_rate": 0.001, "loss": 1.738, "step": 13814 }, { "epoch": 0.5844403079786784, "grad_norm": 0.3187903165817261, "learning_rate": 0.001, "loss": 3.056, "step": 13815 }, { "epoch": 0.5844826127421948, "grad_norm": 2.3998584747314453, "learning_rate": 0.001, "loss": 1.7838, "step": 13816 }, { "epoch": 0.5845249175057111, "grad_norm": 0.17576216161251068, "learning_rate": 0.001, "loss": 2.3258, "step": 13817 }, { "epoch": 0.5845672222692275, "grad_norm": 0.6371609568595886, "learning_rate": 0.001, "loss": 2.2197, "step": 13818 }, { "epoch": 0.5846095270327439, "grad_norm": 0.20169895887374878, "learning_rate": 0.001, "loss": 2.5768, "step": 13819 }, { "epoch": 0.5846518317962602, "grad_norm": 0.20052120089530945, "learning_rate": 0.001, "loss": 1.7194, "step": 13820 }, { "epoch": 0.5846941365597766, "grad_norm": 0.1984112709760666, "learning_rate": 0.001, "loss": 2.0484, "step": 13821 }, { "epoch": 0.584736441323293, "grad_norm": 0.16683201491832733, "learning_rate": 0.001, "loss": 1.9458, "step": 13822 }, { "epoch": 0.5847787460868094, "grad_norm": 0.19755448400974274, "learning_rate": 0.001, "loss": 2.6561, "step": 13823 }, { "epoch": 0.5848210508503258, "grad_norm": 0.18185539543628693, "learning_rate": 0.001, "loss": 2.2397, "step": 13824 }, { "epoch": 0.5848633556138422, "grad_norm": 0.17746347188949585, "learning_rate": 0.001, "loss": 2.9947, "step": 13825 }, { "epoch": 0.5849056603773585, "grad_norm": 0.22636628150939941, "learning_rate": 0.001, "loss": 2.5098, "step": 13826 }, { "epoch": 0.5849479651408749, "grad_norm": 0.5232028365135193, "learning_rate": 0.001, "loss": 3.0739, "step": 13827 }, { "epoch": 0.5849902699043912, "grad_norm": 0.20873093605041504, "learning_rate": 0.001, "loss": 2.4017, "step": 13828 }, { "epoch": 0.5850325746679076, "grad_norm": 0.2317352294921875, "learning_rate": 0.001, "loss": 1.5189, "step": 13829 }, { "epoch": 0.585074879431424, "grad_norm": 2.4872546195983887, "learning_rate": 0.001, "loss": 1.9005, "step": 13830 }, { "epoch": 0.5851171841949403, "grad_norm": 0.20402368903160095, "learning_rate": 0.001, "loss": 1.6404, "step": 13831 }, { "epoch": 0.5851594889584567, "grad_norm": 0.16960665583610535, "learning_rate": 0.001, "loss": 1.7397, "step": 13832 }, { "epoch": 0.5852017937219731, "grad_norm": 0.16686907410621643, "learning_rate": 0.001, "loss": 2.6307, "step": 13833 }, { "epoch": 0.5852440984854894, "grad_norm": 0.20668341219425201, "learning_rate": 0.001, "loss": 2.2207, "step": 13834 }, { "epoch": 0.5852864032490058, "grad_norm": 0.17005237936973572, "learning_rate": 0.001, "loss": 2.3992, "step": 13835 }, { "epoch": 0.5853287080125222, "grad_norm": 0.16195209324359894, "learning_rate": 0.001, "loss": 1.6082, "step": 13836 }, { "epoch": 0.5853710127760385, "grad_norm": 0.3230375349521637, "learning_rate": 0.001, "loss": 2.2868, "step": 13837 }, { "epoch": 0.585413317539555, "grad_norm": 0.16415375471115112, "learning_rate": 0.001, "loss": 2.2974, "step": 13838 }, { "epoch": 0.5854556223030714, "grad_norm": 0.1570240557193756, "learning_rate": 0.001, "loss": 2.3329, "step": 13839 }, { "epoch": 0.5854979270665877, "grad_norm": 0.17124520242214203, "learning_rate": 0.001, "loss": 1.725, "step": 13840 }, { "epoch": 0.5855402318301041, "grad_norm": 0.2079612761735916, "learning_rate": 0.001, "loss": 2.2798, "step": 13841 }, { "epoch": 0.5855825365936205, "grad_norm": 0.20434881746768951, "learning_rate": 0.001, "loss": 2.7584, "step": 13842 }, { "epoch": 0.5856248413571368, "grad_norm": 0.17397215962409973, "learning_rate": 0.001, "loss": 2.5361, "step": 13843 }, { "epoch": 0.5856671461206532, "grad_norm": 0.14827987551689148, "learning_rate": 0.001, "loss": 3.5882, "step": 13844 }, { "epoch": 0.5857094508841696, "grad_norm": 0.16601242125034332, "learning_rate": 0.001, "loss": 1.7631, "step": 13845 }, { "epoch": 0.5857517556476859, "grad_norm": 0.1840457022190094, "learning_rate": 0.001, "loss": 3.2754, "step": 13846 }, { "epoch": 0.5857940604112023, "grad_norm": 0.19872917234897614, "learning_rate": 0.001, "loss": 2.24, "step": 13847 }, { "epoch": 0.5858363651747187, "grad_norm": 0.18587267398834229, "learning_rate": 0.001, "loss": 2.8884, "step": 13848 }, { "epoch": 0.585878669938235, "grad_norm": 0.5582289695739746, "learning_rate": 0.001, "loss": 2.3321, "step": 13849 }, { "epoch": 0.5859209747017514, "grad_norm": 0.21963296830654144, "learning_rate": 0.001, "loss": 2.3329, "step": 13850 }, { "epoch": 0.5859632794652678, "grad_norm": 0.1582070291042328, "learning_rate": 0.001, "loss": 2.5041, "step": 13851 }, { "epoch": 0.5860055842287841, "grad_norm": 0.1862238198518753, "learning_rate": 0.001, "loss": 1.9722, "step": 13852 }, { "epoch": 0.5860478889923005, "grad_norm": 0.24395392835140228, "learning_rate": 0.001, "loss": 2.1698, "step": 13853 }, { "epoch": 0.586090193755817, "grad_norm": 0.19407311081886292, "learning_rate": 0.001, "loss": 2.0222, "step": 13854 }, { "epoch": 0.5861324985193332, "grad_norm": 0.18697842955589294, "learning_rate": 0.001, "loss": 2.0685, "step": 13855 }, { "epoch": 0.5861748032828497, "grad_norm": 0.15743017196655273, "learning_rate": 0.001, "loss": 2.9768, "step": 13856 }, { "epoch": 0.5862171080463661, "grad_norm": 0.1737719476222992, "learning_rate": 0.001, "loss": 1.6942, "step": 13857 }, { "epoch": 0.5862594128098824, "grad_norm": 0.17090411484241486, "learning_rate": 0.001, "loss": 1.5764, "step": 13858 }, { "epoch": 0.5863017175733988, "grad_norm": 0.21270331740379333, "learning_rate": 0.001, "loss": 1.7628, "step": 13859 }, { "epoch": 0.5863440223369152, "grad_norm": 0.16474924981594086, "learning_rate": 0.001, "loss": 2.702, "step": 13860 }, { "epoch": 0.5863863271004315, "grad_norm": 0.18944130837917328, "learning_rate": 0.001, "loss": 2.5051, "step": 13861 }, { "epoch": 0.5864286318639479, "grad_norm": 0.1647646725177765, "learning_rate": 0.001, "loss": 2.3672, "step": 13862 }, { "epoch": 0.5864709366274643, "grad_norm": 0.2273533195257187, "learning_rate": 0.001, "loss": 2.4584, "step": 13863 }, { "epoch": 0.5865132413909806, "grad_norm": 0.14887486398220062, "learning_rate": 0.001, "loss": 2.4992, "step": 13864 }, { "epoch": 0.586555546154497, "grad_norm": 0.20371198654174805, "learning_rate": 0.001, "loss": 2.2056, "step": 13865 }, { "epoch": 0.5865978509180134, "grad_norm": 0.9710887670516968, "learning_rate": 0.001, "loss": 1.9499, "step": 13866 }, { "epoch": 0.5866401556815297, "grad_norm": 0.16241279244422913, "learning_rate": 0.001, "loss": 1.9251, "step": 13867 }, { "epoch": 0.5866824604450461, "grad_norm": 0.401077002286911, "learning_rate": 0.001, "loss": 2.3491, "step": 13868 }, { "epoch": 0.5867247652085625, "grad_norm": 0.16556090116500854, "learning_rate": 0.001, "loss": 1.7719, "step": 13869 }, { "epoch": 0.5867670699720788, "grad_norm": 0.14938072860240936, "learning_rate": 0.001, "loss": 1.8205, "step": 13870 }, { "epoch": 0.5868093747355952, "grad_norm": 0.1616201102733612, "learning_rate": 0.001, "loss": 2.3958, "step": 13871 }, { "epoch": 0.5868516794991115, "grad_norm": 0.19175079464912415, "learning_rate": 0.001, "loss": 1.9595, "step": 13872 }, { "epoch": 0.586893984262628, "grad_norm": 0.21373534202575684, "learning_rate": 0.001, "loss": 2.2095, "step": 13873 }, { "epoch": 0.5869362890261444, "grad_norm": 0.20551612973213196, "learning_rate": 0.001, "loss": 2.4731, "step": 13874 }, { "epoch": 0.5869785937896607, "grad_norm": 0.2638375759124756, "learning_rate": 0.001, "loss": 2.8643, "step": 13875 }, { "epoch": 0.5870208985531771, "grad_norm": 0.1855308711528778, "learning_rate": 0.001, "loss": 1.8964, "step": 13876 }, { "epoch": 0.5870632033166935, "grad_norm": 0.2503952383995056, "learning_rate": 0.001, "loss": 2.9562, "step": 13877 }, { "epoch": 0.5871055080802098, "grad_norm": 0.15639376640319824, "learning_rate": 0.001, "loss": 2.6518, "step": 13878 }, { "epoch": 0.5871478128437262, "grad_norm": 0.3068271279335022, "learning_rate": 0.001, "loss": 2.9139, "step": 13879 }, { "epoch": 0.5871901176072426, "grad_norm": 2.3085885047912598, "learning_rate": 0.001, "loss": 1.9774, "step": 13880 }, { "epoch": 0.5872324223707589, "grad_norm": 0.17612521350383759, "learning_rate": 0.001, "loss": 2.3076, "step": 13881 }, { "epoch": 0.5872747271342753, "grad_norm": 0.21990317106246948, "learning_rate": 0.001, "loss": 2.7583, "step": 13882 }, { "epoch": 0.5873170318977917, "grad_norm": 0.15076623857021332, "learning_rate": 0.001, "loss": 1.9021, "step": 13883 }, { "epoch": 0.587359336661308, "grad_norm": 0.17355187237262726, "learning_rate": 0.001, "loss": 2.1959, "step": 13884 }, { "epoch": 0.5874016414248244, "grad_norm": 0.16059938073158264, "learning_rate": 0.001, "loss": 2.5057, "step": 13885 }, { "epoch": 0.5874439461883408, "grad_norm": 0.17203792929649353, "learning_rate": 0.001, "loss": 2.2947, "step": 13886 }, { "epoch": 0.5874862509518571, "grad_norm": 0.15990357100963593, "learning_rate": 0.001, "loss": 1.7769, "step": 13887 }, { "epoch": 0.5875285557153735, "grad_norm": 0.1762009710073471, "learning_rate": 0.001, "loss": 3.1737, "step": 13888 }, { "epoch": 0.58757086047889, "grad_norm": 0.14753255248069763, "learning_rate": 0.001, "loss": 2.3686, "step": 13889 }, { "epoch": 0.5876131652424063, "grad_norm": 0.20030535757541656, "learning_rate": 0.001, "loss": 2.094, "step": 13890 }, { "epoch": 0.5876554700059227, "grad_norm": 0.1795814037322998, "learning_rate": 0.001, "loss": 2.438, "step": 13891 }, { "epoch": 0.5876977747694391, "grad_norm": 0.2042204886674881, "learning_rate": 0.001, "loss": 1.9378, "step": 13892 }, { "epoch": 0.5877400795329554, "grad_norm": 0.18718861043453217, "learning_rate": 0.001, "loss": 3.3314, "step": 13893 }, { "epoch": 0.5877823842964718, "grad_norm": 0.18534649908542633, "learning_rate": 0.001, "loss": 1.9572, "step": 13894 }, { "epoch": 0.5878246890599882, "grad_norm": 0.14680497348308563, "learning_rate": 0.001, "loss": 1.6453, "step": 13895 }, { "epoch": 0.5878669938235045, "grad_norm": 0.22413283586502075, "learning_rate": 0.001, "loss": 2.7833, "step": 13896 }, { "epoch": 0.5879092985870209, "grad_norm": 0.20671682059764862, "learning_rate": 0.001, "loss": 3.5421, "step": 13897 }, { "epoch": 0.5879516033505373, "grad_norm": 0.3078271448612213, "learning_rate": 0.001, "loss": 3.6566, "step": 13898 }, { "epoch": 0.5879939081140536, "grad_norm": 0.3358747661113739, "learning_rate": 0.001, "loss": 1.9084, "step": 13899 }, { "epoch": 0.58803621287757, "grad_norm": 0.15469489991664886, "learning_rate": 0.001, "loss": 1.4976, "step": 13900 }, { "epoch": 0.5880785176410864, "grad_norm": 0.19083096086978912, "learning_rate": 0.001, "loss": 2.5376, "step": 13901 }, { "epoch": 0.5881208224046027, "grad_norm": 0.16537901759147644, "learning_rate": 0.001, "loss": 1.4644, "step": 13902 }, { "epoch": 0.5881631271681191, "grad_norm": 0.1859930157661438, "learning_rate": 0.001, "loss": 2.2167, "step": 13903 }, { "epoch": 0.5882054319316355, "grad_norm": 0.17841875553131104, "learning_rate": 0.001, "loss": 1.7491, "step": 13904 }, { "epoch": 0.5882477366951518, "grad_norm": 0.17977175116539001, "learning_rate": 0.001, "loss": 2.1064, "step": 13905 }, { "epoch": 0.5882900414586683, "grad_norm": 0.33689969778060913, "learning_rate": 0.001, "loss": 2.7081, "step": 13906 }, { "epoch": 0.5883323462221847, "grad_norm": 0.19339759647846222, "learning_rate": 0.001, "loss": 2.108, "step": 13907 }, { "epoch": 0.588374650985701, "grad_norm": 0.14537258446216583, "learning_rate": 0.001, "loss": 2.1058, "step": 13908 }, { "epoch": 0.5884169557492174, "grad_norm": 0.19922007620334625, "learning_rate": 0.001, "loss": 1.4782, "step": 13909 }, { "epoch": 0.5884592605127338, "grad_norm": 0.1968841850757599, "learning_rate": 0.001, "loss": 2.6562, "step": 13910 }, { "epoch": 0.5885015652762501, "grad_norm": 0.15195704996585846, "learning_rate": 0.001, "loss": 2.0136, "step": 13911 }, { "epoch": 0.5885438700397665, "grad_norm": 0.17894305288791656, "learning_rate": 0.001, "loss": 3.5737, "step": 13912 }, { "epoch": 0.5885861748032829, "grad_norm": 0.2585073411464691, "learning_rate": 0.001, "loss": 2.9576, "step": 13913 }, { "epoch": 0.5886284795667992, "grad_norm": 0.240971177816391, "learning_rate": 0.001, "loss": 2.4537, "step": 13914 }, { "epoch": 0.5886707843303156, "grad_norm": 0.1822543740272522, "learning_rate": 0.001, "loss": 2.2145, "step": 13915 }, { "epoch": 0.588713089093832, "grad_norm": 0.1564810425043106, "learning_rate": 0.001, "loss": 2.5292, "step": 13916 }, { "epoch": 0.5887553938573483, "grad_norm": 0.16015750169754028, "learning_rate": 0.001, "loss": 2.2064, "step": 13917 }, { "epoch": 0.5887976986208647, "grad_norm": 0.2196504920721054, "learning_rate": 0.001, "loss": 2.7055, "step": 13918 }, { "epoch": 0.588840003384381, "grad_norm": 0.19382864236831665, "learning_rate": 0.001, "loss": 3.4921, "step": 13919 }, { "epoch": 0.5888823081478974, "grad_norm": 0.1581316739320755, "learning_rate": 0.001, "loss": 2.4825, "step": 13920 }, { "epoch": 0.5889246129114138, "grad_norm": 0.168742835521698, "learning_rate": 0.001, "loss": 1.8284, "step": 13921 }, { "epoch": 0.5889669176749301, "grad_norm": 0.17295318841934204, "learning_rate": 0.001, "loss": 2.2552, "step": 13922 }, { "epoch": 0.5890092224384466, "grad_norm": 0.14867152273654938, "learning_rate": 0.001, "loss": 1.4105, "step": 13923 }, { "epoch": 0.589051527201963, "grad_norm": 0.19675691425800323, "learning_rate": 0.001, "loss": 1.9656, "step": 13924 }, { "epoch": 0.5890938319654793, "grad_norm": 0.17681030929088593, "learning_rate": 0.001, "loss": 3.0477, "step": 13925 }, { "epoch": 0.5891361367289957, "grad_norm": 0.5007891654968262, "learning_rate": 0.001, "loss": 2.2316, "step": 13926 }, { "epoch": 0.5891784414925121, "grad_norm": 0.6708447337150574, "learning_rate": 0.001, "loss": 1.6344, "step": 13927 }, { "epoch": 0.5892207462560284, "grad_norm": 0.19077329337596893, "learning_rate": 0.001, "loss": 2.792, "step": 13928 }, { "epoch": 0.5892630510195448, "grad_norm": 0.28526973724365234, "learning_rate": 0.001, "loss": 2.514, "step": 13929 }, { "epoch": 0.5893053557830612, "grad_norm": 0.34212541580200195, "learning_rate": 0.001, "loss": 1.6885, "step": 13930 }, { "epoch": 0.5893476605465775, "grad_norm": 0.17507655918598175, "learning_rate": 0.001, "loss": 2.1797, "step": 13931 }, { "epoch": 0.5893899653100939, "grad_norm": 1.8926448822021484, "learning_rate": 0.001, "loss": 1.746, "step": 13932 }, { "epoch": 0.5894322700736103, "grad_norm": 0.1368320882320404, "learning_rate": 0.001, "loss": 1.5929, "step": 13933 }, { "epoch": 0.5894745748371266, "grad_norm": 0.20840121805667877, "learning_rate": 0.001, "loss": 2.0687, "step": 13934 }, { "epoch": 0.589516879600643, "grad_norm": 0.3458685576915741, "learning_rate": 0.001, "loss": 1.7513, "step": 13935 }, { "epoch": 0.5895591843641594, "grad_norm": 0.17107515037059784, "learning_rate": 0.001, "loss": 1.8047, "step": 13936 }, { "epoch": 0.5896014891276757, "grad_norm": 0.37925609946250916, "learning_rate": 0.001, "loss": 2.0818, "step": 13937 }, { "epoch": 0.5896437938911921, "grad_norm": 0.31357237696647644, "learning_rate": 0.001, "loss": 2.9934, "step": 13938 }, { "epoch": 0.5896860986547086, "grad_norm": 0.16676288843154907, "learning_rate": 0.001, "loss": 2.134, "step": 13939 }, { "epoch": 0.5897284034182249, "grad_norm": 0.17158526182174683, "learning_rate": 0.001, "loss": 2.1719, "step": 13940 }, { "epoch": 0.5897707081817413, "grad_norm": 0.2076321244239807, "learning_rate": 0.001, "loss": 2.6721, "step": 13941 }, { "epoch": 0.5898130129452577, "grad_norm": 0.14922165870666504, "learning_rate": 0.001, "loss": 2.6749, "step": 13942 }, { "epoch": 0.589855317708774, "grad_norm": 0.3216572403907776, "learning_rate": 0.001, "loss": 2.0285, "step": 13943 }, { "epoch": 0.5898976224722904, "grad_norm": 0.2103135734796524, "learning_rate": 0.001, "loss": 1.9216, "step": 13944 }, { "epoch": 0.5899399272358068, "grad_norm": 0.16740508377552032, "learning_rate": 0.001, "loss": 2.4028, "step": 13945 }, { "epoch": 0.5899822319993231, "grad_norm": 0.2918654680252075, "learning_rate": 0.001, "loss": 2.7537, "step": 13946 }, { "epoch": 0.5900245367628395, "grad_norm": 0.18662185966968536, "learning_rate": 0.001, "loss": 1.6938, "step": 13947 }, { "epoch": 0.5900668415263559, "grad_norm": 0.19983160495758057, "learning_rate": 0.001, "loss": 2.2847, "step": 13948 }, { "epoch": 0.5901091462898722, "grad_norm": 0.21256953477859497, "learning_rate": 0.001, "loss": 3.3585, "step": 13949 }, { "epoch": 0.5901514510533886, "grad_norm": 0.1589345932006836, "learning_rate": 0.001, "loss": 3.151, "step": 13950 }, { "epoch": 0.590193755816905, "grad_norm": 0.5611705183982849, "learning_rate": 0.001, "loss": 2.2215, "step": 13951 }, { "epoch": 0.5902360605804213, "grad_norm": 0.19331638514995575, "learning_rate": 0.001, "loss": 3.0221, "step": 13952 }, { "epoch": 0.5902783653439377, "grad_norm": 0.17588597536087036, "learning_rate": 0.001, "loss": 1.8878, "step": 13953 }, { "epoch": 0.5903206701074541, "grad_norm": 0.14944405853748322, "learning_rate": 0.001, "loss": 2.3562, "step": 13954 }, { "epoch": 0.5903629748709704, "grad_norm": 0.15163281559944153, "learning_rate": 0.001, "loss": 1.2766, "step": 13955 }, { "epoch": 0.5904052796344869, "grad_norm": 0.1893131136894226, "learning_rate": 0.001, "loss": 2.3026, "step": 13956 }, { "epoch": 0.5904475843980033, "grad_norm": 0.17503425478935242, "learning_rate": 0.001, "loss": 2.9459, "step": 13957 }, { "epoch": 0.5904898891615196, "grad_norm": 0.2079678177833557, "learning_rate": 0.001, "loss": 1.8288, "step": 13958 }, { "epoch": 0.590532193925036, "grad_norm": 0.4229949116706848, "learning_rate": 0.001, "loss": 2.3859, "step": 13959 }, { "epoch": 0.5905744986885524, "grad_norm": 0.1723388284444809, "learning_rate": 0.001, "loss": 2.6868, "step": 13960 }, { "epoch": 0.5906168034520687, "grad_norm": 0.16546562314033508, "learning_rate": 0.001, "loss": 1.7051, "step": 13961 }, { "epoch": 0.5906591082155851, "grad_norm": 0.1712961196899414, "learning_rate": 0.001, "loss": 1.9134, "step": 13962 }, { "epoch": 0.5907014129791014, "grad_norm": 0.18814264237880707, "learning_rate": 0.001, "loss": 2.4536, "step": 13963 }, { "epoch": 0.5907437177426178, "grad_norm": 1.0132697820663452, "learning_rate": 0.001, "loss": 2.2008, "step": 13964 }, { "epoch": 0.5907860225061342, "grad_norm": 0.16948258876800537, "learning_rate": 0.001, "loss": 1.9644, "step": 13965 }, { "epoch": 0.5908283272696505, "grad_norm": 0.17865702509880066, "learning_rate": 0.001, "loss": 3.5171, "step": 13966 }, { "epoch": 0.5908706320331669, "grad_norm": 0.1568206250667572, "learning_rate": 0.001, "loss": 2.2852, "step": 13967 }, { "epoch": 0.5909129367966833, "grad_norm": 0.18641190230846405, "learning_rate": 0.001, "loss": 1.5695, "step": 13968 }, { "epoch": 0.5909552415601996, "grad_norm": 0.484352707862854, "learning_rate": 0.001, "loss": 2.8019, "step": 13969 }, { "epoch": 0.590997546323716, "grad_norm": 0.1573459804058075, "learning_rate": 0.001, "loss": 1.5844, "step": 13970 }, { "epoch": 0.5910398510872324, "grad_norm": 0.236774280667305, "learning_rate": 0.001, "loss": 2.7873, "step": 13971 }, { "epoch": 0.5910821558507487, "grad_norm": 1.0169886350631714, "learning_rate": 0.001, "loss": 1.9265, "step": 13972 }, { "epoch": 0.5911244606142652, "grad_norm": 0.19387690722942352, "learning_rate": 0.001, "loss": 1.6925, "step": 13973 }, { "epoch": 0.5911667653777816, "grad_norm": 0.17645257711410522, "learning_rate": 0.001, "loss": 2.2239, "step": 13974 }, { "epoch": 0.5912090701412979, "grad_norm": 0.1641792207956314, "learning_rate": 0.001, "loss": 2.3921, "step": 13975 }, { "epoch": 0.5912513749048143, "grad_norm": 0.20244446396827698, "learning_rate": 0.001, "loss": 2.133, "step": 13976 }, { "epoch": 0.5912936796683307, "grad_norm": 0.2241334766149521, "learning_rate": 0.001, "loss": 2.7806, "step": 13977 }, { "epoch": 0.591335984431847, "grad_norm": 0.16886314749717712, "learning_rate": 0.001, "loss": 2.3937, "step": 13978 }, { "epoch": 0.5913782891953634, "grad_norm": 0.17606092989444733, "learning_rate": 0.001, "loss": 1.5095, "step": 13979 }, { "epoch": 0.5914205939588798, "grad_norm": 0.16961275041103363, "learning_rate": 0.001, "loss": 2.3644, "step": 13980 }, { "epoch": 0.5914628987223961, "grad_norm": 0.16681556403636932, "learning_rate": 0.001, "loss": 2.0659, "step": 13981 }, { "epoch": 0.5915052034859125, "grad_norm": 0.2830229699611664, "learning_rate": 0.001, "loss": 1.8581, "step": 13982 }, { "epoch": 0.5915475082494289, "grad_norm": 0.6050325036048889, "learning_rate": 0.001, "loss": 2.2993, "step": 13983 }, { "epoch": 0.5915898130129452, "grad_norm": 0.17912669479846954, "learning_rate": 0.001, "loss": 2.4188, "step": 13984 }, { "epoch": 0.5916321177764616, "grad_norm": 0.19286015629768372, "learning_rate": 0.001, "loss": 2.7553, "step": 13985 }, { "epoch": 0.591674422539978, "grad_norm": 0.1747298538684845, "learning_rate": 0.001, "loss": 1.9951, "step": 13986 }, { "epoch": 0.5917167273034943, "grad_norm": 0.24552187323570251, "learning_rate": 0.001, "loss": 2.9972, "step": 13987 }, { "epoch": 0.5917590320670107, "grad_norm": 0.3392343521118164, "learning_rate": 0.001, "loss": 2.7581, "step": 13988 }, { "epoch": 0.5918013368305272, "grad_norm": 0.22997502982616425, "learning_rate": 0.001, "loss": 2.4476, "step": 13989 }, { "epoch": 0.5918436415940435, "grad_norm": 0.17258018255233765, "learning_rate": 0.001, "loss": 1.8377, "step": 13990 }, { "epoch": 0.5918859463575599, "grad_norm": 0.6078843474388123, "learning_rate": 0.001, "loss": 2.6106, "step": 13991 }, { "epoch": 0.5919282511210763, "grad_norm": 0.19402997195720673, "learning_rate": 0.001, "loss": 2.5247, "step": 13992 }, { "epoch": 0.5919705558845926, "grad_norm": 0.1636909693479538, "learning_rate": 0.001, "loss": 2.0067, "step": 13993 }, { "epoch": 0.592012860648109, "grad_norm": 0.18842186033725739, "learning_rate": 0.001, "loss": 1.6082, "step": 13994 }, { "epoch": 0.5920551654116254, "grad_norm": 0.1705595999956131, "learning_rate": 0.001, "loss": 1.6876, "step": 13995 }, { "epoch": 0.5920974701751417, "grad_norm": 0.23243148624897003, "learning_rate": 0.001, "loss": 1.9521, "step": 13996 }, { "epoch": 0.5921397749386581, "grad_norm": 0.18063674867153168, "learning_rate": 0.001, "loss": 2.2572, "step": 13997 }, { "epoch": 0.5921820797021745, "grad_norm": 0.21057967841625214, "learning_rate": 0.001, "loss": 1.7763, "step": 13998 }, { "epoch": 0.5922243844656908, "grad_norm": 0.19632886350154877, "learning_rate": 0.001, "loss": 1.9372, "step": 13999 }, { "epoch": 0.5922666892292072, "grad_norm": 0.16427557170391083, "learning_rate": 0.001, "loss": 1.974, "step": 14000 }, { "epoch": 0.5923089939927236, "grad_norm": 0.15664273500442505, "learning_rate": 0.001, "loss": 1.7737, "step": 14001 }, { "epoch": 0.5923512987562399, "grad_norm": 15.016242027282715, "learning_rate": 0.001, "loss": 2.0841, "step": 14002 }, { "epoch": 0.5923936035197563, "grad_norm": 0.14031845331192017, "learning_rate": 0.001, "loss": 2.2924, "step": 14003 }, { "epoch": 0.5924359082832727, "grad_norm": 0.17053398489952087, "learning_rate": 0.001, "loss": 2.4202, "step": 14004 }, { "epoch": 0.592478213046789, "grad_norm": 0.1829700618982315, "learning_rate": 0.001, "loss": 2.0318, "step": 14005 }, { "epoch": 0.5925205178103055, "grad_norm": 0.16647842526435852, "learning_rate": 0.001, "loss": 2.2545, "step": 14006 }, { "epoch": 0.5925628225738218, "grad_norm": 0.1678646355867386, "learning_rate": 0.001, "loss": 1.8475, "step": 14007 }, { "epoch": 0.5926051273373382, "grad_norm": 0.2268977016210556, "learning_rate": 0.001, "loss": 1.7835, "step": 14008 }, { "epoch": 0.5926474321008546, "grad_norm": 0.16663521528244019, "learning_rate": 0.001, "loss": 1.7448, "step": 14009 }, { "epoch": 0.5926897368643709, "grad_norm": 0.33862343430519104, "learning_rate": 0.001, "loss": 1.9905, "step": 14010 }, { "epoch": 0.5927320416278873, "grad_norm": 0.5181235671043396, "learning_rate": 0.001, "loss": 2.3188, "step": 14011 }, { "epoch": 0.5927743463914037, "grad_norm": 0.15368396043777466, "learning_rate": 0.001, "loss": 2.6473, "step": 14012 }, { "epoch": 0.59281665115492, "grad_norm": 0.18206775188446045, "learning_rate": 0.001, "loss": 1.3312, "step": 14013 }, { "epoch": 0.5928589559184364, "grad_norm": 0.21230578422546387, "learning_rate": 0.001, "loss": 1.8037, "step": 14014 }, { "epoch": 0.5929012606819528, "grad_norm": 0.1920211762189865, "learning_rate": 0.001, "loss": 2.3031, "step": 14015 }, { "epoch": 0.5929435654454691, "grad_norm": 0.1721087396144867, "learning_rate": 0.001, "loss": 2.0063, "step": 14016 }, { "epoch": 0.5929858702089855, "grad_norm": 0.17139104008674622, "learning_rate": 0.001, "loss": 1.8707, "step": 14017 }, { "epoch": 0.5930281749725019, "grad_norm": 0.2000657021999359, "learning_rate": 0.001, "loss": 1.9258, "step": 14018 }, { "epoch": 0.5930704797360182, "grad_norm": 2.25368595123291, "learning_rate": 0.001, "loss": 2.2605, "step": 14019 }, { "epoch": 0.5931127844995346, "grad_norm": 0.17565739154815674, "learning_rate": 0.001, "loss": 2.4521, "step": 14020 }, { "epoch": 0.593155089263051, "grad_norm": 0.7381449341773987, "learning_rate": 0.001, "loss": 2.9677, "step": 14021 }, { "epoch": 0.5931973940265673, "grad_norm": 0.17680025100708008, "learning_rate": 0.001, "loss": 2.6527, "step": 14022 }, { "epoch": 0.5932396987900838, "grad_norm": 0.6374967694282532, "learning_rate": 0.001, "loss": 2.7785, "step": 14023 }, { "epoch": 0.5932820035536002, "grad_norm": 0.18539991974830627, "learning_rate": 0.001, "loss": 2.1156, "step": 14024 }, { "epoch": 0.5933243083171165, "grad_norm": 0.17498497664928436, "learning_rate": 0.001, "loss": 1.9663, "step": 14025 }, { "epoch": 0.5933666130806329, "grad_norm": 0.16935625672340393, "learning_rate": 0.001, "loss": 1.7092, "step": 14026 }, { "epoch": 0.5934089178441493, "grad_norm": 0.1708284169435501, "learning_rate": 0.001, "loss": 1.619, "step": 14027 }, { "epoch": 0.5934512226076656, "grad_norm": 0.1765231490135193, "learning_rate": 0.001, "loss": 1.8157, "step": 14028 }, { "epoch": 0.593493527371182, "grad_norm": 0.21221309900283813, "learning_rate": 0.001, "loss": 1.911, "step": 14029 }, { "epoch": 0.5935358321346984, "grad_norm": 16.430587768554688, "learning_rate": 0.001, "loss": 2.7351, "step": 14030 }, { "epoch": 0.5935781368982147, "grad_norm": 0.15817879140377045, "learning_rate": 0.001, "loss": 2.0115, "step": 14031 }, { "epoch": 0.5936204416617311, "grad_norm": 0.2005200982093811, "learning_rate": 0.001, "loss": 1.8476, "step": 14032 }, { "epoch": 0.5936627464252475, "grad_norm": 0.18343733251094818, "learning_rate": 0.001, "loss": 1.9199, "step": 14033 }, { "epoch": 0.5937050511887638, "grad_norm": 13.979016304016113, "learning_rate": 0.001, "loss": 1.6957, "step": 14034 }, { "epoch": 0.5937473559522802, "grad_norm": 1.4430853128433228, "learning_rate": 0.001, "loss": 3.6849, "step": 14035 }, { "epoch": 0.5937896607157966, "grad_norm": 0.22609612345695496, "learning_rate": 0.001, "loss": 3.7324, "step": 14036 }, { "epoch": 0.5938319654793129, "grad_norm": 0.3302842080593109, "learning_rate": 0.001, "loss": 2.7874, "step": 14037 }, { "epoch": 0.5938742702428293, "grad_norm": 0.39011886715888977, "learning_rate": 0.001, "loss": 2.6145, "step": 14038 }, { "epoch": 0.5939165750063458, "grad_norm": 0.21780306100845337, "learning_rate": 0.001, "loss": 1.8593, "step": 14039 }, { "epoch": 0.5939588797698621, "grad_norm": 0.2653607130050659, "learning_rate": 0.001, "loss": 2.3802, "step": 14040 }, { "epoch": 0.5940011845333785, "grad_norm": 0.18377161026000977, "learning_rate": 0.001, "loss": 2.3686, "step": 14041 }, { "epoch": 0.5940434892968949, "grad_norm": 0.22068673372268677, "learning_rate": 0.001, "loss": 3.5105, "step": 14042 }, { "epoch": 0.5940857940604112, "grad_norm": 0.2928449213504791, "learning_rate": 0.001, "loss": 2.2201, "step": 14043 }, { "epoch": 0.5941280988239276, "grad_norm": 5.771327495574951, "learning_rate": 0.001, "loss": 3.2931, "step": 14044 }, { "epoch": 0.594170403587444, "grad_norm": 0.24995769560337067, "learning_rate": 0.001, "loss": 3.6736, "step": 14045 }, { "epoch": 0.5942127083509603, "grad_norm": 1.1308993101119995, "learning_rate": 0.001, "loss": 2.1198, "step": 14046 }, { "epoch": 0.5942550131144767, "grad_norm": 0.16081666946411133, "learning_rate": 0.001, "loss": 1.9559, "step": 14047 }, { "epoch": 0.5942973178779931, "grad_norm": 0.8098224997520447, "learning_rate": 0.001, "loss": 3.7266, "step": 14048 }, { "epoch": 0.5943396226415094, "grad_norm": 6.794950008392334, "learning_rate": 0.001, "loss": 2.869, "step": 14049 }, { "epoch": 0.5943819274050258, "grad_norm": 0.2231823056936264, "learning_rate": 0.001, "loss": 2.455, "step": 14050 }, { "epoch": 0.5944242321685422, "grad_norm": 0.23370391130447388, "learning_rate": 0.001, "loss": 2.4393, "step": 14051 }, { "epoch": 0.5944665369320585, "grad_norm": 0.300771564245224, "learning_rate": 0.001, "loss": 2.7126, "step": 14052 }, { "epoch": 0.5945088416955749, "grad_norm": 0.9046683311462402, "learning_rate": 0.001, "loss": 3.1607, "step": 14053 }, { "epoch": 0.5945511464590912, "grad_norm": 0.2239486575126648, "learning_rate": 0.001, "loss": 2.566, "step": 14054 }, { "epoch": 0.5945934512226076, "grad_norm": 0.20858706533908844, "learning_rate": 0.001, "loss": 1.9689, "step": 14055 }, { "epoch": 0.5946357559861241, "grad_norm": 0.24165865778923035, "learning_rate": 0.001, "loss": 2.2796, "step": 14056 }, { "epoch": 0.5946780607496404, "grad_norm": 0.18632391095161438, "learning_rate": 0.001, "loss": 2.541, "step": 14057 }, { "epoch": 0.5947203655131568, "grad_norm": 0.21690063178539276, "learning_rate": 0.001, "loss": 2.8571, "step": 14058 }, { "epoch": 0.5947626702766732, "grad_norm": 0.2051665484905243, "learning_rate": 0.001, "loss": 3.3445, "step": 14059 }, { "epoch": 0.5948049750401895, "grad_norm": 0.2277742475271225, "learning_rate": 0.001, "loss": 2.1127, "step": 14060 }, { "epoch": 0.5948472798037059, "grad_norm": 0.16131742298603058, "learning_rate": 0.001, "loss": 2.1966, "step": 14061 }, { "epoch": 0.5948895845672223, "grad_norm": 0.20708271861076355, "learning_rate": 0.001, "loss": 1.7722, "step": 14062 }, { "epoch": 0.5949318893307386, "grad_norm": 0.17444756627082825, "learning_rate": 0.001, "loss": 2.1499, "step": 14063 }, { "epoch": 0.594974194094255, "grad_norm": 0.8531075119972229, "learning_rate": 0.001, "loss": 2.6364, "step": 14064 }, { "epoch": 0.5950164988577714, "grad_norm": 0.3456467092037201, "learning_rate": 0.001, "loss": 2.8462, "step": 14065 }, { "epoch": 0.5950588036212877, "grad_norm": 0.2058996707201004, "learning_rate": 0.001, "loss": 1.8034, "step": 14066 }, { "epoch": 0.5951011083848041, "grad_norm": 0.22642020881175995, "learning_rate": 0.001, "loss": 2.8038, "step": 14067 }, { "epoch": 0.5951434131483205, "grad_norm": 4.5695719718933105, "learning_rate": 0.001, "loss": 2.0407, "step": 14068 }, { "epoch": 0.5951857179118368, "grad_norm": 0.23832251131534576, "learning_rate": 0.001, "loss": 2.3427, "step": 14069 }, { "epoch": 0.5952280226753532, "grad_norm": 0.19538003206253052, "learning_rate": 0.001, "loss": 3.9116, "step": 14070 }, { "epoch": 0.5952703274388697, "grad_norm": 0.53928542137146, "learning_rate": 0.001, "loss": 1.8973, "step": 14071 }, { "epoch": 0.595312632202386, "grad_norm": 0.26821160316467285, "learning_rate": 0.001, "loss": 2.5537, "step": 14072 }, { "epoch": 0.5953549369659024, "grad_norm": 0.20947696268558502, "learning_rate": 0.001, "loss": 2.5992, "step": 14073 }, { "epoch": 0.5953972417294188, "grad_norm": 0.2498164176940918, "learning_rate": 0.001, "loss": 2.0545, "step": 14074 }, { "epoch": 0.5954395464929351, "grad_norm": 0.22430068254470825, "learning_rate": 0.001, "loss": 2.6415, "step": 14075 }, { "epoch": 0.5954818512564515, "grad_norm": 0.39491817355155945, "learning_rate": 0.001, "loss": 2.3454, "step": 14076 }, { "epoch": 0.5955241560199679, "grad_norm": 0.21011623740196228, "learning_rate": 0.001, "loss": 3.1634, "step": 14077 }, { "epoch": 0.5955664607834842, "grad_norm": 0.20442980527877808, "learning_rate": 0.001, "loss": 2.2365, "step": 14078 }, { "epoch": 0.5956087655470006, "grad_norm": 0.18820206820964813, "learning_rate": 0.001, "loss": 2.0868, "step": 14079 }, { "epoch": 0.595651070310517, "grad_norm": 0.21796266734600067, "learning_rate": 0.001, "loss": 2.4075, "step": 14080 }, { "epoch": 0.5956933750740333, "grad_norm": 0.16265445947647095, "learning_rate": 0.001, "loss": 2.2618, "step": 14081 }, { "epoch": 0.5957356798375497, "grad_norm": 0.17943935096263885, "learning_rate": 0.001, "loss": 1.7516, "step": 14082 }, { "epoch": 0.5957779846010661, "grad_norm": 0.3553915321826935, "learning_rate": 0.001, "loss": 1.9473, "step": 14083 }, { "epoch": 0.5958202893645824, "grad_norm": 0.1593002825975418, "learning_rate": 0.001, "loss": 1.6759, "step": 14084 }, { "epoch": 0.5958625941280988, "grad_norm": 0.18188300728797913, "learning_rate": 0.001, "loss": 2.2562, "step": 14085 }, { "epoch": 0.5959048988916152, "grad_norm": 0.16794385015964508, "learning_rate": 0.001, "loss": 2.2645, "step": 14086 }, { "epoch": 0.5959472036551315, "grad_norm": 0.16501161456108093, "learning_rate": 0.001, "loss": 2.7828, "step": 14087 }, { "epoch": 0.595989508418648, "grad_norm": 0.22263823449611664, "learning_rate": 0.001, "loss": 2.0619, "step": 14088 }, { "epoch": 0.5960318131821644, "grad_norm": 0.4712727963924408, "learning_rate": 0.001, "loss": 3.4105, "step": 14089 }, { "epoch": 0.5960741179456807, "grad_norm": 0.19112852215766907, "learning_rate": 0.001, "loss": 2.1327, "step": 14090 }, { "epoch": 0.5961164227091971, "grad_norm": 0.1422937512397766, "learning_rate": 0.001, "loss": 2.0298, "step": 14091 }, { "epoch": 0.5961587274727135, "grad_norm": 0.14151273667812347, "learning_rate": 0.001, "loss": 1.967, "step": 14092 }, { "epoch": 0.5962010322362298, "grad_norm": 0.1558687388896942, "learning_rate": 0.001, "loss": 2.4991, "step": 14093 }, { "epoch": 0.5962433369997462, "grad_norm": 0.18624737858772278, "learning_rate": 0.001, "loss": 3.1451, "step": 14094 }, { "epoch": 0.5962856417632626, "grad_norm": 0.15436244010925293, "learning_rate": 0.001, "loss": 2.2889, "step": 14095 }, { "epoch": 0.5963279465267789, "grad_norm": 0.15433412790298462, "learning_rate": 0.001, "loss": 1.7584, "step": 14096 }, { "epoch": 0.5963702512902953, "grad_norm": 0.24146470427513123, "learning_rate": 0.001, "loss": 3.3554, "step": 14097 }, { "epoch": 0.5964125560538116, "grad_norm": 0.15554895997047424, "learning_rate": 0.001, "loss": 1.8556, "step": 14098 }, { "epoch": 0.596454860817328, "grad_norm": 0.17149004340171814, "learning_rate": 0.001, "loss": 2.3022, "step": 14099 }, { "epoch": 0.5964971655808444, "grad_norm": 0.2116393893957138, "learning_rate": 0.001, "loss": 2.0259, "step": 14100 }, { "epoch": 0.5965394703443607, "grad_norm": 0.15278328955173492, "learning_rate": 0.001, "loss": 2.0596, "step": 14101 }, { "epoch": 0.5965817751078771, "grad_norm": 0.14205598831176758, "learning_rate": 0.001, "loss": 2.5957, "step": 14102 }, { "epoch": 0.5966240798713935, "grad_norm": 0.14854857325553894, "learning_rate": 0.001, "loss": 2.006, "step": 14103 }, { "epoch": 0.5966663846349098, "grad_norm": 0.5435006022453308, "learning_rate": 0.001, "loss": 2.9184, "step": 14104 }, { "epoch": 0.5967086893984263, "grad_norm": 0.15318304300308228, "learning_rate": 0.001, "loss": 2.3277, "step": 14105 }, { "epoch": 0.5967509941619427, "grad_norm": 0.3546973168849945, "learning_rate": 0.001, "loss": 1.9591, "step": 14106 }, { "epoch": 0.596793298925459, "grad_norm": 0.15295492112636566, "learning_rate": 0.001, "loss": 2.7593, "step": 14107 }, { "epoch": 0.5968356036889754, "grad_norm": 0.13746538758277893, "learning_rate": 0.001, "loss": 2.6047, "step": 14108 }, { "epoch": 0.5968779084524918, "grad_norm": 0.18742793798446655, "learning_rate": 0.001, "loss": 1.4895, "step": 14109 }, { "epoch": 0.5969202132160081, "grad_norm": 0.28040406107902527, "learning_rate": 0.001, "loss": 3.4037, "step": 14110 }, { "epoch": 0.5969625179795245, "grad_norm": 0.15165936946868896, "learning_rate": 0.001, "loss": 2.2845, "step": 14111 }, { "epoch": 0.5970048227430409, "grad_norm": 0.1565185934305191, "learning_rate": 0.001, "loss": 2.1824, "step": 14112 }, { "epoch": 0.5970471275065572, "grad_norm": 0.16770948469638824, "learning_rate": 0.001, "loss": 2.4891, "step": 14113 }, { "epoch": 0.5970894322700736, "grad_norm": 0.14534291625022888, "learning_rate": 0.001, "loss": 2.0148, "step": 14114 }, { "epoch": 0.59713173703359, "grad_norm": 0.157785564661026, "learning_rate": 0.001, "loss": 2.0671, "step": 14115 }, { "epoch": 0.5971740417971063, "grad_norm": 0.15590552985668182, "learning_rate": 0.001, "loss": 3.3419, "step": 14116 }, { "epoch": 0.5972163465606227, "grad_norm": 0.24124379456043243, "learning_rate": 0.001, "loss": 3.057, "step": 14117 }, { "epoch": 0.5972586513241391, "grad_norm": 4.952909469604492, "learning_rate": 0.001, "loss": 2.278, "step": 14118 }, { "epoch": 0.5973009560876554, "grad_norm": 0.3283090889453888, "learning_rate": 0.001, "loss": 2.4805, "step": 14119 }, { "epoch": 0.5973432608511718, "grad_norm": 0.14943337440490723, "learning_rate": 0.001, "loss": 1.8459, "step": 14120 }, { "epoch": 0.5973855656146883, "grad_norm": 0.1828925907611847, "learning_rate": 0.001, "loss": 2.2262, "step": 14121 }, { "epoch": 0.5974278703782046, "grad_norm": 0.21698467433452606, "learning_rate": 0.001, "loss": 1.5742, "step": 14122 }, { "epoch": 0.597470175141721, "grad_norm": 0.17111243307590485, "learning_rate": 0.001, "loss": 1.6804, "step": 14123 }, { "epoch": 0.5975124799052374, "grad_norm": 0.1583365499973297, "learning_rate": 0.001, "loss": 1.6393, "step": 14124 }, { "epoch": 0.5975547846687537, "grad_norm": 0.18235273659229279, "learning_rate": 0.001, "loss": 2.8478, "step": 14125 }, { "epoch": 0.5975970894322701, "grad_norm": 0.16986776888370514, "learning_rate": 0.001, "loss": 1.6388, "step": 14126 }, { "epoch": 0.5976393941957865, "grad_norm": 0.39957574009895325, "learning_rate": 0.001, "loss": 2.1505, "step": 14127 }, { "epoch": 0.5976816989593028, "grad_norm": 0.15922756493091583, "learning_rate": 0.001, "loss": 1.7598, "step": 14128 }, { "epoch": 0.5977240037228192, "grad_norm": 6.040116786956787, "learning_rate": 0.001, "loss": 3.9252, "step": 14129 }, { "epoch": 0.5977663084863356, "grad_norm": 0.2572100758552551, "learning_rate": 0.001, "loss": 2.1899, "step": 14130 }, { "epoch": 0.5978086132498519, "grad_norm": 7.241512298583984, "learning_rate": 0.001, "loss": 2.238, "step": 14131 }, { "epoch": 0.5978509180133683, "grad_norm": 0.21074838936328888, "learning_rate": 0.001, "loss": 1.8177, "step": 14132 }, { "epoch": 0.5978932227768847, "grad_norm": 0.20617999136447906, "learning_rate": 0.001, "loss": 2.7263, "step": 14133 }, { "epoch": 0.597935527540401, "grad_norm": 8.84471321105957, "learning_rate": 0.001, "loss": 2.3485, "step": 14134 }, { "epoch": 0.5979778323039174, "grad_norm": 0.21541795134544373, "learning_rate": 0.001, "loss": 2.4785, "step": 14135 }, { "epoch": 0.5980201370674338, "grad_norm": 0.17976322770118713, "learning_rate": 0.001, "loss": 1.9674, "step": 14136 }, { "epoch": 0.5980624418309501, "grad_norm": 0.18282859027385712, "learning_rate": 0.001, "loss": 2.8773, "step": 14137 }, { "epoch": 0.5981047465944666, "grad_norm": 0.29796525835990906, "learning_rate": 0.001, "loss": 2.8154, "step": 14138 }, { "epoch": 0.598147051357983, "grad_norm": 0.19228790700435638, "learning_rate": 0.001, "loss": 2.666, "step": 14139 }, { "epoch": 0.5981893561214993, "grad_norm": 0.3605715334415436, "learning_rate": 0.001, "loss": 2.8384, "step": 14140 }, { "epoch": 0.5982316608850157, "grad_norm": 0.28930771350860596, "learning_rate": 0.001, "loss": 1.6602, "step": 14141 }, { "epoch": 0.5982739656485321, "grad_norm": 0.21113309264183044, "learning_rate": 0.001, "loss": 1.586, "step": 14142 }, { "epoch": 0.5983162704120484, "grad_norm": 0.16728419065475464, "learning_rate": 0.001, "loss": 3.1558, "step": 14143 }, { "epoch": 0.5983585751755648, "grad_norm": 0.19090363383293152, "learning_rate": 0.001, "loss": 2.0699, "step": 14144 }, { "epoch": 0.5984008799390811, "grad_norm": 0.23593130707740784, "learning_rate": 0.001, "loss": 2.9676, "step": 14145 }, { "epoch": 0.5984431847025975, "grad_norm": 0.1566046178340912, "learning_rate": 0.001, "loss": 1.873, "step": 14146 }, { "epoch": 0.5984854894661139, "grad_norm": 0.14671561121940613, "learning_rate": 0.001, "loss": 1.6314, "step": 14147 }, { "epoch": 0.5985277942296302, "grad_norm": 0.17109639942646027, "learning_rate": 0.001, "loss": 1.7268, "step": 14148 }, { "epoch": 0.5985700989931466, "grad_norm": 0.13763944804668427, "learning_rate": 0.001, "loss": 2.2571, "step": 14149 }, { "epoch": 0.598612403756663, "grad_norm": 0.14401958882808685, "learning_rate": 0.001, "loss": 1.5655, "step": 14150 }, { "epoch": 0.5986547085201793, "grad_norm": 0.21010830998420715, "learning_rate": 0.001, "loss": 1.9039, "step": 14151 }, { "epoch": 0.5986970132836957, "grad_norm": 1.928266167640686, "learning_rate": 0.001, "loss": 2.4406, "step": 14152 }, { "epoch": 0.5987393180472121, "grad_norm": 0.1703515201807022, "learning_rate": 0.001, "loss": 2.6037, "step": 14153 }, { "epoch": 0.5987816228107284, "grad_norm": 0.2860715687274933, "learning_rate": 0.001, "loss": 2.0231, "step": 14154 }, { "epoch": 0.5988239275742449, "grad_norm": 0.19897747039794922, "learning_rate": 0.001, "loss": 2.8199, "step": 14155 }, { "epoch": 0.5988662323377613, "grad_norm": 0.18251550197601318, "learning_rate": 0.001, "loss": 1.7923, "step": 14156 }, { "epoch": 0.5989085371012776, "grad_norm": 1.5346297025680542, "learning_rate": 0.001, "loss": 1.7232, "step": 14157 }, { "epoch": 0.598950841864794, "grad_norm": 0.16617514193058014, "learning_rate": 0.001, "loss": 1.4424, "step": 14158 }, { "epoch": 0.5989931466283104, "grad_norm": 0.14995898306369781, "learning_rate": 0.001, "loss": 1.8629, "step": 14159 }, { "epoch": 0.5990354513918267, "grad_norm": 0.1728651225566864, "learning_rate": 0.001, "loss": 2.0475, "step": 14160 }, { "epoch": 0.5990777561553431, "grad_norm": 0.27386367321014404, "learning_rate": 0.001, "loss": 1.6863, "step": 14161 }, { "epoch": 0.5991200609188595, "grad_norm": 0.21479547023773193, "learning_rate": 0.001, "loss": 1.8521, "step": 14162 }, { "epoch": 0.5991623656823758, "grad_norm": 0.2000337839126587, "learning_rate": 0.001, "loss": 2.8076, "step": 14163 }, { "epoch": 0.5992046704458922, "grad_norm": 0.16316631436347961, "learning_rate": 0.001, "loss": 3.3434, "step": 14164 }, { "epoch": 0.5992469752094086, "grad_norm": 0.18021291494369507, "learning_rate": 0.001, "loss": 2.0387, "step": 14165 }, { "epoch": 0.5992892799729249, "grad_norm": 0.24870742857456207, "learning_rate": 0.001, "loss": 2.6437, "step": 14166 }, { "epoch": 0.5993315847364413, "grad_norm": 0.17932534217834473, "learning_rate": 0.001, "loss": 2.412, "step": 14167 }, { "epoch": 0.5993738894999577, "grad_norm": 0.22996637225151062, "learning_rate": 0.001, "loss": 3.117, "step": 14168 }, { "epoch": 0.599416194263474, "grad_norm": 13.875000953674316, "learning_rate": 0.001, "loss": 2.2331, "step": 14169 }, { "epoch": 0.5994584990269904, "grad_norm": 0.8741011619567871, "learning_rate": 0.001, "loss": 2.2017, "step": 14170 }, { "epoch": 0.5995008037905069, "grad_norm": 0.3972923159599304, "learning_rate": 0.001, "loss": 2.4348, "step": 14171 }, { "epoch": 0.5995431085540232, "grad_norm": 0.18050576746463776, "learning_rate": 0.001, "loss": 2.2719, "step": 14172 }, { "epoch": 0.5995854133175396, "grad_norm": 0.6231819987297058, "learning_rate": 0.001, "loss": 1.9272, "step": 14173 }, { "epoch": 0.599627718081056, "grad_norm": 0.2706356942653656, "learning_rate": 0.001, "loss": 1.8771, "step": 14174 }, { "epoch": 0.5996700228445723, "grad_norm": 0.2066756784915924, "learning_rate": 0.001, "loss": 1.9065, "step": 14175 }, { "epoch": 0.5997123276080887, "grad_norm": 0.2722444236278534, "learning_rate": 0.001, "loss": 2.5702, "step": 14176 }, { "epoch": 0.5997546323716051, "grad_norm": 0.18113920092582703, "learning_rate": 0.001, "loss": 1.8397, "step": 14177 }, { "epoch": 0.5997969371351214, "grad_norm": 0.2111581563949585, "learning_rate": 0.001, "loss": 2.0402, "step": 14178 }, { "epoch": 0.5998392418986378, "grad_norm": 0.1754538118839264, "learning_rate": 0.001, "loss": 1.4719, "step": 14179 }, { "epoch": 0.5998815466621542, "grad_norm": 0.15258760750293732, "learning_rate": 0.001, "loss": 2.0344, "step": 14180 }, { "epoch": 0.5999238514256705, "grad_norm": 0.18332518637180328, "learning_rate": 0.001, "loss": 2.2107, "step": 14181 }, { "epoch": 0.5999661561891869, "grad_norm": 0.23446984589099884, "learning_rate": 0.001, "loss": 2.8876, "step": 14182 }, { "epoch": 0.6000084609527033, "grad_norm": 0.20823650062084198, "learning_rate": 0.001, "loss": 2.5374, "step": 14183 }, { "epoch": 0.6000507657162196, "grad_norm": 0.16053082048892975, "learning_rate": 0.001, "loss": 2.1142, "step": 14184 }, { "epoch": 0.600093070479736, "grad_norm": 0.2412220984697342, "learning_rate": 0.001, "loss": 3.0947, "step": 14185 }, { "epoch": 0.6001353752432524, "grad_norm": 0.17646367847919464, "learning_rate": 0.001, "loss": 2.4277, "step": 14186 }, { "epoch": 0.6001776800067687, "grad_norm": 0.16096143424510956, "learning_rate": 0.001, "loss": 2.2449, "step": 14187 }, { "epoch": 0.6002199847702852, "grad_norm": 0.2696020007133484, "learning_rate": 0.001, "loss": 1.9888, "step": 14188 }, { "epoch": 0.6002622895338015, "grad_norm": 0.14469857513904572, "learning_rate": 0.001, "loss": 2.14, "step": 14189 }, { "epoch": 0.6003045942973179, "grad_norm": 0.17882224917411804, "learning_rate": 0.001, "loss": 2.0161, "step": 14190 }, { "epoch": 0.6003468990608343, "grad_norm": 0.1592990756034851, "learning_rate": 0.001, "loss": 2.067, "step": 14191 }, { "epoch": 0.6003892038243506, "grad_norm": 0.8799763321876526, "learning_rate": 0.001, "loss": 2.3578, "step": 14192 }, { "epoch": 0.600431508587867, "grad_norm": 0.1751909852027893, "learning_rate": 0.001, "loss": 1.8269, "step": 14193 }, { "epoch": 0.6004738133513834, "grad_norm": 0.22344686090946198, "learning_rate": 0.001, "loss": 2.8203, "step": 14194 }, { "epoch": 0.6005161181148997, "grad_norm": 0.19454553723335266, "learning_rate": 0.001, "loss": 2.5996, "step": 14195 }, { "epoch": 0.6005584228784161, "grad_norm": 0.16790246963500977, "learning_rate": 0.001, "loss": 2.8472, "step": 14196 }, { "epoch": 0.6006007276419325, "grad_norm": 0.16107873618602753, "learning_rate": 0.001, "loss": 1.7427, "step": 14197 }, { "epoch": 0.6006430324054488, "grad_norm": 22.60145378112793, "learning_rate": 0.001, "loss": 1.6772, "step": 14198 }, { "epoch": 0.6006853371689652, "grad_norm": 0.16331073641777039, "learning_rate": 0.001, "loss": 2.0363, "step": 14199 }, { "epoch": 0.6007276419324816, "grad_norm": 1.7857904434204102, "learning_rate": 0.001, "loss": 1.7835, "step": 14200 }, { "epoch": 0.6007699466959979, "grad_norm": 0.1780909299850464, "learning_rate": 0.001, "loss": 2.9103, "step": 14201 }, { "epoch": 0.6008122514595143, "grad_norm": 0.3058990240097046, "learning_rate": 0.001, "loss": 2.3492, "step": 14202 }, { "epoch": 0.6008545562230307, "grad_norm": 0.1861615926027298, "learning_rate": 0.001, "loss": 2.2742, "step": 14203 }, { "epoch": 0.600896860986547, "grad_norm": 0.21202732622623444, "learning_rate": 0.001, "loss": 1.6728, "step": 14204 }, { "epoch": 0.6009391657500635, "grad_norm": 0.17166270315647125, "learning_rate": 0.001, "loss": 2.293, "step": 14205 }, { "epoch": 0.6009814705135799, "grad_norm": 0.1747630089521408, "learning_rate": 0.001, "loss": 3.0309, "step": 14206 }, { "epoch": 0.6010237752770962, "grad_norm": 0.1900450885295868, "learning_rate": 0.001, "loss": 1.6363, "step": 14207 }, { "epoch": 0.6010660800406126, "grad_norm": 6.871549129486084, "learning_rate": 0.001, "loss": 1.5597, "step": 14208 }, { "epoch": 0.601108384804129, "grad_norm": 0.20221640169620514, "learning_rate": 0.001, "loss": 1.6391, "step": 14209 }, { "epoch": 0.6011506895676453, "grad_norm": 0.5754346251487732, "learning_rate": 0.001, "loss": 4.2668, "step": 14210 }, { "epoch": 0.6011929943311617, "grad_norm": 0.2078094184398651, "learning_rate": 0.001, "loss": 1.8344, "step": 14211 }, { "epoch": 0.6012352990946781, "grad_norm": 0.19465067982673645, "learning_rate": 0.001, "loss": 2.707, "step": 14212 }, { "epoch": 0.6012776038581944, "grad_norm": 0.1583552062511444, "learning_rate": 0.001, "loss": 1.791, "step": 14213 }, { "epoch": 0.6013199086217108, "grad_norm": 0.6577909588813782, "learning_rate": 0.001, "loss": 1.8294, "step": 14214 }, { "epoch": 0.6013622133852272, "grad_norm": 0.16228260099887848, "learning_rate": 0.001, "loss": 2.0963, "step": 14215 }, { "epoch": 0.6014045181487435, "grad_norm": 0.21815434098243713, "learning_rate": 0.001, "loss": 1.9625, "step": 14216 }, { "epoch": 0.6014468229122599, "grad_norm": 0.168915793299675, "learning_rate": 0.001, "loss": 1.7163, "step": 14217 }, { "epoch": 0.6014891276757763, "grad_norm": 0.21465511620044708, "learning_rate": 0.001, "loss": 2.9348, "step": 14218 }, { "epoch": 0.6015314324392926, "grad_norm": 1.094494104385376, "learning_rate": 0.001, "loss": 2.2373, "step": 14219 }, { "epoch": 0.601573737202809, "grad_norm": 0.21323548257350922, "learning_rate": 0.001, "loss": 2.9424, "step": 14220 }, { "epoch": 0.6016160419663255, "grad_norm": 4.479442596435547, "learning_rate": 0.001, "loss": 1.9656, "step": 14221 }, { "epoch": 0.6016583467298418, "grad_norm": 0.13365942239761353, "learning_rate": 0.001, "loss": 1.264, "step": 14222 }, { "epoch": 0.6017006514933582, "grad_norm": 1.7394766807556152, "learning_rate": 0.001, "loss": 2.1951, "step": 14223 }, { "epoch": 0.6017429562568746, "grad_norm": 3.335594415664673, "learning_rate": 0.001, "loss": 2.4577, "step": 14224 }, { "epoch": 0.6017852610203909, "grad_norm": 0.574901282787323, "learning_rate": 0.001, "loss": 2.5492, "step": 14225 }, { "epoch": 0.6018275657839073, "grad_norm": 0.16462647914886475, "learning_rate": 0.001, "loss": 2.7077, "step": 14226 }, { "epoch": 0.6018698705474237, "grad_norm": 0.36625564098358154, "learning_rate": 0.001, "loss": 2.6481, "step": 14227 }, { "epoch": 0.60191217531094, "grad_norm": 0.33818289637565613, "learning_rate": 0.001, "loss": 2.2215, "step": 14228 }, { "epoch": 0.6019544800744564, "grad_norm": 0.18577156960964203, "learning_rate": 0.001, "loss": 2.0114, "step": 14229 }, { "epoch": 0.6019967848379728, "grad_norm": 0.14699004590511322, "learning_rate": 0.001, "loss": 1.6804, "step": 14230 }, { "epoch": 0.6020390896014891, "grad_norm": 0.17045558989048004, "learning_rate": 0.001, "loss": 2.3435, "step": 14231 }, { "epoch": 0.6020813943650055, "grad_norm": 0.1529216319322586, "learning_rate": 0.001, "loss": 2.1237, "step": 14232 }, { "epoch": 0.6021236991285218, "grad_norm": 0.1529701054096222, "learning_rate": 0.001, "loss": 3.5144, "step": 14233 }, { "epoch": 0.6021660038920382, "grad_norm": 0.18276235461235046, "learning_rate": 0.001, "loss": 2.1834, "step": 14234 }, { "epoch": 0.6022083086555546, "grad_norm": 0.2533540725708008, "learning_rate": 0.001, "loss": 3.0911, "step": 14235 }, { "epoch": 0.6022506134190709, "grad_norm": 0.1999509334564209, "learning_rate": 0.001, "loss": 1.7524, "step": 14236 }, { "epoch": 0.6022929181825873, "grad_norm": 0.1922556310892105, "learning_rate": 0.001, "loss": 2.1238, "step": 14237 }, { "epoch": 0.6023352229461038, "grad_norm": 1.1301320791244507, "learning_rate": 0.001, "loss": 1.7954, "step": 14238 }, { "epoch": 0.60237752770962, "grad_norm": 0.14961791038513184, "learning_rate": 0.001, "loss": 1.6322, "step": 14239 }, { "epoch": 0.6024198324731365, "grad_norm": 0.5575346350669861, "learning_rate": 0.001, "loss": 2.2569, "step": 14240 }, { "epoch": 0.6024621372366529, "grad_norm": 0.19425736367702484, "learning_rate": 0.001, "loss": 1.8248, "step": 14241 }, { "epoch": 0.6025044420001692, "grad_norm": 0.1504928469657898, "learning_rate": 0.001, "loss": 2.2506, "step": 14242 }, { "epoch": 0.6025467467636856, "grad_norm": 1.183660626411438, "learning_rate": 0.001, "loss": 1.9673, "step": 14243 }, { "epoch": 0.602589051527202, "grad_norm": 0.3906610608100891, "learning_rate": 0.001, "loss": 2.8927, "step": 14244 }, { "epoch": 0.6026313562907183, "grad_norm": 0.20628994703292847, "learning_rate": 0.001, "loss": 2.1268, "step": 14245 }, { "epoch": 0.6026736610542347, "grad_norm": 4.261792182922363, "learning_rate": 0.001, "loss": 4.2053, "step": 14246 }, { "epoch": 0.6027159658177511, "grad_norm": 0.15113836526870728, "learning_rate": 0.001, "loss": 1.5448, "step": 14247 }, { "epoch": 0.6027582705812674, "grad_norm": 0.20235134661197662, "learning_rate": 0.001, "loss": 2.3315, "step": 14248 }, { "epoch": 0.6028005753447838, "grad_norm": 0.15247638523578644, "learning_rate": 0.001, "loss": 2.15, "step": 14249 }, { "epoch": 0.6028428801083002, "grad_norm": 0.18794965744018555, "learning_rate": 0.001, "loss": 2.3342, "step": 14250 }, { "epoch": 0.6028851848718165, "grad_norm": 4.161924839019775, "learning_rate": 0.001, "loss": 1.9379, "step": 14251 }, { "epoch": 0.6029274896353329, "grad_norm": 0.1974867433309555, "learning_rate": 0.001, "loss": 2.1114, "step": 14252 }, { "epoch": 0.6029697943988493, "grad_norm": 0.15145453810691833, "learning_rate": 0.001, "loss": 2.2004, "step": 14253 }, { "epoch": 0.6030120991623656, "grad_norm": 0.1826041340827942, "learning_rate": 0.001, "loss": 1.8654, "step": 14254 }, { "epoch": 0.603054403925882, "grad_norm": 0.33202704787254333, "learning_rate": 0.001, "loss": 2.8525, "step": 14255 }, { "epoch": 0.6030967086893985, "grad_norm": 0.17979615926742554, "learning_rate": 0.001, "loss": 2.1171, "step": 14256 }, { "epoch": 0.6031390134529148, "grad_norm": 0.33477500081062317, "learning_rate": 0.001, "loss": 2.4972, "step": 14257 }, { "epoch": 0.6031813182164312, "grad_norm": 0.1628679484128952, "learning_rate": 0.001, "loss": 2.3023, "step": 14258 }, { "epoch": 0.6032236229799476, "grad_norm": 0.15380176901817322, "learning_rate": 0.001, "loss": 1.6094, "step": 14259 }, { "epoch": 0.6032659277434639, "grad_norm": 0.15169599652290344, "learning_rate": 0.001, "loss": 1.3153, "step": 14260 }, { "epoch": 0.6033082325069803, "grad_norm": 1.1661324501037598, "learning_rate": 0.001, "loss": 1.8356, "step": 14261 }, { "epoch": 0.6033505372704967, "grad_norm": 0.14348934590816498, "learning_rate": 0.001, "loss": 1.4043, "step": 14262 }, { "epoch": 0.603392842034013, "grad_norm": 0.35176733136177063, "learning_rate": 0.001, "loss": 2.327, "step": 14263 }, { "epoch": 0.6034351467975294, "grad_norm": 2.9565463066101074, "learning_rate": 0.001, "loss": 2.2605, "step": 14264 }, { "epoch": 0.6034774515610458, "grad_norm": 0.20699867606163025, "learning_rate": 0.001, "loss": 2.5949, "step": 14265 }, { "epoch": 0.6035197563245621, "grad_norm": 0.6101794242858887, "learning_rate": 0.001, "loss": 2.4005, "step": 14266 }, { "epoch": 0.6035620610880785, "grad_norm": 0.2099284827709198, "learning_rate": 0.001, "loss": 1.7885, "step": 14267 }, { "epoch": 0.6036043658515949, "grad_norm": 0.20473699271678925, "learning_rate": 0.001, "loss": 2.0055, "step": 14268 }, { "epoch": 0.6036466706151112, "grad_norm": 0.2228740006685257, "learning_rate": 0.001, "loss": 2.4903, "step": 14269 }, { "epoch": 0.6036889753786276, "grad_norm": 0.6952749490737915, "learning_rate": 0.001, "loss": 2.8113, "step": 14270 }, { "epoch": 0.6037312801421441, "grad_norm": 0.16827528178691864, "learning_rate": 0.001, "loss": 1.7197, "step": 14271 }, { "epoch": 0.6037735849056604, "grad_norm": 0.23891712725162506, "learning_rate": 0.001, "loss": 2.2606, "step": 14272 }, { "epoch": 0.6038158896691768, "grad_norm": 0.220239520072937, "learning_rate": 0.001, "loss": 1.4048, "step": 14273 }, { "epoch": 0.6038581944326932, "grad_norm": 0.16221195459365845, "learning_rate": 0.001, "loss": 1.7971, "step": 14274 }, { "epoch": 0.6039004991962095, "grad_norm": 0.31431055068969727, "learning_rate": 0.001, "loss": 3.3171, "step": 14275 }, { "epoch": 0.6039428039597259, "grad_norm": 1.185067057609558, "learning_rate": 0.001, "loss": 2.7166, "step": 14276 }, { "epoch": 0.6039851087232423, "grad_norm": 0.15773341059684753, "learning_rate": 0.001, "loss": 2.0959, "step": 14277 }, { "epoch": 0.6040274134867586, "grad_norm": 0.16446535289287567, "learning_rate": 0.001, "loss": 1.6047, "step": 14278 }, { "epoch": 0.604069718250275, "grad_norm": 0.20500445365905762, "learning_rate": 0.001, "loss": 2.4833, "step": 14279 }, { "epoch": 0.6041120230137913, "grad_norm": 0.1654626429080963, "learning_rate": 0.001, "loss": 1.7549, "step": 14280 }, { "epoch": 0.6041543277773077, "grad_norm": 0.16922923922538757, "learning_rate": 0.001, "loss": 2.0515, "step": 14281 }, { "epoch": 0.6041966325408241, "grad_norm": 0.17011629045009613, "learning_rate": 0.001, "loss": 2.5793, "step": 14282 }, { "epoch": 0.6042389373043404, "grad_norm": 0.25761881470680237, "learning_rate": 0.001, "loss": 1.9069, "step": 14283 }, { "epoch": 0.6042812420678568, "grad_norm": 0.18269698321819305, "learning_rate": 0.001, "loss": 3.0221, "step": 14284 }, { "epoch": 0.6043235468313732, "grad_norm": 85.4771728515625, "learning_rate": 0.001, "loss": 2.3729, "step": 14285 }, { "epoch": 0.6043658515948895, "grad_norm": 0.7515902519226074, "learning_rate": 0.001, "loss": 2.3315, "step": 14286 }, { "epoch": 0.604408156358406, "grad_norm": 0.2449134737253189, "learning_rate": 0.001, "loss": 1.72, "step": 14287 }, { "epoch": 0.6044504611219224, "grad_norm": 0.15557745099067688, "learning_rate": 0.001, "loss": 1.9485, "step": 14288 }, { "epoch": 0.6044927658854387, "grad_norm": 0.18973484635353088, "learning_rate": 0.001, "loss": 2.1576, "step": 14289 }, { "epoch": 0.6045350706489551, "grad_norm": 0.2871471643447876, "learning_rate": 0.001, "loss": 1.7434, "step": 14290 }, { "epoch": 0.6045773754124715, "grad_norm": 0.4421120882034302, "learning_rate": 0.001, "loss": 3.4448, "step": 14291 }, { "epoch": 0.6046196801759878, "grad_norm": 0.15245676040649414, "learning_rate": 0.001, "loss": 1.6375, "step": 14292 }, { "epoch": 0.6046619849395042, "grad_norm": 0.22273124754428864, "learning_rate": 0.001, "loss": 3.4129, "step": 14293 }, { "epoch": 0.6047042897030206, "grad_norm": 0.1912602186203003, "learning_rate": 0.001, "loss": 2.368, "step": 14294 }, { "epoch": 0.6047465944665369, "grad_norm": 0.18390634655952454, "learning_rate": 0.001, "loss": 2.33, "step": 14295 }, { "epoch": 0.6047888992300533, "grad_norm": 0.2985355854034424, "learning_rate": 0.001, "loss": 2.6485, "step": 14296 }, { "epoch": 0.6048312039935697, "grad_norm": 0.1954064518213272, "learning_rate": 0.001, "loss": 2.0261, "step": 14297 }, { "epoch": 0.604873508757086, "grad_norm": 0.16802330315113068, "learning_rate": 0.001, "loss": 2.1737, "step": 14298 }, { "epoch": 0.6049158135206024, "grad_norm": 0.1490180343389511, "learning_rate": 0.001, "loss": 1.6823, "step": 14299 }, { "epoch": 0.6049581182841188, "grad_norm": 0.1882055103778839, "learning_rate": 0.001, "loss": 2.7867, "step": 14300 }, { "epoch": 0.6050004230476351, "grad_norm": 0.18217292428016663, "learning_rate": 0.001, "loss": 2.2182, "step": 14301 }, { "epoch": 0.6050427278111515, "grad_norm": 0.16061586141586304, "learning_rate": 0.001, "loss": 1.2033, "step": 14302 }, { "epoch": 0.605085032574668, "grad_norm": 0.17680132389068604, "learning_rate": 0.001, "loss": 2.3674, "step": 14303 }, { "epoch": 0.6051273373381842, "grad_norm": 0.21177402138710022, "learning_rate": 0.001, "loss": 2.0609, "step": 14304 }, { "epoch": 0.6051696421017007, "grad_norm": 0.8895161151885986, "learning_rate": 0.001, "loss": 2.1776, "step": 14305 }, { "epoch": 0.6052119468652171, "grad_norm": 0.18167245388031006, "learning_rate": 0.001, "loss": 1.8625, "step": 14306 }, { "epoch": 0.6052542516287334, "grad_norm": 7.46832799911499, "learning_rate": 0.001, "loss": 1.9056, "step": 14307 }, { "epoch": 0.6052965563922498, "grad_norm": 0.17370212078094482, "learning_rate": 0.001, "loss": 2.3242, "step": 14308 }, { "epoch": 0.6053388611557662, "grad_norm": 0.1907428652048111, "learning_rate": 0.001, "loss": 2.38, "step": 14309 }, { "epoch": 0.6053811659192825, "grad_norm": 0.5028619766235352, "learning_rate": 0.001, "loss": 2.3635, "step": 14310 }, { "epoch": 0.6054234706827989, "grad_norm": 0.15247510373592377, "learning_rate": 0.001, "loss": 1.7588, "step": 14311 }, { "epoch": 0.6054657754463153, "grad_norm": 0.2253142148256302, "learning_rate": 0.001, "loss": 2.7246, "step": 14312 }, { "epoch": 0.6055080802098316, "grad_norm": 0.2002718299627304, "learning_rate": 0.001, "loss": 2.9367, "step": 14313 }, { "epoch": 0.605550384973348, "grad_norm": 1.097982406616211, "learning_rate": 0.001, "loss": 1.6434, "step": 14314 }, { "epoch": 0.6055926897368644, "grad_norm": 0.20762449502944946, "learning_rate": 0.001, "loss": 2.281, "step": 14315 }, { "epoch": 0.6056349945003807, "grad_norm": 0.8287076950073242, "learning_rate": 0.001, "loss": 2.1989, "step": 14316 }, { "epoch": 0.6056772992638971, "grad_norm": 0.19646909832954407, "learning_rate": 0.001, "loss": 3.1902, "step": 14317 }, { "epoch": 0.6057196040274135, "grad_norm": 0.21229322254657745, "learning_rate": 0.001, "loss": 2.3791, "step": 14318 }, { "epoch": 0.6057619087909298, "grad_norm": 0.216338649392128, "learning_rate": 0.001, "loss": 2.6643, "step": 14319 }, { "epoch": 0.6058042135544462, "grad_norm": 0.177630215883255, "learning_rate": 0.001, "loss": 3.0275, "step": 14320 }, { "epoch": 0.6058465183179627, "grad_norm": 0.173061802983284, "learning_rate": 0.001, "loss": 1.5222, "step": 14321 }, { "epoch": 0.605888823081479, "grad_norm": 0.2353350967168808, "learning_rate": 0.001, "loss": 2.1331, "step": 14322 }, { "epoch": 0.6059311278449954, "grad_norm": 0.1542205810546875, "learning_rate": 0.001, "loss": 1.6273, "step": 14323 }, { "epoch": 0.6059734326085117, "grad_norm": 0.14759258925914764, "learning_rate": 0.001, "loss": 2.9241, "step": 14324 }, { "epoch": 0.6060157373720281, "grad_norm": 1.2428350448608398, "learning_rate": 0.001, "loss": 1.851, "step": 14325 }, { "epoch": 0.6060580421355445, "grad_norm": 0.3622968792915344, "learning_rate": 0.001, "loss": 2.0143, "step": 14326 }, { "epoch": 0.6061003468990608, "grad_norm": 0.1762288361787796, "learning_rate": 0.001, "loss": 2.4346, "step": 14327 }, { "epoch": 0.6061426516625772, "grad_norm": 0.24503590166568756, "learning_rate": 0.001, "loss": 1.8936, "step": 14328 }, { "epoch": 0.6061849564260936, "grad_norm": 0.16428449749946594, "learning_rate": 0.001, "loss": 1.829, "step": 14329 }, { "epoch": 0.6062272611896099, "grad_norm": 0.16530753672122955, "learning_rate": 0.001, "loss": 2.2527, "step": 14330 }, { "epoch": 0.6062695659531263, "grad_norm": 0.20797306299209595, "learning_rate": 0.001, "loss": 2.2176, "step": 14331 }, { "epoch": 0.6063118707166427, "grad_norm": 0.27849939465522766, "learning_rate": 0.001, "loss": 2.2585, "step": 14332 }, { "epoch": 0.606354175480159, "grad_norm": 1.6591880321502686, "learning_rate": 0.001, "loss": 3.7181, "step": 14333 }, { "epoch": 0.6063964802436754, "grad_norm": 0.15750765800476074, "learning_rate": 0.001, "loss": 2.2277, "step": 14334 }, { "epoch": 0.6064387850071918, "grad_norm": 0.5017919540405273, "learning_rate": 0.001, "loss": 2.991, "step": 14335 }, { "epoch": 0.6064810897707081, "grad_norm": 0.37937721610069275, "learning_rate": 0.001, "loss": 2.2904, "step": 14336 }, { "epoch": 0.6065233945342245, "grad_norm": 1.156523585319519, "learning_rate": 0.001, "loss": 2.0803, "step": 14337 }, { "epoch": 0.606565699297741, "grad_norm": 0.26391497254371643, "learning_rate": 0.001, "loss": 2.2633, "step": 14338 }, { "epoch": 0.6066080040612573, "grad_norm": 0.17540274560451508, "learning_rate": 0.001, "loss": 2.4428, "step": 14339 }, { "epoch": 0.6066503088247737, "grad_norm": 0.1678505390882492, "learning_rate": 0.001, "loss": 2.1558, "step": 14340 }, { "epoch": 0.6066926135882901, "grad_norm": 0.1621529459953308, "learning_rate": 0.001, "loss": 2.3497, "step": 14341 }, { "epoch": 0.6067349183518064, "grad_norm": 0.2335866242647171, "learning_rate": 0.001, "loss": 2.0897, "step": 14342 }, { "epoch": 0.6067772231153228, "grad_norm": 0.2002691626548767, "learning_rate": 0.001, "loss": 2.613, "step": 14343 }, { "epoch": 0.6068195278788392, "grad_norm": 13.141894340515137, "learning_rate": 0.001, "loss": 1.9083, "step": 14344 }, { "epoch": 0.6068618326423555, "grad_norm": 0.19172821938991547, "learning_rate": 0.001, "loss": 1.8666, "step": 14345 }, { "epoch": 0.6069041374058719, "grad_norm": 0.2590673267841339, "learning_rate": 0.001, "loss": 1.9539, "step": 14346 }, { "epoch": 0.6069464421693883, "grad_norm": 0.17630375921726227, "learning_rate": 0.001, "loss": 2.6305, "step": 14347 }, { "epoch": 0.6069887469329046, "grad_norm": 0.14956524968147278, "learning_rate": 0.001, "loss": 2.8753, "step": 14348 }, { "epoch": 0.607031051696421, "grad_norm": 0.1801750808954239, "learning_rate": 0.001, "loss": 1.9418, "step": 14349 }, { "epoch": 0.6070733564599374, "grad_norm": 0.1958351731300354, "learning_rate": 0.001, "loss": 3.2227, "step": 14350 }, { "epoch": 0.6071156612234537, "grad_norm": 0.24762704968452454, "learning_rate": 0.001, "loss": 2.6021, "step": 14351 }, { "epoch": 0.6071579659869701, "grad_norm": 0.19234824180603027, "learning_rate": 0.001, "loss": 3.2872, "step": 14352 }, { "epoch": 0.6072002707504865, "grad_norm": 0.1839028298854828, "learning_rate": 0.001, "loss": 2.8299, "step": 14353 }, { "epoch": 0.6072425755140028, "grad_norm": 0.18792131543159485, "learning_rate": 0.001, "loss": 1.9494, "step": 14354 }, { "epoch": 0.6072848802775193, "grad_norm": 0.14730499684810638, "learning_rate": 0.001, "loss": 1.4713, "step": 14355 }, { "epoch": 0.6073271850410357, "grad_norm": 0.5098087787628174, "learning_rate": 0.001, "loss": 1.9097, "step": 14356 }, { "epoch": 0.607369489804552, "grad_norm": 0.15701937675476074, "learning_rate": 0.001, "loss": 2.2055, "step": 14357 }, { "epoch": 0.6074117945680684, "grad_norm": 0.16516199707984924, "learning_rate": 0.001, "loss": 2.4988, "step": 14358 }, { "epoch": 0.6074540993315848, "grad_norm": 0.2014375776052475, "learning_rate": 0.001, "loss": 2.3594, "step": 14359 }, { "epoch": 0.6074964040951011, "grad_norm": 0.17397524416446686, "learning_rate": 0.001, "loss": 2.3691, "step": 14360 }, { "epoch": 0.6075387088586175, "grad_norm": 8.400336265563965, "learning_rate": 0.001, "loss": 3.4756, "step": 14361 }, { "epoch": 0.6075810136221339, "grad_norm": 0.18245413899421692, "learning_rate": 0.001, "loss": 1.7794, "step": 14362 }, { "epoch": 0.6076233183856502, "grad_norm": 0.19193901121616364, "learning_rate": 0.001, "loss": 1.7134, "step": 14363 }, { "epoch": 0.6076656231491666, "grad_norm": 0.19857852160930634, "learning_rate": 0.001, "loss": 2.7933, "step": 14364 }, { "epoch": 0.607707927912683, "grad_norm": 0.25610992312431335, "learning_rate": 0.001, "loss": 1.4993, "step": 14365 }, { "epoch": 0.6077502326761993, "grad_norm": 0.268586128950119, "learning_rate": 0.001, "loss": 3.1649, "step": 14366 }, { "epoch": 0.6077925374397157, "grad_norm": 0.30467700958251953, "learning_rate": 0.001, "loss": 2.0349, "step": 14367 }, { "epoch": 0.607834842203232, "grad_norm": 0.38377171754837036, "learning_rate": 0.001, "loss": 2.3225, "step": 14368 }, { "epoch": 0.6078771469667484, "grad_norm": 0.16598057746887207, "learning_rate": 0.001, "loss": 2.5853, "step": 14369 }, { "epoch": 0.6079194517302648, "grad_norm": 0.2181134968996048, "learning_rate": 0.001, "loss": 3.0605, "step": 14370 }, { "epoch": 0.6079617564937811, "grad_norm": 0.182630717754364, "learning_rate": 0.001, "loss": 1.6975, "step": 14371 }, { "epoch": 0.6080040612572976, "grad_norm": 0.7057227492332458, "learning_rate": 0.001, "loss": 2.5886, "step": 14372 }, { "epoch": 0.608046366020814, "grad_norm": 0.1978188157081604, "learning_rate": 0.001, "loss": 2.124, "step": 14373 }, { "epoch": 0.6080886707843303, "grad_norm": 0.2088436782360077, "learning_rate": 0.001, "loss": 3.0534, "step": 14374 }, { "epoch": 0.6081309755478467, "grad_norm": 0.15031234920024872, "learning_rate": 0.001, "loss": 2.396, "step": 14375 }, { "epoch": 0.6081732803113631, "grad_norm": 0.2140101194381714, "learning_rate": 0.001, "loss": 2.1995, "step": 14376 }, { "epoch": 0.6082155850748794, "grad_norm": 0.15244650840759277, "learning_rate": 0.001, "loss": 2.3434, "step": 14377 }, { "epoch": 0.6082578898383958, "grad_norm": 0.19588300585746765, "learning_rate": 0.001, "loss": 1.9308, "step": 14378 }, { "epoch": 0.6083001946019122, "grad_norm": 0.1862742006778717, "learning_rate": 0.001, "loss": 2.6186, "step": 14379 }, { "epoch": 0.6083424993654285, "grad_norm": 0.16094960272312164, "learning_rate": 0.001, "loss": 1.7264, "step": 14380 }, { "epoch": 0.6083848041289449, "grad_norm": 0.17328019440174103, "learning_rate": 0.001, "loss": 1.566, "step": 14381 }, { "epoch": 0.6084271088924613, "grad_norm": 0.15607021749019623, "learning_rate": 0.001, "loss": 2.6352, "step": 14382 }, { "epoch": 0.6084694136559776, "grad_norm": 0.15530943870544434, "learning_rate": 0.001, "loss": 2.0045, "step": 14383 }, { "epoch": 0.608511718419494, "grad_norm": 0.16822437942028046, "learning_rate": 0.001, "loss": 3.1795, "step": 14384 }, { "epoch": 0.6085540231830104, "grad_norm": 0.15865309536457062, "learning_rate": 0.001, "loss": 2.3704, "step": 14385 }, { "epoch": 0.6085963279465267, "grad_norm": 0.30900880694389343, "learning_rate": 0.001, "loss": 1.8696, "step": 14386 }, { "epoch": 0.6086386327100431, "grad_norm": 0.1799941211938858, "learning_rate": 0.001, "loss": 2.3696, "step": 14387 }, { "epoch": 0.6086809374735596, "grad_norm": 0.20973291993141174, "learning_rate": 0.001, "loss": 2.8554, "step": 14388 }, { "epoch": 0.6087232422370759, "grad_norm": 0.15292273461818695, "learning_rate": 0.001, "loss": 1.5335, "step": 14389 }, { "epoch": 0.6087655470005923, "grad_norm": 0.1537332534790039, "learning_rate": 0.001, "loss": 2.7656, "step": 14390 }, { "epoch": 0.6088078517641087, "grad_norm": 0.1706329882144928, "learning_rate": 0.001, "loss": 2.221, "step": 14391 }, { "epoch": 0.608850156527625, "grad_norm": 0.15366674959659576, "learning_rate": 0.001, "loss": 1.4947, "step": 14392 }, { "epoch": 0.6088924612911414, "grad_norm": 0.16146250069141388, "learning_rate": 0.001, "loss": 2.2607, "step": 14393 }, { "epoch": 0.6089347660546578, "grad_norm": 0.1696520894765854, "learning_rate": 0.001, "loss": 2.8822, "step": 14394 }, { "epoch": 0.6089770708181741, "grad_norm": 0.7845591306686401, "learning_rate": 0.001, "loss": 2.0901, "step": 14395 }, { "epoch": 0.6090193755816905, "grad_norm": 0.20241577923297882, "learning_rate": 0.001, "loss": 3.0825, "step": 14396 }, { "epoch": 0.6090616803452069, "grad_norm": 0.16126592457294464, "learning_rate": 0.001, "loss": 1.6057, "step": 14397 }, { "epoch": 0.6091039851087232, "grad_norm": 0.18037404119968414, "learning_rate": 0.001, "loss": 2.864, "step": 14398 }, { "epoch": 0.6091462898722396, "grad_norm": 0.1628550887107849, "learning_rate": 0.001, "loss": 3.1012, "step": 14399 }, { "epoch": 0.609188594635756, "grad_norm": 0.20835568010807037, "learning_rate": 0.001, "loss": 2.0582, "step": 14400 }, { "epoch": 0.6092308993992723, "grad_norm": 0.18450294435024261, "learning_rate": 0.001, "loss": 2.5508, "step": 14401 }, { "epoch": 0.6092732041627887, "grad_norm": 0.33053863048553467, "learning_rate": 0.001, "loss": 1.758, "step": 14402 }, { "epoch": 0.6093155089263051, "grad_norm": 1.6624572277069092, "learning_rate": 0.001, "loss": 2.8896, "step": 14403 }, { "epoch": 0.6093578136898214, "grad_norm": 0.14562225341796875, "learning_rate": 0.001, "loss": 1.592, "step": 14404 }, { "epoch": 0.6094001184533379, "grad_norm": 0.45143476128578186, "learning_rate": 0.001, "loss": 1.8523, "step": 14405 }, { "epoch": 0.6094424232168543, "grad_norm": 0.47654953598976135, "learning_rate": 0.001, "loss": 2.7751, "step": 14406 }, { "epoch": 0.6094847279803706, "grad_norm": 8.705418586730957, "learning_rate": 0.001, "loss": 1.8628, "step": 14407 }, { "epoch": 0.609527032743887, "grad_norm": 0.17469455301761627, "learning_rate": 0.001, "loss": 2.4936, "step": 14408 }, { "epoch": 0.6095693375074034, "grad_norm": 0.20742036402225494, "learning_rate": 0.001, "loss": 2.1476, "step": 14409 }, { "epoch": 0.6096116422709197, "grad_norm": 0.19691161811351776, "learning_rate": 0.001, "loss": 1.8967, "step": 14410 }, { "epoch": 0.6096539470344361, "grad_norm": 0.16114525496959686, "learning_rate": 0.001, "loss": 1.6115, "step": 14411 }, { "epoch": 0.6096962517979525, "grad_norm": 1.256961703300476, "learning_rate": 0.001, "loss": 2.0185, "step": 14412 }, { "epoch": 0.6097385565614688, "grad_norm": 0.16019225120544434, "learning_rate": 0.001, "loss": 1.7158, "step": 14413 }, { "epoch": 0.6097808613249852, "grad_norm": 0.23191338777542114, "learning_rate": 0.001, "loss": 2.2885, "step": 14414 }, { "epoch": 0.6098231660885015, "grad_norm": 0.17342033982276917, "learning_rate": 0.001, "loss": 1.9171, "step": 14415 }, { "epoch": 0.6098654708520179, "grad_norm": 0.14773350954055786, "learning_rate": 0.001, "loss": 2.0407, "step": 14416 }, { "epoch": 0.6099077756155343, "grad_norm": 0.23442687094211578, "learning_rate": 0.001, "loss": 2.2754, "step": 14417 }, { "epoch": 0.6099500803790506, "grad_norm": 0.8946742415428162, "learning_rate": 0.001, "loss": 2.0776, "step": 14418 }, { "epoch": 0.609992385142567, "grad_norm": 0.22962956130504608, "learning_rate": 0.001, "loss": 2.4009, "step": 14419 }, { "epoch": 0.6100346899060834, "grad_norm": 0.15502995252609253, "learning_rate": 0.001, "loss": 1.824, "step": 14420 }, { "epoch": 0.6100769946695997, "grad_norm": 0.15531884133815765, "learning_rate": 0.001, "loss": 1.7114, "step": 14421 }, { "epoch": 0.6101192994331162, "grad_norm": 0.18697033822536469, "learning_rate": 0.001, "loss": 1.8181, "step": 14422 }, { "epoch": 0.6101616041966326, "grad_norm": 0.14635103940963745, "learning_rate": 0.001, "loss": 2.2397, "step": 14423 }, { "epoch": 0.6102039089601489, "grad_norm": 0.15549708902835846, "learning_rate": 0.001, "loss": 2.2291, "step": 14424 }, { "epoch": 0.6102462137236653, "grad_norm": 0.15069544315338135, "learning_rate": 0.001, "loss": 2.1368, "step": 14425 }, { "epoch": 0.6102885184871817, "grad_norm": 0.15826652944087982, "learning_rate": 0.001, "loss": 2.4466, "step": 14426 }, { "epoch": 0.610330823250698, "grad_norm": 0.18682676553726196, "learning_rate": 0.001, "loss": 1.9024, "step": 14427 }, { "epoch": 0.6103731280142144, "grad_norm": 0.40561962127685547, "learning_rate": 0.001, "loss": 1.8962, "step": 14428 }, { "epoch": 0.6104154327777308, "grad_norm": 0.20110554993152618, "learning_rate": 0.001, "loss": 2.3748, "step": 14429 }, { "epoch": 0.6104577375412471, "grad_norm": 0.15364859998226166, "learning_rate": 0.001, "loss": 2.1927, "step": 14430 }, { "epoch": 0.6105000423047635, "grad_norm": 0.17697322368621826, "learning_rate": 0.001, "loss": 1.8882, "step": 14431 }, { "epoch": 0.6105423470682799, "grad_norm": 0.7132118344306946, "learning_rate": 0.001, "loss": 3.1219, "step": 14432 }, { "epoch": 0.6105846518317962, "grad_norm": 0.19553466141223907, "learning_rate": 0.001, "loss": 2.4858, "step": 14433 }, { "epoch": 0.6106269565953126, "grad_norm": 0.15867097675800323, "learning_rate": 0.001, "loss": 2.2074, "step": 14434 }, { "epoch": 0.610669261358829, "grad_norm": 0.8470575213432312, "learning_rate": 0.001, "loss": 3.0073, "step": 14435 }, { "epoch": 0.6107115661223453, "grad_norm": 0.16488543152809143, "learning_rate": 0.001, "loss": 1.6394, "step": 14436 }, { "epoch": 0.6107538708858617, "grad_norm": 0.2117576003074646, "learning_rate": 0.001, "loss": 2.5382, "step": 14437 }, { "epoch": 0.6107961756493782, "grad_norm": 0.2346462607383728, "learning_rate": 0.001, "loss": 1.9488, "step": 14438 }, { "epoch": 0.6108384804128945, "grad_norm": 1.9085814952850342, "learning_rate": 0.001, "loss": 2.3393, "step": 14439 }, { "epoch": 0.6108807851764109, "grad_norm": 0.15879637002944946, "learning_rate": 0.001, "loss": 2.3031, "step": 14440 }, { "epoch": 0.6109230899399273, "grad_norm": 0.13453783094882965, "learning_rate": 0.001, "loss": 1.7157, "step": 14441 }, { "epoch": 0.6109653947034436, "grad_norm": 0.3421229422092438, "learning_rate": 0.001, "loss": 2.4225, "step": 14442 }, { "epoch": 0.61100769946696, "grad_norm": 0.175466388463974, "learning_rate": 0.001, "loss": 2.3742, "step": 14443 }, { "epoch": 0.6110500042304764, "grad_norm": 5.125629901885986, "learning_rate": 0.001, "loss": 2.022, "step": 14444 }, { "epoch": 0.6110923089939927, "grad_norm": 0.15601593255996704, "learning_rate": 0.001, "loss": 3.9727, "step": 14445 }, { "epoch": 0.6111346137575091, "grad_norm": 0.16740910708904266, "learning_rate": 0.001, "loss": 2.8391, "step": 14446 }, { "epoch": 0.6111769185210255, "grad_norm": 0.18118330836296082, "learning_rate": 0.001, "loss": 2.154, "step": 14447 }, { "epoch": 0.6112192232845418, "grad_norm": 0.17362359166145325, "learning_rate": 0.001, "loss": 2.4111, "step": 14448 }, { "epoch": 0.6112615280480582, "grad_norm": 0.20023968815803528, "learning_rate": 0.001, "loss": 2.0913, "step": 14449 }, { "epoch": 0.6113038328115746, "grad_norm": 0.954468846321106, "learning_rate": 0.001, "loss": 2.5481, "step": 14450 }, { "epoch": 0.6113461375750909, "grad_norm": 0.2559969127178192, "learning_rate": 0.001, "loss": 2.5936, "step": 14451 }, { "epoch": 0.6113884423386073, "grad_norm": 0.1946243792772293, "learning_rate": 0.001, "loss": 2.4807, "step": 14452 }, { "epoch": 0.6114307471021238, "grad_norm": 0.30189049243927, "learning_rate": 0.001, "loss": 1.8112, "step": 14453 }, { "epoch": 0.61147305186564, "grad_norm": 0.20965829491615295, "learning_rate": 0.001, "loss": 2.1163, "step": 14454 }, { "epoch": 0.6115153566291565, "grad_norm": 0.2877630889415741, "learning_rate": 0.001, "loss": 1.9946, "step": 14455 }, { "epoch": 0.6115576613926729, "grad_norm": 0.23159538209438324, "learning_rate": 0.001, "loss": 2.35, "step": 14456 }, { "epoch": 0.6115999661561892, "grad_norm": 1.8251463174819946, "learning_rate": 0.001, "loss": 2.014, "step": 14457 }, { "epoch": 0.6116422709197056, "grad_norm": 0.2677466869354248, "learning_rate": 0.001, "loss": 2.1856, "step": 14458 }, { "epoch": 0.6116845756832219, "grad_norm": 0.291852742433548, "learning_rate": 0.001, "loss": 3.0096, "step": 14459 }, { "epoch": 0.6117268804467383, "grad_norm": 0.22112785279750824, "learning_rate": 0.001, "loss": 1.8697, "step": 14460 }, { "epoch": 0.6117691852102547, "grad_norm": 0.1902538239955902, "learning_rate": 0.001, "loss": 3.42, "step": 14461 }, { "epoch": 0.611811489973771, "grad_norm": 0.17315338551998138, "learning_rate": 0.001, "loss": 2.4311, "step": 14462 }, { "epoch": 0.6118537947372874, "grad_norm": 0.17455525696277618, "learning_rate": 0.001, "loss": 2.2537, "step": 14463 }, { "epoch": 0.6118960995008038, "grad_norm": 0.22085832059383392, "learning_rate": 0.001, "loss": 3.21, "step": 14464 }, { "epoch": 0.6119384042643201, "grad_norm": 0.16527900099754333, "learning_rate": 0.001, "loss": 1.7905, "step": 14465 }, { "epoch": 0.6119807090278365, "grad_norm": 0.18405689299106598, "learning_rate": 0.001, "loss": 2.2348, "step": 14466 }, { "epoch": 0.6120230137913529, "grad_norm": 0.19731445610523224, "learning_rate": 0.001, "loss": 2.0635, "step": 14467 }, { "epoch": 0.6120653185548692, "grad_norm": 0.20120853185653687, "learning_rate": 0.001, "loss": 3.1794, "step": 14468 }, { "epoch": 0.6121076233183856, "grad_norm": 0.18305185437202454, "learning_rate": 0.001, "loss": 1.4547, "step": 14469 }, { "epoch": 0.612149928081902, "grad_norm": 0.18967366218566895, "learning_rate": 0.001, "loss": 1.6065, "step": 14470 }, { "epoch": 0.6121922328454183, "grad_norm": 0.19936278462409973, "learning_rate": 0.001, "loss": 2.2255, "step": 14471 }, { "epoch": 0.6122345376089348, "grad_norm": 0.17981190979480743, "learning_rate": 0.001, "loss": 1.6158, "step": 14472 }, { "epoch": 0.6122768423724512, "grad_norm": 0.24982218444347382, "learning_rate": 0.001, "loss": 2.276, "step": 14473 }, { "epoch": 0.6123191471359675, "grad_norm": 0.2008954882621765, "learning_rate": 0.001, "loss": 1.9837, "step": 14474 }, { "epoch": 0.6123614518994839, "grad_norm": 0.1441406011581421, "learning_rate": 0.001, "loss": 2.0582, "step": 14475 }, { "epoch": 0.6124037566630003, "grad_norm": 0.6654687523841858, "learning_rate": 0.001, "loss": 1.4793, "step": 14476 }, { "epoch": 0.6124460614265166, "grad_norm": 0.17162881791591644, "learning_rate": 0.001, "loss": 2.0025, "step": 14477 }, { "epoch": 0.612488366190033, "grad_norm": 0.1826455295085907, "learning_rate": 0.001, "loss": 1.9079, "step": 14478 }, { "epoch": 0.6125306709535494, "grad_norm": 2.3016321659088135, "learning_rate": 0.001, "loss": 3.0657, "step": 14479 }, { "epoch": 0.6125729757170657, "grad_norm": 0.24080024659633636, "learning_rate": 0.001, "loss": 2.9867, "step": 14480 }, { "epoch": 0.6126152804805821, "grad_norm": 0.2750950753688812, "learning_rate": 0.001, "loss": 1.9209, "step": 14481 }, { "epoch": 0.6126575852440985, "grad_norm": 0.22558815777301788, "learning_rate": 0.001, "loss": 2.6685, "step": 14482 }, { "epoch": 0.6126998900076148, "grad_norm": 0.1902596652507782, "learning_rate": 0.001, "loss": 2.2843, "step": 14483 }, { "epoch": 0.6127421947711312, "grad_norm": 0.18809862434864044, "learning_rate": 0.001, "loss": 2.212, "step": 14484 }, { "epoch": 0.6127844995346476, "grad_norm": 0.1640164852142334, "learning_rate": 0.001, "loss": 2.0967, "step": 14485 }, { "epoch": 0.6128268042981639, "grad_norm": 0.1704949587583542, "learning_rate": 0.001, "loss": 2.6479, "step": 14486 }, { "epoch": 0.6128691090616804, "grad_norm": 2.7232117652893066, "learning_rate": 0.001, "loss": 1.8047, "step": 14487 }, { "epoch": 0.6129114138251968, "grad_norm": 0.16258461773395538, "learning_rate": 0.001, "loss": 2.556, "step": 14488 }, { "epoch": 0.6129537185887131, "grad_norm": 0.22606851160526276, "learning_rate": 0.001, "loss": 1.8643, "step": 14489 }, { "epoch": 0.6129960233522295, "grad_norm": 0.39778846502304077, "learning_rate": 0.001, "loss": 2.8814, "step": 14490 }, { "epoch": 0.6130383281157459, "grad_norm": 0.1924993395805359, "learning_rate": 0.001, "loss": 2.1945, "step": 14491 }, { "epoch": 0.6130806328792622, "grad_norm": 0.1684507429599762, "learning_rate": 0.001, "loss": 2.0614, "step": 14492 }, { "epoch": 0.6131229376427786, "grad_norm": 0.14290301501750946, "learning_rate": 0.001, "loss": 1.9804, "step": 14493 }, { "epoch": 0.613165242406295, "grad_norm": 0.4323841333389282, "learning_rate": 0.001, "loss": 1.4766, "step": 14494 }, { "epoch": 0.6132075471698113, "grad_norm": 0.15192975103855133, "learning_rate": 0.001, "loss": 1.5979, "step": 14495 }, { "epoch": 0.6132498519333277, "grad_norm": 0.23368826508522034, "learning_rate": 0.001, "loss": 3.5542, "step": 14496 }, { "epoch": 0.6132921566968441, "grad_norm": 0.2155388742685318, "learning_rate": 0.001, "loss": 2.2724, "step": 14497 }, { "epoch": 0.6133344614603604, "grad_norm": 0.15829609334468842, "learning_rate": 0.001, "loss": 3.0361, "step": 14498 }, { "epoch": 0.6133767662238768, "grad_norm": 0.16065742075443268, "learning_rate": 0.001, "loss": 1.3002, "step": 14499 }, { "epoch": 0.6134190709873932, "grad_norm": 1.1077728271484375, "learning_rate": 0.001, "loss": 2.6874, "step": 14500 }, { "epoch": 0.6134613757509095, "grad_norm": 0.27375224232673645, "learning_rate": 0.001, "loss": 2.4451, "step": 14501 }, { "epoch": 0.6135036805144259, "grad_norm": 0.14725899696350098, "learning_rate": 0.001, "loss": 2.2295, "step": 14502 }, { "epoch": 0.6135459852779424, "grad_norm": 0.15817123651504517, "learning_rate": 0.001, "loss": 2.0776, "step": 14503 }, { "epoch": 0.6135882900414587, "grad_norm": 0.16688252985477448, "learning_rate": 0.001, "loss": 1.6458, "step": 14504 }, { "epoch": 0.6136305948049751, "grad_norm": 0.1742338240146637, "learning_rate": 0.001, "loss": 2.1804, "step": 14505 }, { "epoch": 0.6136728995684914, "grad_norm": 0.17663906514644623, "learning_rate": 0.001, "loss": 2.1758, "step": 14506 }, { "epoch": 0.6137152043320078, "grad_norm": 0.19644849002361298, "learning_rate": 0.001, "loss": 2.0111, "step": 14507 }, { "epoch": 0.6137575090955242, "grad_norm": 0.2182653397321701, "learning_rate": 0.001, "loss": 2.7332, "step": 14508 }, { "epoch": 0.6137998138590405, "grad_norm": 1.093945026397705, "learning_rate": 0.001, "loss": 2.3192, "step": 14509 }, { "epoch": 0.6138421186225569, "grad_norm": 0.20383012294769287, "learning_rate": 0.001, "loss": 2.4847, "step": 14510 }, { "epoch": 0.6138844233860733, "grad_norm": 0.1353861391544342, "learning_rate": 0.001, "loss": 1.9635, "step": 14511 }, { "epoch": 0.6139267281495896, "grad_norm": 0.2178875207901001, "learning_rate": 0.001, "loss": 1.7267, "step": 14512 }, { "epoch": 0.613969032913106, "grad_norm": 0.4006967544555664, "learning_rate": 0.001, "loss": 1.8074, "step": 14513 }, { "epoch": 0.6140113376766224, "grad_norm": 0.16235609352588654, "learning_rate": 0.001, "loss": 3.2147, "step": 14514 }, { "epoch": 0.6140536424401387, "grad_norm": 0.21571922302246094, "learning_rate": 0.001, "loss": 2.0414, "step": 14515 }, { "epoch": 0.6140959472036551, "grad_norm": 2.157762289047241, "learning_rate": 0.001, "loss": 2.4857, "step": 14516 }, { "epoch": 0.6141382519671715, "grad_norm": 0.20253095030784607, "learning_rate": 0.001, "loss": 1.9104, "step": 14517 }, { "epoch": 0.6141805567306878, "grad_norm": 0.14087221026420593, "learning_rate": 0.001, "loss": 1.518, "step": 14518 }, { "epoch": 0.6142228614942042, "grad_norm": 0.2999531924724579, "learning_rate": 0.001, "loss": 2.6567, "step": 14519 }, { "epoch": 0.6142651662577207, "grad_norm": 3.447448253631592, "learning_rate": 0.001, "loss": 1.865, "step": 14520 }, { "epoch": 0.614307471021237, "grad_norm": 0.217742919921875, "learning_rate": 0.001, "loss": 2.1669, "step": 14521 }, { "epoch": 0.6143497757847534, "grad_norm": 0.22795531153678894, "learning_rate": 0.001, "loss": 2.0143, "step": 14522 }, { "epoch": 0.6143920805482698, "grad_norm": 0.15495827794075012, "learning_rate": 0.001, "loss": 1.879, "step": 14523 }, { "epoch": 0.6144343853117861, "grad_norm": 0.1813746690750122, "learning_rate": 0.001, "loss": 3.0125, "step": 14524 }, { "epoch": 0.6144766900753025, "grad_norm": 0.33635345101356506, "learning_rate": 0.001, "loss": 2.0765, "step": 14525 }, { "epoch": 0.6145189948388189, "grad_norm": 0.2094181776046753, "learning_rate": 0.001, "loss": 2.7443, "step": 14526 }, { "epoch": 0.6145612996023352, "grad_norm": 0.18319903314113617, "learning_rate": 0.001, "loss": 1.7433, "step": 14527 }, { "epoch": 0.6146036043658516, "grad_norm": 0.25899675488471985, "learning_rate": 0.001, "loss": 1.8407, "step": 14528 }, { "epoch": 0.614645909129368, "grad_norm": 0.17044375836849213, "learning_rate": 0.001, "loss": 3.0131, "step": 14529 }, { "epoch": 0.6146882138928843, "grad_norm": 0.19204241037368774, "learning_rate": 0.001, "loss": 1.6593, "step": 14530 }, { "epoch": 0.6147305186564007, "grad_norm": 0.15827490389347076, "learning_rate": 0.001, "loss": 1.7696, "step": 14531 }, { "epoch": 0.6147728234199171, "grad_norm": 0.16154128313064575, "learning_rate": 0.001, "loss": 2.025, "step": 14532 }, { "epoch": 0.6148151281834334, "grad_norm": 0.18466542661190033, "learning_rate": 0.001, "loss": 1.822, "step": 14533 }, { "epoch": 0.6148574329469498, "grad_norm": 0.15772341191768646, "learning_rate": 0.001, "loss": 2.458, "step": 14534 }, { "epoch": 0.6148997377104662, "grad_norm": 0.2001129388809204, "learning_rate": 0.001, "loss": 2.7717, "step": 14535 }, { "epoch": 0.6149420424739825, "grad_norm": 0.49999427795410156, "learning_rate": 0.001, "loss": 1.7162, "step": 14536 }, { "epoch": 0.614984347237499, "grad_norm": 0.18692375719547272, "learning_rate": 0.001, "loss": 2.1668, "step": 14537 }, { "epoch": 0.6150266520010154, "grad_norm": 0.43777942657470703, "learning_rate": 0.001, "loss": 1.8468, "step": 14538 }, { "epoch": 0.6150689567645317, "grad_norm": 0.18012431263923645, "learning_rate": 0.001, "loss": 2.0141, "step": 14539 }, { "epoch": 0.6151112615280481, "grad_norm": 0.17725875973701477, "learning_rate": 0.001, "loss": 1.9882, "step": 14540 }, { "epoch": 0.6151535662915645, "grad_norm": 0.19504068791866302, "learning_rate": 0.001, "loss": 2.7564, "step": 14541 }, { "epoch": 0.6151958710550808, "grad_norm": 0.18181535601615906, "learning_rate": 0.001, "loss": 1.6593, "step": 14542 }, { "epoch": 0.6152381758185972, "grad_norm": 0.1605805605649948, "learning_rate": 0.001, "loss": 2.4811, "step": 14543 }, { "epoch": 0.6152804805821136, "grad_norm": 0.1837165355682373, "learning_rate": 0.001, "loss": 2.5541, "step": 14544 }, { "epoch": 0.6153227853456299, "grad_norm": 0.1661083698272705, "learning_rate": 0.001, "loss": 2.745, "step": 14545 }, { "epoch": 0.6153650901091463, "grad_norm": 0.1631242036819458, "learning_rate": 0.001, "loss": 3.1111, "step": 14546 }, { "epoch": 0.6154073948726627, "grad_norm": 0.1594124138355255, "learning_rate": 0.001, "loss": 2.1363, "step": 14547 }, { "epoch": 0.615449699636179, "grad_norm": 0.1624990850687027, "learning_rate": 0.001, "loss": 1.9491, "step": 14548 }, { "epoch": 0.6154920043996954, "grad_norm": 0.15371422469615936, "learning_rate": 0.001, "loss": 1.8865, "step": 14549 }, { "epoch": 0.6155343091632117, "grad_norm": 0.1542615443468094, "learning_rate": 0.001, "loss": 1.8565, "step": 14550 }, { "epoch": 0.6155766139267281, "grad_norm": 0.18403269350528717, "learning_rate": 0.001, "loss": 1.5119, "step": 14551 }, { "epoch": 0.6156189186902445, "grad_norm": 0.19757772982120514, "learning_rate": 0.001, "loss": 2.071, "step": 14552 }, { "epoch": 0.6156612234537608, "grad_norm": 0.21193090081214905, "learning_rate": 0.001, "loss": 1.5183, "step": 14553 }, { "epoch": 0.6157035282172773, "grad_norm": 0.15698926150798798, "learning_rate": 0.001, "loss": 1.9242, "step": 14554 }, { "epoch": 0.6157458329807937, "grad_norm": 0.18835414946079254, "learning_rate": 0.001, "loss": 2.5247, "step": 14555 }, { "epoch": 0.61578813774431, "grad_norm": 0.1583268940448761, "learning_rate": 0.001, "loss": 1.9729, "step": 14556 }, { "epoch": 0.6158304425078264, "grad_norm": 0.16368068754673004, "learning_rate": 0.001, "loss": 2.4374, "step": 14557 }, { "epoch": 0.6158727472713428, "grad_norm": 0.1612701714038849, "learning_rate": 0.001, "loss": 1.6765, "step": 14558 }, { "epoch": 0.6159150520348591, "grad_norm": 0.14190715551376343, "learning_rate": 0.001, "loss": 1.9572, "step": 14559 }, { "epoch": 0.6159573567983755, "grad_norm": 0.15973453223705292, "learning_rate": 0.001, "loss": 2.3115, "step": 14560 }, { "epoch": 0.6159996615618919, "grad_norm": 0.15088126063346863, "learning_rate": 0.001, "loss": 2.3368, "step": 14561 }, { "epoch": 0.6160419663254082, "grad_norm": 0.1798139065504074, "learning_rate": 0.001, "loss": 2.6461, "step": 14562 }, { "epoch": 0.6160842710889246, "grad_norm": 2.4017934799194336, "learning_rate": 0.001, "loss": 2.928, "step": 14563 }, { "epoch": 0.616126575852441, "grad_norm": 0.14553676545619965, "learning_rate": 0.001, "loss": 1.5844, "step": 14564 }, { "epoch": 0.6161688806159573, "grad_norm": 0.18981827795505524, "learning_rate": 0.001, "loss": 1.7026, "step": 14565 }, { "epoch": 0.6162111853794737, "grad_norm": 0.1583603173494339, "learning_rate": 0.001, "loss": 1.93, "step": 14566 }, { "epoch": 0.6162534901429901, "grad_norm": 0.19451354444026947, "learning_rate": 0.001, "loss": 2.2567, "step": 14567 }, { "epoch": 0.6162957949065064, "grad_norm": 0.18572446703910828, "learning_rate": 0.001, "loss": 3.772, "step": 14568 }, { "epoch": 0.6163380996700228, "grad_norm": 0.16187942028045654, "learning_rate": 0.001, "loss": 2.2992, "step": 14569 }, { "epoch": 0.6163804044335393, "grad_norm": 0.1688256412744522, "learning_rate": 0.001, "loss": 1.6916, "step": 14570 }, { "epoch": 0.6164227091970556, "grad_norm": 0.16254852712154388, "learning_rate": 0.001, "loss": 1.4337, "step": 14571 }, { "epoch": 0.616465013960572, "grad_norm": 0.14653462171554565, "learning_rate": 0.001, "loss": 2.2641, "step": 14572 }, { "epoch": 0.6165073187240884, "grad_norm": 0.18988440930843353, "learning_rate": 0.001, "loss": 2.2461, "step": 14573 }, { "epoch": 0.6165496234876047, "grad_norm": 0.19180841743946075, "learning_rate": 0.001, "loss": 2.1642, "step": 14574 }, { "epoch": 0.6165919282511211, "grad_norm": 0.17922373116016388, "learning_rate": 0.001, "loss": 2.0952, "step": 14575 }, { "epoch": 0.6166342330146375, "grad_norm": 0.17525149881839752, "learning_rate": 0.001, "loss": 2.2221, "step": 14576 }, { "epoch": 0.6166765377781538, "grad_norm": 0.18129895627498627, "learning_rate": 0.001, "loss": 2.5061, "step": 14577 }, { "epoch": 0.6167188425416702, "grad_norm": 0.3474824130535126, "learning_rate": 0.001, "loss": 2.2037, "step": 14578 }, { "epoch": 0.6167611473051866, "grad_norm": 0.2225697934627533, "learning_rate": 0.001, "loss": 2.9076, "step": 14579 }, { "epoch": 0.6168034520687029, "grad_norm": 0.15336523950099945, "learning_rate": 0.001, "loss": 2.0626, "step": 14580 }, { "epoch": 0.6168457568322193, "grad_norm": 0.22657287120819092, "learning_rate": 0.001, "loss": 1.8477, "step": 14581 }, { "epoch": 0.6168880615957357, "grad_norm": 0.19697429239749908, "learning_rate": 0.001, "loss": 2.6378, "step": 14582 }, { "epoch": 0.616930366359252, "grad_norm": 0.783888041973114, "learning_rate": 0.001, "loss": 2.097, "step": 14583 }, { "epoch": 0.6169726711227684, "grad_norm": 0.2075197994709015, "learning_rate": 0.001, "loss": 2.4211, "step": 14584 }, { "epoch": 0.6170149758862848, "grad_norm": 0.1552828848361969, "learning_rate": 0.001, "loss": 2.6256, "step": 14585 }, { "epoch": 0.6170572806498011, "grad_norm": 0.1605806201696396, "learning_rate": 0.001, "loss": 1.842, "step": 14586 }, { "epoch": 0.6170995854133176, "grad_norm": 0.18926933407783508, "learning_rate": 0.001, "loss": 2.7009, "step": 14587 }, { "epoch": 0.617141890176834, "grad_norm": 0.17639969289302826, "learning_rate": 0.001, "loss": 2.0845, "step": 14588 }, { "epoch": 0.6171841949403503, "grad_norm": 0.16132144629955292, "learning_rate": 0.001, "loss": 2.5827, "step": 14589 }, { "epoch": 0.6172264997038667, "grad_norm": 0.1768246591091156, "learning_rate": 0.001, "loss": 2.6096, "step": 14590 }, { "epoch": 0.6172688044673831, "grad_norm": 0.1509474366903305, "learning_rate": 0.001, "loss": 3.6571, "step": 14591 }, { "epoch": 0.6173111092308994, "grad_norm": 0.17288728058338165, "learning_rate": 0.001, "loss": 2.6354, "step": 14592 }, { "epoch": 0.6173534139944158, "grad_norm": 0.1312096118927002, "learning_rate": 0.001, "loss": 2.0085, "step": 14593 }, { "epoch": 0.6173957187579321, "grad_norm": 0.2396790236234665, "learning_rate": 0.001, "loss": 2.3952, "step": 14594 }, { "epoch": 0.6174380235214485, "grad_norm": 0.1728651076555252, "learning_rate": 0.001, "loss": 1.4488, "step": 14595 }, { "epoch": 0.6174803282849649, "grad_norm": 0.5452121496200562, "learning_rate": 0.001, "loss": 1.9965, "step": 14596 }, { "epoch": 0.6175226330484812, "grad_norm": 0.8020671010017395, "learning_rate": 0.001, "loss": 1.8593, "step": 14597 }, { "epoch": 0.6175649378119976, "grad_norm": 0.15915119647979736, "learning_rate": 0.001, "loss": 2.0414, "step": 14598 }, { "epoch": 0.617607242575514, "grad_norm": 0.13803361356258392, "learning_rate": 0.001, "loss": 2.0599, "step": 14599 }, { "epoch": 0.6176495473390303, "grad_norm": 0.22241367399692535, "learning_rate": 0.001, "loss": 2.8024, "step": 14600 }, { "epoch": 0.6176918521025467, "grad_norm": 1.1794174909591675, "learning_rate": 0.001, "loss": 2.3638, "step": 14601 }, { "epoch": 0.6177341568660631, "grad_norm": 0.20139962434768677, "learning_rate": 0.001, "loss": 2.7807, "step": 14602 }, { "epoch": 0.6177764616295794, "grad_norm": 0.19287584722042084, "learning_rate": 0.001, "loss": 2.6512, "step": 14603 }, { "epoch": 0.6178187663930959, "grad_norm": 0.18711356818675995, "learning_rate": 0.001, "loss": 2.3137, "step": 14604 }, { "epoch": 0.6178610711566123, "grad_norm": 0.18138305842876434, "learning_rate": 0.001, "loss": 2.7517, "step": 14605 }, { "epoch": 0.6179033759201286, "grad_norm": 0.1796899139881134, "learning_rate": 0.001, "loss": 2.9958, "step": 14606 }, { "epoch": 0.617945680683645, "grad_norm": 0.20048393309116364, "learning_rate": 0.001, "loss": 3.1626, "step": 14607 }, { "epoch": 0.6179879854471614, "grad_norm": 0.1451716423034668, "learning_rate": 0.001, "loss": 2.6609, "step": 14608 }, { "epoch": 0.6180302902106777, "grad_norm": 0.4020373225212097, "learning_rate": 0.001, "loss": 2.3252, "step": 14609 }, { "epoch": 0.6180725949741941, "grad_norm": 0.2272024154663086, "learning_rate": 0.001, "loss": 2.3946, "step": 14610 }, { "epoch": 0.6181148997377105, "grad_norm": 0.24336324632167816, "learning_rate": 0.001, "loss": 2.0187, "step": 14611 }, { "epoch": 0.6181572045012268, "grad_norm": 3.6941826343536377, "learning_rate": 0.001, "loss": 1.9722, "step": 14612 }, { "epoch": 0.6181995092647432, "grad_norm": 0.1938718855381012, "learning_rate": 0.001, "loss": 2.0278, "step": 14613 }, { "epoch": 0.6182418140282596, "grad_norm": 0.6641799211502075, "learning_rate": 0.001, "loss": 2.0153, "step": 14614 }, { "epoch": 0.6182841187917759, "grad_norm": 0.4109531342983246, "learning_rate": 0.001, "loss": 1.9137, "step": 14615 }, { "epoch": 0.6183264235552923, "grad_norm": 0.14402462542057037, "learning_rate": 0.001, "loss": 2.4357, "step": 14616 }, { "epoch": 0.6183687283188087, "grad_norm": 0.14747104048728943, "learning_rate": 0.001, "loss": 2.7109, "step": 14617 }, { "epoch": 0.618411033082325, "grad_norm": 0.14934441447257996, "learning_rate": 0.001, "loss": 1.6499, "step": 14618 }, { "epoch": 0.6184533378458414, "grad_norm": 0.3803541362285614, "learning_rate": 0.001, "loss": 2.4184, "step": 14619 }, { "epoch": 0.6184956426093579, "grad_norm": 0.18346597254276276, "learning_rate": 0.001, "loss": 2.0202, "step": 14620 }, { "epoch": 0.6185379473728742, "grad_norm": 2.4871697425842285, "learning_rate": 0.001, "loss": 1.9789, "step": 14621 }, { "epoch": 0.6185802521363906, "grad_norm": 0.15989036858081818, "learning_rate": 0.001, "loss": 1.9835, "step": 14622 }, { "epoch": 0.618622556899907, "grad_norm": 0.17353324592113495, "learning_rate": 0.001, "loss": 1.9959, "step": 14623 }, { "epoch": 0.6186648616634233, "grad_norm": 0.18269836902618408, "learning_rate": 0.001, "loss": 2.9858, "step": 14624 }, { "epoch": 0.6187071664269397, "grad_norm": 0.19032901525497437, "learning_rate": 0.001, "loss": 3.0415, "step": 14625 }, { "epoch": 0.6187494711904561, "grad_norm": 0.17497460544109344, "learning_rate": 0.001, "loss": 2.0095, "step": 14626 }, { "epoch": 0.6187917759539724, "grad_norm": 0.14147375524044037, "learning_rate": 0.001, "loss": 1.6253, "step": 14627 }, { "epoch": 0.6188340807174888, "grad_norm": 0.1689339280128479, "learning_rate": 0.001, "loss": 2.3544, "step": 14628 }, { "epoch": 0.6188763854810052, "grad_norm": 0.14351142942905426, "learning_rate": 0.001, "loss": 2.6217, "step": 14629 }, { "epoch": 0.6189186902445215, "grad_norm": 0.30054372549057007, "learning_rate": 0.001, "loss": 2.5242, "step": 14630 }, { "epoch": 0.6189609950080379, "grad_norm": 0.1856541931629181, "learning_rate": 0.001, "loss": 2.0127, "step": 14631 }, { "epoch": 0.6190032997715543, "grad_norm": 0.2573411762714386, "learning_rate": 0.001, "loss": 2.2372, "step": 14632 }, { "epoch": 0.6190456045350706, "grad_norm": 0.1990809440612793, "learning_rate": 0.001, "loss": 2.2482, "step": 14633 }, { "epoch": 0.619087909298587, "grad_norm": 0.1465701162815094, "learning_rate": 0.001, "loss": 1.8241, "step": 14634 }, { "epoch": 0.6191302140621034, "grad_norm": 0.1555158942937851, "learning_rate": 0.001, "loss": 2.1623, "step": 14635 }, { "epoch": 0.6191725188256197, "grad_norm": 0.14374671876430511, "learning_rate": 0.001, "loss": 2.4611, "step": 14636 }, { "epoch": 0.6192148235891362, "grad_norm": 0.19940204918384552, "learning_rate": 0.001, "loss": 2.1044, "step": 14637 }, { "epoch": 0.6192571283526526, "grad_norm": 0.14661625027656555, "learning_rate": 0.001, "loss": 1.6882, "step": 14638 }, { "epoch": 0.6192994331161689, "grad_norm": 0.17937296628952026, "learning_rate": 0.001, "loss": 2.2717, "step": 14639 }, { "epoch": 0.6193417378796853, "grad_norm": 0.15153121948242188, "learning_rate": 0.001, "loss": 1.7106, "step": 14640 }, { "epoch": 0.6193840426432016, "grad_norm": 0.1418657898902893, "learning_rate": 0.001, "loss": 1.6178, "step": 14641 }, { "epoch": 0.619426347406718, "grad_norm": 0.19973745942115784, "learning_rate": 0.001, "loss": 1.786, "step": 14642 }, { "epoch": 0.6194686521702344, "grad_norm": 0.16791045665740967, "learning_rate": 0.001, "loss": 1.7425, "step": 14643 }, { "epoch": 0.6195109569337507, "grad_norm": 1.9201226234436035, "learning_rate": 0.001, "loss": 3.1445, "step": 14644 }, { "epoch": 0.6195532616972671, "grad_norm": 0.1937858760356903, "learning_rate": 0.001, "loss": 3.0628, "step": 14645 }, { "epoch": 0.6195955664607835, "grad_norm": 0.8908295631408691, "learning_rate": 0.001, "loss": 1.4817, "step": 14646 }, { "epoch": 0.6196378712242998, "grad_norm": 0.18158625066280365, "learning_rate": 0.001, "loss": 2.1148, "step": 14647 }, { "epoch": 0.6196801759878162, "grad_norm": 0.18421104550361633, "learning_rate": 0.001, "loss": 1.7709, "step": 14648 }, { "epoch": 0.6197224807513326, "grad_norm": 0.19580382108688354, "learning_rate": 0.001, "loss": 2.2813, "step": 14649 }, { "epoch": 0.6197647855148489, "grad_norm": 0.23519310355186462, "learning_rate": 0.001, "loss": 1.6237, "step": 14650 }, { "epoch": 0.6198070902783653, "grad_norm": 0.27454647421836853, "learning_rate": 0.001, "loss": 2.3794, "step": 14651 }, { "epoch": 0.6198493950418817, "grad_norm": 0.23078219592571259, "learning_rate": 0.001, "loss": 2.2003, "step": 14652 }, { "epoch": 0.619891699805398, "grad_norm": 2.1767003536224365, "learning_rate": 0.001, "loss": 2.1137, "step": 14653 }, { "epoch": 0.6199340045689145, "grad_norm": 0.7693164944648743, "learning_rate": 0.001, "loss": 1.6826, "step": 14654 }, { "epoch": 0.6199763093324309, "grad_norm": 0.18725281953811646, "learning_rate": 0.001, "loss": 1.6722, "step": 14655 }, { "epoch": 0.6200186140959472, "grad_norm": 0.17474079132080078, "learning_rate": 0.001, "loss": 1.6512, "step": 14656 }, { "epoch": 0.6200609188594636, "grad_norm": 0.2864467203617096, "learning_rate": 0.001, "loss": 1.657, "step": 14657 }, { "epoch": 0.62010322362298, "grad_norm": 0.16446319222450256, "learning_rate": 0.001, "loss": 2.22, "step": 14658 }, { "epoch": 0.6201455283864963, "grad_norm": 0.21445626020431519, "learning_rate": 0.001, "loss": 2.0906, "step": 14659 }, { "epoch": 0.6201878331500127, "grad_norm": 0.18971355259418488, "learning_rate": 0.001, "loss": 1.6979, "step": 14660 }, { "epoch": 0.6202301379135291, "grad_norm": 0.1920858919620514, "learning_rate": 0.001, "loss": 2.9302, "step": 14661 }, { "epoch": 0.6202724426770454, "grad_norm": 2.1202192306518555, "learning_rate": 0.001, "loss": 2.9636, "step": 14662 }, { "epoch": 0.6203147474405618, "grad_norm": 0.1833968460559845, "learning_rate": 0.001, "loss": 2.9163, "step": 14663 }, { "epoch": 0.6203570522040782, "grad_norm": 1.7556793689727783, "learning_rate": 0.001, "loss": 2.1679, "step": 14664 }, { "epoch": 0.6203993569675945, "grad_norm": 0.1881345808506012, "learning_rate": 0.001, "loss": 1.4928, "step": 14665 }, { "epoch": 0.6204416617311109, "grad_norm": 0.23631784319877625, "learning_rate": 0.001, "loss": 2.231, "step": 14666 }, { "epoch": 0.6204839664946273, "grad_norm": 0.23086075484752655, "learning_rate": 0.001, "loss": 2.1282, "step": 14667 }, { "epoch": 0.6205262712581436, "grad_norm": 0.23201145231723785, "learning_rate": 0.001, "loss": 2.5473, "step": 14668 }, { "epoch": 0.62056857602166, "grad_norm": 0.20039266347885132, "learning_rate": 0.001, "loss": 2.1767, "step": 14669 }, { "epoch": 0.6206108807851765, "grad_norm": 0.2201448231935501, "learning_rate": 0.001, "loss": 2.3714, "step": 14670 }, { "epoch": 0.6206531855486928, "grad_norm": 0.25646641850471497, "learning_rate": 0.001, "loss": 2.2821, "step": 14671 }, { "epoch": 0.6206954903122092, "grad_norm": 0.2379496693611145, "learning_rate": 0.001, "loss": 2.428, "step": 14672 }, { "epoch": 0.6207377950757256, "grad_norm": 0.6765031218528748, "learning_rate": 0.001, "loss": 2.5439, "step": 14673 }, { "epoch": 0.6207800998392419, "grad_norm": 0.23694269359111786, "learning_rate": 0.001, "loss": 3.0224, "step": 14674 }, { "epoch": 0.6208224046027583, "grad_norm": 0.22863154113292694, "learning_rate": 0.001, "loss": 2.0422, "step": 14675 }, { "epoch": 0.6208647093662747, "grad_norm": 0.2052396535873413, "learning_rate": 0.001, "loss": 2.1184, "step": 14676 }, { "epoch": 0.620907014129791, "grad_norm": 0.42548587918281555, "learning_rate": 0.001, "loss": 2.3153, "step": 14677 }, { "epoch": 0.6209493188933074, "grad_norm": 0.20352311432361603, "learning_rate": 0.001, "loss": 2.7797, "step": 14678 }, { "epoch": 0.6209916236568238, "grad_norm": 1.041845440864563, "learning_rate": 0.001, "loss": 1.669, "step": 14679 }, { "epoch": 0.6210339284203401, "grad_norm": 0.19072070717811584, "learning_rate": 0.001, "loss": 2.6243, "step": 14680 }, { "epoch": 0.6210762331838565, "grad_norm": 0.17468595504760742, "learning_rate": 0.001, "loss": 2.0063, "step": 14681 }, { "epoch": 0.6211185379473729, "grad_norm": 0.18021397292613983, "learning_rate": 0.001, "loss": 1.8637, "step": 14682 }, { "epoch": 0.6211608427108892, "grad_norm": 0.1784118413925171, "learning_rate": 0.001, "loss": 2.5515, "step": 14683 }, { "epoch": 0.6212031474744056, "grad_norm": 0.17213745415210724, "learning_rate": 0.001, "loss": 1.9208, "step": 14684 }, { "epoch": 0.6212454522379219, "grad_norm": 0.16187311708927155, "learning_rate": 0.001, "loss": 2.6264, "step": 14685 }, { "epoch": 0.6212877570014383, "grad_norm": 0.1690584123134613, "learning_rate": 0.001, "loss": 2.0176, "step": 14686 }, { "epoch": 0.6213300617649548, "grad_norm": 0.22212959825992584, "learning_rate": 0.001, "loss": 2.0443, "step": 14687 }, { "epoch": 0.621372366528471, "grad_norm": 0.19889722764492035, "learning_rate": 0.001, "loss": 2.108, "step": 14688 }, { "epoch": 0.6214146712919875, "grad_norm": 0.20588736236095428, "learning_rate": 0.001, "loss": 2.604, "step": 14689 }, { "epoch": 0.6214569760555039, "grad_norm": 0.15710745751857758, "learning_rate": 0.001, "loss": 1.642, "step": 14690 }, { "epoch": 0.6214992808190202, "grad_norm": 1.2365772724151611, "learning_rate": 0.001, "loss": 1.5983, "step": 14691 }, { "epoch": 0.6215415855825366, "grad_norm": 0.1592063158750534, "learning_rate": 0.001, "loss": 1.7246, "step": 14692 }, { "epoch": 0.621583890346053, "grad_norm": 0.2230633795261383, "learning_rate": 0.001, "loss": 2.5722, "step": 14693 }, { "epoch": 0.6216261951095693, "grad_norm": 0.17436636984348297, "learning_rate": 0.001, "loss": 1.6434, "step": 14694 }, { "epoch": 0.6216684998730857, "grad_norm": 0.1729450523853302, "learning_rate": 0.001, "loss": 2.0217, "step": 14695 }, { "epoch": 0.6217108046366021, "grad_norm": 0.16045428812503815, "learning_rate": 0.001, "loss": 1.4028, "step": 14696 }, { "epoch": 0.6217531094001184, "grad_norm": 0.1945531815290451, "learning_rate": 0.001, "loss": 2.3836, "step": 14697 }, { "epoch": 0.6217954141636348, "grad_norm": 3.1854636669158936, "learning_rate": 0.001, "loss": 3.0657, "step": 14698 }, { "epoch": 0.6218377189271512, "grad_norm": 0.15348710119724274, "learning_rate": 0.001, "loss": 2.0376, "step": 14699 }, { "epoch": 0.6218800236906675, "grad_norm": 0.16753935813903809, "learning_rate": 0.001, "loss": 1.6844, "step": 14700 }, { "epoch": 0.6219223284541839, "grad_norm": 0.1476292461156845, "learning_rate": 0.001, "loss": 1.878, "step": 14701 }, { "epoch": 0.6219646332177003, "grad_norm": 0.21797721087932587, "learning_rate": 0.001, "loss": 1.692, "step": 14702 }, { "epoch": 0.6220069379812166, "grad_norm": 0.176117941737175, "learning_rate": 0.001, "loss": 2.2716, "step": 14703 }, { "epoch": 0.622049242744733, "grad_norm": 4.9479660987854, "learning_rate": 0.001, "loss": 1.8148, "step": 14704 }, { "epoch": 0.6220915475082495, "grad_norm": 0.16278578341007233, "learning_rate": 0.001, "loss": 1.9686, "step": 14705 }, { "epoch": 0.6221338522717658, "grad_norm": 0.19134145975112915, "learning_rate": 0.001, "loss": 2.3014, "step": 14706 }, { "epoch": 0.6221761570352822, "grad_norm": 0.1470852494239807, "learning_rate": 0.001, "loss": 2.4502, "step": 14707 }, { "epoch": 0.6222184617987986, "grad_norm": 0.3955981731414795, "learning_rate": 0.001, "loss": 2.0912, "step": 14708 }, { "epoch": 0.6222607665623149, "grad_norm": 0.2723364233970642, "learning_rate": 0.001, "loss": 1.8479, "step": 14709 }, { "epoch": 0.6223030713258313, "grad_norm": 0.15754322707653046, "learning_rate": 0.001, "loss": 2.4443, "step": 14710 }, { "epoch": 0.6223453760893477, "grad_norm": 0.21072664856910706, "learning_rate": 0.001, "loss": 1.9223, "step": 14711 }, { "epoch": 0.622387680852864, "grad_norm": 0.17302967607975006, "learning_rate": 0.001, "loss": 2.3223, "step": 14712 }, { "epoch": 0.6224299856163804, "grad_norm": 0.17941580712795258, "learning_rate": 0.001, "loss": 1.7731, "step": 14713 }, { "epoch": 0.6224722903798968, "grad_norm": 0.325706422328949, "learning_rate": 0.001, "loss": 2.6128, "step": 14714 }, { "epoch": 0.6225145951434131, "grad_norm": 1.440018653869629, "learning_rate": 0.001, "loss": 1.9563, "step": 14715 }, { "epoch": 0.6225568999069295, "grad_norm": 0.1713675558567047, "learning_rate": 0.001, "loss": 2.3617, "step": 14716 }, { "epoch": 0.6225992046704459, "grad_norm": 0.19308139383792877, "learning_rate": 0.001, "loss": 1.6729, "step": 14717 }, { "epoch": 0.6226415094339622, "grad_norm": 0.17297999560832977, "learning_rate": 0.001, "loss": 3.6168, "step": 14718 }, { "epoch": 0.6226838141974786, "grad_norm": 0.27014705538749695, "learning_rate": 0.001, "loss": 3.2252, "step": 14719 }, { "epoch": 0.6227261189609951, "grad_norm": 0.15865100920200348, "learning_rate": 0.001, "loss": 1.4966, "step": 14720 }, { "epoch": 0.6227684237245114, "grad_norm": 0.14343230426311493, "learning_rate": 0.001, "loss": 1.711, "step": 14721 }, { "epoch": 0.6228107284880278, "grad_norm": 0.1730983704328537, "learning_rate": 0.001, "loss": 2.159, "step": 14722 }, { "epoch": 0.6228530332515442, "grad_norm": 0.20309610664844513, "learning_rate": 0.001, "loss": 2.1347, "step": 14723 }, { "epoch": 0.6228953380150605, "grad_norm": 0.9017519950866699, "learning_rate": 0.001, "loss": 1.8289, "step": 14724 }, { "epoch": 0.6229376427785769, "grad_norm": 0.17153169214725494, "learning_rate": 0.001, "loss": 2.527, "step": 14725 }, { "epoch": 0.6229799475420933, "grad_norm": 0.1731494665145874, "learning_rate": 0.001, "loss": 1.3847, "step": 14726 }, { "epoch": 0.6230222523056096, "grad_norm": 0.20264741778373718, "learning_rate": 0.001, "loss": 2.6843, "step": 14727 }, { "epoch": 0.623064557069126, "grad_norm": 0.1755369007587433, "learning_rate": 0.001, "loss": 1.8534, "step": 14728 }, { "epoch": 0.6231068618326423, "grad_norm": 0.1698158234357834, "learning_rate": 0.001, "loss": 2.4937, "step": 14729 }, { "epoch": 0.6231491665961587, "grad_norm": 0.3321411907672882, "learning_rate": 0.001, "loss": 1.902, "step": 14730 }, { "epoch": 0.6231914713596751, "grad_norm": 0.1981087177991867, "learning_rate": 0.001, "loss": 2.086, "step": 14731 }, { "epoch": 0.6232337761231914, "grad_norm": 0.14606229960918427, "learning_rate": 0.001, "loss": 1.7585, "step": 14732 }, { "epoch": 0.6232760808867078, "grad_norm": 0.15275610983371735, "learning_rate": 0.001, "loss": 2.0629, "step": 14733 }, { "epoch": 0.6233183856502242, "grad_norm": 0.3921429514884949, "learning_rate": 0.001, "loss": 2.0281, "step": 14734 }, { "epoch": 0.6233606904137405, "grad_norm": 0.15665315091609955, "learning_rate": 0.001, "loss": 1.9623, "step": 14735 }, { "epoch": 0.623402995177257, "grad_norm": 0.1723744422197342, "learning_rate": 0.001, "loss": 2.2255, "step": 14736 }, { "epoch": 0.6234452999407734, "grad_norm": 0.16472692787647247, "learning_rate": 0.001, "loss": 2.3605, "step": 14737 }, { "epoch": 0.6234876047042897, "grad_norm": 0.22719836235046387, "learning_rate": 0.001, "loss": 2.9966, "step": 14738 }, { "epoch": 0.6235299094678061, "grad_norm": 0.5726194381713867, "learning_rate": 0.001, "loss": 2.0207, "step": 14739 }, { "epoch": 0.6235722142313225, "grad_norm": 0.16665321588516235, "learning_rate": 0.001, "loss": 2.1726, "step": 14740 }, { "epoch": 0.6236145189948388, "grad_norm": 0.15230652689933777, "learning_rate": 0.001, "loss": 2.2088, "step": 14741 }, { "epoch": 0.6236568237583552, "grad_norm": 0.17324359714984894, "learning_rate": 0.001, "loss": 2.0516, "step": 14742 }, { "epoch": 0.6236991285218716, "grad_norm": 0.24778017401695251, "learning_rate": 0.001, "loss": 1.798, "step": 14743 }, { "epoch": 0.6237414332853879, "grad_norm": 0.18029212951660156, "learning_rate": 0.001, "loss": 1.5368, "step": 14744 }, { "epoch": 0.6237837380489043, "grad_norm": 0.17106720805168152, "learning_rate": 0.001, "loss": 2.5262, "step": 14745 }, { "epoch": 0.6238260428124207, "grad_norm": 0.14900583028793335, "learning_rate": 0.001, "loss": 1.2427, "step": 14746 }, { "epoch": 0.623868347575937, "grad_norm": 0.2502407133579254, "learning_rate": 0.001, "loss": 3.2134, "step": 14747 }, { "epoch": 0.6239106523394534, "grad_norm": 0.21255750954151154, "learning_rate": 0.001, "loss": 2.0656, "step": 14748 }, { "epoch": 0.6239529571029698, "grad_norm": 0.15253281593322754, "learning_rate": 0.001, "loss": 3.1515, "step": 14749 }, { "epoch": 0.6239952618664861, "grad_norm": 0.15575817227363586, "learning_rate": 0.001, "loss": 2.1155, "step": 14750 }, { "epoch": 0.6240375666300025, "grad_norm": 0.26133993268013, "learning_rate": 0.001, "loss": 2.7806, "step": 14751 }, { "epoch": 0.624079871393519, "grad_norm": 0.15048684179782867, "learning_rate": 0.001, "loss": 1.8556, "step": 14752 }, { "epoch": 0.6241221761570352, "grad_norm": 0.2583177387714386, "learning_rate": 0.001, "loss": 2.298, "step": 14753 }, { "epoch": 0.6241644809205517, "grad_norm": 0.15635612607002258, "learning_rate": 0.001, "loss": 1.8436, "step": 14754 }, { "epoch": 0.6242067856840681, "grad_norm": 0.3417166769504547, "learning_rate": 0.001, "loss": 2.3692, "step": 14755 }, { "epoch": 0.6242490904475844, "grad_norm": 0.29285457730293274, "learning_rate": 0.001, "loss": 2.2755, "step": 14756 }, { "epoch": 0.6242913952111008, "grad_norm": 0.2324390560388565, "learning_rate": 0.001, "loss": 2.2066, "step": 14757 }, { "epoch": 0.6243336999746172, "grad_norm": 0.15262608230113983, "learning_rate": 0.001, "loss": 1.5537, "step": 14758 }, { "epoch": 0.6243760047381335, "grad_norm": 0.1738661378622055, "learning_rate": 0.001, "loss": 2.2254, "step": 14759 }, { "epoch": 0.6244183095016499, "grad_norm": 0.17284923791885376, "learning_rate": 0.001, "loss": 2.619, "step": 14760 }, { "epoch": 0.6244606142651663, "grad_norm": 0.16344395279884338, "learning_rate": 0.001, "loss": 1.8052, "step": 14761 }, { "epoch": 0.6245029190286826, "grad_norm": 1.0341507196426392, "learning_rate": 0.001, "loss": 2.4716, "step": 14762 }, { "epoch": 0.624545223792199, "grad_norm": 0.17319472134113312, "learning_rate": 0.001, "loss": 1.7911, "step": 14763 }, { "epoch": 0.6245875285557154, "grad_norm": 0.1723223626613617, "learning_rate": 0.001, "loss": 2.2801, "step": 14764 }, { "epoch": 0.6246298333192317, "grad_norm": 0.16963432729244232, "learning_rate": 0.001, "loss": 1.8165, "step": 14765 }, { "epoch": 0.6246721380827481, "grad_norm": 0.1635103076696396, "learning_rate": 0.001, "loss": 2.3229, "step": 14766 }, { "epoch": 0.6247144428462645, "grad_norm": 0.14711347222328186, "learning_rate": 0.001, "loss": 2.0542, "step": 14767 }, { "epoch": 0.6247567476097808, "grad_norm": 0.15598388016223907, "learning_rate": 0.001, "loss": 3.1843, "step": 14768 }, { "epoch": 0.6247990523732972, "grad_norm": 0.15948522090911865, "learning_rate": 0.001, "loss": 3.8822, "step": 14769 }, { "epoch": 0.6248413571368137, "grad_norm": 0.1707606315612793, "learning_rate": 0.001, "loss": 1.5173, "step": 14770 }, { "epoch": 0.62488366190033, "grad_norm": 0.20455005764961243, "learning_rate": 0.001, "loss": 2.8538, "step": 14771 }, { "epoch": 0.6249259666638464, "grad_norm": 0.18177711963653564, "learning_rate": 0.001, "loss": 1.576, "step": 14772 }, { "epoch": 0.6249682714273628, "grad_norm": 0.6637758016586304, "learning_rate": 0.001, "loss": 3.9036, "step": 14773 }, { "epoch": 0.6250105761908791, "grad_norm": 0.14505170285701752, "learning_rate": 0.001, "loss": 1.3907, "step": 14774 }, { "epoch": 0.6250528809543955, "grad_norm": 0.1670999825000763, "learning_rate": 0.001, "loss": 2.2654, "step": 14775 }, { "epoch": 0.6250951857179118, "grad_norm": 0.15915502607822418, "learning_rate": 0.001, "loss": 1.5619, "step": 14776 }, { "epoch": 0.6251374904814282, "grad_norm": 0.24664057791233063, "learning_rate": 0.001, "loss": 2.0549, "step": 14777 }, { "epoch": 0.6251797952449446, "grad_norm": 0.19590286910533905, "learning_rate": 0.001, "loss": 1.9566, "step": 14778 }, { "epoch": 0.6252221000084609, "grad_norm": 0.1494143158197403, "learning_rate": 0.001, "loss": 2.6541, "step": 14779 }, { "epoch": 0.6252644047719773, "grad_norm": 0.1772175133228302, "learning_rate": 0.001, "loss": 2.077, "step": 14780 }, { "epoch": 0.6253067095354937, "grad_norm": 0.16731050610542297, "learning_rate": 0.001, "loss": 1.8391, "step": 14781 }, { "epoch": 0.62534901429901, "grad_norm": 0.18583473563194275, "learning_rate": 0.001, "loss": 2.561, "step": 14782 }, { "epoch": 0.6253913190625264, "grad_norm": 0.15559391677379608, "learning_rate": 0.001, "loss": 1.7544, "step": 14783 }, { "epoch": 0.6254336238260428, "grad_norm": 0.16811911761760712, "learning_rate": 0.001, "loss": 2.3595, "step": 14784 }, { "epoch": 0.6254759285895591, "grad_norm": 0.18134218454360962, "learning_rate": 0.001, "loss": 1.9538, "step": 14785 }, { "epoch": 0.6255182333530755, "grad_norm": 0.8683392405509949, "learning_rate": 0.001, "loss": 2.0525, "step": 14786 }, { "epoch": 0.625560538116592, "grad_norm": 0.17296668887138367, "learning_rate": 0.001, "loss": 2.1946, "step": 14787 }, { "epoch": 0.6256028428801083, "grad_norm": 0.24003466963768005, "learning_rate": 0.001, "loss": 2.5681, "step": 14788 }, { "epoch": 0.6256451476436247, "grad_norm": 0.1762605756521225, "learning_rate": 0.001, "loss": 2.2416, "step": 14789 }, { "epoch": 0.6256874524071411, "grad_norm": 0.262921541929245, "learning_rate": 0.001, "loss": 3.2333, "step": 14790 }, { "epoch": 0.6257297571706574, "grad_norm": 0.2003992646932602, "learning_rate": 0.001, "loss": 2.1344, "step": 14791 }, { "epoch": 0.6257720619341738, "grad_norm": 0.1967916488647461, "learning_rate": 0.001, "loss": 1.5399, "step": 14792 }, { "epoch": 0.6258143666976902, "grad_norm": 0.18930691480636597, "learning_rate": 0.001, "loss": 1.3929, "step": 14793 }, { "epoch": 0.6258566714612065, "grad_norm": 0.6461501121520996, "learning_rate": 0.001, "loss": 1.8684, "step": 14794 }, { "epoch": 0.6258989762247229, "grad_norm": 0.16277913749217987, "learning_rate": 0.001, "loss": 2.1545, "step": 14795 }, { "epoch": 0.6259412809882393, "grad_norm": 0.13681641221046448, "learning_rate": 0.001, "loss": 1.4387, "step": 14796 }, { "epoch": 0.6259835857517556, "grad_norm": 0.49204564094543457, "learning_rate": 0.001, "loss": 1.716, "step": 14797 }, { "epoch": 0.626025890515272, "grad_norm": 0.2248888909816742, "learning_rate": 0.001, "loss": 2.0344, "step": 14798 }, { "epoch": 0.6260681952787884, "grad_norm": 0.1486063003540039, "learning_rate": 0.001, "loss": 1.7528, "step": 14799 }, { "epoch": 0.6261105000423047, "grad_norm": 0.16557568311691284, "learning_rate": 0.001, "loss": 1.7937, "step": 14800 }, { "epoch": 0.6261528048058211, "grad_norm": 0.2470521777868271, "learning_rate": 0.001, "loss": 1.88, "step": 14801 }, { "epoch": 0.6261951095693375, "grad_norm": 1.031522274017334, "learning_rate": 0.001, "loss": 2.0462, "step": 14802 }, { "epoch": 0.6262374143328538, "grad_norm": 0.39598196744918823, "learning_rate": 0.001, "loss": 1.4901, "step": 14803 }, { "epoch": 0.6262797190963703, "grad_norm": 0.1830819547176361, "learning_rate": 0.001, "loss": 1.7788, "step": 14804 }, { "epoch": 0.6263220238598867, "grad_norm": 0.18388625979423523, "learning_rate": 0.001, "loss": 2.308, "step": 14805 }, { "epoch": 0.626364328623403, "grad_norm": 1.8769688606262207, "learning_rate": 0.001, "loss": 2.2326, "step": 14806 }, { "epoch": 0.6264066333869194, "grad_norm": 0.1682252585887909, "learning_rate": 0.001, "loss": 1.7469, "step": 14807 }, { "epoch": 0.6264489381504358, "grad_norm": 0.1951892077922821, "learning_rate": 0.001, "loss": 3.3156, "step": 14808 }, { "epoch": 0.6264912429139521, "grad_norm": 0.20327278971672058, "learning_rate": 0.001, "loss": 2.7878, "step": 14809 }, { "epoch": 0.6265335476774685, "grad_norm": 7.379302024841309, "learning_rate": 0.001, "loss": 2.2043, "step": 14810 }, { "epoch": 0.6265758524409849, "grad_norm": 0.19853001832962036, "learning_rate": 0.001, "loss": 2.6433, "step": 14811 }, { "epoch": 0.6266181572045012, "grad_norm": 0.4619123637676239, "learning_rate": 0.001, "loss": 1.9174, "step": 14812 }, { "epoch": 0.6266604619680176, "grad_norm": 0.167628213763237, "learning_rate": 0.001, "loss": 1.3258, "step": 14813 }, { "epoch": 0.626702766731534, "grad_norm": 1.1932356357574463, "learning_rate": 0.001, "loss": 2.1188, "step": 14814 }, { "epoch": 0.6267450714950503, "grad_norm": 0.21053379774093628, "learning_rate": 0.001, "loss": 3.2038, "step": 14815 }, { "epoch": 0.6267873762585667, "grad_norm": 1.0147972106933594, "learning_rate": 0.001, "loss": 3.0992, "step": 14816 }, { "epoch": 0.6268296810220831, "grad_norm": 0.2153492271900177, "learning_rate": 0.001, "loss": 2.7228, "step": 14817 }, { "epoch": 0.6268719857855994, "grad_norm": 0.4768437147140503, "learning_rate": 0.001, "loss": 1.9515, "step": 14818 }, { "epoch": 0.6269142905491158, "grad_norm": 0.6105790138244629, "learning_rate": 0.001, "loss": 2.5721, "step": 14819 }, { "epoch": 0.6269565953126321, "grad_norm": 0.17983876168727875, "learning_rate": 0.001, "loss": 1.896, "step": 14820 }, { "epoch": 0.6269989000761486, "grad_norm": 0.23579160869121552, "learning_rate": 0.001, "loss": 1.8257, "step": 14821 }, { "epoch": 0.627041204839665, "grad_norm": 0.14642800390720367, "learning_rate": 0.001, "loss": 1.7842, "step": 14822 }, { "epoch": 0.6270835096031813, "grad_norm": 0.23658250272274017, "learning_rate": 0.001, "loss": 2.5395, "step": 14823 }, { "epoch": 0.6271258143666977, "grad_norm": 0.18231017887592316, "learning_rate": 0.001, "loss": 2.6851, "step": 14824 }, { "epoch": 0.6271681191302141, "grad_norm": 0.22886481881141663, "learning_rate": 0.001, "loss": 2.5188, "step": 14825 }, { "epoch": 0.6272104238937304, "grad_norm": 0.32207468152046204, "learning_rate": 0.001, "loss": 2.9632, "step": 14826 }, { "epoch": 0.6272527286572468, "grad_norm": 0.2477169632911682, "learning_rate": 0.001, "loss": 2.8172, "step": 14827 }, { "epoch": 0.6272950334207632, "grad_norm": 0.2553689479827881, "learning_rate": 0.001, "loss": 2.035, "step": 14828 }, { "epoch": 0.6273373381842795, "grad_norm": 0.16400131583213806, "learning_rate": 0.001, "loss": 2.1881, "step": 14829 }, { "epoch": 0.6273796429477959, "grad_norm": 0.47019270062446594, "learning_rate": 0.001, "loss": 2.8494, "step": 14830 }, { "epoch": 0.6274219477113123, "grad_norm": 0.3059365153312683, "learning_rate": 0.001, "loss": 2.2702, "step": 14831 }, { "epoch": 0.6274642524748286, "grad_norm": 0.21573302149772644, "learning_rate": 0.001, "loss": 2.0846, "step": 14832 }, { "epoch": 0.627506557238345, "grad_norm": 0.168625608086586, "learning_rate": 0.001, "loss": 1.7721, "step": 14833 }, { "epoch": 0.6275488620018614, "grad_norm": 0.15803949534893036, "learning_rate": 0.001, "loss": 1.42, "step": 14834 }, { "epoch": 0.6275911667653777, "grad_norm": 0.22716844081878662, "learning_rate": 0.001, "loss": 1.655, "step": 14835 }, { "epoch": 0.6276334715288941, "grad_norm": 0.2481343150138855, "learning_rate": 0.001, "loss": 2.1238, "step": 14836 }, { "epoch": 0.6276757762924106, "grad_norm": 0.21885570883750916, "learning_rate": 0.001, "loss": 2.5259, "step": 14837 }, { "epoch": 0.6277180810559269, "grad_norm": 0.22164210677146912, "learning_rate": 0.001, "loss": 2.5285, "step": 14838 }, { "epoch": 0.6277603858194433, "grad_norm": 0.17463375627994537, "learning_rate": 0.001, "loss": 2.1484, "step": 14839 }, { "epoch": 0.6278026905829597, "grad_norm": 0.38111189007759094, "learning_rate": 0.001, "loss": 3.1991, "step": 14840 }, { "epoch": 0.627844995346476, "grad_norm": 0.2066563367843628, "learning_rate": 0.001, "loss": 1.804, "step": 14841 }, { "epoch": 0.6278873001099924, "grad_norm": 0.23109540343284607, "learning_rate": 0.001, "loss": 1.7063, "step": 14842 }, { "epoch": 0.6279296048735088, "grad_norm": 0.1922856718301773, "learning_rate": 0.001, "loss": 1.6542, "step": 14843 }, { "epoch": 0.6279719096370251, "grad_norm": 0.1745460331439972, "learning_rate": 0.001, "loss": 2.2544, "step": 14844 }, { "epoch": 0.6280142144005415, "grad_norm": 0.16005033254623413, "learning_rate": 0.001, "loss": 1.6839, "step": 14845 }, { "epoch": 0.6280565191640579, "grad_norm": 0.14515602588653564, "learning_rate": 0.001, "loss": 2.7744, "step": 14846 }, { "epoch": 0.6280988239275742, "grad_norm": 0.1645001471042633, "learning_rate": 0.001, "loss": 1.6844, "step": 14847 }, { "epoch": 0.6281411286910906, "grad_norm": 0.19100764393806458, "learning_rate": 0.001, "loss": 2.6849, "step": 14848 }, { "epoch": 0.628183433454607, "grad_norm": 0.1547725945711136, "learning_rate": 0.001, "loss": 1.6892, "step": 14849 }, { "epoch": 0.6282257382181233, "grad_norm": 0.15877199172973633, "learning_rate": 0.001, "loss": 2.3236, "step": 14850 }, { "epoch": 0.6282680429816397, "grad_norm": 0.858564555644989, "learning_rate": 0.001, "loss": 2.6084, "step": 14851 }, { "epoch": 0.6283103477451562, "grad_norm": 0.15130288898944855, "learning_rate": 0.001, "loss": 2.6856, "step": 14852 }, { "epoch": 0.6283526525086724, "grad_norm": 0.18292021751403809, "learning_rate": 0.001, "loss": 1.7743, "step": 14853 }, { "epoch": 0.6283949572721889, "grad_norm": 0.17332005500793457, "learning_rate": 0.001, "loss": 2.0246, "step": 14854 }, { "epoch": 0.6284372620357053, "grad_norm": 0.2719305455684662, "learning_rate": 0.001, "loss": 2.222, "step": 14855 }, { "epoch": 0.6284795667992216, "grad_norm": 0.17727939784526825, "learning_rate": 0.001, "loss": 2.3099, "step": 14856 }, { "epoch": 0.628521871562738, "grad_norm": 0.21192631125450134, "learning_rate": 0.001, "loss": 2.0069, "step": 14857 }, { "epoch": 0.6285641763262544, "grad_norm": 0.16656994819641113, "learning_rate": 0.001, "loss": 2.1416, "step": 14858 }, { "epoch": 0.6286064810897707, "grad_norm": 0.14432191848754883, "learning_rate": 0.001, "loss": 1.9373, "step": 14859 }, { "epoch": 0.6286487858532871, "grad_norm": 0.16526567935943604, "learning_rate": 0.001, "loss": 2.9011, "step": 14860 }, { "epoch": 0.6286910906168035, "grad_norm": 0.35414940118789673, "learning_rate": 0.001, "loss": 1.6422, "step": 14861 }, { "epoch": 0.6287333953803198, "grad_norm": 1.0360305309295654, "learning_rate": 0.001, "loss": 1.7833, "step": 14862 }, { "epoch": 0.6287757001438362, "grad_norm": 0.18930722773075104, "learning_rate": 0.001, "loss": 1.7108, "step": 14863 }, { "epoch": 0.6288180049073525, "grad_norm": 0.23029504716396332, "learning_rate": 0.001, "loss": 2.3319, "step": 14864 }, { "epoch": 0.6288603096708689, "grad_norm": 0.1663587987422943, "learning_rate": 0.001, "loss": 2.4982, "step": 14865 }, { "epoch": 0.6289026144343853, "grad_norm": 0.14894288778305054, "learning_rate": 0.001, "loss": 2.4744, "step": 14866 }, { "epoch": 0.6289449191979016, "grad_norm": 0.1686108112335205, "learning_rate": 0.001, "loss": 2.1875, "step": 14867 }, { "epoch": 0.628987223961418, "grad_norm": 0.28346070647239685, "learning_rate": 0.001, "loss": 1.7888, "step": 14868 }, { "epoch": 0.6290295287249345, "grad_norm": 0.2675085961818695, "learning_rate": 0.001, "loss": 2.2643, "step": 14869 }, { "epoch": 0.6290718334884507, "grad_norm": 0.5835021734237671, "learning_rate": 0.001, "loss": 2.4189, "step": 14870 }, { "epoch": 0.6291141382519672, "grad_norm": 0.1684013456106186, "learning_rate": 0.001, "loss": 2.6277, "step": 14871 }, { "epoch": 0.6291564430154836, "grad_norm": 0.26715943217277527, "learning_rate": 0.001, "loss": 2.1086, "step": 14872 }, { "epoch": 0.6291987477789999, "grad_norm": 0.1863265186548233, "learning_rate": 0.001, "loss": 2.702, "step": 14873 }, { "epoch": 0.6292410525425163, "grad_norm": 0.17459061741828918, "learning_rate": 0.001, "loss": 1.8087, "step": 14874 }, { "epoch": 0.6292833573060327, "grad_norm": 0.19673362374305725, "learning_rate": 0.001, "loss": 3.9467, "step": 14875 }, { "epoch": 0.629325662069549, "grad_norm": 0.19001983106136322, "learning_rate": 0.001, "loss": 1.4823, "step": 14876 }, { "epoch": 0.6293679668330654, "grad_norm": 0.24293777346611023, "learning_rate": 0.001, "loss": 3.0979, "step": 14877 }, { "epoch": 0.6294102715965818, "grad_norm": 0.2093208283185959, "learning_rate": 0.001, "loss": 2.5133, "step": 14878 }, { "epoch": 0.6294525763600981, "grad_norm": 0.1860685646533966, "learning_rate": 0.001, "loss": 1.69, "step": 14879 }, { "epoch": 0.6294948811236145, "grad_norm": 2.451101303100586, "learning_rate": 0.001, "loss": 1.933, "step": 14880 }, { "epoch": 0.6295371858871309, "grad_norm": 0.16000764071941376, "learning_rate": 0.001, "loss": 1.6052, "step": 14881 }, { "epoch": 0.6295794906506472, "grad_norm": 0.17317312955856323, "learning_rate": 0.001, "loss": 1.7292, "step": 14882 }, { "epoch": 0.6296217954141636, "grad_norm": 0.15874236822128296, "learning_rate": 0.001, "loss": 2.214, "step": 14883 }, { "epoch": 0.62966410017768, "grad_norm": 0.16798686981201172, "learning_rate": 0.001, "loss": 1.7421, "step": 14884 }, { "epoch": 0.6297064049411963, "grad_norm": 8.583555221557617, "learning_rate": 0.001, "loss": 2.7575, "step": 14885 }, { "epoch": 0.6297487097047128, "grad_norm": 0.20271947979927063, "learning_rate": 0.001, "loss": 2.0891, "step": 14886 }, { "epoch": 0.6297910144682292, "grad_norm": 0.19663158059120178, "learning_rate": 0.001, "loss": 2.0016, "step": 14887 }, { "epoch": 0.6298333192317455, "grad_norm": 0.15687520802021027, "learning_rate": 0.001, "loss": 1.4679, "step": 14888 }, { "epoch": 0.6298756239952619, "grad_norm": 0.6638527512550354, "learning_rate": 0.001, "loss": 2.6785, "step": 14889 }, { "epoch": 0.6299179287587783, "grad_norm": 0.1655256301164627, "learning_rate": 0.001, "loss": 1.9314, "step": 14890 }, { "epoch": 0.6299602335222946, "grad_norm": 0.21192145347595215, "learning_rate": 0.001, "loss": 2.146, "step": 14891 }, { "epoch": 0.630002538285811, "grad_norm": 0.1955963522195816, "learning_rate": 0.001, "loss": 1.6481, "step": 14892 }, { "epoch": 0.6300448430493274, "grad_norm": 30.6153621673584, "learning_rate": 0.001, "loss": 3.6619, "step": 14893 }, { "epoch": 0.6300871478128437, "grad_norm": 0.2149381786584854, "learning_rate": 0.001, "loss": 3.3041, "step": 14894 }, { "epoch": 0.6301294525763601, "grad_norm": 0.19510112702846527, "learning_rate": 0.001, "loss": 1.7846, "step": 14895 }, { "epoch": 0.6301717573398765, "grad_norm": 1.3764687776565552, "learning_rate": 0.001, "loss": 2.012, "step": 14896 }, { "epoch": 0.6302140621033928, "grad_norm": 0.15694308280944824, "learning_rate": 0.001, "loss": 1.8974, "step": 14897 }, { "epoch": 0.6302563668669092, "grad_norm": 0.2188582718372345, "learning_rate": 0.001, "loss": 3.2371, "step": 14898 }, { "epoch": 0.6302986716304256, "grad_norm": 0.1506149172782898, "learning_rate": 0.001, "loss": 1.5915, "step": 14899 }, { "epoch": 0.6303409763939419, "grad_norm": 0.3987337648868561, "learning_rate": 0.001, "loss": 2.1704, "step": 14900 }, { "epoch": 0.6303832811574583, "grad_norm": 0.19197428226470947, "learning_rate": 0.001, "loss": 2.7042, "step": 14901 }, { "epoch": 0.6304255859209748, "grad_norm": 0.16541145741939545, "learning_rate": 0.001, "loss": 1.7064, "step": 14902 }, { "epoch": 0.630467890684491, "grad_norm": 0.3559930622577667, "learning_rate": 0.001, "loss": 1.8449, "step": 14903 }, { "epoch": 0.6305101954480075, "grad_norm": 0.1497167944908142, "learning_rate": 0.001, "loss": 2.1508, "step": 14904 }, { "epoch": 0.6305525002115239, "grad_norm": 3.9234423637390137, "learning_rate": 0.001, "loss": 2.4774, "step": 14905 }, { "epoch": 0.6305948049750402, "grad_norm": 1.5417382717132568, "learning_rate": 0.001, "loss": 2.181, "step": 14906 }, { "epoch": 0.6306371097385566, "grad_norm": 0.2059686779975891, "learning_rate": 0.001, "loss": 1.5838, "step": 14907 }, { "epoch": 0.630679414502073, "grad_norm": 0.17090870440006256, "learning_rate": 0.001, "loss": 2.7388, "step": 14908 }, { "epoch": 0.6307217192655893, "grad_norm": 0.1849461793899536, "learning_rate": 0.001, "loss": 1.9216, "step": 14909 }, { "epoch": 0.6307640240291057, "grad_norm": 0.22465984523296356, "learning_rate": 0.001, "loss": 2.0343, "step": 14910 }, { "epoch": 0.630806328792622, "grad_norm": 0.6882874369621277, "learning_rate": 0.001, "loss": 2.4926, "step": 14911 }, { "epoch": 0.6308486335561384, "grad_norm": 0.43035462498664856, "learning_rate": 0.001, "loss": 1.9783, "step": 14912 }, { "epoch": 0.6308909383196548, "grad_norm": 0.35573610663414, "learning_rate": 0.001, "loss": 2.3259, "step": 14913 }, { "epoch": 0.6309332430831711, "grad_norm": 0.18853552639484406, "learning_rate": 0.001, "loss": 2.3126, "step": 14914 }, { "epoch": 0.6309755478466875, "grad_norm": 0.17522744834423065, "learning_rate": 0.001, "loss": 2.5944, "step": 14915 }, { "epoch": 0.6310178526102039, "grad_norm": 0.5187592506408691, "learning_rate": 0.001, "loss": 2.4785, "step": 14916 }, { "epoch": 0.6310601573737202, "grad_norm": 0.184511199593544, "learning_rate": 0.001, "loss": 1.9519, "step": 14917 }, { "epoch": 0.6311024621372366, "grad_norm": 2.1163647174835205, "learning_rate": 0.001, "loss": 2.9251, "step": 14918 }, { "epoch": 0.631144766900753, "grad_norm": 0.17871823906898499, "learning_rate": 0.001, "loss": 2.1804, "step": 14919 }, { "epoch": 0.6311870716642694, "grad_norm": 0.49142828583717346, "learning_rate": 0.001, "loss": 2.1875, "step": 14920 }, { "epoch": 0.6312293764277858, "grad_norm": 0.294746458530426, "learning_rate": 0.001, "loss": 2.2112, "step": 14921 }, { "epoch": 0.6312716811913022, "grad_norm": 0.23569050431251526, "learning_rate": 0.001, "loss": 2.9051, "step": 14922 }, { "epoch": 0.6313139859548185, "grad_norm": 0.20902414619922638, "learning_rate": 0.001, "loss": 1.7777, "step": 14923 }, { "epoch": 0.6313562907183349, "grad_norm": 0.30001044273376465, "learning_rate": 0.001, "loss": 2.2292, "step": 14924 }, { "epoch": 0.6313985954818513, "grad_norm": 0.20105008780956268, "learning_rate": 0.001, "loss": 2.2452, "step": 14925 }, { "epoch": 0.6314409002453676, "grad_norm": 0.4256247282028198, "learning_rate": 0.001, "loss": 2.3232, "step": 14926 }, { "epoch": 0.631483205008884, "grad_norm": 0.1731926053762436, "learning_rate": 0.001, "loss": 1.7504, "step": 14927 }, { "epoch": 0.6315255097724004, "grad_norm": 0.16395363211631775, "learning_rate": 0.001, "loss": 1.5929, "step": 14928 }, { "epoch": 0.6315678145359167, "grad_norm": 0.395588755607605, "learning_rate": 0.001, "loss": 2.7332, "step": 14929 }, { "epoch": 0.6316101192994331, "grad_norm": 0.1726456880569458, "learning_rate": 0.001, "loss": 1.8693, "step": 14930 }, { "epoch": 0.6316524240629495, "grad_norm": 0.1603616327047348, "learning_rate": 0.001, "loss": 2.5375, "step": 14931 }, { "epoch": 0.6316947288264658, "grad_norm": 0.21942928433418274, "learning_rate": 0.001, "loss": 2.6368, "step": 14932 }, { "epoch": 0.6317370335899822, "grad_norm": 0.20922864973545074, "learning_rate": 0.001, "loss": 2.3417, "step": 14933 }, { "epoch": 0.6317793383534986, "grad_norm": 0.142195925116539, "learning_rate": 0.001, "loss": 2.0823, "step": 14934 }, { "epoch": 0.6318216431170149, "grad_norm": 0.4264514744281769, "learning_rate": 0.001, "loss": 2.3226, "step": 14935 }, { "epoch": 0.6318639478805314, "grad_norm": 0.17566858232021332, "learning_rate": 0.001, "loss": 1.4364, "step": 14936 }, { "epoch": 0.6319062526440478, "grad_norm": 0.5592827200889587, "learning_rate": 0.001, "loss": 2.4408, "step": 14937 }, { "epoch": 0.6319485574075641, "grad_norm": 0.4113415479660034, "learning_rate": 0.001, "loss": 2.6319, "step": 14938 }, { "epoch": 0.6319908621710805, "grad_norm": 0.1990566849708557, "learning_rate": 0.001, "loss": 2.4111, "step": 14939 }, { "epoch": 0.6320331669345969, "grad_norm": 0.7872762084007263, "learning_rate": 0.001, "loss": 2.0307, "step": 14940 }, { "epoch": 0.6320754716981132, "grad_norm": 0.18138566613197327, "learning_rate": 0.001, "loss": 2.3526, "step": 14941 }, { "epoch": 0.6321177764616296, "grad_norm": 0.1856798529624939, "learning_rate": 0.001, "loss": 1.9008, "step": 14942 }, { "epoch": 0.632160081225146, "grad_norm": 0.1658208668231964, "learning_rate": 0.001, "loss": 1.8302, "step": 14943 }, { "epoch": 0.6322023859886623, "grad_norm": 0.48950570821762085, "learning_rate": 0.001, "loss": 1.6563, "step": 14944 }, { "epoch": 0.6322446907521787, "grad_norm": 1.1566777229309082, "learning_rate": 0.001, "loss": 2.7926, "step": 14945 }, { "epoch": 0.6322869955156951, "grad_norm": 0.20745162665843964, "learning_rate": 0.001, "loss": 2.2428, "step": 14946 }, { "epoch": 0.6323293002792114, "grad_norm": 0.2187420278787613, "learning_rate": 0.001, "loss": 2.3801, "step": 14947 }, { "epoch": 0.6323716050427278, "grad_norm": 101.63371276855469, "learning_rate": 0.001, "loss": 2.0414, "step": 14948 }, { "epoch": 0.6324139098062442, "grad_norm": 0.1789313405752182, "learning_rate": 0.001, "loss": 1.9108, "step": 14949 }, { "epoch": 0.6324562145697605, "grad_norm": 0.2938269078731537, "learning_rate": 0.001, "loss": 1.9316, "step": 14950 }, { "epoch": 0.6324985193332769, "grad_norm": 0.32403501868247986, "learning_rate": 0.001, "loss": 1.9873, "step": 14951 }, { "epoch": 0.6325408240967934, "grad_norm": 0.1873176246881485, "learning_rate": 0.001, "loss": 1.8201, "step": 14952 }, { "epoch": 0.6325831288603097, "grad_norm": 0.19360357522964478, "learning_rate": 0.001, "loss": 2.7862, "step": 14953 }, { "epoch": 0.6326254336238261, "grad_norm": 0.17896825075149536, "learning_rate": 0.001, "loss": 2.0564, "step": 14954 }, { "epoch": 0.6326677383873424, "grad_norm": 1.2869054079055786, "learning_rate": 0.001, "loss": 1.5205, "step": 14955 }, { "epoch": 0.6327100431508588, "grad_norm": 0.18864065408706665, "learning_rate": 0.001, "loss": 3.0444, "step": 14956 }, { "epoch": 0.6327523479143752, "grad_norm": 0.880074143409729, "learning_rate": 0.001, "loss": 1.9113, "step": 14957 }, { "epoch": 0.6327946526778915, "grad_norm": 0.1550513505935669, "learning_rate": 0.001, "loss": 2.146, "step": 14958 }, { "epoch": 0.6328369574414079, "grad_norm": 0.20279888808727264, "learning_rate": 0.001, "loss": 2.5939, "step": 14959 }, { "epoch": 0.6328792622049243, "grad_norm": 0.4904308617115021, "learning_rate": 0.001, "loss": 2.1548, "step": 14960 }, { "epoch": 0.6329215669684406, "grad_norm": 0.15668341517448425, "learning_rate": 0.001, "loss": 3.1503, "step": 14961 }, { "epoch": 0.632963871731957, "grad_norm": 1.097795009613037, "learning_rate": 0.001, "loss": 1.9487, "step": 14962 }, { "epoch": 0.6330061764954734, "grad_norm": 0.19955521821975708, "learning_rate": 0.001, "loss": 1.4885, "step": 14963 }, { "epoch": 0.6330484812589897, "grad_norm": 0.15636754035949707, "learning_rate": 0.001, "loss": 2.508, "step": 14964 }, { "epoch": 0.6330907860225061, "grad_norm": 2.758397340774536, "learning_rate": 0.001, "loss": 1.6241, "step": 14965 }, { "epoch": 0.6331330907860225, "grad_norm": 2.932732582092285, "learning_rate": 0.001, "loss": 1.9931, "step": 14966 }, { "epoch": 0.6331753955495388, "grad_norm": 0.21314847469329834, "learning_rate": 0.001, "loss": 2.2011, "step": 14967 }, { "epoch": 0.6332177003130552, "grad_norm": 0.23298072814941406, "learning_rate": 0.001, "loss": 3.0595, "step": 14968 }, { "epoch": 0.6332600050765717, "grad_norm": 0.246332049369812, "learning_rate": 0.001, "loss": 1.9456, "step": 14969 }, { "epoch": 0.633302309840088, "grad_norm": 0.18252138793468475, "learning_rate": 0.001, "loss": 3.1042, "step": 14970 }, { "epoch": 0.6333446146036044, "grad_norm": 0.20896947383880615, "learning_rate": 0.001, "loss": 2.6753, "step": 14971 }, { "epoch": 0.6333869193671208, "grad_norm": 25.508777618408203, "learning_rate": 0.001, "loss": 3.5983, "step": 14972 }, { "epoch": 0.6334292241306371, "grad_norm": 0.8212241530418396, "learning_rate": 0.001, "loss": 2.4581, "step": 14973 }, { "epoch": 0.6334715288941535, "grad_norm": 0.36662164330482483, "learning_rate": 0.001, "loss": 1.8713, "step": 14974 }, { "epoch": 0.6335138336576699, "grad_norm": 0.14960072934627533, "learning_rate": 0.001, "loss": 2.7025, "step": 14975 }, { "epoch": 0.6335561384211862, "grad_norm": 0.17821753025054932, "learning_rate": 0.001, "loss": 2.1402, "step": 14976 }, { "epoch": 0.6335984431847026, "grad_norm": 0.17542670667171478, "learning_rate": 0.001, "loss": 1.9822, "step": 14977 }, { "epoch": 0.633640747948219, "grad_norm": 0.25928542017936707, "learning_rate": 0.001, "loss": 3.1297, "step": 14978 }, { "epoch": 0.6336830527117353, "grad_norm": 0.1568516343832016, "learning_rate": 0.001, "loss": 1.6175, "step": 14979 }, { "epoch": 0.6337253574752517, "grad_norm": 0.18374250829219818, "learning_rate": 0.001, "loss": 3.5063, "step": 14980 }, { "epoch": 0.6337676622387681, "grad_norm": 0.23148831725120544, "learning_rate": 0.001, "loss": 2.3223, "step": 14981 }, { "epoch": 0.6338099670022844, "grad_norm": 0.23832428455352783, "learning_rate": 0.001, "loss": 2.036, "step": 14982 }, { "epoch": 0.6338522717658008, "grad_norm": 0.19005969166755676, "learning_rate": 0.001, "loss": 3.0384, "step": 14983 }, { "epoch": 0.6338945765293172, "grad_norm": 2.5607242584228516, "learning_rate": 0.001, "loss": 2.0137, "step": 14984 }, { "epoch": 0.6339368812928335, "grad_norm": 0.4837716817855835, "learning_rate": 0.001, "loss": 3.2616, "step": 14985 }, { "epoch": 0.63397918605635, "grad_norm": 0.5023155212402344, "learning_rate": 0.001, "loss": 2.6116, "step": 14986 }, { "epoch": 0.6340214908198664, "grad_norm": 0.21957671642303467, "learning_rate": 0.001, "loss": 2.8777, "step": 14987 }, { "epoch": 0.6340637955833827, "grad_norm": 0.15454959869384766, "learning_rate": 0.001, "loss": 2.8105, "step": 14988 }, { "epoch": 0.6341061003468991, "grad_norm": 7.386536121368408, "learning_rate": 0.001, "loss": 1.749, "step": 14989 }, { "epoch": 0.6341484051104155, "grad_norm": 0.22474679350852966, "learning_rate": 0.001, "loss": 2.1073, "step": 14990 }, { "epoch": 0.6341907098739318, "grad_norm": 0.22885270416736603, "learning_rate": 0.001, "loss": 2.722, "step": 14991 }, { "epoch": 0.6342330146374482, "grad_norm": 0.1935967653989792, "learning_rate": 0.001, "loss": 1.5022, "step": 14992 }, { "epoch": 0.6342753194009646, "grad_norm": 1.1925420761108398, "learning_rate": 0.001, "loss": 3.1638, "step": 14993 }, { "epoch": 0.6343176241644809, "grad_norm": 0.16157661378383636, "learning_rate": 0.001, "loss": 1.8381, "step": 14994 }, { "epoch": 0.6343599289279973, "grad_norm": 0.2140708863735199, "learning_rate": 0.001, "loss": 2.2911, "step": 14995 }, { "epoch": 0.6344022336915137, "grad_norm": 0.15380524098873138, "learning_rate": 0.001, "loss": 2.2116, "step": 14996 }, { "epoch": 0.63444453845503, "grad_norm": 0.19003191590309143, "learning_rate": 0.001, "loss": 2.0538, "step": 14997 }, { "epoch": 0.6344868432185464, "grad_norm": 0.17649468779563904, "learning_rate": 0.001, "loss": 1.9866, "step": 14998 }, { "epoch": 0.6345291479820628, "grad_norm": 1.8466432094573975, "learning_rate": 0.001, "loss": 2.4553, "step": 14999 }, { "epoch": 0.6345714527455791, "grad_norm": 0.1778542697429657, "learning_rate": 0.001, "loss": 2.5347, "step": 15000 }, { "epoch": 0.6346137575090955, "grad_norm": 0.19804713129997253, "learning_rate": 0.001, "loss": 1.5805, "step": 15001 }, { "epoch": 0.6346560622726118, "grad_norm": 0.14694872498512268, "learning_rate": 0.001, "loss": 2.0717, "step": 15002 }, { "epoch": 0.6346983670361283, "grad_norm": 1.054595708847046, "learning_rate": 0.001, "loss": 3.0874, "step": 15003 }, { "epoch": 0.6347406717996447, "grad_norm": 0.1948278397321701, "learning_rate": 0.001, "loss": 1.9202, "step": 15004 }, { "epoch": 0.634782976563161, "grad_norm": 0.23619548976421356, "learning_rate": 0.001, "loss": 2.0307, "step": 15005 }, { "epoch": 0.6348252813266774, "grad_norm": 0.21441768109798431, "learning_rate": 0.001, "loss": 2.4843, "step": 15006 }, { "epoch": 0.6348675860901938, "grad_norm": 0.19178879261016846, "learning_rate": 0.001, "loss": 2.0039, "step": 15007 }, { "epoch": 0.6349098908537101, "grad_norm": 0.19128453731536865, "learning_rate": 0.001, "loss": 1.8224, "step": 15008 }, { "epoch": 0.6349521956172265, "grad_norm": 0.30151814222335815, "learning_rate": 0.001, "loss": 2.6074, "step": 15009 }, { "epoch": 0.6349945003807429, "grad_norm": 0.1635221242904663, "learning_rate": 0.001, "loss": 1.5166, "step": 15010 }, { "epoch": 0.6350368051442592, "grad_norm": 0.15775498747825623, "learning_rate": 0.001, "loss": 1.501, "step": 15011 }, { "epoch": 0.6350791099077756, "grad_norm": 0.19926244020462036, "learning_rate": 0.001, "loss": 2.0229, "step": 15012 }, { "epoch": 0.635121414671292, "grad_norm": 0.2127702683210373, "learning_rate": 0.001, "loss": 1.9368, "step": 15013 }, { "epoch": 0.6351637194348083, "grad_norm": 0.19976593554019928, "learning_rate": 0.001, "loss": 2.2498, "step": 15014 }, { "epoch": 0.6352060241983247, "grad_norm": 0.15875552594661713, "learning_rate": 0.001, "loss": 2.2435, "step": 15015 }, { "epoch": 0.6352483289618411, "grad_norm": 0.20924918353557587, "learning_rate": 0.001, "loss": 1.889, "step": 15016 }, { "epoch": 0.6352906337253574, "grad_norm": 0.16609390079975128, "learning_rate": 0.001, "loss": 2.3809, "step": 15017 }, { "epoch": 0.6353329384888738, "grad_norm": 0.17234043776988983, "learning_rate": 0.001, "loss": 1.9048, "step": 15018 }, { "epoch": 0.6353752432523903, "grad_norm": 0.18904206156730652, "learning_rate": 0.001, "loss": 2.9954, "step": 15019 }, { "epoch": 0.6354175480159066, "grad_norm": 0.2591887414455414, "learning_rate": 0.001, "loss": 1.4825, "step": 15020 }, { "epoch": 0.635459852779423, "grad_norm": 0.21162384748458862, "learning_rate": 0.001, "loss": 2.1042, "step": 15021 }, { "epoch": 0.6355021575429394, "grad_norm": 0.1585322916507721, "learning_rate": 0.001, "loss": 2.2755, "step": 15022 }, { "epoch": 0.6355444623064557, "grad_norm": 0.3131169080734253, "learning_rate": 0.001, "loss": 2.6273, "step": 15023 }, { "epoch": 0.6355867670699721, "grad_norm": 0.1742488294839859, "learning_rate": 0.001, "loss": 2.3599, "step": 15024 }, { "epoch": 0.6356290718334885, "grad_norm": 0.37338918447494507, "learning_rate": 0.001, "loss": 2.752, "step": 15025 }, { "epoch": 0.6356713765970048, "grad_norm": 0.20515532791614532, "learning_rate": 0.001, "loss": 2.4337, "step": 15026 }, { "epoch": 0.6357136813605212, "grad_norm": 0.5772908926010132, "learning_rate": 0.001, "loss": 2.1753, "step": 15027 }, { "epoch": 0.6357559861240376, "grad_norm": 1.2035185098648071, "learning_rate": 0.001, "loss": 2.298, "step": 15028 }, { "epoch": 0.6357982908875539, "grad_norm": 0.3183322250843048, "learning_rate": 0.001, "loss": 2.7047, "step": 15029 }, { "epoch": 0.6358405956510703, "grad_norm": 0.15647853910923004, "learning_rate": 0.001, "loss": 1.6478, "step": 15030 }, { "epoch": 0.6358829004145867, "grad_norm": 0.16988950967788696, "learning_rate": 0.001, "loss": 1.7595, "step": 15031 }, { "epoch": 0.635925205178103, "grad_norm": 1.6382477283477783, "learning_rate": 0.001, "loss": 1.7299, "step": 15032 }, { "epoch": 0.6359675099416194, "grad_norm": 0.3181857466697693, "learning_rate": 0.001, "loss": 1.609, "step": 15033 }, { "epoch": 0.6360098147051358, "grad_norm": 0.20994387567043304, "learning_rate": 0.001, "loss": 2.6348, "step": 15034 }, { "epoch": 0.6360521194686521, "grad_norm": 0.17180518805980682, "learning_rate": 0.001, "loss": 1.4551, "step": 15035 }, { "epoch": 0.6360944242321686, "grad_norm": 0.15223318338394165, "learning_rate": 0.001, "loss": 2.0057, "step": 15036 }, { "epoch": 0.636136728995685, "grad_norm": 0.14394080638885498, "learning_rate": 0.001, "loss": 1.8883, "step": 15037 }, { "epoch": 0.6361790337592013, "grad_norm": 0.8304606080055237, "learning_rate": 0.001, "loss": 2.4011, "step": 15038 }, { "epoch": 0.6362213385227177, "grad_norm": 0.1558523178100586, "learning_rate": 0.001, "loss": 2.1005, "step": 15039 }, { "epoch": 0.6362636432862341, "grad_norm": 0.19884271919727325, "learning_rate": 0.001, "loss": 3.0935, "step": 15040 }, { "epoch": 0.6363059480497504, "grad_norm": 0.19426396489143372, "learning_rate": 0.001, "loss": 2.1119, "step": 15041 }, { "epoch": 0.6363482528132668, "grad_norm": 0.1493634283542633, "learning_rate": 0.001, "loss": 2.2015, "step": 15042 }, { "epoch": 0.6363905575767832, "grad_norm": 0.1937769651412964, "learning_rate": 0.001, "loss": 2.2157, "step": 15043 }, { "epoch": 0.6364328623402995, "grad_norm": 0.1691095232963562, "learning_rate": 0.001, "loss": 1.7524, "step": 15044 }, { "epoch": 0.6364751671038159, "grad_norm": 0.2379046529531479, "learning_rate": 0.001, "loss": 1.9738, "step": 15045 }, { "epoch": 0.6365174718673322, "grad_norm": 0.4896133244037628, "learning_rate": 0.001, "loss": 1.2294, "step": 15046 }, { "epoch": 0.6365597766308486, "grad_norm": 0.18665045499801636, "learning_rate": 0.001, "loss": 2.2706, "step": 15047 }, { "epoch": 0.636602081394365, "grad_norm": 0.1756897121667862, "learning_rate": 0.001, "loss": 2.0685, "step": 15048 }, { "epoch": 0.6366443861578813, "grad_norm": 0.46474409103393555, "learning_rate": 0.001, "loss": 1.7136, "step": 15049 }, { "epoch": 0.6366866909213977, "grad_norm": 0.7416086196899414, "learning_rate": 0.001, "loss": 2.623, "step": 15050 }, { "epoch": 0.6367289956849141, "grad_norm": 0.21350111067295074, "learning_rate": 0.001, "loss": 1.6577, "step": 15051 }, { "epoch": 0.6367713004484304, "grad_norm": 0.6124548316001892, "learning_rate": 0.001, "loss": 3.5035, "step": 15052 }, { "epoch": 0.6368136052119469, "grad_norm": 0.19088469445705414, "learning_rate": 0.001, "loss": 2.3181, "step": 15053 }, { "epoch": 0.6368559099754633, "grad_norm": 0.14974898099899292, "learning_rate": 0.001, "loss": 2.0188, "step": 15054 }, { "epoch": 0.6368982147389796, "grad_norm": 0.17937543988227844, "learning_rate": 0.001, "loss": 2.6969, "step": 15055 }, { "epoch": 0.636940519502496, "grad_norm": 0.22011379897594452, "learning_rate": 0.001, "loss": 2.7894, "step": 15056 }, { "epoch": 0.6369828242660124, "grad_norm": 0.18520912528038025, "learning_rate": 0.001, "loss": 1.7704, "step": 15057 }, { "epoch": 0.6370251290295287, "grad_norm": 0.1443547159433365, "learning_rate": 0.001, "loss": 2.0866, "step": 15058 }, { "epoch": 0.6370674337930451, "grad_norm": 0.1495741754770279, "learning_rate": 0.001, "loss": 1.5056, "step": 15059 }, { "epoch": 0.6371097385565615, "grad_norm": 9.01001262664795, "learning_rate": 0.001, "loss": 1.8438, "step": 15060 }, { "epoch": 0.6371520433200778, "grad_norm": 0.46700048446655273, "learning_rate": 0.001, "loss": 2.1663, "step": 15061 }, { "epoch": 0.6371943480835942, "grad_norm": 0.20175836980342865, "learning_rate": 0.001, "loss": 2.9477, "step": 15062 }, { "epoch": 0.6372366528471106, "grad_norm": 0.162751242518425, "learning_rate": 0.001, "loss": 1.6253, "step": 15063 }, { "epoch": 0.6372789576106269, "grad_norm": 0.1852269321680069, "learning_rate": 0.001, "loss": 1.4472, "step": 15064 }, { "epoch": 0.6373212623741433, "grad_norm": 0.17682723701000214, "learning_rate": 0.001, "loss": 1.6896, "step": 15065 }, { "epoch": 0.6373635671376597, "grad_norm": 0.8060948848724365, "learning_rate": 0.001, "loss": 1.6884, "step": 15066 }, { "epoch": 0.637405871901176, "grad_norm": 0.30199164152145386, "learning_rate": 0.001, "loss": 2.0973, "step": 15067 }, { "epoch": 0.6374481766646924, "grad_norm": 0.20303098857402802, "learning_rate": 0.001, "loss": 2.1595, "step": 15068 }, { "epoch": 0.6374904814282089, "grad_norm": 0.20386897027492523, "learning_rate": 0.001, "loss": 1.6897, "step": 15069 }, { "epoch": 0.6375327861917252, "grad_norm": 0.233832448720932, "learning_rate": 0.001, "loss": 2.1659, "step": 15070 }, { "epoch": 0.6375750909552416, "grad_norm": 2.2971856594085693, "learning_rate": 0.001, "loss": 4.1125, "step": 15071 }, { "epoch": 0.637617395718758, "grad_norm": 0.19406281411647797, "learning_rate": 0.001, "loss": 2.3436, "step": 15072 }, { "epoch": 0.6376597004822743, "grad_norm": 0.16026239097118378, "learning_rate": 0.001, "loss": 2.5325, "step": 15073 }, { "epoch": 0.6377020052457907, "grad_norm": 0.21731014549732208, "learning_rate": 0.001, "loss": 2.6568, "step": 15074 }, { "epoch": 0.6377443100093071, "grad_norm": 0.1755847930908203, "learning_rate": 0.001, "loss": 2.48, "step": 15075 }, { "epoch": 0.6377866147728234, "grad_norm": 0.16319885849952698, "learning_rate": 0.001, "loss": 2.152, "step": 15076 }, { "epoch": 0.6378289195363398, "grad_norm": 0.1832021176815033, "learning_rate": 0.001, "loss": 2.4498, "step": 15077 }, { "epoch": 0.6378712242998562, "grad_norm": 0.17077642679214478, "learning_rate": 0.001, "loss": 2.2088, "step": 15078 }, { "epoch": 0.6379135290633725, "grad_norm": 0.2068411260843277, "learning_rate": 0.001, "loss": 2.3452, "step": 15079 }, { "epoch": 0.6379558338268889, "grad_norm": 0.1923973709344864, "learning_rate": 0.001, "loss": 2.124, "step": 15080 }, { "epoch": 0.6379981385904053, "grad_norm": 7.424524784088135, "learning_rate": 0.001, "loss": 2.9359, "step": 15081 }, { "epoch": 0.6380404433539216, "grad_norm": 0.2013271301984787, "learning_rate": 0.001, "loss": 1.4738, "step": 15082 }, { "epoch": 0.638082748117438, "grad_norm": 0.7266159653663635, "learning_rate": 0.001, "loss": 2.4914, "step": 15083 }, { "epoch": 0.6381250528809544, "grad_norm": 0.7086105346679688, "learning_rate": 0.001, "loss": 2.7849, "step": 15084 }, { "epoch": 0.6381673576444707, "grad_norm": 1.527816891670227, "learning_rate": 0.001, "loss": 3.3425, "step": 15085 }, { "epoch": 0.6382096624079872, "grad_norm": 0.30964910984039307, "learning_rate": 0.001, "loss": 2.8131, "step": 15086 }, { "epoch": 0.6382519671715036, "grad_norm": 0.18733543157577515, "learning_rate": 0.001, "loss": 2.89, "step": 15087 }, { "epoch": 0.6382942719350199, "grad_norm": 0.32117486000061035, "learning_rate": 0.001, "loss": 3.6968, "step": 15088 }, { "epoch": 0.6383365766985363, "grad_norm": 0.1650446653366089, "learning_rate": 0.001, "loss": 1.6528, "step": 15089 }, { "epoch": 0.6383788814620526, "grad_norm": 1.424420714378357, "learning_rate": 0.001, "loss": 2.9049, "step": 15090 }, { "epoch": 0.638421186225569, "grad_norm": 0.18598754703998566, "learning_rate": 0.001, "loss": 2.5301, "step": 15091 }, { "epoch": 0.6384634909890854, "grad_norm": 0.24407409131526947, "learning_rate": 0.001, "loss": 1.614, "step": 15092 }, { "epoch": 0.6385057957526017, "grad_norm": 0.18757480382919312, "learning_rate": 0.001, "loss": 2.3472, "step": 15093 }, { "epoch": 0.6385481005161181, "grad_norm": 0.17683260142803192, "learning_rate": 0.001, "loss": 2.2612, "step": 15094 }, { "epoch": 0.6385904052796345, "grad_norm": 0.2939551770687103, "learning_rate": 0.001, "loss": 1.9736, "step": 15095 }, { "epoch": 0.6386327100431508, "grad_norm": 0.1915324181318283, "learning_rate": 0.001, "loss": 1.6365, "step": 15096 }, { "epoch": 0.6386750148066672, "grad_norm": 0.21610991656780243, "learning_rate": 0.001, "loss": 2.0241, "step": 15097 }, { "epoch": 0.6387173195701836, "grad_norm": 0.17032171785831451, "learning_rate": 0.001, "loss": 2.7106, "step": 15098 }, { "epoch": 0.6387596243336999, "grad_norm": 0.33380383253097534, "learning_rate": 0.001, "loss": 1.8032, "step": 15099 }, { "epoch": 0.6388019290972163, "grad_norm": 0.15979735553264618, "learning_rate": 0.001, "loss": 2.0711, "step": 15100 }, { "epoch": 0.6388442338607327, "grad_norm": 0.21214862167835236, "learning_rate": 0.001, "loss": 2.4475, "step": 15101 }, { "epoch": 0.638886538624249, "grad_norm": 0.19921685755252838, "learning_rate": 0.001, "loss": 2.1466, "step": 15102 }, { "epoch": 0.6389288433877655, "grad_norm": 0.18050815165042877, "learning_rate": 0.001, "loss": 2.3784, "step": 15103 }, { "epoch": 0.6389711481512819, "grad_norm": 1.592774748802185, "learning_rate": 0.001, "loss": 2.568, "step": 15104 }, { "epoch": 0.6390134529147982, "grad_norm": 0.18110394477844238, "learning_rate": 0.001, "loss": 2.3632, "step": 15105 }, { "epoch": 0.6390557576783146, "grad_norm": 0.13899345695972443, "learning_rate": 0.001, "loss": 2.49, "step": 15106 }, { "epoch": 0.639098062441831, "grad_norm": 0.16161954402923584, "learning_rate": 0.001, "loss": 1.9518, "step": 15107 }, { "epoch": 0.6391403672053473, "grad_norm": 0.2258518636226654, "learning_rate": 0.001, "loss": 1.8021, "step": 15108 }, { "epoch": 0.6391826719688637, "grad_norm": 0.19789251685142517, "learning_rate": 0.001, "loss": 2.7393, "step": 15109 }, { "epoch": 0.6392249767323801, "grad_norm": 0.37085291743278503, "learning_rate": 0.001, "loss": 1.7739, "step": 15110 }, { "epoch": 0.6392672814958964, "grad_norm": 1.3115873336791992, "learning_rate": 0.001, "loss": 2.4295, "step": 15111 }, { "epoch": 0.6393095862594128, "grad_norm": 0.44819048047065735, "learning_rate": 0.001, "loss": 2.1524, "step": 15112 }, { "epoch": 0.6393518910229292, "grad_norm": 0.1929224282503128, "learning_rate": 0.001, "loss": 1.8331, "step": 15113 }, { "epoch": 0.6393941957864455, "grad_norm": 0.17352215945720673, "learning_rate": 0.001, "loss": 1.9057, "step": 15114 }, { "epoch": 0.6394365005499619, "grad_norm": 0.32852211594581604, "learning_rate": 0.001, "loss": 1.5222, "step": 15115 }, { "epoch": 0.6394788053134783, "grad_norm": 0.17034880816936493, "learning_rate": 0.001, "loss": 2.1697, "step": 15116 }, { "epoch": 0.6395211100769946, "grad_norm": 0.17381560802459717, "learning_rate": 0.001, "loss": 1.616, "step": 15117 }, { "epoch": 0.639563414840511, "grad_norm": 1.7698090076446533, "learning_rate": 0.001, "loss": 2.1559, "step": 15118 }, { "epoch": 0.6396057196040275, "grad_norm": 0.2830827534198761, "learning_rate": 0.001, "loss": 2.1308, "step": 15119 }, { "epoch": 0.6396480243675438, "grad_norm": 0.25885289907455444, "learning_rate": 0.001, "loss": 1.9705, "step": 15120 }, { "epoch": 0.6396903291310602, "grad_norm": 0.2354385405778885, "learning_rate": 0.001, "loss": 2.0764, "step": 15121 }, { "epoch": 0.6397326338945766, "grad_norm": 0.2808350920677185, "learning_rate": 0.001, "loss": 2.703, "step": 15122 }, { "epoch": 0.6397749386580929, "grad_norm": 0.1770741045475006, "learning_rate": 0.001, "loss": 2.2367, "step": 15123 }, { "epoch": 0.6398172434216093, "grad_norm": 0.1734161674976349, "learning_rate": 0.001, "loss": 1.4621, "step": 15124 }, { "epoch": 0.6398595481851257, "grad_norm": 0.2023870348930359, "learning_rate": 0.001, "loss": 2.4376, "step": 15125 }, { "epoch": 0.639901852948642, "grad_norm": 0.7190189361572266, "learning_rate": 0.001, "loss": 3.3754, "step": 15126 }, { "epoch": 0.6399441577121584, "grad_norm": 0.18912023305892944, "learning_rate": 0.001, "loss": 3.4364, "step": 15127 }, { "epoch": 0.6399864624756748, "grad_norm": 0.16115638613700867, "learning_rate": 0.001, "loss": 1.8208, "step": 15128 }, { "epoch": 0.6400287672391911, "grad_norm": 0.17985692620277405, "learning_rate": 0.001, "loss": 1.921, "step": 15129 }, { "epoch": 0.6400710720027075, "grad_norm": 1.403349757194519, "learning_rate": 0.001, "loss": 3.553, "step": 15130 }, { "epoch": 0.6401133767662239, "grad_norm": 0.14458277821540833, "learning_rate": 0.001, "loss": 1.8948, "step": 15131 }, { "epoch": 0.6401556815297402, "grad_norm": 0.21987377107143402, "learning_rate": 0.001, "loss": 2.8338, "step": 15132 }, { "epoch": 0.6401979862932566, "grad_norm": 0.3733513355255127, "learning_rate": 0.001, "loss": 2.4485, "step": 15133 }, { "epoch": 0.640240291056773, "grad_norm": 21.617033004760742, "learning_rate": 0.001, "loss": 2.7377, "step": 15134 }, { "epoch": 0.6402825958202893, "grad_norm": 0.4703228771686554, "learning_rate": 0.001, "loss": 2.7947, "step": 15135 }, { "epoch": 0.6403249005838058, "grad_norm": 1.1009413003921509, "learning_rate": 0.001, "loss": 2.0846, "step": 15136 }, { "epoch": 0.640367205347322, "grad_norm": 0.16309663653373718, "learning_rate": 0.001, "loss": 2.8663, "step": 15137 }, { "epoch": 0.6404095101108385, "grad_norm": 0.18451957404613495, "learning_rate": 0.001, "loss": 2.7764, "step": 15138 }, { "epoch": 0.6404518148743549, "grad_norm": 0.20747211575508118, "learning_rate": 0.001, "loss": 1.8173, "step": 15139 }, { "epoch": 0.6404941196378712, "grad_norm": 0.2003680169582367, "learning_rate": 0.001, "loss": 2.5292, "step": 15140 }, { "epoch": 0.6405364244013876, "grad_norm": 0.14385853707790375, "learning_rate": 0.001, "loss": 3.4984, "step": 15141 }, { "epoch": 0.640578729164904, "grad_norm": 0.20833978056907654, "learning_rate": 0.001, "loss": 2.2487, "step": 15142 }, { "epoch": 0.6406210339284203, "grad_norm": 1.4632526636123657, "learning_rate": 0.001, "loss": 2.327, "step": 15143 }, { "epoch": 0.6406633386919367, "grad_norm": 0.1615937054157257, "learning_rate": 0.001, "loss": 2.6719, "step": 15144 }, { "epoch": 0.6407056434554531, "grad_norm": 0.3300820291042328, "learning_rate": 0.001, "loss": 1.7582, "step": 15145 }, { "epoch": 0.6407479482189694, "grad_norm": 0.40735575556755066, "learning_rate": 0.001, "loss": 1.6912, "step": 15146 }, { "epoch": 0.6407902529824858, "grad_norm": 0.16738514602184296, "learning_rate": 0.001, "loss": 1.6134, "step": 15147 }, { "epoch": 0.6408325577460022, "grad_norm": 0.17721709609031677, "learning_rate": 0.001, "loss": 2.4597, "step": 15148 }, { "epoch": 0.6408748625095185, "grad_norm": 0.2825102210044861, "learning_rate": 0.001, "loss": 1.8421, "step": 15149 }, { "epoch": 0.6409171672730349, "grad_norm": 0.19013796746730804, "learning_rate": 0.001, "loss": 2.0838, "step": 15150 }, { "epoch": 0.6409594720365513, "grad_norm": 0.22980915009975433, "learning_rate": 0.001, "loss": 3.4241, "step": 15151 }, { "epoch": 0.6410017768000676, "grad_norm": 0.4537425935268402, "learning_rate": 0.001, "loss": 2.7787, "step": 15152 }, { "epoch": 0.6410440815635841, "grad_norm": 0.44562122225761414, "learning_rate": 0.001, "loss": 1.986, "step": 15153 }, { "epoch": 0.6410863863271005, "grad_norm": 0.1742187738418579, "learning_rate": 0.001, "loss": 1.8076, "step": 15154 }, { "epoch": 0.6411286910906168, "grad_norm": 0.41731923818588257, "learning_rate": 0.001, "loss": 2.7682, "step": 15155 }, { "epoch": 0.6411709958541332, "grad_norm": 0.17305545508861542, "learning_rate": 0.001, "loss": 2.2091, "step": 15156 }, { "epoch": 0.6412133006176496, "grad_norm": 3.8810274600982666, "learning_rate": 0.001, "loss": 2.5479, "step": 15157 }, { "epoch": 0.6412556053811659, "grad_norm": 0.22583703696727753, "learning_rate": 0.001, "loss": 1.9204, "step": 15158 }, { "epoch": 0.6412979101446823, "grad_norm": 0.14476729929447174, "learning_rate": 0.001, "loss": 3.0101, "step": 15159 }, { "epoch": 0.6413402149081987, "grad_norm": 0.19955182075500488, "learning_rate": 0.001, "loss": 2.3063, "step": 15160 }, { "epoch": 0.641382519671715, "grad_norm": 0.15358324348926544, "learning_rate": 0.001, "loss": 1.4909, "step": 15161 }, { "epoch": 0.6414248244352314, "grad_norm": 0.2164243459701538, "learning_rate": 0.001, "loss": 1.9155, "step": 15162 }, { "epoch": 0.6414671291987478, "grad_norm": 0.15397514402866364, "learning_rate": 0.001, "loss": 1.6991, "step": 15163 }, { "epoch": 0.6415094339622641, "grad_norm": 0.14911989867687225, "learning_rate": 0.001, "loss": 1.9703, "step": 15164 }, { "epoch": 0.6415517387257805, "grad_norm": 0.1464262753725052, "learning_rate": 0.001, "loss": 1.8296, "step": 15165 }, { "epoch": 0.6415940434892969, "grad_norm": 0.9781848192214966, "learning_rate": 0.001, "loss": 4.401, "step": 15166 }, { "epoch": 0.6416363482528132, "grad_norm": 0.18489103019237518, "learning_rate": 0.001, "loss": 1.6242, "step": 15167 }, { "epoch": 0.6416786530163296, "grad_norm": 0.16017593443393707, "learning_rate": 0.001, "loss": 1.9852, "step": 15168 }, { "epoch": 0.6417209577798461, "grad_norm": 1.0685312747955322, "learning_rate": 0.001, "loss": 2.4017, "step": 15169 }, { "epoch": 0.6417632625433624, "grad_norm": 0.17249107360839844, "learning_rate": 0.001, "loss": 1.9322, "step": 15170 }, { "epoch": 0.6418055673068788, "grad_norm": 0.2893502712249756, "learning_rate": 0.001, "loss": 1.7509, "step": 15171 }, { "epoch": 0.6418478720703952, "grad_norm": 1.8474113941192627, "learning_rate": 0.001, "loss": 1.5001, "step": 15172 }, { "epoch": 0.6418901768339115, "grad_norm": 0.21686485409736633, "learning_rate": 0.001, "loss": 2.2628, "step": 15173 }, { "epoch": 0.6419324815974279, "grad_norm": 0.1666136085987091, "learning_rate": 0.001, "loss": 3.1013, "step": 15174 }, { "epoch": 0.6419747863609443, "grad_norm": 0.20355500280857086, "learning_rate": 0.001, "loss": 1.8449, "step": 15175 }, { "epoch": 0.6420170911244606, "grad_norm": 0.16820862889289856, "learning_rate": 0.001, "loss": 2.2891, "step": 15176 }, { "epoch": 0.642059395887977, "grad_norm": 0.16452017426490784, "learning_rate": 0.001, "loss": 1.8697, "step": 15177 }, { "epoch": 0.6421017006514934, "grad_norm": 0.17088449001312256, "learning_rate": 0.001, "loss": 1.9137, "step": 15178 }, { "epoch": 0.6421440054150097, "grad_norm": 0.4427139163017273, "learning_rate": 0.001, "loss": 1.8976, "step": 15179 }, { "epoch": 0.6421863101785261, "grad_norm": 0.16302001476287842, "learning_rate": 0.001, "loss": 2.2036, "step": 15180 }, { "epoch": 0.6422286149420424, "grad_norm": 0.22736626863479614, "learning_rate": 0.001, "loss": 1.8923, "step": 15181 }, { "epoch": 0.6422709197055588, "grad_norm": 0.20898133516311646, "learning_rate": 0.001, "loss": 2.1798, "step": 15182 }, { "epoch": 0.6423132244690752, "grad_norm": 1.8661376237869263, "learning_rate": 0.001, "loss": 2.4555, "step": 15183 }, { "epoch": 0.6423555292325915, "grad_norm": 0.22298663854599, "learning_rate": 0.001, "loss": 2.2072, "step": 15184 }, { "epoch": 0.642397833996108, "grad_norm": 3.3507096767425537, "learning_rate": 0.001, "loss": 2.7466, "step": 15185 }, { "epoch": 0.6424401387596244, "grad_norm": 0.3017289638519287, "learning_rate": 0.001, "loss": 2.6671, "step": 15186 }, { "epoch": 0.6424824435231407, "grad_norm": 0.24658021330833435, "learning_rate": 0.001, "loss": 1.7703, "step": 15187 }, { "epoch": 0.6425247482866571, "grad_norm": 2.2778401374816895, "learning_rate": 0.001, "loss": 3.8874, "step": 15188 }, { "epoch": 0.6425670530501735, "grad_norm": 0.1641712188720703, "learning_rate": 0.001, "loss": 2.0445, "step": 15189 }, { "epoch": 0.6426093578136898, "grad_norm": 0.26722750067710876, "learning_rate": 0.001, "loss": 2.8895, "step": 15190 }, { "epoch": 0.6426516625772062, "grad_norm": 0.1975189447402954, "learning_rate": 0.001, "loss": 2.0646, "step": 15191 }, { "epoch": 0.6426939673407226, "grad_norm": 0.22183305025100708, "learning_rate": 0.001, "loss": 1.8173, "step": 15192 }, { "epoch": 0.6427362721042389, "grad_norm": 0.17388564348220825, "learning_rate": 0.001, "loss": 1.6142, "step": 15193 }, { "epoch": 0.6427785768677553, "grad_norm": 0.169900044798851, "learning_rate": 0.001, "loss": 2.7619, "step": 15194 }, { "epoch": 0.6428208816312717, "grad_norm": 0.2132178544998169, "learning_rate": 0.001, "loss": 2.2104, "step": 15195 }, { "epoch": 0.642863186394788, "grad_norm": 0.2020595222711563, "learning_rate": 0.001, "loss": 1.8507, "step": 15196 }, { "epoch": 0.6429054911583044, "grad_norm": 0.18588212132453918, "learning_rate": 0.001, "loss": 2.8819, "step": 15197 }, { "epoch": 0.6429477959218208, "grad_norm": 0.17926856875419617, "learning_rate": 0.001, "loss": 2.0165, "step": 15198 }, { "epoch": 0.6429901006853371, "grad_norm": 0.1749180406332016, "learning_rate": 0.001, "loss": 2.548, "step": 15199 }, { "epoch": 0.6430324054488535, "grad_norm": 0.1662074625492096, "learning_rate": 0.001, "loss": 2.1117, "step": 15200 }, { "epoch": 0.64307471021237, "grad_norm": 0.5484973788261414, "learning_rate": 0.001, "loss": 1.8429, "step": 15201 }, { "epoch": 0.6431170149758862, "grad_norm": 0.16047289967536926, "learning_rate": 0.001, "loss": 2.5943, "step": 15202 }, { "epoch": 0.6431593197394027, "grad_norm": 0.17750583589076996, "learning_rate": 0.001, "loss": 1.8823, "step": 15203 }, { "epoch": 0.6432016245029191, "grad_norm": 0.17922556400299072, "learning_rate": 0.001, "loss": 1.844, "step": 15204 }, { "epoch": 0.6432439292664354, "grad_norm": 0.2863519489765167, "learning_rate": 0.001, "loss": 2.2631, "step": 15205 }, { "epoch": 0.6432862340299518, "grad_norm": 0.19518601894378662, "learning_rate": 0.001, "loss": 1.5934, "step": 15206 }, { "epoch": 0.6433285387934682, "grad_norm": 0.15284669399261475, "learning_rate": 0.001, "loss": 1.7176, "step": 15207 }, { "epoch": 0.6433708435569845, "grad_norm": 0.16001272201538086, "learning_rate": 0.001, "loss": 2.4634, "step": 15208 }, { "epoch": 0.6434131483205009, "grad_norm": 1.5020637512207031, "learning_rate": 0.001, "loss": 2.363, "step": 15209 }, { "epoch": 0.6434554530840173, "grad_norm": 0.19093799591064453, "learning_rate": 0.001, "loss": 2.8734, "step": 15210 }, { "epoch": 0.6434977578475336, "grad_norm": 0.1673070341348648, "learning_rate": 0.001, "loss": 2.2707, "step": 15211 }, { "epoch": 0.64354006261105, "grad_norm": 0.1651526242494583, "learning_rate": 0.001, "loss": 2.457, "step": 15212 }, { "epoch": 0.6435823673745664, "grad_norm": 0.34426966309547424, "learning_rate": 0.001, "loss": 2.6741, "step": 15213 }, { "epoch": 0.6436246721380827, "grad_norm": 0.15150640904903412, "learning_rate": 0.001, "loss": 2.3646, "step": 15214 }, { "epoch": 0.6436669769015991, "grad_norm": 0.1601693034172058, "learning_rate": 0.001, "loss": 1.4741, "step": 15215 }, { "epoch": 0.6437092816651155, "grad_norm": 0.19948755204677582, "learning_rate": 0.001, "loss": 1.6806, "step": 15216 }, { "epoch": 0.6437515864286318, "grad_norm": 0.17080558836460114, "learning_rate": 0.001, "loss": 2.2009, "step": 15217 }, { "epoch": 0.6437938911921482, "grad_norm": 0.1389591544866562, "learning_rate": 0.001, "loss": 1.921, "step": 15218 }, { "epoch": 0.6438361959556647, "grad_norm": 1.358182668685913, "learning_rate": 0.001, "loss": 2.0884, "step": 15219 }, { "epoch": 0.643878500719181, "grad_norm": 0.3052426278591156, "learning_rate": 0.001, "loss": 3.247, "step": 15220 }, { "epoch": 0.6439208054826974, "grad_norm": 0.16330936551094055, "learning_rate": 0.001, "loss": 1.6572, "step": 15221 }, { "epoch": 0.6439631102462138, "grad_norm": 0.21469064056873322, "learning_rate": 0.001, "loss": 1.9606, "step": 15222 }, { "epoch": 0.6440054150097301, "grad_norm": 0.17421077191829681, "learning_rate": 0.001, "loss": 2.3584, "step": 15223 }, { "epoch": 0.6440477197732465, "grad_norm": 0.18720628321170807, "learning_rate": 0.001, "loss": 2.3146, "step": 15224 }, { "epoch": 0.6440900245367628, "grad_norm": 1.2276394367218018, "learning_rate": 0.001, "loss": 2.6331, "step": 15225 }, { "epoch": 0.6441323293002792, "grad_norm": 0.17419876158237457, "learning_rate": 0.001, "loss": 2.1331, "step": 15226 }, { "epoch": 0.6441746340637956, "grad_norm": 0.1927102506160736, "learning_rate": 0.001, "loss": 2.2267, "step": 15227 }, { "epoch": 0.6442169388273119, "grad_norm": 0.19751717150211334, "learning_rate": 0.001, "loss": 2.5559, "step": 15228 }, { "epoch": 0.6442592435908283, "grad_norm": 0.14785194396972656, "learning_rate": 0.001, "loss": 3.0032, "step": 15229 }, { "epoch": 0.6443015483543447, "grad_norm": 0.3033665716648102, "learning_rate": 0.001, "loss": 2.1101, "step": 15230 }, { "epoch": 0.644343853117861, "grad_norm": 0.1637023538351059, "learning_rate": 0.001, "loss": 2.0958, "step": 15231 }, { "epoch": 0.6443861578813774, "grad_norm": 0.24798746407032013, "learning_rate": 0.001, "loss": 2.0162, "step": 15232 }, { "epoch": 0.6444284626448938, "grad_norm": 0.16221873462200165, "learning_rate": 0.001, "loss": 1.8261, "step": 15233 }, { "epoch": 0.6444707674084101, "grad_norm": 0.1729075014591217, "learning_rate": 0.001, "loss": 2.3098, "step": 15234 }, { "epoch": 0.6445130721719265, "grad_norm": 0.1991363763809204, "learning_rate": 0.001, "loss": 1.3549, "step": 15235 }, { "epoch": 0.644555376935443, "grad_norm": 1.0189127922058105, "learning_rate": 0.001, "loss": 2.1717, "step": 15236 }, { "epoch": 0.6445976816989593, "grad_norm": 0.16026759147644043, "learning_rate": 0.001, "loss": 2.4036, "step": 15237 }, { "epoch": 0.6446399864624757, "grad_norm": 0.31504741311073303, "learning_rate": 0.001, "loss": 1.429, "step": 15238 }, { "epoch": 0.6446822912259921, "grad_norm": 0.2236756980419159, "learning_rate": 0.001, "loss": 2.204, "step": 15239 }, { "epoch": 0.6447245959895084, "grad_norm": 0.18025624752044678, "learning_rate": 0.001, "loss": 2.653, "step": 15240 }, { "epoch": 0.6447669007530248, "grad_norm": 0.1674138605594635, "learning_rate": 0.001, "loss": 3.3606, "step": 15241 }, { "epoch": 0.6448092055165412, "grad_norm": 0.1862388402223587, "learning_rate": 0.001, "loss": 3.4166, "step": 15242 }, { "epoch": 0.6448515102800575, "grad_norm": 0.17802289128303528, "learning_rate": 0.001, "loss": 3.0027, "step": 15243 }, { "epoch": 0.6448938150435739, "grad_norm": 0.1625264286994934, "learning_rate": 0.001, "loss": 2.5584, "step": 15244 }, { "epoch": 0.6449361198070903, "grad_norm": 0.293082058429718, "learning_rate": 0.001, "loss": 2.7967, "step": 15245 }, { "epoch": 0.6449784245706066, "grad_norm": 0.20152679085731506, "learning_rate": 0.001, "loss": 1.4473, "step": 15246 }, { "epoch": 0.645020729334123, "grad_norm": 0.5746887922286987, "learning_rate": 0.001, "loss": 1.424, "step": 15247 }, { "epoch": 0.6450630340976394, "grad_norm": 0.19594338536262512, "learning_rate": 0.001, "loss": 2.3114, "step": 15248 }, { "epoch": 0.6451053388611557, "grad_norm": 0.218429297208786, "learning_rate": 0.001, "loss": 1.768, "step": 15249 }, { "epoch": 0.6451476436246721, "grad_norm": 0.2682070732116699, "learning_rate": 0.001, "loss": 1.842, "step": 15250 }, { "epoch": 0.6451899483881885, "grad_norm": 0.34150052070617676, "learning_rate": 0.001, "loss": 1.7695, "step": 15251 }, { "epoch": 0.6452322531517048, "grad_norm": 0.19166529178619385, "learning_rate": 0.001, "loss": 2.596, "step": 15252 }, { "epoch": 0.6452745579152213, "grad_norm": 1.010944128036499, "learning_rate": 0.001, "loss": 2.4779, "step": 15253 }, { "epoch": 0.6453168626787377, "grad_norm": 0.20341774821281433, "learning_rate": 0.001, "loss": 2.7125, "step": 15254 }, { "epoch": 0.645359167442254, "grad_norm": 0.28562796115875244, "learning_rate": 0.001, "loss": 2.493, "step": 15255 }, { "epoch": 0.6454014722057704, "grad_norm": 0.19176359474658966, "learning_rate": 0.001, "loss": 2.4811, "step": 15256 }, { "epoch": 0.6454437769692868, "grad_norm": 0.17995816469192505, "learning_rate": 0.001, "loss": 1.7175, "step": 15257 }, { "epoch": 0.6454860817328031, "grad_norm": 0.2325253188610077, "learning_rate": 0.001, "loss": 2.5527, "step": 15258 }, { "epoch": 0.6455283864963195, "grad_norm": 0.7789939641952515, "learning_rate": 0.001, "loss": 1.7572, "step": 15259 }, { "epoch": 0.6455706912598359, "grad_norm": 0.1507723033428192, "learning_rate": 0.001, "loss": 2.1685, "step": 15260 }, { "epoch": 0.6456129960233522, "grad_norm": 0.15807674825191498, "learning_rate": 0.001, "loss": 2.5854, "step": 15261 }, { "epoch": 0.6456553007868686, "grad_norm": 0.1875530183315277, "learning_rate": 0.001, "loss": 2.4665, "step": 15262 }, { "epoch": 0.645697605550385, "grad_norm": 0.20089443027973175, "learning_rate": 0.001, "loss": 1.8378, "step": 15263 }, { "epoch": 0.6457399103139013, "grad_norm": 0.40566131472587585, "learning_rate": 0.001, "loss": 1.5999, "step": 15264 }, { "epoch": 0.6457822150774177, "grad_norm": 2.285637378692627, "learning_rate": 0.001, "loss": 2.1813, "step": 15265 }, { "epoch": 0.6458245198409341, "grad_norm": 0.1539418250322342, "learning_rate": 0.001, "loss": 1.8228, "step": 15266 }, { "epoch": 0.6458668246044504, "grad_norm": 5.015846252441406, "learning_rate": 0.001, "loss": 2.1239, "step": 15267 }, { "epoch": 0.6459091293679668, "grad_norm": 0.2014991044998169, "learning_rate": 0.001, "loss": 2.0453, "step": 15268 }, { "epoch": 0.6459514341314833, "grad_norm": 0.4300079345703125, "learning_rate": 0.001, "loss": 2.0821, "step": 15269 }, { "epoch": 0.6459937388949996, "grad_norm": 0.20501923561096191, "learning_rate": 0.001, "loss": 2.1233, "step": 15270 }, { "epoch": 0.646036043658516, "grad_norm": 0.17998738586902618, "learning_rate": 0.001, "loss": 1.9638, "step": 15271 }, { "epoch": 0.6460783484220323, "grad_norm": 0.17478275299072266, "learning_rate": 0.001, "loss": 1.6565, "step": 15272 }, { "epoch": 0.6461206531855487, "grad_norm": 0.17657312750816345, "learning_rate": 0.001, "loss": 1.9822, "step": 15273 }, { "epoch": 0.6461629579490651, "grad_norm": 2.826846122741699, "learning_rate": 0.001, "loss": 1.6034, "step": 15274 }, { "epoch": 0.6462052627125814, "grad_norm": 0.9250707626342773, "learning_rate": 0.001, "loss": 3.1013, "step": 15275 }, { "epoch": 0.6462475674760978, "grad_norm": 0.1742154359817505, "learning_rate": 0.001, "loss": 2.5725, "step": 15276 }, { "epoch": 0.6462898722396142, "grad_norm": 1.7787930965423584, "learning_rate": 0.001, "loss": 2.0831, "step": 15277 }, { "epoch": 0.6463321770031305, "grad_norm": 0.17975670099258423, "learning_rate": 0.001, "loss": 1.9179, "step": 15278 }, { "epoch": 0.6463744817666469, "grad_norm": 0.1871751993894577, "learning_rate": 0.001, "loss": 1.7723, "step": 15279 }, { "epoch": 0.6464167865301633, "grad_norm": 0.19217629730701447, "learning_rate": 0.001, "loss": 2.0674, "step": 15280 }, { "epoch": 0.6464590912936796, "grad_norm": 0.2209371030330658, "learning_rate": 0.001, "loss": 1.9014, "step": 15281 }, { "epoch": 0.646501396057196, "grad_norm": 0.1662231981754303, "learning_rate": 0.001, "loss": 2.1925, "step": 15282 }, { "epoch": 0.6465437008207124, "grad_norm": 0.40641865134239197, "learning_rate": 0.001, "loss": 2.4045, "step": 15283 }, { "epoch": 0.6465860055842287, "grad_norm": 0.14980608224868774, "learning_rate": 0.001, "loss": 1.4153, "step": 15284 }, { "epoch": 0.6466283103477451, "grad_norm": 0.18884730339050293, "learning_rate": 0.001, "loss": 1.945, "step": 15285 }, { "epoch": 0.6466706151112616, "grad_norm": 0.1964019387960434, "learning_rate": 0.001, "loss": 2.997, "step": 15286 }, { "epoch": 0.6467129198747779, "grad_norm": 0.1846921443939209, "learning_rate": 0.001, "loss": 2.0583, "step": 15287 }, { "epoch": 0.6467552246382943, "grad_norm": 0.3572556674480438, "learning_rate": 0.001, "loss": 3.6665, "step": 15288 }, { "epoch": 0.6467975294018107, "grad_norm": 0.1696724146604538, "learning_rate": 0.001, "loss": 1.6555, "step": 15289 }, { "epoch": 0.646839834165327, "grad_norm": 0.18197889626026154, "learning_rate": 0.001, "loss": 2.0833, "step": 15290 }, { "epoch": 0.6468821389288434, "grad_norm": 0.7675547003746033, "learning_rate": 0.001, "loss": 1.9319, "step": 15291 }, { "epoch": 0.6469244436923598, "grad_norm": 0.16503793001174927, "learning_rate": 0.001, "loss": 2.4541, "step": 15292 }, { "epoch": 0.6469667484558761, "grad_norm": 0.5148576498031616, "learning_rate": 0.001, "loss": 1.8749, "step": 15293 }, { "epoch": 0.6470090532193925, "grad_norm": 0.1498442143201828, "learning_rate": 0.001, "loss": 2.6986, "step": 15294 }, { "epoch": 0.6470513579829089, "grad_norm": 0.22199515998363495, "learning_rate": 0.001, "loss": 2.5812, "step": 15295 }, { "epoch": 0.6470936627464252, "grad_norm": 0.15802200138568878, "learning_rate": 0.001, "loss": 1.5685, "step": 15296 }, { "epoch": 0.6471359675099416, "grad_norm": 0.15270139276981354, "learning_rate": 0.001, "loss": 2.1229, "step": 15297 }, { "epoch": 0.647178272273458, "grad_norm": 0.15476052463054657, "learning_rate": 0.001, "loss": 2.2869, "step": 15298 }, { "epoch": 0.6472205770369743, "grad_norm": 0.23796048760414124, "learning_rate": 0.001, "loss": 1.6435, "step": 15299 }, { "epoch": 0.6472628818004907, "grad_norm": 0.2162591516971588, "learning_rate": 0.001, "loss": 2.5024, "step": 15300 }, { "epoch": 0.6473051865640072, "grad_norm": 0.5299810171127319, "learning_rate": 0.001, "loss": 2.4246, "step": 15301 }, { "epoch": 0.6473474913275234, "grad_norm": 1.423614740371704, "learning_rate": 0.001, "loss": 2.4182, "step": 15302 }, { "epoch": 0.6473897960910399, "grad_norm": 0.15965206921100616, "learning_rate": 0.001, "loss": 1.6585, "step": 15303 }, { "epoch": 0.6474321008545563, "grad_norm": 0.2295125126838684, "learning_rate": 0.001, "loss": 1.5463, "step": 15304 }, { "epoch": 0.6474744056180726, "grad_norm": 0.16311728954315186, "learning_rate": 0.001, "loss": 2.6348, "step": 15305 }, { "epoch": 0.647516710381589, "grad_norm": 0.16523398458957672, "learning_rate": 0.001, "loss": 2.221, "step": 15306 }, { "epoch": 0.6475590151451054, "grad_norm": 0.16306725144386292, "learning_rate": 0.001, "loss": 1.871, "step": 15307 }, { "epoch": 0.6476013199086217, "grad_norm": 0.24580958485603333, "learning_rate": 0.001, "loss": 3.2734, "step": 15308 }, { "epoch": 0.6476436246721381, "grad_norm": 0.3764786124229431, "learning_rate": 0.001, "loss": 2.7352, "step": 15309 }, { "epoch": 0.6476859294356545, "grad_norm": 0.41898685693740845, "learning_rate": 0.001, "loss": 2.2815, "step": 15310 }, { "epoch": 0.6477282341991708, "grad_norm": 1.0306098461151123, "learning_rate": 0.001, "loss": 2.1651, "step": 15311 }, { "epoch": 0.6477705389626872, "grad_norm": 0.20003588497638702, "learning_rate": 0.001, "loss": 1.8374, "step": 15312 }, { "epoch": 0.6478128437262036, "grad_norm": 0.18757633864879608, "learning_rate": 0.001, "loss": 1.9034, "step": 15313 }, { "epoch": 0.6478551484897199, "grad_norm": 0.2218223512172699, "learning_rate": 0.001, "loss": 1.9381, "step": 15314 }, { "epoch": 0.6478974532532363, "grad_norm": 0.17475609481334686, "learning_rate": 0.001, "loss": 1.7699, "step": 15315 }, { "epoch": 0.6479397580167526, "grad_norm": 0.23167279362678528, "learning_rate": 0.001, "loss": 1.9099, "step": 15316 }, { "epoch": 0.647982062780269, "grad_norm": 0.1843143105506897, "learning_rate": 0.001, "loss": 1.8182, "step": 15317 }, { "epoch": 0.6480243675437855, "grad_norm": 0.20558197796344757, "learning_rate": 0.001, "loss": 2.5398, "step": 15318 }, { "epoch": 0.6480666723073017, "grad_norm": 0.17838044464588165, "learning_rate": 0.001, "loss": 1.7194, "step": 15319 }, { "epoch": 0.6481089770708182, "grad_norm": 0.21093347668647766, "learning_rate": 0.001, "loss": 3.3587, "step": 15320 }, { "epoch": 0.6481512818343346, "grad_norm": 0.24948959052562714, "learning_rate": 0.001, "loss": 2.1242, "step": 15321 }, { "epoch": 0.6481935865978509, "grad_norm": 0.17409680783748627, "learning_rate": 0.001, "loss": 1.7667, "step": 15322 }, { "epoch": 0.6482358913613673, "grad_norm": 0.1577576845884323, "learning_rate": 0.001, "loss": 1.9457, "step": 15323 }, { "epoch": 0.6482781961248837, "grad_norm": 0.6335095167160034, "learning_rate": 0.001, "loss": 2.0039, "step": 15324 }, { "epoch": 0.6483205008884, "grad_norm": 0.23187367618083954, "learning_rate": 0.001, "loss": 1.9439, "step": 15325 }, { "epoch": 0.6483628056519164, "grad_norm": 0.17001605033874512, "learning_rate": 0.001, "loss": 2.1188, "step": 15326 }, { "epoch": 0.6484051104154328, "grad_norm": 0.1839197725057602, "learning_rate": 0.001, "loss": 3.4747, "step": 15327 }, { "epoch": 0.6484474151789491, "grad_norm": 0.18075038492679596, "learning_rate": 0.001, "loss": 1.8859, "step": 15328 }, { "epoch": 0.6484897199424655, "grad_norm": 0.42471522092819214, "learning_rate": 0.001, "loss": 2.1715, "step": 15329 }, { "epoch": 0.6485320247059819, "grad_norm": 0.3928261995315552, "learning_rate": 0.001, "loss": 3.119, "step": 15330 }, { "epoch": 0.6485743294694982, "grad_norm": 0.15341652929782867, "learning_rate": 0.001, "loss": 2.5067, "step": 15331 }, { "epoch": 0.6486166342330146, "grad_norm": 0.16816523671150208, "learning_rate": 0.001, "loss": 2.3909, "step": 15332 }, { "epoch": 0.648658938996531, "grad_norm": 0.24296429753303528, "learning_rate": 0.001, "loss": 2.4028, "step": 15333 }, { "epoch": 0.6487012437600473, "grad_norm": 0.3298254907131195, "learning_rate": 0.001, "loss": 2.0308, "step": 15334 }, { "epoch": 0.6487435485235638, "grad_norm": 0.9870914816856384, "learning_rate": 0.001, "loss": 2.1879, "step": 15335 }, { "epoch": 0.6487858532870802, "grad_norm": 0.1429598480463028, "learning_rate": 0.001, "loss": 1.3078, "step": 15336 }, { "epoch": 0.6488281580505965, "grad_norm": 2.075826644897461, "learning_rate": 0.001, "loss": 1.9437, "step": 15337 }, { "epoch": 0.6488704628141129, "grad_norm": 0.18827824294567108, "learning_rate": 0.001, "loss": 1.5201, "step": 15338 }, { "epoch": 0.6489127675776293, "grad_norm": 0.16094815731048584, "learning_rate": 0.001, "loss": 2.2602, "step": 15339 }, { "epoch": 0.6489550723411456, "grad_norm": 0.18486766517162323, "learning_rate": 0.001, "loss": 2.3534, "step": 15340 }, { "epoch": 0.648997377104662, "grad_norm": 0.22534742951393127, "learning_rate": 0.001, "loss": 2.5483, "step": 15341 }, { "epoch": 0.6490396818681784, "grad_norm": 0.17582590878009796, "learning_rate": 0.001, "loss": 1.4281, "step": 15342 }, { "epoch": 0.6490819866316947, "grad_norm": 0.13245414197444916, "learning_rate": 0.001, "loss": 1.9149, "step": 15343 }, { "epoch": 0.6491242913952111, "grad_norm": 0.4428796172142029, "learning_rate": 0.001, "loss": 2.2785, "step": 15344 }, { "epoch": 0.6491665961587275, "grad_norm": 0.289799302816391, "learning_rate": 0.001, "loss": 1.9939, "step": 15345 }, { "epoch": 0.6492089009222438, "grad_norm": 0.16207891702651978, "learning_rate": 0.001, "loss": 2.3712, "step": 15346 }, { "epoch": 0.6492512056857602, "grad_norm": 0.20188239216804504, "learning_rate": 0.001, "loss": 2.4096, "step": 15347 }, { "epoch": 0.6492935104492766, "grad_norm": 0.21010519564151764, "learning_rate": 0.001, "loss": 2.6139, "step": 15348 }, { "epoch": 0.6493358152127929, "grad_norm": 0.3073784112930298, "learning_rate": 0.001, "loss": 2.1791, "step": 15349 }, { "epoch": 0.6493781199763093, "grad_norm": 0.3525810241699219, "learning_rate": 0.001, "loss": 2.2354, "step": 15350 }, { "epoch": 0.6494204247398258, "grad_norm": 0.18765120208263397, "learning_rate": 0.001, "loss": 2.6249, "step": 15351 }, { "epoch": 0.649462729503342, "grad_norm": 0.21588948369026184, "learning_rate": 0.001, "loss": 1.4647, "step": 15352 }, { "epoch": 0.6495050342668585, "grad_norm": 0.17089973390102386, "learning_rate": 0.001, "loss": 1.6018, "step": 15353 }, { "epoch": 0.6495473390303749, "grad_norm": 1.505226492881775, "learning_rate": 0.001, "loss": 2.5792, "step": 15354 }, { "epoch": 0.6495896437938912, "grad_norm": 0.18574289977550507, "learning_rate": 0.001, "loss": 2.5838, "step": 15355 }, { "epoch": 0.6496319485574076, "grad_norm": 0.9261936545372009, "learning_rate": 0.001, "loss": 3.0768, "step": 15356 }, { "epoch": 0.649674253320924, "grad_norm": 0.1780634969472885, "learning_rate": 0.001, "loss": 3.5627, "step": 15357 }, { "epoch": 0.6497165580844403, "grad_norm": 0.5185316205024719, "learning_rate": 0.001, "loss": 2.1678, "step": 15358 }, { "epoch": 0.6497588628479567, "grad_norm": 0.3285927176475525, "learning_rate": 0.001, "loss": 2.4195, "step": 15359 }, { "epoch": 0.6498011676114731, "grad_norm": 0.2265651822090149, "learning_rate": 0.001, "loss": 2.5156, "step": 15360 }, { "epoch": 0.6498434723749894, "grad_norm": 0.14902041852474213, "learning_rate": 0.001, "loss": 2.5237, "step": 15361 }, { "epoch": 0.6498857771385058, "grad_norm": 0.20233316719532013, "learning_rate": 0.001, "loss": 2.4619, "step": 15362 }, { "epoch": 0.6499280819020221, "grad_norm": 0.16044741868972778, "learning_rate": 0.001, "loss": 1.9778, "step": 15363 }, { "epoch": 0.6499703866655385, "grad_norm": 0.22437353432178497, "learning_rate": 0.001, "loss": 2.704, "step": 15364 }, { "epoch": 0.6500126914290549, "grad_norm": 0.1866353303194046, "learning_rate": 0.001, "loss": 1.7799, "step": 15365 }, { "epoch": 0.6500549961925712, "grad_norm": 0.25852930545806885, "learning_rate": 0.001, "loss": 1.3778, "step": 15366 }, { "epoch": 0.6500973009560876, "grad_norm": 0.23204196989536285, "learning_rate": 0.001, "loss": 2.7044, "step": 15367 }, { "epoch": 0.650139605719604, "grad_norm": 0.19387875497341156, "learning_rate": 0.001, "loss": 3.1026, "step": 15368 }, { "epoch": 0.6501819104831204, "grad_norm": 0.15012617409229279, "learning_rate": 0.001, "loss": 2.8182, "step": 15369 }, { "epoch": 0.6502242152466368, "grad_norm": 0.1906086802482605, "learning_rate": 0.001, "loss": 2.2236, "step": 15370 }, { "epoch": 0.6502665200101532, "grad_norm": 0.16298934817314148, "learning_rate": 0.001, "loss": 1.6815, "step": 15371 }, { "epoch": 0.6503088247736695, "grad_norm": 6.248525142669678, "learning_rate": 0.001, "loss": 1.8015, "step": 15372 }, { "epoch": 0.6503511295371859, "grad_norm": 0.29866117238998413, "learning_rate": 0.001, "loss": 2.1441, "step": 15373 }, { "epoch": 0.6503934343007023, "grad_norm": 0.5905458331108093, "learning_rate": 0.001, "loss": 1.629, "step": 15374 }, { "epoch": 0.6504357390642186, "grad_norm": 0.19776056706905365, "learning_rate": 0.001, "loss": 1.8649, "step": 15375 }, { "epoch": 0.650478043827735, "grad_norm": 0.19329819083213806, "learning_rate": 0.001, "loss": 2.4082, "step": 15376 }, { "epoch": 0.6505203485912514, "grad_norm": 0.2139400839805603, "learning_rate": 0.001, "loss": 3.2294, "step": 15377 }, { "epoch": 0.6505626533547677, "grad_norm": 0.1699124574661255, "learning_rate": 0.001, "loss": 2.2416, "step": 15378 }, { "epoch": 0.6506049581182841, "grad_norm": 0.17111091315746307, "learning_rate": 0.001, "loss": 1.564, "step": 15379 }, { "epoch": 0.6506472628818005, "grad_norm": 0.1551768034696579, "learning_rate": 0.001, "loss": 2.4745, "step": 15380 }, { "epoch": 0.6506895676453168, "grad_norm": 0.3416825830936432, "learning_rate": 0.001, "loss": 2.7327, "step": 15381 }, { "epoch": 0.6507318724088332, "grad_norm": 0.1836758702993393, "learning_rate": 0.001, "loss": 2.3768, "step": 15382 }, { "epoch": 0.6507741771723496, "grad_norm": 0.5745386481285095, "learning_rate": 0.001, "loss": 1.7781, "step": 15383 }, { "epoch": 0.6508164819358659, "grad_norm": 0.21170569956302643, "learning_rate": 0.001, "loss": 2.6607, "step": 15384 }, { "epoch": 0.6508587866993824, "grad_norm": 0.1727539747953415, "learning_rate": 0.001, "loss": 2.1116, "step": 15385 }, { "epoch": 0.6509010914628988, "grad_norm": 1.874002456665039, "learning_rate": 0.001, "loss": 2.9078, "step": 15386 }, { "epoch": 0.6509433962264151, "grad_norm": 0.21820873022079468, "learning_rate": 0.001, "loss": 1.8, "step": 15387 }, { "epoch": 0.6509857009899315, "grad_norm": 6.197278022766113, "learning_rate": 0.001, "loss": 1.6494, "step": 15388 }, { "epoch": 0.6510280057534479, "grad_norm": 0.16244280338287354, "learning_rate": 0.001, "loss": 1.9974, "step": 15389 }, { "epoch": 0.6510703105169642, "grad_norm": 0.2022523730993271, "learning_rate": 0.001, "loss": 2.4231, "step": 15390 }, { "epoch": 0.6511126152804806, "grad_norm": 0.19882678985595703, "learning_rate": 0.001, "loss": 2.0508, "step": 15391 }, { "epoch": 0.651154920043997, "grad_norm": 0.5075106024742126, "learning_rate": 0.001, "loss": 1.6093, "step": 15392 }, { "epoch": 0.6511972248075133, "grad_norm": 0.17667998373508453, "learning_rate": 0.001, "loss": 1.8422, "step": 15393 }, { "epoch": 0.6512395295710297, "grad_norm": 0.28973835706710815, "learning_rate": 0.001, "loss": 3.3682, "step": 15394 }, { "epoch": 0.6512818343345461, "grad_norm": 36.70282745361328, "learning_rate": 0.001, "loss": 2.7338, "step": 15395 }, { "epoch": 0.6513241390980624, "grad_norm": 0.16966314613819122, "learning_rate": 0.001, "loss": 2.356, "step": 15396 }, { "epoch": 0.6513664438615788, "grad_norm": 0.18758277595043182, "learning_rate": 0.001, "loss": 1.7575, "step": 15397 }, { "epoch": 0.6514087486250952, "grad_norm": 0.16251905262470245, "learning_rate": 0.001, "loss": 1.4579, "step": 15398 }, { "epoch": 0.6514510533886115, "grad_norm": 0.24462532997131348, "learning_rate": 0.001, "loss": 2.0842, "step": 15399 }, { "epoch": 0.6514933581521279, "grad_norm": 0.29812654852867126, "learning_rate": 0.001, "loss": 2.6172, "step": 15400 }, { "epoch": 0.6515356629156444, "grad_norm": 1.4498318433761597, "learning_rate": 0.001, "loss": 1.6447, "step": 15401 }, { "epoch": 0.6515779676791607, "grad_norm": 0.19964168965816498, "learning_rate": 0.001, "loss": 1.4949, "step": 15402 }, { "epoch": 0.6516202724426771, "grad_norm": 0.17301076650619507, "learning_rate": 0.001, "loss": 3.407, "step": 15403 }, { "epoch": 0.6516625772061935, "grad_norm": 0.18589536845684052, "learning_rate": 0.001, "loss": 2.1407, "step": 15404 }, { "epoch": 0.6517048819697098, "grad_norm": 0.18076921999454498, "learning_rate": 0.001, "loss": 2.0286, "step": 15405 }, { "epoch": 0.6517471867332262, "grad_norm": 0.15782049298286438, "learning_rate": 0.001, "loss": 2.6584, "step": 15406 }, { "epoch": 0.6517894914967425, "grad_norm": 1.653499960899353, "learning_rate": 0.001, "loss": 2.1578, "step": 15407 }, { "epoch": 0.6518317962602589, "grad_norm": 0.20613858103752136, "learning_rate": 0.001, "loss": 1.7229, "step": 15408 }, { "epoch": 0.6518741010237753, "grad_norm": 0.16704171895980835, "learning_rate": 0.001, "loss": 2.4252, "step": 15409 }, { "epoch": 0.6519164057872916, "grad_norm": 0.3319103717803955, "learning_rate": 0.001, "loss": 2.2387, "step": 15410 }, { "epoch": 0.651958710550808, "grad_norm": 0.1666289120912552, "learning_rate": 0.001, "loss": 1.8471, "step": 15411 }, { "epoch": 0.6520010153143244, "grad_norm": 0.3717435896396637, "learning_rate": 0.001, "loss": 1.9818, "step": 15412 }, { "epoch": 0.6520433200778407, "grad_norm": 0.21722985804080963, "learning_rate": 0.001, "loss": 1.9053, "step": 15413 }, { "epoch": 0.6520856248413571, "grad_norm": 0.21333225071430206, "learning_rate": 0.001, "loss": 2.19, "step": 15414 }, { "epoch": 0.6521279296048735, "grad_norm": 0.22371824085712433, "learning_rate": 0.001, "loss": 2.5068, "step": 15415 }, { "epoch": 0.6521702343683898, "grad_norm": 0.1780281811952591, "learning_rate": 0.001, "loss": 1.9781, "step": 15416 }, { "epoch": 0.6522125391319062, "grad_norm": 0.1755794882774353, "learning_rate": 0.001, "loss": 2.8328, "step": 15417 }, { "epoch": 0.6522548438954227, "grad_norm": 1.0463812351226807, "learning_rate": 0.001, "loss": 2.8041, "step": 15418 }, { "epoch": 0.652297148658939, "grad_norm": 0.16094234585762024, "learning_rate": 0.001, "loss": 1.7606, "step": 15419 }, { "epoch": 0.6523394534224554, "grad_norm": 0.14978618919849396, "learning_rate": 0.001, "loss": 2.2326, "step": 15420 }, { "epoch": 0.6523817581859718, "grad_norm": 0.19463518261909485, "learning_rate": 0.001, "loss": 2.1057, "step": 15421 }, { "epoch": 0.6524240629494881, "grad_norm": 0.6183575391769409, "learning_rate": 0.001, "loss": 1.9251, "step": 15422 }, { "epoch": 0.6524663677130045, "grad_norm": 0.15496648848056793, "learning_rate": 0.001, "loss": 1.7538, "step": 15423 }, { "epoch": 0.6525086724765209, "grad_norm": 0.17894995212554932, "learning_rate": 0.001, "loss": 1.6924, "step": 15424 }, { "epoch": 0.6525509772400372, "grad_norm": 0.16409684717655182, "learning_rate": 0.001, "loss": 1.995, "step": 15425 }, { "epoch": 0.6525932820035536, "grad_norm": 0.15463796257972717, "learning_rate": 0.001, "loss": 1.8611, "step": 15426 }, { "epoch": 0.65263558676707, "grad_norm": 0.14601118862628937, "learning_rate": 0.001, "loss": 2.8148, "step": 15427 }, { "epoch": 0.6526778915305863, "grad_norm": 0.19336079061031342, "learning_rate": 0.001, "loss": 2.299, "step": 15428 }, { "epoch": 0.6527201962941027, "grad_norm": 0.15365414321422577, "learning_rate": 0.001, "loss": 1.8484, "step": 15429 }, { "epoch": 0.6527625010576191, "grad_norm": 0.17741386592388153, "learning_rate": 0.001, "loss": 1.9603, "step": 15430 }, { "epoch": 0.6528048058211354, "grad_norm": 0.1832387000322342, "learning_rate": 0.001, "loss": 2.7062, "step": 15431 }, { "epoch": 0.6528471105846518, "grad_norm": 0.16831880807876587, "learning_rate": 0.001, "loss": 1.9521, "step": 15432 }, { "epoch": 0.6528894153481682, "grad_norm": 0.2662883698940277, "learning_rate": 0.001, "loss": 1.614, "step": 15433 }, { "epoch": 0.6529317201116845, "grad_norm": 0.15198548138141632, "learning_rate": 0.001, "loss": 1.7864, "step": 15434 }, { "epoch": 0.652974024875201, "grad_norm": 0.1819853037595749, "learning_rate": 0.001, "loss": 1.8496, "step": 15435 }, { "epoch": 0.6530163296387174, "grad_norm": 0.17152895033359528, "learning_rate": 0.001, "loss": 2.5133, "step": 15436 }, { "epoch": 0.6530586344022337, "grad_norm": 0.16041690111160278, "learning_rate": 0.001, "loss": 1.5841, "step": 15437 }, { "epoch": 0.6531009391657501, "grad_norm": 0.1594046652317047, "learning_rate": 0.001, "loss": 1.9821, "step": 15438 }, { "epoch": 0.6531432439292665, "grad_norm": 0.45842236280441284, "learning_rate": 0.001, "loss": 2.6672, "step": 15439 }, { "epoch": 0.6531855486927828, "grad_norm": 0.6107159852981567, "learning_rate": 0.001, "loss": 2.3053, "step": 15440 }, { "epoch": 0.6532278534562992, "grad_norm": 0.19147630035877228, "learning_rate": 0.001, "loss": 1.6185, "step": 15441 }, { "epoch": 0.6532701582198156, "grad_norm": 0.1753721982240677, "learning_rate": 0.001, "loss": 2.134, "step": 15442 }, { "epoch": 0.6533124629833319, "grad_norm": 0.18597379326820374, "learning_rate": 0.001, "loss": 2.6756, "step": 15443 }, { "epoch": 0.6533547677468483, "grad_norm": 0.17725729942321777, "learning_rate": 0.001, "loss": 2.1048, "step": 15444 }, { "epoch": 0.6533970725103647, "grad_norm": 0.13884805142879486, "learning_rate": 0.001, "loss": 1.2568, "step": 15445 }, { "epoch": 0.653439377273881, "grad_norm": 0.9566793441772461, "learning_rate": 0.001, "loss": 1.6837, "step": 15446 }, { "epoch": 0.6534816820373974, "grad_norm": 0.2636922597885132, "learning_rate": 0.001, "loss": 2.301, "step": 15447 }, { "epoch": 0.6535239868009138, "grad_norm": 0.20910915732383728, "learning_rate": 0.001, "loss": 2.7653, "step": 15448 }, { "epoch": 0.6535662915644301, "grad_norm": 0.38089674711227417, "learning_rate": 0.001, "loss": 1.8104, "step": 15449 }, { "epoch": 0.6536085963279465, "grad_norm": 0.18689171969890594, "learning_rate": 0.001, "loss": 1.6432, "step": 15450 }, { "epoch": 0.6536509010914628, "grad_norm": 0.19268843531608582, "learning_rate": 0.001, "loss": 1.7838, "step": 15451 }, { "epoch": 0.6536932058549793, "grad_norm": 0.17988325655460358, "learning_rate": 0.001, "loss": 2.2731, "step": 15452 }, { "epoch": 0.6537355106184957, "grad_norm": 0.17474591732025146, "learning_rate": 0.001, "loss": 1.865, "step": 15453 }, { "epoch": 0.653777815382012, "grad_norm": 0.16267631947994232, "learning_rate": 0.001, "loss": 1.614, "step": 15454 }, { "epoch": 0.6538201201455284, "grad_norm": 0.27630341053009033, "learning_rate": 0.001, "loss": 1.9749, "step": 15455 }, { "epoch": 0.6538624249090448, "grad_norm": 0.16530779004096985, "learning_rate": 0.001, "loss": 1.5857, "step": 15456 }, { "epoch": 0.6539047296725611, "grad_norm": 0.7646900415420532, "learning_rate": 0.001, "loss": 2.128, "step": 15457 }, { "epoch": 0.6539470344360775, "grad_norm": 0.21980629861354828, "learning_rate": 0.001, "loss": 2.7452, "step": 15458 }, { "epoch": 0.6539893391995939, "grad_norm": 0.18467316031455994, "learning_rate": 0.001, "loss": 3.4153, "step": 15459 }, { "epoch": 0.6540316439631102, "grad_norm": 0.1603761464357376, "learning_rate": 0.001, "loss": 2.3351, "step": 15460 }, { "epoch": 0.6540739487266266, "grad_norm": 0.19741065800189972, "learning_rate": 0.001, "loss": 1.9429, "step": 15461 }, { "epoch": 0.654116253490143, "grad_norm": 0.2010018527507782, "learning_rate": 0.001, "loss": 2.3873, "step": 15462 }, { "epoch": 0.6541585582536593, "grad_norm": 0.1678660362958908, "learning_rate": 0.001, "loss": 1.8596, "step": 15463 }, { "epoch": 0.6542008630171757, "grad_norm": 0.15577636659145355, "learning_rate": 0.001, "loss": 2.438, "step": 15464 }, { "epoch": 0.6542431677806921, "grad_norm": 0.16869132220745087, "learning_rate": 0.001, "loss": 2.0241, "step": 15465 }, { "epoch": 0.6542854725442084, "grad_norm": 0.16793528199195862, "learning_rate": 0.001, "loss": 2.3212, "step": 15466 }, { "epoch": 0.6543277773077248, "grad_norm": 0.14412140846252441, "learning_rate": 0.001, "loss": 2.1674, "step": 15467 }, { "epoch": 0.6543700820712413, "grad_norm": 0.17492231726646423, "learning_rate": 0.001, "loss": 1.7155, "step": 15468 }, { "epoch": 0.6544123868347576, "grad_norm": 0.16392190754413605, "learning_rate": 0.001, "loss": 1.5997, "step": 15469 }, { "epoch": 0.654454691598274, "grad_norm": 0.15477705001831055, "learning_rate": 0.001, "loss": 2.3755, "step": 15470 }, { "epoch": 0.6544969963617904, "grad_norm": 9.079736709594727, "learning_rate": 0.001, "loss": 2.5364, "step": 15471 }, { "epoch": 0.6545393011253067, "grad_norm": 0.2828308641910553, "learning_rate": 0.001, "loss": 1.9828, "step": 15472 }, { "epoch": 0.6545816058888231, "grad_norm": 0.2948327958583832, "learning_rate": 0.001, "loss": 2.5522, "step": 15473 }, { "epoch": 0.6546239106523395, "grad_norm": 0.2276574969291687, "learning_rate": 0.001, "loss": 2.3777, "step": 15474 }, { "epoch": 0.6546662154158558, "grad_norm": 0.23238156735897064, "learning_rate": 0.001, "loss": 1.8858, "step": 15475 }, { "epoch": 0.6547085201793722, "grad_norm": 0.2526158094406128, "learning_rate": 0.001, "loss": 2.4122, "step": 15476 }, { "epoch": 0.6547508249428886, "grad_norm": 0.1535889059305191, "learning_rate": 0.001, "loss": 1.8641, "step": 15477 }, { "epoch": 0.6547931297064049, "grad_norm": 0.9799718856811523, "learning_rate": 0.001, "loss": 2.1668, "step": 15478 }, { "epoch": 0.6548354344699213, "grad_norm": 0.18154305219650269, "learning_rate": 0.001, "loss": 2.3197, "step": 15479 }, { "epoch": 0.6548777392334377, "grad_norm": 0.1667226254940033, "learning_rate": 0.001, "loss": 2.3554, "step": 15480 }, { "epoch": 0.654920043996954, "grad_norm": 1.3010785579681396, "learning_rate": 0.001, "loss": 2.7386, "step": 15481 }, { "epoch": 0.6549623487604704, "grad_norm": 0.19829553365707397, "learning_rate": 0.001, "loss": 1.6301, "step": 15482 }, { "epoch": 0.6550046535239868, "grad_norm": 0.2689056098461151, "learning_rate": 0.001, "loss": 3.029, "step": 15483 }, { "epoch": 0.6550469582875031, "grad_norm": 0.33899521827697754, "learning_rate": 0.001, "loss": 2.2413, "step": 15484 }, { "epoch": 0.6550892630510196, "grad_norm": 0.26710861921310425, "learning_rate": 0.001, "loss": 2.9207, "step": 15485 }, { "epoch": 0.655131567814536, "grad_norm": 0.22953356802463531, "learning_rate": 0.001, "loss": 2.1988, "step": 15486 }, { "epoch": 0.6551738725780523, "grad_norm": 0.2150760293006897, "learning_rate": 0.001, "loss": 2.098, "step": 15487 }, { "epoch": 0.6552161773415687, "grad_norm": 0.20908361673355103, "learning_rate": 0.001, "loss": 2.7129, "step": 15488 }, { "epoch": 0.6552584821050851, "grad_norm": 0.20225393772125244, "learning_rate": 0.001, "loss": 2.7402, "step": 15489 }, { "epoch": 0.6553007868686014, "grad_norm": 0.2164647877216339, "learning_rate": 0.001, "loss": 2.245, "step": 15490 }, { "epoch": 0.6553430916321178, "grad_norm": 0.17477239668369293, "learning_rate": 0.001, "loss": 2.915, "step": 15491 }, { "epoch": 0.6553853963956342, "grad_norm": 1.133888602256775, "learning_rate": 0.001, "loss": 1.5993, "step": 15492 }, { "epoch": 0.6554277011591505, "grad_norm": 1.6383650302886963, "learning_rate": 0.001, "loss": 1.6001, "step": 15493 }, { "epoch": 0.6554700059226669, "grad_norm": 0.419475257396698, "learning_rate": 0.001, "loss": 1.8772, "step": 15494 }, { "epoch": 0.6555123106861833, "grad_norm": 0.18061023950576782, "learning_rate": 0.001, "loss": 2.0269, "step": 15495 }, { "epoch": 0.6555546154496996, "grad_norm": 0.3554385304450989, "learning_rate": 0.001, "loss": 2.9187, "step": 15496 }, { "epoch": 0.655596920213216, "grad_norm": 0.39084017276763916, "learning_rate": 0.001, "loss": 3.1199, "step": 15497 }, { "epoch": 0.6556392249767323, "grad_norm": 0.168495312333107, "learning_rate": 0.001, "loss": 3.786, "step": 15498 }, { "epoch": 0.6556815297402487, "grad_norm": 0.6152557134628296, "learning_rate": 0.001, "loss": 2.2492, "step": 15499 }, { "epoch": 0.6557238345037651, "grad_norm": 0.19804157316684723, "learning_rate": 0.001, "loss": 2.449, "step": 15500 }, { "epoch": 0.6557661392672814, "grad_norm": 0.1926928460597992, "learning_rate": 0.001, "loss": 2.3249, "step": 15501 }, { "epoch": 0.6558084440307979, "grad_norm": 0.1641806960105896, "learning_rate": 0.001, "loss": 1.742, "step": 15502 }, { "epoch": 0.6558507487943143, "grad_norm": 0.1997319757938385, "learning_rate": 0.001, "loss": 1.7896, "step": 15503 }, { "epoch": 0.6558930535578306, "grad_norm": 0.2186889350414276, "learning_rate": 0.001, "loss": 2.4833, "step": 15504 }, { "epoch": 0.655935358321347, "grad_norm": 0.16924524307250977, "learning_rate": 0.001, "loss": 2.279, "step": 15505 }, { "epoch": 0.6559776630848634, "grad_norm": 0.17367133498191833, "learning_rate": 0.001, "loss": 2.3269, "step": 15506 }, { "epoch": 0.6560199678483797, "grad_norm": 0.1684117615222931, "learning_rate": 0.001, "loss": 2.4659, "step": 15507 }, { "epoch": 0.6560622726118961, "grad_norm": 8.400760650634766, "learning_rate": 0.001, "loss": 2.1449, "step": 15508 }, { "epoch": 0.6561045773754125, "grad_norm": 0.17332401871681213, "learning_rate": 0.001, "loss": 1.8829, "step": 15509 }, { "epoch": 0.6561468821389288, "grad_norm": 0.4135810136795044, "learning_rate": 0.001, "loss": 2.5169, "step": 15510 }, { "epoch": 0.6561891869024452, "grad_norm": 0.15025272965431213, "learning_rate": 0.001, "loss": 1.9536, "step": 15511 }, { "epoch": 0.6562314916659616, "grad_norm": 0.21531356871128082, "learning_rate": 0.001, "loss": 2.1685, "step": 15512 }, { "epoch": 0.6562737964294779, "grad_norm": 0.20930558443069458, "learning_rate": 0.001, "loss": 2.3867, "step": 15513 }, { "epoch": 0.6563161011929943, "grad_norm": 0.1821804642677307, "learning_rate": 0.001, "loss": 1.4883, "step": 15514 }, { "epoch": 0.6563584059565107, "grad_norm": 0.2047373205423355, "learning_rate": 0.001, "loss": 3.2604, "step": 15515 }, { "epoch": 0.656400710720027, "grad_norm": 0.1793907731771469, "learning_rate": 0.001, "loss": 2.5151, "step": 15516 }, { "epoch": 0.6564430154835434, "grad_norm": 0.20762686431407928, "learning_rate": 0.001, "loss": 3.577, "step": 15517 }, { "epoch": 0.6564853202470599, "grad_norm": 0.20441284775733948, "learning_rate": 0.001, "loss": 2.101, "step": 15518 }, { "epoch": 0.6565276250105762, "grad_norm": 0.3753068447113037, "learning_rate": 0.001, "loss": 3.6612, "step": 15519 }, { "epoch": 0.6565699297740926, "grad_norm": 0.1667603701353073, "learning_rate": 0.001, "loss": 1.8838, "step": 15520 }, { "epoch": 0.656612234537609, "grad_norm": 16.620149612426758, "learning_rate": 0.001, "loss": 2.5339, "step": 15521 }, { "epoch": 0.6566545393011253, "grad_norm": 0.17226529121398926, "learning_rate": 0.001, "loss": 2.9333, "step": 15522 }, { "epoch": 0.6566968440646417, "grad_norm": 0.1851300597190857, "learning_rate": 0.001, "loss": 2.8224, "step": 15523 }, { "epoch": 0.6567391488281581, "grad_norm": 0.1961529552936554, "learning_rate": 0.001, "loss": 2.389, "step": 15524 }, { "epoch": 0.6567814535916744, "grad_norm": 0.2944627106189728, "learning_rate": 0.001, "loss": 2.1613, "step": 15525 }, { "epoch": 0.6568237583551908, "grad_norm": 0.4502488374710083, "learning_rate": 0.001, "loss": 1.678, "step": 15526 }, { "epoch": 0.6568660631187072, "grad_norm": 0.19314773380756378, "learning_rate": 0.001, "loss": 2.8654, "step": 15527 }, { "epoch": 0.6569083678822235, "grad_norm": 0.19277732074260712, "learning_rate": 0.001, "loss": 3.3764, "step": 15528 }, { "epoch": 0.6569506726457399, "grad_norm": 0.2084587961435318, "learning_rate": 0.001, "loss": 3.0331, "step": 15529 }, { "epoch": 0.6569929774092563, "grad_norm": 0.24447475373744965, "learning_rate": 0.001, "loss": 2.4765, "step": 15530 }, { "epoch": 0.6570352821727726, "grad_norm": 0.4201889932155609, "learning_rate": 0.001, "loss": 1.8756, "step": 15531 }, { "epoch": 0.657077586936289, "grad_norm": 0.3729053735733032, "learning_rate": 0.001, "loss": 1.9558, "step": 15532 }, { "epoch": 0.6571198916998054, "grad_norm": 0.21242676675319672, "learning_rate": 0.001, "loss": 2.0069, "step": 15533 }, { "epoch": 0.6571621964633217, "grad_norm": 0.1712314486503601, "learning_rate": 0.001, "loss": 1.3812, "step": 15534 }, { "epoch": 0.6572045012268382, "grad_norm": 0.1673876792192459, "learning_rate": 0.001, "loss": 1.1909, "step": 15535 }, { "epoch": 0.6572468059903546, "grad_norm": 0.15203256905078888, "learning_rate": 0.001, "loss": 1.6762, "step": 15536 }, { "epoch": 0.6572891107538709, "grad_norm": 0.15472784638404846, "learning_rate": 0.001, "loss": 1.5563, "step": 15537 }, { "epoch": 0.6573314155173873, "grad_norm": 0.22867830097675323, "learning_rate": 0.001, "loss": 2.4964, "step": 15538 }, { "epoch": 0.6573737202809037, "grad_norm": 1.3349720239639282, "learning_rate": 0.001, "loss": 1.7851, "step": 15539 }, { "epoch": 0.65741602504442, "grad_norm": 3.2347517013549805, "learning_rate": 0.001, "loss": 1.8873, "step": 15540 }, { "epoch": 0.6574583298079364, "grad_norm": 1.0401722192764282, "learning_rate": 0.001, "loss": 1.982, "step": 15541 }, { "epoch": 0.6575006345714527, "grad_norm": 0.4150663912296295, "learning_rate": 0.001, "loss": 2.4127, "step": 15542 }, { "epoch": 0.6575429393349691, "grad_norm": 0.28380629420280457, "learning_rate": 0.001, "loss": 1.7512, "step": 15543 }, { "epoch": 0.6575852440984855, "grad_norm": 0.1765417903661728, "learning_rate": 0.001, "loss": 2.7307, "step": 15544 }, { "epoch": 0.6576275488620018, "grad_norm": 0.22841008007526398, "learning_rate": 0.001, "loss": 2.1964, "step": 15545 }, { "epoch": 0.6576698536255182, "grad_norm": 0.3355230391025543, "learning_rate": 0.001, "loss": 2.1679, "step": 15546 }, { "epoch": 0.6577121583890346, "grad_norm": 0.16296561062335968, "learning_rate": 0.001, "loss": 1.8227, "step": 15547 }, { "epoch": 0.6577544631525509, "grad_norm": 0.2643844485282898, "learning_rate": 0.001, "loss": 2.0391, "step": 15548 }, { "epoch": 0.6577967679160673, "grad_norm": 0.19168947637081146, "learning_rate": 0.001, "loss": 1.9581, "step": 15549 }, { "epoch": 0.6578390726795837, "grad_norm": 0.17555919289588928, "learning_rate": 0.001, "loss": 1.7128, "step": 15550 }, { "epoch": 0.6578813774431, "grad_norm": 0.21022935211658478, "learning_rate": 0.001, "loss": 2.0966, "step": 15551 }, { "epoch": 0.6579236822066165, "grad_norm": 0.17026807367801666, "learning_rate": 0.001, "loss": 1.7935, "step": 15552 }, { "epoch": 0.6579659869701329, "grad_norm": 9.642647743225098, "learning_rate": 0.001, "loss": 1.9757, "step": 15553 }, { "epoch": 0.6580082917336492, "grad_norm": 0.1723700612783432, "learning_rate": 0.001, "loss": 2.0512, "step": 15554 }, { "epoch": 0.6580505964971656, "grad_norm": 0.16185259819030762, "learning_rate": 0.001, "loss": 2.1963, "step": 15555 }, { "epoch": 0.658092901260682, "grad_norm": 0.17926618456840515, "learning_rate": 0.001, "loss": 2.3246, "step": 15556 }, { "epoch": 0.6581352060241983, "grad_norm": 0.31012213230133057, "learning_rate": 0.001, "loss": 2.4251, "step": 15557 }, { "epoch": 0.6581775107877147, "grad_norm": 0.2613414525985718, "learning_rate": 0.001, "loss": 3.0419, "step": 15558 }, { "epoch": 0.6582198155512311, "grad_norm": 0.17412632703781128, "learning_rate": 0.001, "loss": 2.2753, "step": 15559 }, { "epoch": 0.6582621203147474, "grad_norm": 0.40548646450042725, "learning_rate": 0.001, "loss": 1.9838, "step": 15560 }, { "epoch": 0.6583044250782638, "grad_norm": 0.16524159908294678, "learning_rate": 0.001, "loss": 2.8922, "step": 15561 }, { "epoch": 0.6583467298417802, "grad_norm": 0.17522113025188446, "learning_rate": 0.001, "loss": 1.704, "step": 15562 }, { "epoch": 0.6583890346052965, "grad_norm": 0.1847570687532425, "learning_rate": 0.001, "loss": 2.1191, "step": 15563 }, { "epoch": 0.6584313393688129, "grad_norm": 0.16982965171337128, "learning_rate": 0.001, "loss": 1.4231, "step": 15564 }, { "epoch": 0.6584736441323293, "grad_norm": 0.16816236078739166, "learning_rate": 0.001, "loss": 2.9246, "step": 15565 }, { "epoch": 0.6585159488958456, "grad_norm": 0.18339279294013977, "learning_rate": 0.001, "loss": 2.7366, "step": 15566 }, { "epoch": 0.658558253659362, "grad_norm": 0.15726692974567413, "learning_rate": 0.001, "loss": 1.7157, "step": 15567 }, { "epoch": 0.6586005584228785, "grad_norm": 0.1590546816587448, "learning_rate": 0.001, "loss": 1.957, "step": 15568 }, { "epoch": 0.6586428631863948, "grad_norm": 0.28861668705940247, "learning_rate": 0.001, "loss": 2.2768, "step": 15569 }, { "epoch": 0.6586851679499112, "grad_norm": 0.15033060312271118, "learning_rate": 0.001, "loss": 1.8739, "step": 15570 }, { "epoch": 0.6587274727134276, "grad_norm": 0.17307975888252258, "learning_rate": 0.001, "loss": 1.7251, "step": 15571 }, { "epoch": 0.6587697774769439, "grad_norm": 0.4346740245819092, "learning_rate": 0.001, "loss": 3.3548, "step": 15572 }, { "epoch": 0.6588120822404603, "grad_norm": 0.24258974194526672, "learning_rate": 0.001, "loss": 2.4327, "step": 15573 }, { "epoch": 0.6588543870039767, "grad_norm": 0.1866195797920227, "learning_rate": 0.001, "loss": 2.1185, "step": 15574 }, { "epoch": 0.658896691767493, "grad_norm": 0.15456928312778473, "learning_rate": 0.001, "loss": 1.487, "step": 15575 }, { "epoch": 0.6589389965310094, "grad_norm": 0.18687579035758972, "learning_rate": 0.001, "loss": 2.329, "step": 15576 }, { "epoch": 0.6589813012945258, "grad_norm": 0.16042332351207733, "learning_rate": 0.001, "loss": 2.1489, "step": 15577 }, { "epoch": 0.6590236060580421, "grad_norm": 0.15789800882339478, "learning_rate": 0.001, "loss": 2.7708, "step": 15578 }, { "epoch": 0.6590659108215585, "grad_norm": 0.4976028501987457, "learning_rate": 0.001, "loss": 1.66, "step": 15579 }, { "epoch": 0.6591082155850749, "grad_norm": 0.18985185027122498, "learning_rate": 0.001, "loss": 1.7931, "step": 15580 }, { "epoch": 0.6591505203485912, "grad_norm": 0.18553532660007477, "learning_rate": 0.001, "loss": 2.2777, "step": 15581 }, { "epoch": 0.6591928251121076, "grad_norm": 0.15861479938030243, "learning_rate": 0.001, "loss": 1.9693, "step": 15582 }, { "epoch": 0.659235129875624, "grad_norm": 0.34126266837120056, "learning_rate": 0.001, "loss": 1.8181, "step": 15583 }, { "epoch": 0.6592774346391403, "grad_norm": 3.384877920150757, "learning_rate": 0.001, "loss": 1.9545, "step": 15584 }, { "epoch": 0.6593197394026568, "grad_norm": 0.18254320323467255, "learning_rate": 0.001, "loss": 2.2612, "step": 15585 }, { "epoch": 0.6593620441661731, "grad_norm": 0.17780011892318726, "learning_rate": 0.001, "loss": 2.3693, "step": 15586 }, { "epoch": 0.6594043489296895, "grad_norm": 0.2957765460014343, "learning_rate": 0.001, "loss": 2.8228, "step": 15587 }, { "epoch": 0.6594466536932059, "grad_norm": 0.20004132390022278, "learning_rate": 0.001, "loss": 2.2395, "step": 15588 }, { "epoch": 0.6594889584567222, "grad_norm": 0.16427592933177948, "learning_rate": 0.001, "loss": 1.9505, "step": 15589 }, { "epoch": 0.6595312632202386, "grad_norm": 0.3645588755607605, "learning_rate": 0.001, "loss": 2.4585, "step": 15590 }, { "epoch": 0.659573567983755, "grad_norm": 0.1937217265367508, "learning_rate": 0.001, "loss": 1.9487, "step": 15591 }, { "epoch": 0.6596158727472713, "grad_norm": 0.5016273260116577, "learning_rate": 0.001, "loss": 3.591, "step": 15592 }, { "epoch": 0.6596581775107877, "grad_norm": 0.15664246678352356, "learning_rate": 0.001, "loss": 1.5117, "step": 15593 }, { "epoch": 0.6597004822743041, "grad_norm": 1.7184746265411377, "learning_rate": 0.001, "loss": 2.2226, "step": 15594 }, { "epoch": 0.6597427870378204, "grad_norm": 1.1707103252410889, "learning_rate": 0.001, "loss": 2.213, "step": 15595 }, { "epoch": 0.6597850918013368, "grad_norm": 0.1881413757801056, "learning_rate": 0.001, "loss": 2.1988, "step": 15596 }, { "epoch": 0.6598273965648532, "grad_norm": 0.1818539947271347, "learning_rate": 0.001, "loss": 2.2715, "step": 15597 }, { "epoch": 0.6598697013283695, "grad_norm": 0.5400058627128601, "learning_rate": 0.001, "loss": 1.6366, "step": 15598 }, { "epoch": 0.6599120060918859, "grad_norm": 0.3063301742076874, "learning_rate": 0.001, "loss": 2.4906, "step": 15599 }, { "epoch": 0.6599543108554023, "grad_norm": 0.2518875300884247, "learning_rate": 0.001, "loss": 2.2794, "step": 15600 }, { "epoch": 0.6599966156189186, "grad_norm": 0.17568255960941315, "learning_rate": 0.001, "loss": 2.1365, "step": 15601 }, { "epoch": 0.6600389203824351, "grad_norm": 0.17697425186634064, "learning_rate": 0.001, "loss": 2.8336, "step": 15602 }, { "epoch": 0.6600812251459515, "grad_norm": 1.3957675695419312, "learning_rate": 0.001, "loss": 2.2867, "step": 15603 }, { "epoch": 0.6601235299094678, "grad_norm": 0.15436089038848877, "learning_rate": 0.001, "loss": 1.9459, "step": 15604 }, { "epoch": 0.6601658346729842, "grad_norm": 0.16911160945892334, "learning_rate": 0.001, "loss": 2.0371, "step": 15605 }, { "epoch": 0.6602081394365006, "grad_norm": 0.2687240242958069, "learning_rate": 0.001, "loss": 2.4646, "step": 15606 }, { "epoch": 0.6602504442000169, "grad_norm": 0.2176024168729782, "learning_rate": 0.001, "loss": 2.0944, "step": 15607 }, { "epoch": 0.6602927489635333, "grad_norm": 0.4650316536426544, "learning_rate": 0.001, "loss": 1.8368, "step": 15608 }, { "epoch": 0.6603350537270497, "grad_norm": 0.21563652157783508, "learning_rate": 0.001, "loss": 3.3112, "step": 15609 }, { "epoch": 0.660377358490566, "grad_norm": 0.14292295277118683, "learning_rate": 0.001, "loss": 2.6444, "step": 15610 }, { "epoch": 0.6604196632540824, "grad_norm": 0.9657493233680725, "learning_rate": 0.001, "loss": 2.7724, "step": 15611 }, { "epoch": 0.6604619680175988, "grad_norm": 0.31469717621803284, "learning_rate": 0.001, "loss": 2.1013, "step": 15612 }, { "epoch": 0.6605042727811151, "grad_norm": 0.16225768625736237, "learning_rate": 0.001, "loss": 1.816, "step": 15613 }, { "epoch": 0.6605465775446315, "grad_norm": 0.19633138179779053, "learning_rate": 0.001, "loss": 1.9044, "step": 15614 }, { "epoch": 0.6605888823081479, "grad_norm": 0.18959277868270874, "learning_rate": 0.001, "loss": 2.2034, "step": 15615 }, { "epoch": 0.6606311870716642, "grad_norm": 0.25994089245796204, "learning_rate": 0.001, "loss": 2.6884, "step": 15616 }, { "epoch": 0.6606734918351806, "grad_norm": 0.17219536006450653, "learning_rate": 0.001, "loss": 2.4676, "step": 15617 }, { "epoch": 0.6607157965986971, "grad_norm": 0.15563371777534485, "learning_rate": 0.001, "loss": 2.8265, "step": 15618 }, { "epoch": 0.6607581013622134, "grad_norm": 0.1527656465768814, "learning_rate": 0.001, "loss": 1.3507, "step": 15619 }, { "epoch": 0.6608004061257298, "grad_norm": 0.2252880334854126, "learning_rate": 0.001, "loss": 2.4293, "step": 15620 }, { "epoch": 0.6608427108892462, "grad_norm": 0.20225971937179565, "learning_rate": 0.001, "loss": 2.1757, "step": 15621 }, { "epoch": 0.6608850156527625, "grad_norm": 0.4528272747993469, "learning_rate": 0.001, "loss": 1.8446, "step": 15622 }, { "epoch": 0.6609273204162789, "grad_norm": 0.16811969876289368, "learning_rate": 0.001, "loss": 1.8456, "step": 15623 }, { "epoch": 0.6609696251797953, "grad_norm": 0.16160327196121216, "learning_rate": 0.001, "loss": 2.14, "step": 15624 }, { "epoch": 0.6610119299433116, "grad_norm": 0.17660216987133026, "learning_rate": 0.001, "loss": 3.2061, "step": 15625 }, { "epoch": 0.661054234706828, "grad_norm": 0.20479421317577362, "learning_rate": 0.001, "loss": 4.2633, "step": 15626 }, { "epoch": 0.6610965394703444, "grad_norm": 0.21080619096755981, "learning_rate": 0.001, "loss": 2.2565, "step": 15627 }, { "epoch": 0.6611388442338607, "grad_norm": 0.180558443069458, "learning_rate": 0.001, "loss": 1.8742, "step": 15628 }, { "epoch": 0.6611811489973771, "grad_norm": 0.21047769486904144, "learning_rate": 0.001, "loss": 2.6093, "step": 15629 }, { "epoch": 0.6612234537608935, "grad_norm": 0.1717175841331482, "learning_rate": 0.001, "loss": 1.7606, "step": 15630 }, { "epoch": 0.6612657585244098, "grad_norm": 0.19754162430763245, "learning_rate": 0.001, "loss": 2.4099, "step": 15631 }, { "epoch": 0.6613080632879262, "grad_norm": 0.24597451090812683, "learning_rate": 0.001, "loss": 2.0901, "step": 15632 }, { "epoch": 0.6613503680514425, "grad_norm": 0.1566450297832489, "learning_rate": 0.001, "loss": 1.5422, "step": 15633 }, { "epoch": 0.661392672814959, "grad_norm": 0.16817563772201538, "learning_rate": 0.001, "loss": 1.6473, "step": 15634 }, { "epoch": 0.6614349775784754, "grad_norm": 0.16162802278995514, "learning_rate": 0.001, "loss": 2.6791, "step": 15635 }, { "epoch": 0.6614772823419917, "grad_norm": 0.15925811231136322, "learning_rate": 0.001, "loss": 2.2663, "step": 15636 }, { "epoch": 0.6615195871055081, "grad_norm": 0.17941713333129883, "learning_rate": 0.001, "loss": 2.1478, "step": 15637 }, { "epoch": 0.6615618918690245, "grad_norm": 0.14808796346187592, "learning_rate": 0.001, "loss": 2.3699, "step": 15638 }, { "epoch": 0.6616041966325408, "grad_norm": 0.17554134130477905, "learning_rate": 0.001, "loss": 1.5387, "step": 15639 }, { "epoch": 0.6616465013960572, "grad_norm": 0.17657719552516937, "learning_rate": 0.001, "loss": 1.8514, "step": 15640 }, { "epoch": 0.6616888061595736, "grad_norm": 0.9961352348327637, "learning_rate": 0.001, "loss": 2.5427, "step": 15641 }, { "epoch": 0.6617311109230899, "grad_norm": 0.16566766798496246, "learning_rate": 0.001, "loss": 3.038, "step": 15642 }, { "epoch": 0.6617734156866063, "grad_norm": 0.3651495575904846, "learning_rate": 0.001, "loss": 2.575, "step": 15643 }, { "epoch": 0.6618157204501227, "grad_norm": 27.26308250427246, "learning_rate": 0.001, "loss": 1.8942, "step": 15644 }, { "epoch": 0.661858025213639, "grad_norm": 0.15556779503822327, "learning_rate": 0.001, "loss": 3.0992, "step": 15645 }, { "epoch": 0.6619003299771554, "grad_norm": 0.2336292266845703, "learning_rate": 0.001, "loss": 2.0776, "step": 15646 }, { "epoch": 0.6619426347406718, "grad_norm": 0.191901296377182, "learning_rate": 0.001, "loss": 2.344, "step": 15647 }, { "epoch": 0.6619849395041881, "grad_norm": 0.1957559734582901, "learning_rate": 0.001, "loss": 2.3465, "step": 15648 }, { "epoch": 0.6620272442677045, "grad_norm": 0.19413861632347107, "learning_rate": 0.001, "loss": 2.6151, "step": 15649 }, { "epoch": 0.662069549031221, "grad_norm": 0.2169937938451767, "learning_rate": 0.001, "loss": 2.3186, "step": 15650 }, { "epoch": 0.6621118537947372, "grad_norm": 0.16846176981925964, "learning_rate": 0.001, "loss": 1.4052, "step": 15651 }, { "epoch": 0.6621541585582537, "grad_norm": 0.23452334105968475, "learning_rate": 0.001, "loss": 2.1938, "step": 15652 }, { "epoch": 0.6621964633217701, "grad_norm": 0.19128349423408508, "learning_rate": 0.001, "loss": 2.3334, "step": 15653 }, { "epoch": 0.6622387680852864, "grad_norm": 0.3340997099876404, "learning_rate": 0.001, "loss": 2.0692, "step": 15654 }, { "epoch": 0.6622810728488028, "grad_norm": 0.2314499020576477, "learning_rate": 0.001, "loss": 1.8396, "step": 15655 }, { "epoch": 0.6623233776123192, "grad_norm": 0.19297367334365845, "learning_rate": 0.001, "loss": 2.2581, "step": 15656 }, { "epoch": 0.6623656823758355, "grad_norm": 0.15719622373580933, "learning_rate": 0.001, "loss": 1.741, "step": 15657 }, { "epoch": 0.6624079871393519, "grad_norm": 0.1912499964237213, "learning_rate": 0.001, "loss": 2.2499, "step": 15658 }, { "epoch": 0.6624502919028683, "grad_norm": 0.21334213018417358, "learning_rate": 0.001, "loss": 2.5132, "step": 15659 }, { "epoch": 0.6624925966663846, "grad_norm": 0.215172678232193, "learning_rate": 0.001, "loss": 1.9443, "step": 15660 }, { "epoch": 0.662534901429901, "grad_norm": 0.18598368763923645, "learning_rate": 0.001, "loss": 2.266, "step": 15661 }, { "epoch": 0.6625772061934174, "grad_norm": 0.1715102642774582, "learning_rate": 0.001, "loss": 2.4652, "step": 15662 }, { "epoch": 0.6626195109569337, "grad_norm": 0.19849710166454315, "learning_rate": 0.001, "loss": 1.9286, "step": 15663 }, { "epoch": 0.6626618157204501, "grad_norm": 0.24974191188812256, "learning_rate": 0.001, "loss": 2.3705, "step": 15664 }, { "epoch": 0.6627041204839665, "grad_norm": 0.1930912882089615, "learning_rate": 0.001, "loss": 1.6271, "step": 15665 }, { "epoch": 0.6627464252474828, "grad_norm": 0.18645654618740082, "learning_rate": 0.001, "loss": 1.5376, "step": 15666 }, { "epoch": 0.6627887300109992, "grad_norm": 0.14447803795337677, "learning_rate": 0.001, "loss": 1.5098, "step": 15667 }, { "epoch": 0.6628310347745157, "grad_norm": 0.1653042733669281, "learning_rate": 0.001, "loss": 1.52, "step": 15668 }, { "epoch": 0.662873339538032, "grad_norm": 0.16530010104179382, "learning_rate": 0.001, "loss": 1.9507, "step": 15669 }, { "epoch": 0.6629156443015484, "grad_norm": 0.18780213594436646, "learning_rate": 0.001, "loss": 1.5614, "step": 15670 }, { "epoch": 0.6629579490650648, "grad_norm": 3.833364725112915, "learning_rate": 0.001, "loss": 1.9021, "step": 15671 }, { "epoch": 0.6630002538285811, "grad_norm": 0.16255183517932892, "learning_rate": 0.001, "loss": 2.8819, "step": 15672 }, { "epoch": 0.6630425585920975, "grad_norm": 0.19622357189655304, "learning_rate": 0.001, "loss": 3.1144, "step": 15673 }, { "epoch": 0.6630848633556139, "grad_norm": 0.22275784611701965, "learning_rate": 0.001, "loss": 2.0371, "step": 15674 }, { "epoch": 0.6631271681191302, "grad_norm": 0.17636406421661377, "learning_rate": 0.001, "loss": 2.1601, "step": 15675 }, { "epoch": 0.6631694728826466, "grad_norm": 0.23526060581207275, "learning_rate": 0.001, "loss": 2.5409, "step": 15676 }, { "epoch": 0.6632117776461629, "grad_norm": 0.17581601440906525, "learning_rate": 0.001, "loss": 1.7807, "step": 15677 }, { "epoch": 0.6632540824096793, "grad_norm": 0.14658299088478088, "learning_rate": 0.001, "loss": 2.1031, "step": 15678 }, { "epoch": 0.6632963871731957, "grad_norm": 0.19805100560188293, "learning_rate": 0.001, "loss": 1.9881, "step": 15679 }, { "epoch": 0.663338691936712, "grad_norm": 0.1707378327846527, "learning_rate": 0.001, "loss": 2.3714, "step": 15680 }, { "epoch": 0.6633809967002284, "grad_norm": 0.3700905740261078, "learning_rate": 0.001, "loss": 1.8238, "step": 15681 }, { "epoch": 0.6634233014637448, "grad_norm": 0.2583630084991455, "learning_rate": 0.001, "loss": 3.4336, "step": 15682 }, { "epoch": 0.6634656062272611, "grad_norm": 0.1871895045042038, "learning_rate": 0.001, "loss": 2.8345, "step": 15683 }, { "epoch": 0.6635079109907775, "grad_norm": 0.18821239471435547, "learning_rate": 0.001, "loss": 2.0015, "step": 15684 }, { "epoch": 0.663550215754294, "grad_norm": 0.1794331818819046, "learning_rate": 0.001, "loss": 2.6855, "step": 15685 }, { "epoch": 0.6635925205178103, "grad_norm": 0.13422973453998566, "learning_rate": 0.001, "loss": 2.8227, "step": 15686 }, { "epoch": 0.6636348252813267, "grad_norm": 0.16726963222026825, "learning_rate": 0.001, "loss": 2.4534, "step": 15687 }, { "epoch": 0.6636771300448431, "grad_norm": 0.19704453647136688, "learning_rate": 0.001, "loss": 3.0143, "step": 15688 }, { "epoch": 0.6637194348083594, "grad_norm": 0.27630937099456787, "learning_rate": 0.001, "loss": 1.6488, "step": 15689 }, { "epoch": 0.6637617395718758, "grad_norm": 0.16017358005046844, "learning_rate": 0.001, "loss": 2.288, "step": 15690 }, { "epoch": 0.6638040443353922, "grad_norm": 0.4271370768547058, "learning_rate": 0.001, "loss": 2.3904, "step": 15691 }, { "epoch": 0.6638463490989085, "grad_norm": 0.17746147513389587, "learning_rate": 0.001, "loss": 2.5629, "step": 15692 }, { "epoch": 0.6638886538624249, "grad_norm": 0.22230751812458038, "learning_rate": 0.001, "loss": 3.1131, "step": 15693 }, { "epoch": 0.6639309586259413, "grad_norm": 0.1868673861026764, "learning_rate": 0.001, "loss": 2.424, "step": 15694 }, { "epoch": 0.6639732633894576, "grad_norm": 0.1976270079612732, "learning_rate": 0.001, "loss": 2.4694, "step": 15695 }, { "epoch": 0.664015568152974, "grad_norm": 0.18282292783260345, "learning_rate": 0.001, "loss": 2.1648, "step": 15696 }, { "epoch": 0.6640578729164904, "grad_norm": 0.1945333033800125, "learning_rate": 0.001, "loss": 2.1718, "step": 15697 }, { "epoch": 0.6641001776800067, "grad_norm": 0.19967901706695557, "learning_rate": 0.001, "loss": 1.6653, "step": 15698 }, { "epoch": 0.6641424824435231, "grad_norm": 0.35160374641418457, "learning_rate": 0.001, "loss": 2.9863, "step": 15699 }, { "epoch": 0.6641847872070396, "grad_norm": 0.18338724970817566, "learning_rate": 0.001, "loss": 2.0818, "step": 15700 }, { "epoch": 0.6642270919705558, "grad_norm": 0.26480796933174133, "learning_rate": 0.001, "loss": 1.8102, "step": 15701 }, { "epoch": 0.6642693967340723, "grad_norm": 0.1641346663236618, "learning_rate": 0.001, "loss": 1.8631, "step": 15702 }, { "epoch": 0.6643117014975887, "grad_norm": 0.19785989820957184, "learning_rate": 0.001, "loss": 2.0509, "step": 15703 }, { "epoch": 0.664354006261105, "grad_norm": 0.1693398356437683, "learning_rate": 0.001, "loss": 2.2726, "step": 15704 }, { "epoch": 0.6643963110246214, "grad_norm": 0.20726856589317322, "learning_rate": 0.001, "loss": 2.8105, "step": 15705 }, { "epoch": 0.6644386157881378, "grad_norm": 0.251668781042099, "learning_rate": 0.001, "loss": 2.1022, "step": 15706 }, { "epoch": 0.6644809205516541, "grad_norm": 0.1539115458726883, "learning_rate": 0.001, "loss": 2.8067, "step": 15707 }, { "epoch": 0.6645232253151705, "grad_norm": 0.36175769567489624, "learning_rate": 0.001, "loss": 1.5274, "step": 15708 }, { "epoch": 0.6645655300786869, "grad_norm": 0.17199809849262238, "learning_rate": 0.001, "loss": 2.5353, "step": 15709 }, { "epoch": 0.6646078348422032, "grad_norm": 2.3134865760803223, "learning_rate": 0.001, "loss": 1.6276, "step": 15710 }, { "epoch": 0.6646501396057196, "grad_norm": 0.8231282234191895, "learning_rate": 0.001, "loss": 2.0584, "step": 15711 }, { "epoch": 0.664692444369236, "grad_norm": 0.5767163038253784, "learning_rate": 0.001, "loss": 1.7341, "step": 15712 }, { "epoch": 0.6647347491327523, "grad_norm": 0.1625528633594513, "learning_rate": 0.001, "loss": 2.172, "step": 15713 }, { "epoch": 0.6647770538962687, "grad_norm": 0.8674761056900024, "learning_rate": 0.001, "loss": 2.3215, "step": 15714 }, { "epoch": 0.6648193586597851, "grad_norm": 0.9063986539840698, "learning_rate": 0.001, "loss": 1.7439, "step": 15715 }, { "epoch": 0.6648616634233014, "grad_norm": 0.17824086546897888, "learning_rate": 0.001, "loss": 2.0572, "step": 15716 }, { "epoch": 0.6649039681868179, "grad_norm": 0.2059624344110489, "learning_rate": 0.001, "loss": 2.6742, "step": 15717 }, { "epoch": 0.6649462729503343, "grad_norm": 0.6646909117698669, "learning_rate": 0.001, "loss": 2.3202, "step": 15718 }, { "epoch": 0.6649885777138506, "grad_norm": 0.1927110105752945, "learning_rate": 0.001, "loss": 1.8751, "step": 15719 }, { "epoch": 0.665030882477367, "grad_norm": 0.20116344094276428, "learning_rate": 0.001, "loss": 1.9791, "step": 15720 }, { "epoch": 0.6650731872408834, "grad_norm": 0.44798147678375244, "learning_rate": 0.001, "loss": 2.8342, "step": 15721 }, { "epoch": 0.6651154920043997, "grad_norm": 0.239822655916214, "learning_rate": 0.001, "loss": 1.8323, "step": 15722 }, { "epoch": 0.6651577967679161, "grad_norm": 0.20988096296787262, "learning_rate": 0.001, "loss": 2.2684, "step": 15723 }, { "epoch": 0.6652001015314324, "grad_norm": 0.22264404594898224, "learning_rate": 0.001, "loss": 2.8356, "step": 15724 }, { "epoch": 0.6652424062949488, "grad_norm": 0.4639243185520172, "learning_rate": 0.001, "loss": 1.9119, "step": 15725 }, { "epoch": 0.6652847110584652, "grad_norm": 0.8543448448181152, "learning_rate": 0.001, "loss": 1.6931, "step": 15726 }, { "epoch": 0.6653270158219815, "grad_norm": 1.1423962116241455, "learning_rate": 0.001, "loss": 1.5227, "step": 15727 }, { "epoch": 0.6653693205854979, "grad_norm": 0.1901700645685196, "learning_rate": 0.001, "loss": 2.7098, "step": 15728 }, { "epoch": 0.6654116253490143, "grad_norm": 0.8082202076911926, "learning_rate": 0.001, "loss": 2.3667, "step": 15729 }, { "epoch": 0.6654539301125306, "grad_norm": 0.3281959891319275, "learning_rate": 0.001, "loss": 1.9087, "step": 15730 }, { "epoch": 0.665496234876047, "grad_norm": 0.3187538683414459, "learning_rate": 0.001, "loss": 1.9291, "step": 15731 }, { "epoch": 0.6655385396395634, "grad_norm": 2.2117702960968018, "learning_rate": 0.001, "loss": 2.724, "step": 15732 }, { "epoch": 0.6655808444030797, "grad_norm": 35.29819107055664, "learning_rate": 0.001, "loss": 2.2991, "step": 15733 }, { "epoch": 0.6656231491665962, "grad_norm": 0.18186047673225403, "learning_rate": 0.001, "loss": 1.3769, "step": 15734 }, { "epoch": 0.6656654539301126, "grad_norm": 0.24423609673976898, "learning_rate": 0.001, "loss": 2.32, "step": 15735 }, { "epoch": 0.6657077586936289, "grad_norm": 0.319638729095459, "learning_rate": 0.001, "loss": 2.4374, "step": 15736 }, { "epoch": 0.6657500634571453, "grad_norm": 0.1802143007516861, "learning_rate": 0.001, "loss": 2.2174, "step": 15737 }, { "epoch": 0.6657923682206617, "grad_norm": 0.17544938623905182, "learning_rate": 0.001, "loss": 1.7473, "step": 15738 }, { "epoch": 0.665834672984178, "grad_norm": 0.9758937954902649, "learning_rate": 0.001, "loss": 2.2743, "step": 15739 }, { "epoch": 0.6658769777476944, "grad_norm": 0.1803831309080124, "learning_rate": 0.001, "loss": 3.2636, "step": 15740 }, { "epoch": 0.6659192825112108, "grad_norm": 0.2357131391763687, "learning_rate": 0.001, "loss": 2.4164, "step": 15741 }, { "epoch": 0.6659615872747271, "grad_norm": 0.16148456931114197, "learning_rate": 0.001, "loss": 1.8385, "step": 15742 }, { "epoch": 0.6660038920382435, "grad_norm": 0.17655906081199646, "learning_rate": 0.001, "loss": 1.7078, "step": 15743 }, { "epoch": 0.6660461968017599, "grad_norm": 0.18633557856082916, "learning_rate": 0.001, "loss": 1.5433, "step": 15744 }, { "epoch": 0.6660885015652762, "grad_norm": 0.16458381712436676, "learning_rate": 0.001, "loss": 1.9442, "step": 15745 }, { "epoch": 0.6661308063287926, "grad_norm": 0.1529700607061386, "learning_rate": 0.001, "loss": 2.7367, "step": 15746 }, { "epoch": 0.666173111092309, "grad_norm": 0.16840951144695282, "learning_rate": 0.001, "loss": 2.7347, "step": 15747 }, { "epoch": 0.6662154158558253, "grad_norm": 0.15616460144519806, "learning_rate": 0.001, "loss": 1.6326, "step": 15748 }, { "epoch": 0.6662577206193417, "grad_norm": 45.16459274291992, "learning_rate": 0.001, "loss": 2.0402, "step": 15749 }, { "epoch": 0.6663000253828582, "grad_norm": 0.35248544812202454, "learning_rate": 0.001, "loss": 2.0827, "step": 15750 }, { "epoch": 0.6663423301463745, "grad_norm": 0.14932596683502197, "learning_rate": 0.001, "loss": 2.1283, "step": 15751 }, { "epoch": 0.6663846349098909, "grad_norm": 0.22044512629508972, "learning_rate": 0.001, "loss": 2.6434, "step": 15752 }, { "epoch": 0.6664269396734073, "grad_norm": 0.21503077447414398, "learning_rate": 0.001, "loss": 2.6892, "step": 15753 }, { "epoch": 0.6664692444369236, "grad_norm": 3.1654529571533203, "learning_rate": 0.001, "loss": 1.6119, "step": 15754 }, { "epoch": 0.66651154920044, "grad_norm": 0.19109536707401276, "learning_rate": 0.001, "loss": 1.7015, "step": 15755 }, { "epoch": 0.6665538539639564, "grad_norm": 0.217402383685112, "learning_rate": 0.001, "loss": 2.0906, "step": 15756 }, { "epoch": 0.6665961587274727, "grad_norm": 0.20222723484039307, "learning_rate": 0.001, "loss": 1.8401, "step": 15757 }, { "epoch": 0.6666384634909891, "grad_norm": 0.24966546893119812, "learning_rate": 0.001, "loss": 1.984, "step": 15758 }, { "epoch": 0.6666807682545055, "grad_norm": 0.25226879119873047, "learning_rate": 0.001, "loss": 2.9811, "step": 15759 }, { "epoch": 0.6667230730180218, "grad_norm": 0.37204667925834656, "learning_rate": 0.001, "loss": 2.5787, "step": 15760 }, { "epoch": 0.6667653777815382, "grad_norm": 1.5811610221862793, "learning_rate": 0.001, "loss": 1.7835, "step": 15761 }, { "epoch": 0.6668076825450546, "grad_norm": 0.41911280155181885, "learning_rate": 0.001, "loss": 2.1604, "step": 15762 }, { "epoch": 0.6668499873085709, "grad_norm": 0.19730260968208313, "learning_rate": 0.001, "loss": 3.0043, "step": 15763 }, { "epoch": 0.6668922920720873, "grad_norm": 0.23359958827495575, "learning_rate": 0.001, "loss": 3.387, "step": 15764 }, { "epoch": 0.6669345968356037, "grad_norm": 4.667820930480957, "learning_rate": 0.001, "loss": 2.4877, "step": 15765 }, { "epoch": 0.66697690159912, "grad_norm": 0.25286656618118286, "learning_rate": 0.001, "loss": 1.9937, "step": 15766 }, { "epoch": 0.6670192063626365, "grad_norm": 0.23963415622711182, "learning_rate": 0.001, "loss": 1.69, "step": 15767 }, { "epoch": 0.6670615111261528, "grad_norm": 0.26238617300987244, "learning_rate": 0.001, "loss": 2.352, "step": 15768 }, { "epoch": 0.6671038158896692, "grad_norm": 83.53499603271484, "learning_rate": 0.001, "loss": 2.5639, "step": 15769 }, { "epoch": 0.6671461206531856, "grad_norm": 0.325317919254303, "learning_rate": 0.001, "loss": 2.2086, "step": 15770 }, { "epoch": 0.6671884254167019, "grad_norm": 0.20828962326049805, "learning_rate": 0.001, "loss": 2.473, "step": 15771 }, { "epoch": 0.6672307301802183, "grad_norm": 0.21652193367481232, "learning_rate": 0.001, "loss": 2.2587, "step": 15772 }, { "epoch": 0.6672730349437347, "grad_norm": 0.44318485260009766, "learning_rate": 0.001, "loss": 2.0383, "step": 15773 }, { "epoch": 0.667315339707251, "grad_norm": 0.5616191625595093, "learning_rate": 0.001, "loss": 1.6704, "step": 15774 }, { "epoch": 0.6673576444707674, "grad_norm": 0.2393038272857666, "learning_rate": 0.001, "loss": 2.2136, "step": 15775 }, { "epoch": 0.6673999492342838, "grad_norm": 0.2519043982028961, "learning_rate": 0.001, "loss": 2.8899, "step": 15776 }, { "epoch": 0.6674422539978001, "grad_norm": 0.2502747178077698, "learning_rate": 0.001, "loss": 2.4507, "step": 15777 }, { "epoch": 0.6674845587613165, "grad_norm": 0.20891942083835602, "learning_rate": 0.001, "loss": 1.7339, "step": 15778 }, { "epoch": 0.6675268635248329, "grad_norm": 1.0747361183166504, "learning_rate": 0.001, "loss": 2.0077, "step": 15779 }, { "epoch": 0.6675691682883492, "grad_norm": 0.23666757345199585, "learning_rate": 0.001, "loss": 2.8961, "step": 15780 }, { "epoch": 0.6676114730518656, "grad_norm": 0.3330824077129364, "learning_rate": 0.001, "loss": 2.9923, "step": 15781 }, { "epoch": 0.667653777815382, "grad_norm": 0.33238059282302856, "learning_rate": 0.001, "loss": 1.7868, "step": 15782 }, { "epoch": 0.6676960825788983, "grad_norm": 0.17682193219661713, "learning_rate": 0.001, "loss": 2.7748, "step": 15783 }, { "epoch": 0.6677383873424148, "grad_norm": 0.2014869600534439, "learning_rate": 0.001, "loss": 1.9485, "step": 15784 }, { "epoch": 0.6677806921059312, "grad_norm": 0.17806148529052734, "learning_rate": 0.001, "loss": 1.7066, "step": 15785 }, { "epoch": 0.6678229968694475, "grad_norm": 6.159350395202637, "learning_rate": 0.001, "loss": 2.0137, "step": 15786 }, { "epoch": 0.6678653016329639, "grad_norm": 0.381747841835022, "learning_rate": 0.001, "loss": 2.2376, "step": 15787 }, { "epoch": 0.6679076063964803, "grad_norm": 0.2559763789176941, "learning_rate": 0.001, "loss": 2.3698, "step": 15788 }, { "epoch": 0.6679499111599966, "grad_norm": 0.24722157418727875, "learning_rate": 0.001, "loss": 2.6856, "step": 15789 }, { "epoch": 0.667992215923513, "grad_norm": 3.812957286834717, "learning_rate": 0.001, "loss": 1.976, "step": 15790 }, { "epoch": 0.6680345206870294, "grad_norm": 0.3154589533805847, "learning_rate": 0.001, "loss": 4.0476, "step": 15791 }, { "epoch": 0.6680768254505457, "grad_norm": 0.20563848316669464, "learning_rate": 0.001, "loss": 2.0702, "step": 15792 }, { "epoch": 0.6681191302140621, "grad_norm": 0.3167200982570648, "learning_rate": 0.001, "loss": 2.8908, "step": 15793 }, { "epoch": 0.6681614349775785, "grad_norm": 0.2859385907649994, "learning_rate": 0.001, "loss": 2.3652, "step": 15794 }, { "epoch": 0.6682037397410948, "grad_norm": 4.2778730392456055, "learning_rate": 0.001, "loss": 2.6051, "step": 15795 }, { "epoch": 0.6682460445046112, "grad_norm": 0.23722970485687256, "learning_rate": 0.001, "loss": 1.4931, "step": 15796 }, { "epoch": 0.6682883492681276, "grad_norm": 0.19192059338092804, "learning_rate": 0.001, "loss": 2.4992, "step": 15797 }, { "epoch": 0.6683306540316439, "grad_norm": 0.25284337997436523, "learning_rate": 0.001, "loss": 1.957, "step": 15798 }, { "epoch": 0.6683729587951603, "grad_norm": 0.23747120797634125, "learning_rate": 0.001, "loss": 1.6552, "step": 15799 }, { "epoch": 0.6684152635586768, "grad_norm": 0.21687759459018707, "learning_rate": 0.001, "loss": 1.8681, "step": 15800 }, { "epoch": 0.668457568322193, "grad_norm": 0.2691959738731384, "learning_rate": 0.001, "loss": 2.2289, "step": 15801 }, { "epoch": 0.6684998730857095, "grad_norm": 0.2524837553501129, "learning_rate": 0.001, "loss": 1.8516, "step": 15802 }, { "epoch": 0.6685421778492259, "grad_norm": 0.2057633101940155, "learning_rate": 0.001, "loss": 2.509, "step": 15803 }, { "epoch": 0.6685844826127422, "grad_norm": 1.199033498764038, "learning_rate": 0.001, "loss": 1.5052, "step": 15804 }, { "epoch": 0.6686267873762586, "grad_norm": 0.3630319833755493, "learning_rate": 0.001, "loss": 2.5884, "step": 15805 }, { "epoch": 0.668669092139775, "grad_norm": 0.4470106065273285, "learning_rate": 0.001, "loss": 2.7533, "step": 15806 }, { "epoch": 0.6687113969032913, "grad_norm": 0.18470320105552673, "learning_rate": 0.001, "loss": 2.5, "step": 15807 }, { "epoch": 0.6687537016668077, "grad_norm": 3.678845167160034, "learning_rate": 0.001, "loss": 2.6681, "step": 15808 }, { "epoch": 0.6687960064303241, "grad_norm": 0.19354915618896484, "learning_rate": 0.001, "loss": 2.4777, "step": 15809 }, { "epoch": 0.6688383111938404, "grad_norm": 0.32464343309402466, "learning_rate": 0.001, "loss": 2.7447, "step": 15810 }, { "epoch": 0.6688806159573568, "grad_norm": 0.1859036386013031, "learning_rate": 0.001, "loss": 1.5693, "step": 15811 }, { "epoch": 0.6689229207208731, "grad_norm": 0.19540037214756012, "learning_rate": 0.001, "loss": 2.8704, "step": 15812 }, { "epoch": 0.6689652254843895, "grad_norm": 0.24034158885478973, "learning_rate": 0.001, "loss": 1.6442, "step": 15813 }, { "epoch": 0.6690075302479059, "grad_norm": 0.20161794126033783, "learning_rate": 0.001, "loss": 1.7371, "step": 15814 }, { "epoch": 0.6690498350114222, "grad_norm": 0.27223238348960876, "learning_rate": 0.001, "loss": 2.8628, "step": 15815 }, { "epoch": 0.6690921397749386, "grad_norm": 0.18867754936218262, "learning_rate": 0.001, "loss": 1.8677, "step": 15816 }, { "epoch": 0.669134444538455, "grad_norm": 0.18999359011650085, "learning_rate": 0.001, "loss": 2.0956, "step": 15817 }, { "epoch": 0.6691767493019714, "grad_norm": 0.14994925260543823, "learning_rate": 0.001, "loss": 2.6274, "step": 15818 }, { "epoch": 0.6692190540654878, "grad_norm": 0.16162274777889252, "learning_rate": 0.001, "loss": 2.5421, "step": 15819 }, { "epoch": 0.6692613588290042, "grad_norm": 1.4299275875091553, "learning_rate": 0.001, "loss": 2.334, "step": 15820 }, { "epoch": 0.6693036635925205, "grad_norm": 0.9547110199928284, "learning_rate": 0.001, "loss": 2.2495, "step": 15821 }, { "epoch": 0.6693459683560369, "grad_norm": 0.19420866668224335, "learning_rate": 0.001, "loss": 2.792, "step": 15822 }, { "epoch": 0.6693882731195533, "grad_norm": 0.15037833154201508, "learning_rate": 0.001, "loss": 2.696, "step": 15823 }, { "epoch": 0.6694305778830696, "grad_norm": 0.1415679156780243, "learning_rate": 0.001, "loss": 3.4118, "step": 15824 }, { "epoch": 0.669472882646586, "grad_norm": 0.15855517983436584, "learning_rate": 0.001, "loss": 1.7473, "step": 15825 }, { "epoch": 0.6695151874101024, "grad_norm": 0.47525739669799805, "learning_rate": 0.001, "loss": 2.8112, "step": 15826 }, { "epoch": 0.6695574921736187, "grad_norm": 0.35138139128685, "learning_rate": 0.001, "loss": 2.0132, "step": 15827 }, { "epoch": 0.6695997969371351, "grad_norm": 0.18116721510887146, "learning_rate": 0.001, "loss": 2.488, "step": 15828 }, { "epoch": 0.6696421017006515, "grad_norm": 0.18540117144584656, "learning_rate": 0.001, "loss": 3.8645, "step": 15829 }, { "epoch": 0.6696844064641678, "grad_norm": 0.19489508867263794, "learning_rate": 0.001, "loss": 2.7296, "step": 15830 }, { "epoch": 0.6697267112276842, "grad_norm": 0.19228166341781616, "learning_rate": 0.001, "loss": 1.9359, "step": 15831 }, { "epoch": 0.6697690159912006, "grad_norm": 0.15116505324840546, "learning_rate": 0.001, "loss": 1.8258, "step": 15832 }, { "epoch": 0.6698113207547169, "grad_norm": 0.6345039010047913, "learning_rate": 0.001, "loss": 2.1165, "step": 15833 }, { "epoch": 0.6698536255182334, "grad_norm": 0.15162324905395508, "learning_rate": 0.001, "loss": 2.3055, "step": 15834 }, { "epoch": 0.6698959302817498, "grad_norm": 1.6587047576904297, "learning_rate": 0.001, "loss": 1.78, "step": 15835 }, { "epoch": 0.6699382350452661, "grad_norm": 0.1674169898033142, "learning_rate": 0.001, "loss": 1.9546, "step": 15836 }, { "epoch": 0.6699805398087825, "grad_norm": 0.1696109175682068, "learning_rate": 0.001, "loss": 2.0511, "step": 15837 }, { "epoch": 0.6700228445722989, "grad_norm": 0.16364048421382904, "learning_rate": 0.001, "loss": 1.7434, "step": 15838 }, { "epoch": 0.6700651493358152, "grad_norm": 0.18208681046962738, "learning_rate": 0.001, "loss": 2.9249, "step": 15839 }, { "epoch": 0.6701074540993316, "grad_norm": 0.870050847530365, "learning_rate": 0.001, "loss": 1.8984, "step": 15840 }, { "epoch": 0.670149758862848, "grad_norm": 0.20092959702014923, "learning_rate": 0.001, "loss": 2.044, "step": 15841 }, { "epoch": 0.6701920636263643, "grad_norm": 0.24720391631126404, "learning_rate": 0.001, "loss": 2.1473, "step": 15842 }, { "epoch": 0.6702343683898807, "grad_norm": 0.18767613172531128, "learning_rate": 0.001, "loss": 2.8646, "step": 15843 }, { "epoch": 0.6702766731533971, "grad_norm": 0.15457838773727417, "learning_rate": 0.001, "loss": 1.7142, "step": 15844 }, { "epoch": 0.6703189779169134, "grad_norm": 0.1792488992214203, "learning_rate": 0.001, "loss": 2.8613, "step": 15845 }, { "epoch": 0.6703612826804298, "grad_norm": 0.9255082607269287, "learning_rate": 0.001, "loss": 2.8254, "step": 15846 }, { "epoch": 0.6704035874439462, "grad_norm": 3.0631420612335205, "learning_rate": 0.001, "loss": 2.5177, "step": 15847 }, { "epoch": 0.6704458922074625, "grad_norm": 0.1708671599626541, "learning_rate": 0.001, "loss": 2.045, "step": 15848 }, { "epoch": 0.670488196970979, "grad_norm": 0.1887563019990921, "learning_rate": 0.001, "loss": 1.8023, "step": 15849 }, { "epoch": 0.6705305017344954, "grad_norm": 0.23792339861392975, "learning_rate": 0.001, "loss": 2.0385, "step": 15850 }, { "epoch": 0.6705728064980117, "grad_norm": 0.1688668131828308, "learning_rate": 0.001, "loss": 2.0975, "step": 15851 }, { "epoch": 0.6706151112615281, "grad_norm": 0.21332918107509613, "learning_rate": 0.001, "loss": 2.3229, "step": 15852 }, { "epoch": 0.6706574160250445, "grad_norm": 0.1556411236524582, "learning_rate": 0.001, "loss": 2.6122, "step": 15853 }, { "epoch": 0.6706997207885608, "grad_norm": 0.17764410376548767, "learning_rate": 0.001, "loss": 1.339, "step": 15854 }, { "epoch": 0.6707420255520772, "grad_norm": 0.31873539090156555, "learning_rate": 0.001, "loss": 2.1945, "step": 15855 }, { "epoch": 0.6707843303155936, "grad_norm": 1.299968957901001, "learning_rate": 0.001, "loss": 1.8488, "step": 15856 }, { "epoch": 0.6708266350791099, "grad_norm": 0.19831599295139313, "learning_rate": 0.001, "loss": 2.9631, "step": 15857 }, { "epoch": 0.6708689398426263, "grad_norm": 0.4041413962841034, "learning_rate": 0.001, "loss": 1.7992, "step": 15858 }, { "epoch": 0.6709112446061426, "grad_norm": 0.19786041975021362, "learning_rate": 0.001, "loss": 2.2841, "step": 15859 }, { "epoch": 0.670953549369659, "grad_norm": 0.17182038724422455, "learning_rate": 0.001, "loss": 2.1733, "step": 15860 }, { "epoch": 0.6709958541331754, "grad_norm": 0.21848192811012268, "learning_rate": 0.001, "loss": 2.1951, "step": 15861 }, { "epoch": 0.6710381588966917, "grad_norm": 0.2034897655248642, "learning_rate": 0.001, "loss": 2.1136, "step": 15862 }, { "epoch": 0.6710804636602081, "grad_norm": 0.19234777987003326, "learning_rate": 0.001, "loss": 2.0125, "step": 15863 }, { "epoch": 0.6711227684237245, "grad_norm": 0.2422971874475479, "learning_rate": 0.001, "loss": 3.2963, "step": 15864 }, { "epoch": 0.6711650731872408, "grad_norm": 0.17505241930484772, "learning_rate": 0.001, "loss": 1.7463, "step": 15865 }, { "epoch": 0.6712073779507572, "grad_norm": 0.1780707985162735, "learning_rate": 0.001, "loss": 1.6777, "step": 15866 }, { "epoch": 0.6712496827142737, "grad_norm": 0.1790010929107666, "learning_rate": 0.001, "loss": 2.4067, "step": 15867 }, { "epoch": 0.67129198747779, "grad_norm": 0.19889087975025177, "learning_rate": 0.001, "loss": 2.5161, "step": 15868 }, { "epoch": 0.6713342922413064, "grad_norm": 0.9066613912582397, "learning_rate": 0.001, "loss": 2.8811, "step": 15869 }, { "epoch": 0.6713765970048228, "grad_norm": 0.15774136781692505, "learning_rate": 0.001, "loss": 2.4626, "step": 15870 }, { "epoch": 0.6714189017683391, "grad_norm": 0.14970077574253082, "learning_rate": 0.001, "loss": 1.6917, "step": 15871 }, { "epoch": 0.6714612065318555, "grad_norm": 0.1569453924894333, "learning_rate": 0.001, "loss": 2.3071, "step": 15872 }, { "epoch": 0.6715035112953719, "grad_norm": 0.4611540138721466, "learning_rate": 0.001, "loss": 2.905, "step": 15873 }, { "epoch": 0.6715458160588882, "grad_norm": 0.15557625889778137, "learning_rate": 0.001, "loss": 2.3258, "step": 15874 }, { "epoch": 0.6715881208224046, "grad_norm": 0.19848431646823883, "learning_rate": 0.001, "loss": 2.6509, "step": 15875 }, { "epoch": 0.671630425585921, "grad_norm": 0.16773267090320587, "learning_rate": 0.001, "loss": 2.1152, "step": 15876 }, { "epoch": 0.6716727303494373, "grad_norm": 0.23722898960113525, "learning_rate": 0.001, "loss": 2.3771, "step": 15877 }, { "epoch": 0.6717150351129537, "grad_norm": 1.1143704652786255, "learning_rate": 0.001, "loss": 2.607, "step": 15878 }, { "epoch": 0.6717573398764701, "grad_norm": 0.20059651136398315, "learning_rate": 0.001, "loss": 2.2388, "step": 15879 }, { "epoch": 0.6717996446399864, "grad_norm": 0.19346950948238373, "learning_rate": 0.001, "loss": 1.8036, "step": 15880 }, { "epoch": 0.6718419494035028, "grad_norm": 0.1549372375011444, "learning_rate": 0.001, "loss": 1.6958, "step": 15881 }, { "epoch": 0.6718842541670192, "grad_norm": 0.23537324368953705, "learning_rate": 0.001, "loss": 1.8379, "step": 15882 }, { "epoch": 0.6719265589305355, "grad_norm": 0.2048342525959015, "learning_rate": 0.001, "loss": 2.3291, "step": 15883 }, { "epoch": 0.671968863694052, "grad_norm": 0.15993832051753998, "learning_rate": 0.001, "loss": 2.3463, "step": 15884 }, { "epoch": 0.6720111684575684, "grad_norm": 0.1927027702331543, "learning_rate": 0.001, "loss": 2.0073, "step": 15885 }, { "epoch": 0.6720534732210847, "grad_norm": 0.2728440463542938, "learning_rate": 0.001, "loss": 2.1145, "step": 15886 }, { "epoch": 0.6720957779846011, "grad_norm": 0.21112090349197388, "learning_rate": 0.001, "loss": 2.4727, "step": 15887 }, { "epoch": 0.6721380827481175, "grad_norm": 0.19208763539791107, "learning_rate": 0.001, "loss": 1.6904, "step": 15888 }, { "epoch": 0.6721803875116338, "grad_norm": 0.18781235814094543, "learning_rate": 0.001, "loss": 2.5108, "step": 15889 }, { "epoch": 0.6722226922751502, "grad_norm": 0.20023007690906525, "learning_rate": 0.001, "loss": 1.6354, "step": 15890 }, { "epoch": 0.6722649970386666, "grad_norm": 0.18701715767383575, "learning_rate": 0.001, "loss": 2.3519, "step": 15891 }, { "epoch": 0.6723073018021829, "grad_norm": 2.3707351684570312, "learning_rate": 0.001, "loss": 1.7433, "step": 15892 }, { "epoch": 0.6723496065656993, "grad_norm": 1.582403540611267, "learning_rate": 0.001, "loss": 2.2697, "step": 15893 }, { "epoch": 0.6723919113292157, "grad_norm": 0.17928339540958405, "learning_rate": 0.001, "loss": 2.7044, "step": 15894 }, { "epoch": 0.672434216092732, "grad_norm": 0.1638590693473816, "learning_rate": 0.001, "loss": 2.5621, "step": 15895 }, { "epoch": 0.6724765208562484, "grad_norm": 0.25966379046440125, "learning_rate": 0.001, "loss": 2.4395, "step": 15896 }, { "epoch": 0.6725188256197648, "grad_norm": 0.17949466407299042, "learning_rate": 0.001, "loss": 1.7309, "step": 15897 }, { "epoch": 0.6725611303832811, "grad_norm": 0.182010680437088, "learning_rate": 0.001, "loss": 1.9328, "step": 15898 }, { "epoch": 0.6726034351467975, "grad_norm": 0.2181997448205948, "learning_rate": 0.001, "loss": 2.3349, "step": 15899 }, { "epoch": 0.672645739910314, "grad_norm": 0.28189408779144287, "learning_rate": 0.001, "loss": 2.0677, "step": 15900 }, { "epoch": 0.6726880446738303, "grad_norm": 0.22430168092250824, "learning_rate": 0.001, "loss": 1.6799, "step": 15901 }, { "epoch": 0.6727303494373467, "grad_norm": 0.6292093396186829, "learning_rate": 0.001, "loss": 1.8766, "step": 15902 }, { "epoch": 0.672772654200863, "grad_norm": 4.264910697937012, "learning_rate": 0.001, "loss": 2.1911, "step": 15903 }, { "epoch": 0.6728149589643794, "grad_norm": 0.2221497744321823, "learning_rate": 0.001, "loss": 2.1785, "step": 15904 }, { "epoch": 0.6728572637278958, "grad_norm": 0.19245116412639618, "learning_rate": 0.001, "loss": 2.712, "step": 15905 }, { "epoch": 0.6728995684914121, "grad_norm": 0.25045454502105713, "learning_rate": 0.001, "loss": 3.8263, "step": 15906 }, { "epoch": 0.6729418732549285, "grad_norm": 0.16638623178005219, "learning_rate": 0.001, "loss": 2.4182, "step": 15907 }, { "epoch": 0.6729841780184449, "grad_norm": 0.17453812062740326, "learning_rate": 0.001, "loss": 1.8538, "step": 15908 }, { "epoch": 0.6730264827819612, "grad_norm": 0.27706319093704224, "learning_rate": 0.001, "loss": 2.2582, "step": 15909 }, { "epoch": 0.6730687875454776, "grad_norm": 0.1942570060491562, "learning_rate": 0.001, "loss": 2.1083, "step": 15910 }, { "epoch": 0.673111092308994, "grad_norm": 0.18307733535766602, "learning_rate": 0.001, "loss": 2.1524, "step": 15911 }, { "epoch": 0.6731533970725103, "grad_norm": 0.1911049634218216, "learning_rate": 0.001, "loss": 2.4024, "step": 15912 }, { "epoch": 0.6731957018360267, "grad_norm": 0.15870538353919983, "learning_rate": 0.001, "loss": 2.1905, "step": 15913 }, { "epoch": 0.6732380065995431, "grad_norm": 0.14895422756671906, "learning_rate": 0.001, "loss": 1.8064, "step": 15914 }, { "epoch": 0.6732803113630594, "grad_norm": 0.16844263672828674, "learning_rate": 0.001, "loss": 2.3851, "step": 15915 }, { "epoch": 0.6733226161265758, "grad_norm": 0.15884919464588165, "learning_rate": 0.001, "loss": 2.3164, "step": 15916 }, { "epoch": 0.6733649208900923, "grad_norm": 0.20211821794509888, "learning_rate": 0.001, "loss": 2.3867, "step": 15917 }, { "epoch": 0.6734072256536086, "grad_norm": 0.3958881199359894, "learning_rate": 0.001, "loss": 1.911, "step": 15918 }, { "epoch": 0.673449530417125, "grad_norm": 0.4282142221927643, "learning_rate": 0.001, "loss": 2.8803, "step": 15919 }, { "epoch": 0.6734918351806414, "grad_norm": 0.1638120859861374, "learning_rate": 0.001, "loss": 1.665, "step": 15920 }, { "epoch": 0.6735341399441577, "grad_norm": 0.15340758860111237, "learning_rate": 0.001, "loss": 2.273, "step": 15921 }, { "epoch": 0.6735764447076741, "grad_norm": 0.17197485268115997, "learning_rate": 0.001, "loss": 2.0298, "step": 15922 }, { "epoch": 0.6736187494711905, "grad_norm": 0.1842762529850006, "learning_rate": 0.001, "loss": 2.1636, "step": 15923 }, { "epoch": 0.6736610542347068, "grad_norm": 0.1678512841463089, "learning_rate": 0.001, "loss": 2.6527, "step": 15924 }, { "epoch": 0.6737033589982232, "grad_norm": 0.1786356419324875, "learning_rate": 0.001, "loss": 2.7915, "step": 15925 }, { "epoch": 0.6737456637617396, "grad_norm": 0.14677925407886505, "learning_rate": 0.001, "loss": 1.7595, "step": 15926 }, { "epoch": 0.6737879685252559, "grad_norm": 0.18858976662158966, "learning_rate": 0.001, "loss": 1.9292, "step": 15927 }, { "epoch": 0.6738302732887723, "grad_norm": 0.18520209193229675, "learning_rate": 0.001, "loss": 2.1573, "step": 15928 }, { "epoch": 0.6738725780522887, "grad_norm": 0.9475305676460266, "learning_rate": 0.001, "loss": 2.5929, "step": 15929 }, { "epoch": 0.673914882815805, "grad_norm": 0.17860578000545502, "learning_rate": 0.001, "loss": 1.4486, "step": 15930 }, { "epoch": 0.6739571875793214, "grad_norm": 0.1508931964635849, "learning_rate": 0.001, "loss": 1.5058, "step": 15931 }, { "epoch": 0.6739994923428378, "grad_norm": 0.17374980449676514, "learning_rate": 0.001, "loss": 1.819, "step": 15932 }, { "epoch": 0.6740417971063541, "grad_norm": 0.1520918607711792, "learning_rate": 0.001, "loss": 1.8909, "step": 15933 }, { "epoch": 0.6740841018698706, "grad_norm": 1.1293140649795532, "learning_rate": 0.001, "loss": 3.0682, "step": 15934 }, { "epoch": 0.674126406633387, "grad_norm": 0.17505602538585663, "learning_rate": 0.001, "loss": 3.318, "step": 15935 }, { "epoch": 0.6741687113969033, "grad_norm": 0.3417392075061798, "learning_rate": 0.001, "loss": 2.3171, "step": 15936 }, { "epoch": 0.6742110161604197, "grad_norm": 0.39481237530708313, "learning_rate": 0.001, "loss": 1.7587, "step": 15937 }, { "epoch": 0.6742533209239361, "grad_norm": 0.37343841791152954, "learning_rate": 0.001, "loss": 2.229, "step": 15938 }, { "epoch": 0.6742956256874524, "grad_norm": 0.17360848188400269, "learning_rate": 0.001, "loss": 2.4422, "step": 15939 }, { "epoch": 0.6743379304509688, "grad_norm": 0.2295389622449875, "learning_rate": 0.001, "loss": 1.9226, "step": 15940 }, { "epoch": 0.6743802352144852, "grad_norm": 0.8693257570266724, "learning_rate": 0.001, "loss": 2.2007, "step": 15941 }, { "epoch": 0.6744225399780015, "grad_norm": 0.19263996183872223, "learning_rate": 0.001, "loss": 3.2355, "step": 15942 }, { "epoch": 0.6744648447415179, "grad_norm": 0.17120057344436646, "learning_rate": 0.001, "loss": 2.5869, "step": 15943 }, { "epoch": 0.6745071495050343, "grad_norm": 0.292049378156662, "learning_rate": 0.001, "loss": 1.8235, "step": 15944 }, { "epoch": 0.6745494542685506, "grad_norm": 0.17973144352436066, "learning_rate": 0.001, "loss": 2.0943, "step": 15945 }, { "epoch": 0.674591759032067, "grad_norm": 0.30642953515052795, "learning_rate": 0.001, "loss": 3.0754, "step": 15946 }, { "epoch": 0.6746340637955833, "grad_norm": 0.15908905863761902, "learning_rate": 0.001, "loss": 1.4202, "step": 15947 }, { "epoch": 0.6746763685590997, "grad_norm": 0.1885390281677246, "learning_rate": 0.001, "loss": 1.8611, "step": 15948 }, { "epoch": 0.6747186733226161, "grad_norm": 1.8227906227111816, "learning_rate": 0.001, "loss": 3.1409, "step": 15949 }, { "epoch": 0.6747609780861324, "grad_norm": 0.3331076204776764, "learning_rate": 0.001, "loss": 2.6025, "step": 15950 }, { "epoch": 0.6748032828496489, "grad_norm": 0.18833647668361664, "learning_rate": 0.001, "loss": 1.7091, "step": 15951 }, { "epoch": 0.6748455876131653, "grad_norm": 0.8229162693023682, "learning_rate": 0.001, "loss": 1.9747, "step": 15952 }, { "epoch": 0.6748878923766816, "grad_norm": 0.19028577208518982, "learning_rate": 0.001, "loss": 1.9955, "step": 15953 }, { "epoch": 0.674930197140198, "grad_norm": 0.19043925404548645, "learning_rate": 0.001, "loss": 1.8831, "step": 15954 }, { "epoch": 0.6749725019037144, "grad_norm": 0.1884336918592453, "learning_rate": 0.001, "loss": 2.4741, "step": 15955 }, { "epoch": 0.6750148066672307, "grad_norm": 0.2047235518693924, "learning_rate": 0.001, "loss": 2.729, "step": 15956 }, { "epoch": 0.6750571114307471, "grad_norm": 1.2655960321426392, "learning_rate": 0.001, "loss": 2.2603, "step": 15957 }, { "epoch": 0.6750994161942635, "grad_norm": 0.2844761312007904, "learning_rate": 0.001, "loss": 1.8044, "step": 15958 }, { "epoch": 0.6751417209577798, "grad_norm": 0.19485563039779663, "learning_rate": 0.001, "loss": 2.0, "step": 15959 }, { "epoch": 0.6751840257212962, "grad_norm": 0.2195066213607788, "learning_rate": 0.001, "loss": 2.6798, "step": 15960 }, { "epoch": 0.6752263304848126, "grad_norm": 0.18380756676197052, "learning_rate": 0.001, "loss": 1.7459, "step": 15961 }, { "epoch": 0.6752686352483289, "grad_norm": 0.22318629920482635, "learning_rate": 0.001, "loss": 2.6885, "step": 15962 }, { "epoch": 0.6753109400118453, "grad_norm": 0.940578818321228, "learning_rate": 0.001, "loss": 2.1456, "step": 15963 }, { "epoch": 0.6753532447753617, "grad_norm": 0.19312544167041779, "learning_rate": 0.001, "loss": 2.0494, "step": 15964 }, { "epoch": 0.675395549538878, "grad_norm": 0.24037931859493256, "learning_rate": 0.001, "loss": 2.2044, "step": 15965 }, { "epoch": 0.6754378543023944, "grad_norm": 0.478103369474411, "learning_rate": 0.001, "loss": 3.5143, "step": 15966 }, { "epoch": 0.6754801590659109, "grad_norm": 0.976945161819458, "learning_rate": 0.001, "loss": 2.7022, "step": 15967 }, { "epoch": 0.6755224638294272, "grad_norm": 0.1700313240289688, "learning_rate": 0.001, "loss": 2.0294, "step": 15968 }, { "epoch": 0.6755647685929436, "grad_norm": 0.36223527789115906, "learning_rate": 0.001, "loss": 2.3376, "step": 15969 }, { "epoch": 0.67560707335646, "grad_norm": 1.9298176765441895, "learning_rate": 0.001, "loss": 2.5461, "step": 15970 }, { "epoch": 0.6756493781199763, "grad_norm": 0.23120592534542084, "learning_rate": 0.001, "loss": 2.5119, "step": 15971 }, { "epoch": 0.6756916828834927, "grad_norm": 0.18629468977451324, "learning_rate": 0.001, "loss": 2.1999, "step": 15972 }, { "epoch": 0.6757339876470091, "grad_norm": 0.2043001651763916, "learning_rate": 0.001, "loss": 2.3867, "step": 15973 }, { "epoch": 0.6757762924105254, "grad_norm": 0.1776389479637146, "learning_rate": 0.001, "loss": 1.6796, "step": 15974 }, { "epoch": 0.6758185971740418, "grad_norm": 0.17264407873153687, "learning_rate": 0.001, "loss": 1.5977, "step": 15975 }, { "epoch": 0.6758609019375582, "grad_norm": 0.3489270806312561, "learning_rate": 0.001, "loss": 2.4665, "step": 15976 }, { "epoch": 0.6759032067010745, "grad_norm": 0.18797357380390167, "learning_rate": 0.001, "loss": 2.8661, "step": 15977 }, { "epoch": 0.6759455114645909, "grad_norm": 0.1839050054550171, "learning_rate": 0.001, "loss": 1.798, "step": 15978 }, { "epoch": 0.6759878162281073, "grad_norm": 0.2641671299934387, "learning_rate": 0.001, "loss": 2.0923, "step": 15979 }, { "epoch": 0.6760301209916236, "grad_norm": 0.1782594621181488, "learning_rate": 0.001, "loss": 2.7264, "step": 15980 }, { "epoch": 0.67607242575514, "grad_norm": 0.3223048150539398, "learning_rate": 0.001, "loss": 1.5769, "step": 15981 }, { "epoch": 0.6761147305186564, "grad_norm": 0.17407678067684174, "learning_rate": 0.001, "loss": 2.2179, "step": 15982 }, { "epoch": 0.6761570352821727, "grad_norm": 0.2168140560388565, "learning_rate": 0.001, "loss": 2.9042, "step": 15983 }, { "epoch": 0.6761993400456892, "grad_norm": 0.24506430327892303, "learning_rate": 0.001, "loss": 2.6773, "step": 15984 }, { "epoch": 0.6762416448092056, "grad_norm": 3.0023486614227295, "learning_rate": 0.001, "loss": 2.0182, "step": 15985 }, { "epoch": 0.6762839495727219, "grad_norm": 0.1910613626241684, "learning_rate": 0.001, "loss": 2.047, "step": 15986 }, { "epoch": 0.6763262543362383, "grad_norm": 0.17066790163516998, "learning_rate": 0.001, "loss": 2.0095, "step": 15987 }, { "epoch": 0.6763685590997547, "grad_norm": 0.21411702036857605, "learning_rate": 0.001, "loss": 1.883, "step": 15988 }, { "epoch": 0.676410863863271, "grad_norm": 0.16612938046455383, "learning_rate": 0.001, "loss": 1.7573, "step": 15989 }, { "epoch": 0.6764531686267874, "grad_norm": 0.2450494021177292, "learning_rate": 0.001, "loss": 2.951, "step": 15990 }, { "epoch": 0.6764954733903038, "grad_norm": 0.21236656606197357, "learning_rate": 0.001, "loss": 3.2924, "step": 15991 }, { "epoch": 0.6765377781538201, "grad_norm": 1.4454017877578735, "learning_rate": 0.001, "loss": 2.2104, "step": 15992 }, { "epoch": 0.6765800829173365, "grad_norm": 0.16844910383224487, "learning_rate": 0.001, "loss": 2.0065, "step": 15993 }, { "epoch": 0.6766223876808528, "grad_norm": 0.19149447977542877, "learning_rate": 0.001, "loss": 2.4233, "step": 15994 }, { "epoch": 0.6766646924443692, "grad_norm": 0.15004916489124298, "learning_rate": 0.001, "loss": 1.9877, "step": 15995 }, { "epoch": 0.6767069972078856, "grad_norm": 0.1823279857635498, "learning_rate": 0.001, "loss": 1.6102, "step": 15996 }, { "epoch": 0.6767493019714019, "grad_norm": 0.17334629595279694, "learning_rate": 0.001, "loss": 2.3336, "step": 15997 }, { "epoch": 0.6767916067349183, "grad_norm": 0.19492603838443756, "learning_rate": 0.001, "loss": 2.8301, "step": 15998 }, { "epoch": 0.6768339114984347, "grad_norm": 0.17607484757900238, "learning_rate": 0.001, "loss": 1.8637, "step": 15999 }, { "epoch": 0.676876216261951, "grad_norm": 0.15553830564022064, "learning_rate": 0.001, "loss": 2.4753, "step": 16000 }, { "epoch": 0.6769185210254675, "grad_norm": 0.30437377095222473, "learning_rate": 0.001, "loss": 2.1501, "step": 16001 }, { "epoch": 0.6769608257889839, "grad_norm": 0.17832662165164948, "learning_rate": 0.001, "loss": 1.9037, "step": 16002 }, { "epoch": 0.6770031305525002, "grad_norm": 0.25508981943130493, "learning_rate": 0.001, "loss": 1.3602, "step": 16003 }, { "epoch": 0.6770454353160166, "grad_norm": 0.17589248716831207, "learning_rate": 0.001, "loss": 3.1867, "step": 16004 }, { "epoch": 0.677087740079533, "grad_norm": 0.1560450941324234, "learning_rate": 0.001, "loss": 1.6277, "step": 16005 }, { "epoch": 0.6771300448430493, "grad_norm": 25.86013412475586, "learning_rate": 0.001, "loss": 2.1036, "step": 16006 }, { "epoch": 0.6771723496065657, "grad_norm": 0.153413325548172, "learning_rate": 0.001, "loss": 2.2372, "step": 16007 }, { "epoch": 0.6772146543700821, "grad_norm": 0.17835178971290588, "learning_rate": 0.001, "loss": 1.8833, "step": 16008 }, { "epoch": 0.6772569591335984, "grad_norm": 0.15974244475364685, "learning_rate": 0.001, "loss": 1.9219, "step": 16009 }, { "epoch": 0.6772992638971148, "grad_norm": 0.1814177930355072, "learning_rate": 0.001, "loss": 1.6924, "step": 16010 }, { "epoch": 0.6773415686606312, "grad_norm": 0.14862357079982758, "learning_rate": 0.001, "loss": 1.2958, "step": 16011 }, { "epoch": 0.6773838734241475, "grad_norm": 0.2236209362745285, "learning_rate": 0.001, "loss": 2.7953, "step": 16012 }, { "epoch": 0.6774261781876639, "grad_norm": 0.17278966307640076, "learning_rate": 0.001, "loss": 2.3449, "step": 16013 }, { "epoch": 0.6774684829511803, "grad_norm": 0.4638851284980774, "learning_rate": 0.001, "loss": 1.7526, "step": 16014 }, { "epoch": 0.6775107877146966, "grad_norm": 0.1768551915884018, "learning_rate": 0.001, "loss": 3.0475, "step": 16015 }, { "epoch": 0.677553092478213, "grad_norm": 0.19388507306575775, "learning_rate": 0.001, "loss": 2.2825, "step": 16016 }, { "epoch": 0.6775953972417295, "grad_norm": 0.20869360864162445, "learning_rate": 0.001, "loss": 2.2376, "step": 16017 }, { "epoch": 0.6776377020052458, "grad_norm": 0.16567271947860718, "learning_rate": 0.001, "loss": 1.9322, "step": 16018 }, { "epoch": 0.6776800067687622, "grad_norm": 0.15912829339504242, "learning_rate": 0.001, "loss": 1.5842, "step": 16019 }, { "epoch": 0.6777223115322786, "grad_norm": 0.14932376146316528, "learning_rate": 0.001, "loss": 1.6747, "step": 16020 }, { "epoch": 0.6777646162957949, "grad_norm": 1.8247519731521606, "learning_rate": 0.001, "loss": 2.3317, "step": 16021 }, { "epoch": 0.6778069210593113, "grad_norm": 0.22858451306819916, "learning_rate": 0.001, "loss": 2.5957, "step": 16022 }, { "epoch": 0.6778492258228277, "grad_norm": 0.1770337074995041, "learning_rate": 0.001, "loss": 1.436, "step": 16023 }, { "epoch": 0.677891530586344, "grad_norm": 0.1400739699602127, "learning_rate": 0.001, "loss": 1.9365, "step": 16024 }, { "epoch": 0.6779338353498604, "grad_norm": 0.1432129591703415, "learning_rate": 0.001, "loss": 1.5036, "step": 16025 }, { "epoch": 0.6779761401133768, "grad_norm": 0.16388866305351257, "learning_rate": 0.001, "loss": 1.8471, "step": 16026 }, { "epoch": 0.6780184448768931, "grad_norm": 2.311777353286743, "learning_rate": 0.001, "loss": 2.4238, "step": 16027 }, { "epoch": 0.6780607496404095, "grad_norm": 0.19178903102874756, "learning_rate": 0.001, "loss": 1.9744, "step": 16028 }, { "epoch": 0.6781030544039259, "grad_norm": 0.15423864126205444, "learning_rate": 0.001, "loss": 2.1231, "step": 16029 }, { "epoch": 0.6781453591674422, "grad_norm": 0.16429832577705383, "learning_rate": 0.001, "loss": 2.1746, "step": 16030 }, { "epoch": 0.6781876639309586, "grad_norm": 0.14937609434127808, "learning_rate": 0.001, "loss": 2.2265, "step": 16031 }, { "epoch": 0.678229968694475, "grad_norm": 0.17871306836605072, "learning_rate": 0.001, "loss": 2.605, "step": 16032 }, { "epoch": 0.6782722734579913, "grad_norm": 0.1368352472782135, "learning_rate": 0.001, "loss": 1.3931, "step": 16033 }, { "epoch": 0.6783145782215078, "grad_norm": 0.15225689113140106, "learning_rate": 0.001, "loss": 1.8712, "step": 16034 }, { "epoch": 0.6783568829850242, "grad_norm": 0.237832173705101, "learning_rate": 0.001, "loss": 2.7011, "step": 16035 }, { "epoch": 0.6783991877485405, "grad_norm": 0.2749160826206207, "learning_rate": 0.001, "loss": 2.0368, "step": 16036 }, { "epoch": 0.6784414925120569, "grad_norm": 0.1856054663658142, "learning_rate": 0.001, "loss": 1.5841, "step": 16037 }, { "epoch": 0.6784837972755732, "grad_norm": 0.35927614569664, "learning_rate": 0.001, "loss": 1.5595, "step": 16038 }, { "epoch": 0.6785261020390896, "grad_norm": 0.18782657384872437, "learning_rate": 0.001, "loss": 2.4929, "step": 16039 }, { "epoch": 0.678568406802606, "grad_norm": 0.1664906144142151, "learning_rate": 0.001, "loss": 2.8713, "step": 16040 }, { "epoch": 0.6786107115661223, "grad_norm": 0.17706331610679626, "learning_rate": 0.001, "loss": 2.2758, "step": 16041 }, { "epoch": 0.6786530163296387, "grad_norm": 1.698190450668335, "learning_rate": 0.001, "loss": 2.0762, "step": 16042 }, { "epoch": 0.6786953210931551, "grad_norm": 0.2534584105014801, "learning_rate": 0.001, "loss": 2.2728, "step": 16043 }, { "epoch": 0.6787376258566714, "grad_norm": 0.14307022094726562, "learning_rate": 0.001, "loss": 2.1301, "step": 16044 }, { "epoch": 0.6787799306201878, "grad_norm": 0.16313962638378143, "learning_rate": 0.001, "loss": 2.6391, "step": 16045 }, { "epoch": 0.6788222353837042, "grad_norm": 0.2026868611574173, "learning_rate": 0.001, "loss": 3.0308, "step": 16046 }, { "epoch": 0.6788645401472205, "grad_norm": 0.1914452612400055, "learning_rate": 0.001, "loss": 2.2605, "step": 16047 }, { "epoch": 0.6789068449107369, "grad_norm": 0.17033523321151733, "learning_rate": 0.001, "loss": 2.7466, "step": 16048 }, { "epoch": 0.6789491496742533, "grad_norm": 0.41400858759880066, "learning_rate": 0.001, "loss": 1.8912, "step": 16049 }, { "epoch": 0.6789914544377696, "grad_norm": 0.14586646854877472, "learning_rate": 0.001, "loss": 2.2457, "step": 16050 }, { "epoch": 0.6790337592012861, "grad_norm": 0.1477472484111786, "learning_rate": 0.001, "loss": 1.9528, "step": 16051 }, { "epoch": 0.6790760639648025, "grad_norm": 0.18150284886360168, "learning_rate": 0.001, "loss": 1.8803, "step": 16052 }, { "epoch": 0.6791183687283188, "grad_norm": 163.43626403808594, "learning_rate": 0.001, "loss": 2.7313, "step": 16053 }, { "epoch": 0.6791606734918352, "grad_norm": 0.17980308830738068, "learning_rate": 0.001, "loss": 2.8632, "step": 16054 }, { "epoch": 0.6792029782553516, "grad_norm": 1.3750078678131104, "learning_rate": 0.001, "loss": 2.8298, "step": 16055 }, { "epoch": 0.6792452830188679, "grad_norm": 0.23904463648796082, "learning_rate": 0.001, "loss": 2.7106, "step": 16056 }, { "epoch": 0.6792875877823843, "grad_norm": 0.18311509490013123, "learning_rate": 0.001, "loss": 1.9757, "step": 16057 }, { "epoch": 0.6793298925459007, "grad_norm": 0.15157288312911987, "learning_rate": 0.001, "loss": 2.3797, "step": 16058 }, { "epoch": 0.679372197309417, "grad_norm": 0.1471908688545227, "learning_rate": 0.001, "loss": 2.9482, "step": 16059 }, { "epoch": 0.6794145020729334, "grad_norm": 0.16476254165172577, "learning_rate": 0.001, "loss": 3.0529, "step": 16060 }, { "epoch": 0.6794568068364498, "grad_norm": 0.17069002985954285, "learning_rate": 0.001, "loss": 2.0707, "step": 16061 }, { "epoch": 0.6794991115999661, "grad_norm": 0.17665056884288788, "learning_rate": 0.001, "loss": 2.2161, "step": 16062 }, { "epoch": 0.6795414163634825, "grad_norm": 0.1902749389410019, "learning_rate": 0.001, "loss": 2.5135, "step": 16063 }, { "epoch": 0.6795837211269989, "grad_norm": 0.16366536915302277, "learning_rate": 0.001, "loss": 2.1562, "step": 16064 }, { "epoch": 0.6796260258905152, "grad_norm": 0.6264306306838989, "learning_rate": 0.001, "loss": 1.8162, "step": 16065 }, { "epoch": 0.6796683306540316, "grad_norm": 0.2236694097518921, "learning_rate": 0.001, "loss": 2.4581, "step": 16066 }, { "epoch": 0.6797106354175481, "grad_norm": 0.16119384765625, "learning_rate": 0.001, "loss": 2.1214, "step": 16067 }, { "epoch": 0.6797529401810644, "grad_norm": 0.16101056337356567, "learning_rate": 0.001, "loss": 1.6174, "step": 16068 }, { "epoch": 0.6797952449445808, "grad_norm": 0.28261250257492065, "learning_rate": 0.001, "loss": 2.6421, "step": 16069 }, { "epoch": 0.6798375497080972, "grad_norm": 1.5717463493347168, "learning_rate": 0.001, "loss": 2.7132, "step": 16070 }, { "epoch": 0.6798798544716135, "grad_norm": 0.15996554493904114, "learning_rate": 0.001, "loss": 2.364, "step": 16071 }, { "epoch": 0.6799221592351299, "grad_norm": 0.20333285629749298, "learning_rate": 0.001, "loss": 2.6281, "step": 16072 }, { "epoch": 0.6799644639986463, "grad_norm": 0.17524294555187225, "learning_rate": 0.001, "loss": 2.1776, "step": 16073 }, { "epoch": 0.6800067687621626, "grad_norm": 0.2101033478975296, "learning_rate": 0.001, "loss": 2.2662, "step": 16074 }, { "epoch": 0.680049073525679, "grad_norm": 0.1923307329416275, "learning_rate": 0.001, "loss": 1.9986, "step": 16075 }, { "epoch": 0.6800913782891954, "grad_norm": 0.14267486333847046, "learning_rate": 0.001, "loss": 1.4753, "step": 16076 }, { "epoch": 0.6801336830527117, "grad_norm": 0.14065372943878174, "learning_rate": 0.001, "loss": 1.4347, "step": 16077 }, { "epoch": 0.6801759878162281, "grad_norm": 0.20316016674041748, "learning_rate": 0.001, "loss": 1.7941, "step": 16078 }, { "epoch": 0.6802182925797445, "grad_norm": 0.24780157208442688, "learning_rate": 0.001, "loss": 3.0841, "step": 16079 }, { "epoch": 0.6802605973432608, "grad_norm": 0.19607360661029816, "learning_rate": 0.001, "loss": 3.1225, "step": 16080 }, { "epoch": 0.6803029021067772, "grad_norm": 0.16723619401454926, "learning_rate": 0.001, "loss": 1.9696, "step": 16081 }, { "epoch": 0.6803452068702935, "grad_norm": 10.566794395446777, "learning_rate": 0.001, "loss": 2.347, "step": 16082 }, { "epoch": 0.68038751163381, "grad_norm": 0.19241277873516083, "learning_rate": 0.001, "loss": 2.1758, "step": 16083 }, { "epoch": 0.6804298163973264, "grad_norm": 0.3141223192214966, "learning_rate": 0.001, "loss": 2.1376, "step": 16084 }, { "epoch": 0.6804721211608427, "grad_norm": 0.1563158631324768, "learning_rate": 0.001, "loss": 2.7554, "step": 16085 }, { "epoch": 0.6805144259243591, "grad_norm": 0.967668354511261, "learning_rate": 0.001, "loss": 2.3854, "step": 16086 }, { "epoch": 0.6805567306878755, "grad_norm": 0.1570819467306137, "learning_rate": 0.001, "loss": 1.7543, "step": 16087 }, { "epoch": 0.6805990354513918, "grad_norm": 0.18060454726219177, "learning_rate": 0.001, "loss": 1.3768, "step": 16088 }, { "epoch": 0.6806413402149082, "grad_norm": 0.1356450766324997, "learning_rate": 0.001, "loss": 1.2979, "step": 16089 }, { "epoch": 0.6806836449784246, "grad_norm": 0.20549282431602478, "learning_rate": 0.001, "loss": 2.1857, "step": 16090 }, { "epoch": 0.6807259497419409, "grad_norm": 0.17379418015480042, "learning_rate": 0.001, "loss": 1.7882, "step": 16091 }, { "epoch": 0.6807682545054573, "grad_norm": 0.17570160329341888, "learning_rate": 0.001, "loss": 2.2534, "step": 16092 }, { "epoch": 0.6808105592689737, "grad_norm": 0.17414408922195435, "learning_rate": 0.001, "loss": 2.5999, "step": 16093 }, { "epoch": 0.68085286403249, "grad_norm": 0.9091881513595581, "learning_rate": 0.001, "loss": 2.7196, "step": 16094 }, { "epoch": 0.6808951687960064, "grad_norm": 0.17393404245376587, "learning_rate": 0.001, "loss": 1.8711, "step": 16095 }, { "epoch": 0.6809374735595228, "grad_norm": 0.15561740100383759, "learning_rate": 0.001, "loss": 1.4363, "step": 16096 }, { "epoch": 0.6809797783230391, "grad_norm": 0.16957683861255646, "learning_rate": 0.001, "loss": 2.131, "step": 16097 }, { "epoch": 0.6810220830865555, "grad_norm": 0.2562768757343292, "learning_rate": 0.001, "loss": 3.0767, "step": 16098 }, { "epoch": 0.681064387850072, "grad_norm": 1.1886937618255615, "learning_rate": 0.001, "loss": 2.2489, "step": 16099 }, { "epoch": 0.6811066926135882, "grad_norm": 0.5651175379753113, "learning_rate": 0.001, "loss": 3.2804, "step": 16100 }, { "epoch": 0.6811489973771047, "grad_norm": 0.407637357711792, "learning_rate": 0.001, "loss": 2.6736, "step": 16101 }, { "epoch": 0.6811913021406211, "grad_norm": 0.19160562753677368, "learning_rate": 0.001, "loss": 2.064, "step": 16102 }, { "epoch": 0.6812336069041374, "grad_norm": 0.2181776911020279, "learning_rate": 0.001, "loss": 2.0987, "step": 16103 }, { "epoch": 0.6812759116676538, "grad_norm": 0.22167150676250458, "learning_rate": 0.001, "loss": 1.9636, "step": 16104 }, { "epoch": 0.6813182164311702, "grad_norm": 0.4942590892314911, "learning_rate": 0.001, "loss": 3.871, "step": 16105 }, { "epoch": 0.6813605211946865, "grad_norm": 4.270835876464844, "learning_rate": 0.001, "loss": 2.1507, "step": 16106 }, { "epoch": 0.6814028259582029, "grad_norm": 2.0456957817077637, "learning_rate": 0.001, "loss": 2.1891, "step": 16107 }, { "epoch": 0.6814451307217193, "grad_norm": 0.3435433506965637, "learning_rate": 0.001, "loss": 3.4219, "step": 16108 }, { "epoch": 0.6814874354852356, "grad_norm": 0.3711515963077545, "learning_rate": 0.001, "loss": 3.505, "step": 16109 }, { "epoch": 0.681529740248752, "grad_norm": 0.40264999866485596, "learning_rate": 0.001, "loss": 4.1824, "step": 16110 }, { "epoch": 0.6815720450122684, "grad_norm": 0.25708243250846863, "learning_rate": 0.001, "loss": 3.0473, "step": 16111 }, { "epoch": 0.6816143497757847, "grad_norm": 0.9480053782463074, "learning_rate": 0.001, "loss": 2.4107, "step": 16112 }, { "epoch": 0.6816566545393011, "grad_norm": 0.49835169315338135, "learning_rate": 0.001, "loss": 2.4002, "step": 16113 }, { "epoch": 0.6816989593028175, "grad_norm": 0.33678311109542847, "learning_rate": 0.001, "loss": 2.2966, "step": 16114 }, { "epoch": 0.6817412640663338, "grad_norm": 0.23809859156608582, "learning_rate": 0.001, "loss": 2.1127, "step": 16115 }, { "epoch": 0.6817835688298503, "grad_norm": 0.26749664545059204, "learning_rate": 0.001, "loss": 3.4136, "step": 16116 }, { "epoch": 0.6818258735933667, "grad_norm": 0.27437347173690796, "learning_rate": 0.001, "loss": 2.4324, "step": 16117 }, { "epoch": 0.681868178356883, "grad_norm": 0.26453253626823425, "learning_rate": 0.001, "loss": 2.0741, "step": 16118 }, { "epoch": 0.6819104831203994, "grad_norm": 0.18699724972248077, "learning_rate": 0.001, "loss": 2.6181, "step": 16119 }, { "epoch": 0.6819527878839158, "grad_norm": 0.23921284079551697, "learning_rate": 0.001, "loss": 1.9422, "step": 16120 }, { "epoch": 0.6819950926474321, "grad_norm": 0.27278849482536316, "learning_rate": 0.001, "loss": 2.635, "step": 16121 }, { "epoch": 0.6820373974109485, "grad_norm": 22.81956672668457, "learning_rate": 0.001, "loss": 2.203, "step": 16122 }, { "epoch": 0.6820797021744649, "grad_norm": 0.17843838036060333, "learning_rate": 0.001, "loss": 2.2639, "step": 16123 }, { "epoch": 0.6821220069379812, "grad_norm": 0.17329254746437073, "learning_rate": 0.001, "loss": 2.0456, "step": 16124 }, { "epoch": 0.6821643117014976, "grad_norm": 0.20157817006111145, "learning_rate": 0.001, "loss": 1.7671, "step": 16125 }, { "epoch": 0.682206616465014, "grad_norm": 0.3038026988506317, "learning_rate": 0.001, "loss": 2.6014, "step": 16126 }, { "epoch": 0.6822489212285303, "grad_norm": 0.25918006896972656, "learning_rate": 0.001, "loss": 3.0838, "step": 16127 }, { "epoch": 0.6822912259920467, "grad_norm": 0.17397001385688782, "learning_rate": 0.001, "loss": 2.6565, "step": 16128 }, { "epoch": 0.682333530755563, "grad_norm": 0.262084424495697, "learning_rate": 0.001, "loss": 2.5259, "step": 16129 }, { "epoch": 0.6823758355190794, "grad_norm": 0.21808075904846191, "learning_rate": 0.001, "loss": 2.4083, "step": 16130 }, { "epoch": 0.6824181402825958, "grad_norm": 0.17491497099399567, "learning_rate": 0.001, "loss": 2.25, "step": 16131 }, { "epoch": 0.6824604450461121, "grad_norm": 0.1757962703704834, "learning_rate": 0.001, "loss": 2.1881, "step": 16132 }, { "epoch": 0.6825027498096286, "grad_norm": 0.3888621926307678, "learning_rate": 0.001, "loss": 2.3828, "step": 16133 }, { "epoch": 0.682545054573145, "grad_norm": 0.15985238552093506, "learning_rate": 0.001, "loss": 2.4909, "step": 16134 }, { "epoch": 0.6825873593366613, "grad_norm": 0.21272926032543182, "learning_rate": 0.001, "loss": 2.7576, "step": 16135 }, { "epoch": 0.6826296641001777, "grad_norm": 0.2001175731420517, "learning_rate": 0.001, "loss": 2.2182, "step": 16136 }, { "epoch": 0.6826719688636941, "grad_norm": 0.18408158421516418, "learning_rate": 0.001, "loss": 1.8213, "step": 16137 }, { "epoch": 0.6827142736272104, "grad_norm": 0.1814546287059784, "learning_rate": 0.001, "loss": 2.6382, "step": 16138 }, { "epoch": 0.6827565783907268, "grad_norm": 0.17873550951480865, "learning_rate": 0.001, "loss": 3.3059, "step": 16139 }, { "epoch": 0.6827988831542432, "grad_norm": 0.20914815366268158, "learning_rate": 0.001, "loss": 2.5146, "step": 16140 }, { "epoch": 0.6828411879177595, "grad_norm": 9.856534004211426, "learning_rate": 0.001, "loss": 2.0903, "step": 16141 }, { "epoch": 0.6828834926812759, "grad_norm": 0.19409655034542084, "learning_rate": 0.001, "loss": 2.5403, "step": 16142 }, { "epoch": 0.6829257974447923, "grad_norm": 0.424080491065979, "learning_rate": 0.001, "loss": 3.0397, "step": 16143 }, { "epoch": 0.6829681022083086, "grad_norm": 1.3478124141693115, "learning_rate": 0.001, "loss": 2.0678, "step": 16144 }, { "epoch": 0.683010406971825, "grad_norm": 0.20509791374206543, "learning_rate": 0.001, "loss": 2.4558, "step": 16145 }, { "epoch": 0.6830527117353414, "grad_norm": 0.19733795523643494, "learning_rate": 0.001, "loss": 2.6852, "step": 16146 }, { "epoch": 0.6830950164988577, "grad_norm": 0.16524255275726318, "learning_rate": 0.001, "loss": 2.4211, "step": 16147 }, { "epoch": 0.6831373212623741, "grad_norm": 0.17051273584365845, "learning_rate": 0.001, "loss": 2.6465, "step": 16148 }, { "epoch": 0.6831796260258906, "grad_norm": 0.4586073160171509, "learning_rate": 0.001, "loss": 3.2835, "step": 16149 }, { "epoch": 0.6832219307894069, "grad_norm": 0.38520026206970215, "learning_rate": 0.001, "loss": 1.8567, "step": 16150 }, { "epoch": 0.6832642355529233, "grad_norm": 7.5121941566467285, "learning_rate": 0.001, "loss": 2.7541, "step": 16151 }, { "epoch": 0.6833065403164397, "grad_norm": 0.19074460864067078, "learning_rate": 0.001, "loss": 2.2912, "step": 16152 }, { "epoch": 0.683348845079956, "grad_norm": 0.28227201104164124, "learning_rate": 0.001, "loss": 1.4986, "step": 16153 }, { "epoch": 0.6833911498434724, "grad_norm": 0.1890229731798172, "learning_rate": 0.001, "loss": 2.4198, "step": 16154 }, { "epoch": 0.6834334546069888, "grad_norm": 0.3094002902507782, "learning_rate": 0.001, "loss": 1.7272, "step": 16155 }, { "epoch": 0.6834757593705051, "grad_norm": 0.4410035014152527, "learning_rate": 0.001, "loss": 1.7713, "step": 16156 }, { "epoch": 0.6835180641340215, "grad_norm": 0.20090627670288086, "learning_rate": 0.001, "loss": 2.3903, "step": 16157 }, { "epoch": 0.6835603688975379, "grad_norm": 0.1748632788658142, "learning_rate": 0.001, "loss": 2.1165, "step": 16158 }, { "epoch": 0.6836026736610542, "grad_norm": 0.18395616114139557, "learning_rate": 0.001, "loss": 1.9808, "step": 16159 }, { "epoch": 0.6836449784245706, "grad_norm": 0.18044279515743256, "learning_rate": 0.001, "loss": 2.2385, "step": 16160 }, { "epoch": 0.683687283188087, "grad_norm": 0.7966620922088623, "learning_rate": 0.001, "loss": 2.1261, "step": 16161 }, { "epoch": 0.6837295879516033, "grad_norm": 0.5675308108329773, "learning_rate": 0.001, "loss": 1.9956, "step": 16162 }, { "epoch": 0.6837718927151197, "grad_norm": 0.15210571885108948, "learning_rate": 0.001, "loss": 1.8715, "step": 16163 }, { "epoch": 0.6838141974786361, "grad_norm": 0.19697412848472595, "learning_rate": 0.001, "loss": 2.2049, "step": 16164 }, { "epoch": 0.6838565022421524, "grad_norm": 0.2632860243320465, "learning_rate": 0.001, "loss": 2.1967, "step": 16165 }, { "epoch": 0.6838988070056689, "grad_norm": 0.14289480447769165, "learning_rate": 0.001, "loss": 2.3733, "step": 16166 }, { "epoch": 0.6839411117691853, "grad_norm": 0.16946500539779663, "learning_rate": 0.001, "loss": 2.1708, "step": 16167 }, { "epoch": 0.6839834165327016, "grad_norm": 0.1758553683757782, "learning_rate": 0.001, "loss": 1.8589, "step": 16168 }, { "epoch": 0.684025721296218, "grad_norm": 0.1700044721364975, "learning_rate": 0.001, "loss": 1.6158, "step": 16169 }, { "epoch": 0.6840680260597344, "grad_norm": 0.15607595443725586, "learning_rate": 0.001, "loss": 1.6295, "step": 16170 }, { "epoch": 0.6841103308232507, "grad_norm": 0.173630490899086, "learning_rate": 0.001, "loss": 2.871, "step": 16171 }, { "epoch": 0.6841526355867671, "grad_norm": 0.3207765817642212, "learning_rate": 0.001, "loss": 2.2475, "step": 16172 }, { "epoch": 0.6841949403502834, "grad_norm": 0.1741725355386734, "learning_rate": 0.001, "loss": 2.2591, "step": 16173 }, { "epoch": 0.6842372451137998, "grad_norm": 0.17393991351127625, "learning_rate": 0.001, "loss": 2.1785, "step": 16174 }, { "epoch": 0.6842795498773162, "grad_norm": 0.15135467052459717, "learning_rate": 0.001, "loss": 2.0614, "step": 16175 }, { "epoch": 0.6843218546408325, "grad_norm": 0.24743154644966125, "learning_rate": 0.001, "loss": 2.5485, "step": 16176 }, { "epoch": 0.6843641594043489, "grad_norm": 0.28311067819595337, "learning_rate": 0.001, "loss": 2.6526, "step": 16177 }, { "epoch": 0.6844064641678653, "grad_norm": 0.14594843983650208, "learning_rate": 0.001, "loss": 2.3248, "step": 16178 }, { "epoch": 0.6844487689313816, "grad_norm": 2.94158935546875, "learning_rate": 0.001, "loss": 2.2385, "step": 16179 }, { "epoch": 0.684491073694898, "grad_norm": 0.19169901311397552, "learning_rate": 0.001, "loss": 2.3112, "step": 16180 }, { "epoch": 0.6845333784584144, "grad_norm": 0.6225913763046265, "learning_rate": 0.001, "loss": 2.5312, "step": 16181 }, { "epoch": 0.6845756832219307, "grad_norm": 0.1315428614616394, "learning_rate": 0.001, "loss": 2.1162, "step": 16182 }, { "epoch": 0.6846179879854472, "grad_norm": 0.1488700956106186, "learning_rate": 0.001, "loss": 1.6814, "step": 16183 }, { "epoch": 0.6846602927489636, "grad_norm": 0.1776464730501175, "learning_rate": 0.001, "loss": 2.0227, "step": 16184 }, { "epoch": 0.6847025975124799, "grad_norm": 0.17623931169509888, "learning_rate": 0.001, "loss": 2.0609, "step": 16185 }, { "epoch": 0.6847449022759963, "grad_norm": 0.2685874402523041, "learning_rate": 0.001, "loss": 2.3874, "step": 16186 }, { "epoch": 0.6847872070395127, "grad_norm": 0.20432405173778534, "learning_rate": 0.001, "loss": 1.6907, "step": 16187 }, { "epoch": 0.684829511803029, "grad_norm": 0.17755457758903503, "learning_rate": 0.001, "loss": 1.7708, "step": 16188 }, { "epoch": 0.6848718165665454, "grad_norm": 0.3379620611667633, "learning_rate": 0.001, "loss": 2.383, "step": 16189 }, { "epoch": 0.6849141213300618, "grad_norm": 0.31830352544784546, "learning_rate": 0.001, "loss": 1.7277, "step": 16190 }, { "epoch": 0.6849564260935781, "grad_norm": 0.18110892176628113, "learning_rate": 0.001, "loss": 3.5317, "step": 16191 }, { "epoch": 0.6849987308570945, "grad_norm": 0.5665266513824463, "learning_rate": 0.001, "loss": 2.4427, "step": 16192 }, { "epoch": 0.6850410356206109, "grad_norm": 0.28646814823150635, "learning_rate": 0.001, "loss": 3.2973, "step": 16193 }, { "epoch": 0.6850833403841272, "grad_norm": 0.19316011667251587, "learning_rate": 0.001, "loss": 2.7649, "step": 16194 }, { "epoch": 0.6851256451476436, "grad_norm": 0.21821334958076477, "learning_rate": 0.001, "loss": 2.3747, "step": 16195 }, { "epoch": 0.68516794991116, "grad_norm": 0.18347589671611786, "learning_rate": 0.001, "loss": 2.0008, "step": 16196 }, { "epoch": 0.6852102546746763, "grad_norm": 0.1854442059993744, "learning_rate": 0.001, "loss": 1.7193, "step": 16197 }, { "epoch": 0.6852525594381927, "grad_norm": 0.17909593880176544, "learning_rate": 0.001, "loss": 2.0372, "step": 16198 }, { "epoch": 0.6852948642017092, "grad_norm": 0.1535256803035736, "learning_rate": 0.001, "loss": 1.7345, "step": 16199 }, { "epoch": 0.6853371689652255, "grad_norm": 0.20372457802295685, "learning_rate": 0.001, "loss": 2.1256, "step": 16200 }, { "epoch": 0.6853794737287419, "grad_norm": 0.291826456785202, "learning_rate": 0.001, "loss": 2.5165, "step": 16201 }, { "epoch": 0.6854217784922583, "grad_norm": 0.18575716018676758, "learning_rate": 0.001, "loss": 1.7661, "step": 16202 }, { "epoch": 0.6854640832557746, "grad_norm": 0.15441353619098663, "learning_rate": 0.001, "loss": 1.672, "step": 16203 }, { "epoch": 0.685506388019291, "grad_norm": 1.2076762914657593, "learning_rate": 0.001, "loss": 1.9718, "step": 16204 }, { "epoch": 0.6855486927828074, "grad_norm": 0.17845335602760315, "learning_rate": 0.001, "loss": 2.2695, "step": 16205 }, { "epoch": 0.6855909975463237, "grad_norm": 0.1648271232843399, "learning_rate": 0.001, "loss": 2.9431, "step": 16206 }, { "epoch": 0.6856333023098401, "grad_norm": 0.264567494392395, "learning_rate": 0.001, "loss": 2.7659, "step": 16207 }, { "epoch": 0.6856756070733565, "grad_norm": 0.15774111449718475, "learning_rate": 0.001, "loss": 1.7371, "step": 16208 }, { "epoch": 0.6857179118368728, "grad_norm": 0.3049740195274353, "learning_rate": 0.001, "loss": 2.4582, "step": 16209 }, { "epoch": 0.6857602166003892, "grad_norm": 0.371410071849823, "learning_rate": 0.001, "loss": 3.2076, "step": 16210 }, { "epoch": 0.6858025213639056, "grad_norm": 0.17293484508991241, "learning_rate": 0.001, "loss": 1.4464, "step": 16211 }, { "epoch": 0.6858448261274219, "grad_norm": 0.3040180802345276, "learning_rate": 0.001, "loss": 2.9824, "step": 16212 }, { "epoch": 0.6858871308909383, "grad_norm": 0.22977972030639648, "learning_rate": 0.001, "loss": 3.5424, "step": 16213 }, { "epoch": 0.6859294356544547, "grad_norm": 0.14453327655792236, "learning_rate": 0.001, "loss": 1.4584, "step": 16214 }, { "epoch": 0.685971740417971, "grad_norm": 0.3432617485523224, "learning_rate": 0.001, "loss": 2.2774, "step": 16215 }, { "epoch": 0.6860140451814875, "grad_norm": 0.614466667175293, "learning_rate": 0.001, "loss": 2.9487, "step": 16216 }, { "epoch": 0.6860563499450039, "grad_norm": 0.5213249325752258, "learning_rate": 0.001, "loss": 2.6308, "step": 16217 }, { "epoch": 0.6860986547085202, "grad_norm": 0.13962678611278534, "learning_rate": 0.001, "loss": 2.3401, "step": 16218 }, { "epoch": 0.6861409594720366, "grad_norm": 0.13959261775016785, "learning_rate": 0.001, "loss": 2.066, "step": 16219 }, { "epoch": 0.6861832642355529, "grad_norm": 0.12609069049358368, "learning_rate": 0.001, "loss": 2.7186, "step": 16220 }, { "epoch": 0.6862255689990693, "grad_norm": 0.1317724585533142, "learning_rate": 0.001, "loss": 1.6887, "step": 16221 }, { "epoch": 0.6862678737625857, "grad_norm": 0.22671738266944885, "learning_rate": 0.001, "loss": 1.519, "step": 16222 }, { "epoch": 0.686310178526102, "grad_norm": 0.21472686529159546, "learning_rate": 0.001, "loss": 2.5224, "step": 16223 }, { "epoch": 0.6863524832896184, "grad_norm": 0.5818791389465332, "learning_rate": 0.001, "loss": 1.7538, "step": 16224 }, { "epoch": 0.6863947880531348, "grad_norm": 0.22420547902584076, "learning_rate": 0.001, "loss": 1.5812, "step": 16225 }, { "epoch": 0.6864370928166511, "grad_norm": 0.16626037657260895, "learning_rate": 0.001, "loss": 1.8388, "step": 16226 }, { "epoch": 0.6864793975801675, "grad_norm": 0.19007837772369385, "learning_rate": 0.001, "loss": 2.5345, "step": 16227 }, { "epoch": 0.6865217023436839, "grad_norm": 0.3126547932624817, "learning_rate": 0.001, "loss": 2.5063, "step": 16228 }, { "epoch": 0.6865640071072002, "grad_norm": 0.2687819004058838, "learning_rate": 0.001, "loss": 3.085, "step": 16229 }, { "epoch": 0.6866063118707166, "grad_norm": 0.9603768587112427, "learning_rate": 0.001, "loss": 1.8578, "step": 16230 }, { "epoch": 0.686648616634233, "grad_norm": 0.1472010314464569, "learning_rate": 0.001, "loss": 3.0854, "step": 16231 }, { "epoch": 0.6866909213977493, "grad_norm": 0.20346207916736603, "learning_rate": 0.001, "loss": 1.8353, "step": 16232 }, { "epoch": 0.6867332261612658, "grad_norm": 0.17270101606845856, "learning_rate": 0.001, "loss": 3.0351, "step": 16233 }, { "epoch": 0.6867755309247822, "grad_norm": 0.14880718290805817, "learning_rate": 0.001, "loss": 2.1701, "step": 16234 }, { "epoch": 0.6868178356882985, "grad_norm": 0.23077774047851562, "learning_rate": 0.001, "loss": 3.2428, "step": 16235 }, { "epoch": 0.6868601404518149, "grad_norm": 0.14143741130828857, "learning_rate": 0.001, "loss": 1.7177, "step": 16236 }, { "epoch": 0.6869024452153313, "grad_norm": 0.15532909333705902, "learning_rate": 0.001, "loss": 1.4819, "step": 16237 }, { "epoch": 0.6869447499788476, "grad_norm": 1.5468194484710693, "learning_rate": 0.001, "loss": 1.6917, "step": 16238 }, { "epoch": 0.686987054742364, "grad_norm": 0.16617412865161896, "learning_rate": 0.001, "loss": 1.4165, "step": 16239 }, { "epoch": 0.6870293595058804, "grad_norm": 0.25191357731819153, "learning_rate": 0.001, "loss": 2.1047, "step": 16240 }, { "epoch": 0.6870716642693967, "grad_norm": 0.15333382785320282, "learning_rate": 0.001, "loss": 1.978, "step": 16241 }, { "epoch": 0.6871139690329131, "grad_norm": 0.16414469480514526, "learning_rate": 0.001, "loss": 2.0745, "step": 16242 }, { "epoch": 0.6871562737964295, "grad_norm": 1.5129097700119019, "learning_rate": 0.001, "loss": 2.2844, "step": 16243 }, { "epoch": 0.6871985785599458, "grad_norm": 0.18176978826522827, "learning_rate": 0.001, "loss": 2.1622, "step": 16244 }, { "epoch": 0.6872408833234622, "grad_norm": 0.17760765552520752, "learning_rate": 0.001, "loss": 2.9898, "step": 16245 }, { "epoch": 0.6872831880869786, "grad_norm": 0.2571372985839844, "learning_rate": 0.001, "loss": 2.6795, "step": 16246 }, { "epoch": 0.6873254928504949, "grad_norm": 0.20494753122329712, "learning_rate": 0.001, "loss": 1.6626, "step": 16247 }, { "epoch": 0.6873677976140113, "grad_norm": 0.9767950773239136, "learning_rate": 0.001, "loss": 1.6601, "step": 16248 }, { "epoch": 0.6874101023775278, "grad_norm": 0.2790040075778961, "learning_rate": 0.001, "loss": 3.1638, "step": 16249 }, { "epoch": 0.687452407141044, "grad_norm": 0.2396850734949112, "learning_rate": 0.001, "loss": 1.8693, "step": 16250 }, { "epoch": 0.6874947119045605, "grad_norm": 1.0322060585021973, "learning_rate": 0.001, "loss": 2.7673, "step": 16251 }, { "epoch": 0.6875370166680769, "grad_norm": 0.15175770223140717, "learning_rate": 0.001, "loss": 3.0304, "step": 16252 }, { "epoch": 0.6875793214315932, "grad_norm": 0.18464677035808563, "learning_rate": 0.001, "loss": 2.1256, "step": 16253 }, { "epoch": 0.6876216261951096, "grad_norm": 0.16536930203437805, "learning_rate": 0.001, "loss": 2.0773, "step": 16254 }, { "epoch": 0.687663930958626, "grad_norm": 0.20809626579284668, "learning_rate": 0.001, "loss": 1.8316, "step": 16255 }, { "epoch": 0.6877062357221423, "grad_norm": 0.163310706615448, "learning_rate": 0.001, "loss": 2.1962, "step": 16256 }, { "epoch": 0.6877485404856587, "grad_norm": 0.39365145564079285, "learning_rate": 0.001, "loss": 2.7742, "step": 16257 }, { "epoch": 0.6877908452491751, "grad_norm": 0.5424996018409729, "learning_rate": 0.001, "loss": 3.4448, "step": 16258 }, { "epoch": 0.6878331500126914, "grad_norm": 0.1544448286294937, "learning_rate": 0.001, "loss": 1.9199, "step": 16259 }, { "epoch": 0.6878754547762078, "grad_norm": 10.53158950805664, "learning_rate": 0.001, "loss": 2.1641, "step": 16260 }, { "epoch": 0.6879177595397242, "grad_norm": 0.3506162166595459, "learning_rate": 0.001, "loss": 2.6466, "step": 16261 }, { "epoch": 0.6879600643032405, "grad_norm": 0.16496671736240387, "learning_rate": 0.001, "loss": 2.5371, "step": 16262 }, { "epoch": 0.6880023690667569, "grad_norm": 0.1795535832643509, "learning_rate": 0.001, "loss": 3.5854, "step": 16263 }, { "epoch": 0.6880446738302732, "grad_norm": 0.1523294448852539, "learning_rate": 0.001, "loss": 1.3369, "step": 16264 }, { "epoch": 0.6880869785937896, "grad_norm": 0.16334094107151031, "learning_rate": 0.001, "loss": 1.9804, "step": 16265 }, { "epoch": 0.688129283357306, "grad_norm": 0.36673104763031006, "learning_rate": 0.001, "loss": 2.0428, "step": 16266 }, { "epoch": 0.6881715881208224, "grad_norm": 0.23456034064292908, "learning_rate": 0.001, "loss": 1.778, "step": 16267 }, { "epoch": 0.6882138928843388, "grad_norm": 0.1658594310283661, "learning_rate": 0.001, "loss": 1.8266, "step": 16268 }, { "epoch": 0.6882561976478552, "grad_norm": 0.17385973036289215, "learning_rate": 0.001, "loss": 2.7395, "step": 16269 }, { "epoch": 0.6882985024113715, "grad_norm": 0.20876826345920563, "learning_rate": 0.001, "loss": 2.2943, "step": 16270 }, { "epoch": 0.6883408071748879, "grad_norm": 0.2948185205459595, "learning_rate": 0.001, "loss": 2.295, "step": 16271 }, { "epoch": 0.6883831119384043, "grad_norm": 0.1359289586544037, "learning_rate": 0.001, "loss": 2.0827, "step": 16272 }, { "epoch": 0.6884254167019206, "grad_norm": 0.1961379051208496, "learning_rate": 0.001, "loss": 2.3073, "step": 16273 }, { "epoch": 0.688467721465437, "grad_norm": 0.2041141390800476, "learning_rate": 0.001, "loss": 1.6394, "step": 16274 }, { "epoch": 0.6885100262289534, "grad_norm": 7.983102798461914, "learning_rate": 0.001, "loss": 1.5184, "step": 16275 }, { "epoch": 0.6885523309924697, "grad_norm": 0.1904926300048828, "learning_rate": 0.001, "loss": 2.1257, "step": 16276 }, { "epoch": 0.6885946357559861, "grad_norm": 0.16287410259246826, "learning_rate": 0.001, "loss": 2.0357, "step": 16277 }, { "epoch": 0.6886369405195025, "grad_norm": 0.17052966356277466, "learning_rate": 0.001, "loss": 2.5068, "step": 16278 }, { "epoch": 0.6886792452830188, "grad_norm": 0.35149747133255005, "learning_rate": 0.001, "loss": 2.5683, "step": 16279 }, { "epoch": 0.6887215500465352, "grad_norm": 1.4754235744476318, "learning_rate": 0.001, "loss": 2.9501, "step": 16280 }, { "epoch": 0.6887638548100516, "grad_norm": 0.1624482125043869, "learning_rate": 0.001, "loss": 1.9048, "step": 16281 }, { "epoch": 0.6888061595735679, "grad_norm": 1.466571569442749, "learning_rate": 0.001, "loss": 2.5641, "step": 16282 }, { "epoch": 0.6888484643370844, "grad_norm": 0.18369127810001373, "learning_rate": 0.001, "loss": 1.8939, "step": 16283 }, { "epoch": 0.6888907691006008, "grad_norm": 0.31858545541763306, "learning_rate": 0.001, "loss": 2.515, "step": 16284 }, { "epoch": 0.6889330738641171, "grad_norm": 1.7627861499786377, "learning_rate": 0.001, "loss": 2.8145, "step": 16285 }, { "epoch": 0.6889753786276335, "grad_norm": 0.20102620124816895, "learning_rate": 0.001, "loss": 2.8067, "step": 16286 }, { "epoch": 0.6890176833911499, "grad_norm": 0.20633406937122345, "learning_rate": 0.001, "loss": 2.5596, "step": 16287 }, { "epoch": 0.6890599881546662, "grad_norm": 0.2921423017978668, "learning_rate": 0.001, "loss": 2.6385, "step": 16288 }, { "epoch": 0.6891022929181826, "grad_norm": 1.0566281080245972, "learning_rate": 0.001, "loss": 3.4956, "step": 16289 }, { "epoch": 0.689144597681699, "grad_norm": 0.23099292814731598, "learning_rate": 0.001, "loss": 2.0018, "step": 16290 }, { "epoch": 0.6891869024452153, "grad_norm": 0.19271378219127655, "learning_rate": 0.001, "loss": 2.4489, "step": 16291 }, { "epoch": 0.6892292072087317, "grad_norm": 0.20972347259521484, "learning_rate": 0.001, "loss": 2.3633, "step": 16292 }, { "epoch": 0.6892715119722481, "grad_norm": 0.17728467285633087, "learning_rate": 0.001, "loss": 1.8417, "step": 16293 }, { "epoch": 0.6893138167357644, "grad_norm": 1.631922960281372, "learning_rate": 0.001, "loss": 1.903, "step": 16294 }, { "epoch": 0.6893561214992808, "grad_norm": 0.39803799986839294, "learning_rate": 0.001, "loss": 2.5417, "step": 16295 }, { "epoch": 0.6893984262627972, "grad_norm": 0.206565260887146, "learning_rate": 0.001, "loss": 2.3135, "step": 16296 }, { "epoch": 0.6894407310263135, "grad_norm": 0.18916979432106018, "learning_rate": 0.001, "loss": 1.8302, "step": 16297 }, { "epoch": 0.68948303578983, "grad_norm": 0.21091687679290771, "learning_rate": 0.001, "loss": 2.1947, "step": 16298 }, { "epoch": 0.6895253405533464, "grad_norm": 0.16629081964492798, "learning_rate": 0.001, "loss": 3.1085, "step": 16299 }, { "epoch": 0.6895676453168627, "grad_norm": 0.16846491396427155, "learning_rate": 0.001, "loss": 1.5303, "step": 16300 }, { "epoch": 0.6896099500803791, "grad_norm": 0.1889643669128418, "learning_rate": 0.001, "loss": 2.0213, "step": 16301 }, { "epoch": 0.6896522548438955, "grad_norm": 0.17257481813430786, "learning_rate": 0.001, "loss": 2.3829, "step": 16302 }, { "epoch": 0.6896945596074118, "grad_norm": 0.17580455541610718, "learning_rate": 0.001, "loss": 2.7781, "step": 16303 }, { "epoch": 0.6897368643709282, "grad_norm": 0.1580284833908081, "learning_rate": 0.001, "loss": 2.3906, "step": 16304 }, { "epoch": 0.6897791691344446, "grad_norm": 0.1577870398759842, "learning_rate": 0.001, "loss": 2.7536, "step": 16305 }, { "epoch": 0.6898214738979609, "grad_norm": 0.17416058480739594, "learning_rate": 0.001, "loss": 2.1967, "step": 16306 }, { "epoch": 0.6898637786614773, "grad_norm": 0.7185014486312866, "learning_rate": 0.001, "loss": 1.3881, "step": 16307 }, { "epoch": 0.6899060834249936, "grad_norm": 0.23386551439762115, "learning_rate": 0.001, "loss": 3.7348, "step": 16308 }, { "epoch": 0.68994838818851, "grad_norm": 0.1478804051876068, "learning_rate": 0.001, "loss": 1.8103, "step": 16309 }, { "epoch": 0.6899906929520264, "grad_norm": 0.14378052949905396, "learning_rate": 0.001, "loss": 1.8187, "step": 16310 }, { "epoch": 0.6900329977155427, "grad_norm": 0.19731678068637848, "learning_rate": 0.001, "loss": 2.5575, "step": 16311 }, { "epoch": 0.6900753024790591, "grad_norm": 0.6392672657966614, "learning_rate": 0.001, "loss": 2.2473, "step": 16312 }, { "epoch": 0.6901176072425755, "grad_norm": 0.5831864476203918, "learning_rate": 0.001, "loss": 1.6424, "step": 16313 }, { "epoch": 0.6901599120060918, "grad_norm": 0.573341965675354, "learning_rate": 0.001, "loss": 1.8507, "step": 16314 }, { "epoch": 0.6902022167696082, "grad_norm": 0.1696714460849762, "learning_rate": 0.001, "loss": 2.02, "step": 16315 }, { "epoch": 0.6902445215331247, "grad_norm": 0.8533129692077637, "learning_rate": 0.001, "loss": 2.3024, "step": 16316 }, { "epoch": 0.690286826296641, "grad_norm": 0.9942197203636169, "learning_rate": 0.001, "loss": 1.8328, "step": 16317 }, { "epoch": 0.6903291310601574, "grad_norm": 0.587260901927948, "learning_rate": 0.001, "loss": 2.375, "step": 16318 }, { "epoch": 0.6903714358236738, "grad_norm": 0.17391139268875122, "learning_rate": 0.001, "loss": 1.7228, "step": 16319 }, { "epoch": 0.6904137405871901, "grad_norm": 0.437977135181427, "learning_rate": 0.001, "loss": 2.7662, "step": 16320 }, { "epoch": 0.6904560453507065, "grad_norm": 0.2368604987859726, "learning_rate": 0.001, "loss": 2.2516, "step": 16321 }, { "epoch": 0.6904983501142229, "grad_norm": 0.1805589199066162, "learning_rate": 0.001, "loss": 2.1679, "step": 16322 }, { "epoch": 0.6905406548777392, "grad_norm": 0.6197409629821777, "learning_rate": 0.001, "loss": 1.9621, "step": 16323 }, { "epoch": 0.6905829596412556, "grad_norm": 0.16126328706741333, "learning_rate": 0.001, "loss": 2.1508, "step": 16324 }, { "epoch": 0.690625264404772, "grad_norm": 0.17025581002235413, "learning_rate": 0.001, "loss": 2.3355, "step": 16325 }, { "epoch": 0.6906675691682883, "grad_norm": 0.2806219756603241, "learning_rate": 0.001, "loss": 2.0408, "step": 16326 }, { "epoch": 0.6907098739318047, "grad_norm": 0.4944941997528076, "learning_rate": 0.001, "loss": 3.1678, "step": 16327 }, { "epoch": 0.6907521786953211, "grad_norm": 0.3849775195121765, "learning_rate": 0.001, "loss": 2.0565, "step": 16328 }, { "epoch": 0.6907944834588374, "grad_norm": 0.17728754878044128, "learning_rate": 0.001, "loss": 1.5861, "step": 16329 }, { "epoch": 0.6908367882223538, "grad_norm": 3.389814615249634, "learning_rate": 0.001, "loss": 2.1157, "step": 16330 }, { "epoch": 0.6908790929858702, "grad_norm": 0.16745001077651978, "learning_rate": 0.001, "loss": 1.7655, "step": 16331 }, { "epoch": 0.6909213977493865, "grad_norm": 0.2383284568786621, "learning_rate": 0.001, "loss": 2.2718, "step": 16332 }, { "epoch": 0.690963702512903, "grad_norm": 1.5297582149505615, "learning_rate": 0.001, "loss": 2.8112, "step": 16333 }, { "epoch": 0.6910060072764194, "grad_norm": 0.17354048788547516, "learning_rate": 0.001, "loss": 2.7349, "step": 16334 }, { "epoch": 0.6910483120399357, "grad_norm": 0.1723877489566803, "learning_rate": 0.001, "loss": 2.1994, "step": 16335 }, { "epoch": 0.6910906168034521, "grad_norm": 0.160502627491951, "learning_rate": 0.001, "loss": 2.5597, "step": 16336 }, { "epoch": 0.6911329215669685, "grad_norm": 0.3956283628940582, "learning_rate": 0.001, "loss": 1.5582, "step": 16337 }, { "epoch": 0.6911752263304848, "grad_norm": 0.27428823709487915, "learning_rate": 0.001, "loss": 2.2426, "step": 16338 }, { "epoch": 0.6912175310940012, "grad_norm": 0.6840664148330688, "learning_rate": 0.001, "loss": 2.731, "step": 16339 }, { "epoch": 0.6912598358575176, "grad_norm": 0.1819261908531189, "learning_rate": 0.001, "loss": 2.0739, "step": 16340 }, { "epoch": 0.6913021406210339, "grad_norm": 0.2000657320022583, "learning_rate": 0.001, "loss": 3.0519, "step": 16341 }, { "epoch": 0.6913444453845503, "grad_norm": 0.28833070397377014, "learning_rate": 0.001, "loss": 2.1887, "step": 16342 }, { "epoch": 0.6913867501480667, "grad_norm": 0.24844685196876526, "learning_rate": 0.001, "loss": 2.7631, "step": 16343 }, { "epoch": 0.691429054911583, "grad_norm": 0.16090954840183258, "learning_rate": 0.001, "loss": 2.0467, "step": 16344 }, { "epoch": 0.6914713596750994, "grad_norm": 0.19617071747779846, "learning_rate": 0.001, "loss": 1.4285, "step": 16345 }, { "epoch": 0.6915136644386158, "grad_norm": 0.16506654024124146, "learning_rate": 0.001, "loss": 1.3764, "step": 16346 }, { "epoch": 0.6915559692021321, "grad_norm": 0.175397127866745, "learning_rate": 0.001, "loss": 1.7242, "step": 16347 }, { "epoch": 0.6915982739656485, "grad_norm": 0.20223967730998993, "learning_rate": 0.001, "loss": 2.0692, "step": 16348 }, { "epoch": 0.691640578729165, "grad_norm": 0.17678618431091309, "learning_rate": 0.001, "loss": 2.9338, "step": 16349 }, { "epoch": 0.6916828834926813, "grad_norm": 0.2138862907886505, "learning_rate": 0.001, "loss": 2.1967, "step": 16350 }, { "epoch": 0.6917251882561977, "grad_norm": 0.15194635093212128, "learning_rate": 0.001, "loss": 2.0574, "step": 16351 }, { "epoch": 0.6917674930197141, "grad_norm": 0.2285081446170807, "learning_rate": 0.001, "loss": 3.0883, "step": 16352 }, { "epoch": 0.6918097977832304, "grad_norm": 0.17893058061599731, "learning_rate": 0.001, "loss": 1.9451, "step": 16353 }, { "epoch": 0.6918521025467468, "grad_norm": 0.1954171061515808, "learning_rate": 0.001, "loss": 1.7675, "step": 16354 }, { "epoch": 0.6918944073102631, "grad_norm": 0.1901036947965622, "learning_rate": 0.001, "loss": 2.7754, "step": 16355 }, { "epoch": 0.6919367120737795, "grad_norm": 0.2298295795917511, "learning_rate": 0.001, "loss": 1.8922, "step": 16356 }, { "epoch": 0.6919790168372959, "grad_norm": 0.166233092546463, "learning_rate": 0.001, "loss": 1.6469, "step": 16357 }, { "epoch": 0.6920213216008122, "grad_norm": 0.17486384510993958, "learning_rate": 0.001, "loss": 1.7152, "step": 16358 }, { "epoch": 0.6920636263643286, "grad_norm": 0.2248300462961197, "learning_rate": 0.001, "loss": 1.6036, "step": 16359 }, { "epoch": 0.692105931127845, "grad_norm": 0.36651915311813354, "learning_rate": 0.001, "loss": 3.0015, "step": 16360 }, { "epoch": 0.6921482358913613, "grad_norm": 0.2662813663482666, "learning_rate": 0.001, "loss": 2.7571, "step": 16361 }, { "epoch": 0.6921905406548777, "grad_norm": 0.540983259677887, "learning_rate": 0.001, "loss": 1.9258, "step": 16362 }, { "epoch": 0.6922328454183941, "grad_norm": 0.44526684284210205, "learning_rate": 0.001, "loss": 1.8432, "step": 16363 }, { "epoch": 0.6922751501819104, "grad_norm": 4.076467990875244, "learning_rate": 0.001, "loss": 4.882, "step": 16364 }, { "epoch": 0.6923174549454268, "grad_norm": 0.16594544053077698, "learning_rate": 0.001, "loss": 1.753, "step": 16365 }, { "epoch": 0.6923597597089433, "grad_norm": 0.15641087293624878, "learning_rate": 0.001, "loss": 2.2164, "step": 16366 }, { "epoch": 0.6924020644724596, "grad_norm": 0.14572037756443024, "learning_rate": 0.001, "loss": 3.1288, "step": 16367 }, { "epoch": 0.692444369235976, "grad_norm": 0.18323473632335663, "learning_rate": 0.001, "loss": 1.6634, "step": 16368 }, { "epoch": 0.6924866739994924, "grad_norm": 0.552975594997406, "learning_rate": 0.001, "loss": 2.5361, "step": 16369 }, { "epoch": 0.6925289787630087, "grad_norm": 0.743918240070343, "learning_rate": 0.001, "loss": 3.3047, "step": 16370 }, { "epoch": 0.6925712835265251, "grad_norm": 0.22077415883541107, "learning_rate": 0.001, "loss": 1.9185, "step": 16371 }, { "epoch": 0.6926135882900415, "grad_norm": 4.2804365158081055, "learning_rate": 0.001, "loss": 1.6356, "step": 16372 }, { "epoch": 0.6926558930535578, "grad_norm": 0.23495794832706451, "learning_rate": 0.001, "loss": 2.2379, "step": 16373 }, { "epoch": 0.6926981978170742, "grad_norm": 0.14762520790100098, "learning_rate": 0.001, "loss": 1.7328, "step": 16374 }, { "epoch": 0.6927405025805906, "grad_norm": 1.2095277309417725, "learning_rate": 0.001, "loss": 3.5081, "step": 16375 }, { "epoch": 0.6927828073441069, "grad_norm": 0.16055262088775635, "learning_rate": 0.001, "loss": 2.0528, "step": 16376 }, { "epoch": 0.6928251121076233, "grad_norm": 0.24955902993679047, "learning_rate": 0.001, "loss": 2.0347, "step": 16377 }, { "epoch": 0.6928674168711397, "grad_norm": 0.8334779143333435, "learning_rate": 0.001, "loss": 2.8753, "step": 16378 }, { "epoch": 0.692909721634656, "grad_norm": 0.18906332552433014, "learning_rate": 0.001, "loss": 1.6332, "step": 16379 }, { "epoch": 0.6929520263981724, "grad_norm": 0.1773161143064499, "learning_rate": 0.001, "loss": 2.7939, "step": 16380 }, { "epoch": 0.6929943311616888, "grad_norm": 0.18923841416835785, "learning_rate": 0.001, "loss": 2.376, "step": 16381 }, { "epoch": 0.6930366359252051, "grad_norm": 0.1875029057264328, "learning_rate": 0.001, "loss": 2.5787, "step": 16382 }, { "epoch": 0.6930789406887216, "grad_norm": 0.1562403440475464, "learning_rate": 0.001, "loss": 1.9452, "step": 16383 }, { "epoch": 0.693121245452238, "grad_norm": 0.19524121284484863, "learning_rate": 0.001, "loss": 3.0866, "step": 16384 }, { "epoch": 0.6931635502157543, "grad_norm": 0.1857270449399948, "learning_rate": 0.001, "loss": 1.9935, "step": 16385 }, { "epoch": 0.6932058549792707, "grad_norm": 0.3167372941970825, "learning_rate": 0.001, "loss": 2.4217, "step": 16386 }, { "epoch": 0.6932481597427871, "grad_norm": 0.24016937613487244, "learning_rate": 0.001, "loss": 2.8926, "step": 16387 }, { "epoch": 0.6932904645063034, "grad_norm": 0.15906888246536255, "learning_rate": 0.001, "loss": 3.2683, "step": 16388 }, { "epoch": 0.6933327692698198, "grad_norm": 0.2372499704360962, "learning_rate": 0.001, "loss": 2.9656, "step": 16389 }, { "epoch": 0.6933750740333362, "grad_norm": 18.70325469970703, "learning_rate": 0.001, "loss": 1.8463, "step": 16390 }, { "epoch": 0.6934173787968525, "grad_norm": 0.21969793736934662, "learning_rate": 0.001, "loss": 1.9638, "step": 16391 }, { "epoch": 0.6934596835603689, "grad_norm": 1.0692784786224365, "learning_rate": 0.001, "loss": 2.1758, "step": 16392 }, { "epoch": 0.6935019883238853, "grad_norm": 0.17494791746139526, "learning_rate": 0.001, "loss": 1.9434, "step": 16393 }, { "epoch": 0.6935442930874016, "grad_norm": 10.130684852600098, "learning_rate": 0.001, "loss": 2.0469, "step": 16394 }, { "epoch": 0.693586597850918, "grad_norm": 0.18543873727321625, "learning_rate": 0.001, "loss": 1.6253, "step": 16395 }, { "epoch": 0.6936289026144344, "grad_norm": 0.176463320851326, "learning_rate": 0.001, "loss": 2.5161, "step": 16396 }, { "epoch": 0.6936712073779507, "grad_norm": 0.21182700991630554, "learning_rate": 0.001, "loss": 1.9964, "step": 16397 }, { "epoch": 0.6937135121414671, "grad_norm": 0.17434942722320557, "learning_rate": 0.001, "loss": 1.608, "step": 16398 }, { "epoch": 0.6937558169049834, "grad_norm": 1.2661168575286865, "learning_rate": 0.001, "loss": 3.2148, "step": 16399 }, { "epoch": 0.6937981216684999, "grad_norm": 16.555898666381836, "learning_rate": 0.001, "loss": 2.0895, "step": 16400 }, { "epoch": 0.6938404264320163, "grad_norm": 0.1642748862504959, "learning_rate": 0.001, "loss": 1.9288, "step": 16401 }, { "epoch": 0.6938827311955326, "grad_norm": 0.20912915468215942, "learning_rate": 0.001, "loss": 3.3075, "step": 16402 }, { "epoch": 0.693925035959049, "grad_norm": 0.1803750842809677, "learning_rate": 0.001, "loss": 3.1573, "step": 16403 }, { "epoch": 0.6939673407225654, "grad_norm": 0.21394596993923187, "learning_rate": 0.001, "loss": 2.6982, "step": 16404 }, { "epoch": 0.6940096454860817, "grad_norm": 0.22637422382831573, "learning_rate": 0.001, "loss": 2.2632, "step": 16405 }, { "epoch": 0.6940519502495981, "grad_norm": 0.3572201728820801, "learning_rate": 0.001, "loss": 2.7936, "step": 16406 }, { "epoch": 0.6940942550131145, "grad_norm": 17.344480514526367, "learning_rate": 0.001, "loss": 1.5021, "step": 16407 }, { "epoch": 0.6941365597766308, "grad_norm": 0.19448307156562805, "learning_rate": 0.001, "loss": 1.9725, "step": 16408 }, { "epoch": 0.6941788645401472, "grad_norm": 0.18592120707035065, "learning_rate": 0.001, "loss": 2.0079, "step": 16409 }, { "epoch": 0.6942211693036636, "grad_norm": 0.17572908103466034, "learning_rate": 0.001, "loss": 1.9132, "step": 16410 }, { "epoch": 0.6942634740671799, "grad_norm": 0.31208327412605286, "learning_rate": 0.001, "loss": 1.6902, "step": 16411 }, { "epoch": 0.6943057788306963, "grad_norm": 0.2312842458486557, "learning_rate": 0.001, "loss": 2.3156, "step": 16412 }, { "epoch": 0.6943480835942127, "grad_norm": 0.25776514410972595, "learning_rate": 0.001, "loss": 1.3374, "step": 16413 }, { "epoch": 0.694390388357729, "grad_norm": 0.7767313718795776, "learning_rate": 0.001, "loss": 2.6159, "step": 16414 }, { "epoch": 0.6944326931212454, "grad_norm": 1.3222384452819824, "learning_rate": 0.001, "loss": 3.7811, "step": 16415 }, { "epoch": 0.6944749978847619, "grad_norm": 0.22960853576660156, "learning_rate": 0.001, "loss": 1.8167, "step": 16416 }, { "epoch": 0.6945173026482782, "grad_norm": 0.20826157927513123, "learning_rate": 0.001, "loss": 2.0062, "step": 16417 }, { "epoch": 0.6945596074117946, "grad_norm": 2.312390089035034, "learning_rate": 0.001, "loss": 3.3735, "step": 16418 }, { "epoch": 0.694601912175311, "grad_norm": 3.1580910682678223, "learning_rate": 0.001, "loss": 2.463, "step": 16419 }, { "epoch": 0.6946442169388273, "grad_norm": 0.25120091438293457, "learning_rate": 0.001, "loss": 2.9196, "step": 16420 }, { "epoch": 0.6946865217023437, "grad_norm": 0.15511690080165863, "learning_rate": 0.001, "loss": 1.6251, "step": 16421 }, { "epoch": 0.6947288264658601, "grad_norm": 0.18598857522010803, "learning_rate": 0.001, "loss": 2.9597, "step": 16422 }, { "epoch": 0.6947711312293764, "grad_norm": 0.16862650215625763, "learning_rate": 0.001, "loss": 2.2061, "step": 16423 }, { "epoch": 0.6948134359928928, "grad_norm": 0.18114817142486572, "learning_rate": 0.001, "loss": 2.222, "step": 16424 }, { "epoch": 0.6948557407564092, "grad_norm": 0.19267553091049194, "learning_rate": 0.001, "loss": 2.3873, "step": 16425 }, { "epoch": 0.6948980455199255, "grad_norm": 0.20283320546150208, "learning_rate": 0.001, "loss": 1.3301, "step": 16426 }, { "epoch": 0.6949403502834419, "grad_norm": 2.084455728530884, "learning_rate": 0.001, "loss": 1.6759, "step": 16427 }, { "epoch": 0.6949826550469583, "grad_norm": 2.8412082195281982, "learning_rate": 0.001, "loss": 3.4139, "step": 16428 }, { "epoch": 0.6950249598104746, "grad_norm": 2.059316396713257, "learning_rate": 0.001, "loss": 1.9375, "step": 16429 }, { "epoch": 0.695067264573991, "grad_norm": 0.3789178431034088, "learning_rate": 0.001, "loss": 2.9269, "step": 16430 }, { "epoch": 0.6951095693375074, "grad_norm": 0.31577736139297485, "learning_rate": 0.001, "loss": 2.9995, "step": 16431 }, { "epoch": 0.6951518741010237, "grad_norm": 2.173356533050537, "learning_rate": 0.001, "loss": 2.4088, "step": 16432 }, { "epoch": 0.6951941788645402, "grad_norm": 0.2261185199022293, "learning_rate": 0.001, "loss": 2.0083, "step": 16433 }, { "epoch": 0.6952364836280566, "grad_norm": 0.23543325066566467, "learning_rate": 0.001, "loss": 1.9962, "step": 16434 }, { "epoch": 0.6952787883915729, "grad_norm": 0.23661349713802338, "learning_rate": 0.001, "loss": 2.1091, "step": 16435 }, { "epoch": 0.6953210931550893, "grad_norm": 0.20180752873420715, "learning_rate": 0.001, "loss": 2.3918, "step": 16436 }, { "epoch": 0.6953633979186057, "grad_norm": 0.261025995016098, "learning_rate": 0.001, "loss": 2.3989, "step": 16437 }, { "epoch": 0.695405702682122, "grad_norm": 0.24305380880832672, "learning_rate": 0.001, "loss": 2.3177, "step": 16438 }, { "epoch": 0.6954480074456384, "grad_norm": 0.1875523179769516, "learning_rate": 0.001, "loss": 2.4939, "step": 16439 }, { "epoch": 0.6954903122091548, "grad_norm": 0.20536570250988007, "learning_rate": 0.001, "loss": 2.3507, "step": 16440 }, { "epoch": 0.6955326169726711, "grad_norm": 2.672262668609619, "learning_rate": 0.001, "loss": 2.8801, "step": 16441 }, { "epoch": 0.6955749217361875, "grad_norm": 0.16370277106761932, "learning_rate": 0.001, "loss": 2.0448, "step": 16442 }, { "epoch": 0.6956172264997038, "grad_norm": 0.3966878056526184, "learning_rate": 0.001, "loss": 1.4291, "step": 16443 }, { "epoch": 0.6956595312632202, "grad_norm": 6.427881717681885, "learning_rate": 0.001, "loss": 2.0735, "step": 16444 }, { "epoch": 0.6957018360267366, "grad_norm": 0.9864132404327393, "learning_rate": 0.001, "loss": 1.9811, "step": 16445 }, { "epoch": 0.6957441407902529, "grad_norm": 0.19832006096839905, "learning_rate": 0.001, "loss": 2.0114, "step": 16446 }, { "epoch": 0.6957864455537693, "grad_norm": 0.18039336800575256, "learning_rate": 0.001, "loss": 1.4557, "step": 16447 }, { "epoch": 0.6958287503172857, "grad_norm": 0.18589988350868225, "learning_rate": 0.001, "loss": 1.7354, "step": 16448 }, { "epoch": 0.695871055080802, "grad_norm": 0.21510185301303864, "learning_rate": 0.001, "loss": 2.3479, "step": 16449 }, { "epoch": 0.6959133598443185, "grad_norm": 0.19630225002765656, "learning_rate": 0.001, "loss": 2.2254, "step": 16450 }, { "epoch": 0.6959556646078349, "grad_norm": 0.18261785805225372, "learning_rate": 0.001, "loss": 2.2902, "step": 16451 }, { "epoch": 0.6959979693713512, "grad_norm": 0.20006346702575684, "learning_rate": 0.001, "loss": 2.0152, "step": 16452 }, { "epoch": 0.6960402741348676, "grad_norm": 0.18040160834789276, "learning_rate": 0.001, "loss": 1.8994, "step": 16453 }, { "epoch": 0.696082578898384, "grad_norm": 1.6500322818756104, "learning_rate": 0.001, "loss": 1.7275, "step": 16454 }, { "epoch": 0.6961248836619003, "grad_norm": 0.2118813842535019, "learning_rate": 0.001, "loss": 3.1393, "step": 16455 }, { "epoch": 0.6961671884254167, "grad_norm": 0.18187031149864197, "learning_rate": 0.001, "loss": 2.1885, "step": 16456 }, { "epoch": 0.6962094931889331, "grad_norm": 0.21792373061180115, "learning_rate": 0.001, "loss": 2.5011, "step": 16457 }, { "epoch": 0.6962517979524494, "grad_norm": 0.2024918496608734, "learning_rate": 0.001, "loss": 2.1317, "step": 16458 }, { "epoch": 0.6962941027159658, "grad_norm": 0.1720605045557022, "learning_rate": 0.001, "loss": 2.5661, "step": 16459 }, { "epoch": 0.6963364074794822, "grad_norm": 0.20015959441661835, "learning_rate": 0.001, "loss": 2.6472, "step": 16460 }, { "epoch": 0.6963787122429985, "grad_norm": 0.3034919500350952, "learning_rate": 0.001, "loss": 1.9821, "step": 16461 }, { "epoch": 0.6964210170065149, "grad_norm": 0.1722230315208435, "learning_rate": 0.001, "loss": 1.7016, "step": 16462 }, { "epoch": 0.6964633217700313, "grad_norm": 0.17962494492530823, "learning_rate": 0.001, "loss": 1.6498, "step": 16463 }, { "epoch": 0.6965056265335476, "grad_norm": 0.6122218370437622, "learning_rate": 0.001, "loss": 1.7189, "step": 16464 }, { "epoch": 0.696547931297064, "grad_norm": 0.17944207787513733, "learning_rate": 0.001, "loss": 1.5481, "step": 16465 }, { "epoch": 0.6965902360605805, "grad_norm": 0.16219043731689453, "learning_rate": 0.001, "loss": 2.1806, "step": 16466 }, { "epoch": 0.6966325408240968, "grad_norm": 0.2081691324710846, "learning_rate": 0.001, "loss": 3.2181, "step": 16467 }, { "epoch": 0.6966748455876132, "grad_norm": 0.16732138395309448, "learning_rate": 0.001, "loss": 3.0854, "step": 16468 }, { "epoch": 0.6967171503511296, "grad_norm": 0.1735231727361679, "learning_rate": 0.001, "loss": 2.2234, "step": 16469 }, { "epoch": 0.6967594551146459, "grad_norm": 0.7906176447868347, "learning_rate": 0.001, "loss": 2.4975, "step": 16470 }, { "epoch": 0.6968017598781623, "grad_norm": 0.20566585659980774, "learning_rate": 0.001, "loss": 3.4062, "step": 16471 }, { "epoch": 0.6968440646416787, "grad_norm": 0.1833547055721283, "learning_rate": 0.001, "loss": 2.034, "step": 16472 }, { "epoch": 0.696886369405195, "grad_norm": 0.18477390706539154, "learning_rate": 0.001, "loss": 2.5579, "step": 16473 }, { "epoch": 0.6969286741687114, "grad_norm": 0.19416743516921997, "learning_rate": 0.001, "loss": 2.2212, "step": 16474 }, { "epoch": 0.6969709789322278, "grad_norm": 0.2078043520450592, "learning_rate": 0.001, "loss": 2.0475, "step": 16475 }, { "epoch": 0.6970132836957441, "grad_norm": 0.1452716588973999, "learning_rate": 0.001, "loss": 1.6866, "step": 16476 }, { "epoch": 0.6970555884592605, "grad_norm": 0.1788160353899002, "learning_rate": 0.001, "loss": 2.1014, "step": 16477 }, { "epoch": 0.6970978932227769, "grad_norm": 0.1941988617181778, "learning_rate": 0.001, "loss": 2.2228, "step": 16478 }, { "epoch": 0.6971401979862932, "grad_norm": 0.5075099468231201, "learning_rate": 0.001, "loss": 2.046, "step": 16479 }, { "epoch": 0.6971825027498096, "grad_norm": 0.16728682816028595, "learning_rate": 0.001, "loss": 2.5115, "step": 16480 }, { "epoch": 0.697224807513326, "grad_norm": 0.17837141454219818, "learning_rate": 0.001, "loss": 1.8429, "step": 16481 }, { "epoch": 0.6972671122768423, "grad_norm": 0.14722979068756104, "learning_rate": 0.001, "loss": 1.6878, "step": 16482 }, { "epoch": 0.6973094170403588, "grad_norm": 0.17707088589668274, "learning_rate": 0.001, "loss": 2.0991, "step": 16483 }, { "epoch": 0.6973517218038752, "grad_norm": 0.20449602603912354, "learning_rate": 0.001, "loss": 1.4112, "step": 16484 }, { "epoch": 0.6973940265673915, "grad_norm": 0.18787558376789093, "learning_rate": 0.001, "loss": 1.9535, "step": 16485 }, { "epoch": 0.6974363313309079, "grad_norm": 3.8804171085357666, "learning_rate": 0.001, "loss": 2.9737, "step": 16486 }, { "epoch": 0.6974786360944243, "grad_norm": 0.18371431529521942, "learning_rate": 0.001, "loss": 2.5309, "step": 16487 }, { "epoch": 0.6975209408579406, "grad_norm": 0.154661625623703, "learning_rate": 0.001, "loss": 1.7516, "step": 16488 }, { "epoch": 0.697563245621457, "grad_norm": 0.15226295590400696, "learning_rate": 0.001, "loss": 2.5867, "step": 16489 }, { "epoch": 0.6976055503849733, "grad_norm": 9.875614166259766, "learning_rate": 0.001, "loss": 2.3198, "step": 16490 }, { "epoch": 0.6976478551484897, "grad_norm": 0.18575811386108398, "learning_rate": 0.001, "loss": 2.5731, "step": 16491 }, { "epoch": 0.6976901599120061, "grad_norm": 0.18501834571361542, "learning_rate": 0.001, "loss": 1.9908, "step": 16492 }, { "epoch": 0.6977324646755224, "grad_norm": 0.17492198944091797, "learning_rate": 0.001, "loss": 2.1035, "step": 16493 }, { "epoch": 0.6977747694390388, "grad_norm": 0.1972741186618805, "learning_rate": 0.001, "loss": 2.0895, "step": 16494 }, { "epoch": 0.6978170742025552, "grad_norm": 0.1590324491262436, "learning_rate": 0.001, "loss": 1.8644, "step": 16495 }, { "epoch": 0.6978593789660715, "grad_norm": 0.15750128030776978, "learning_rate": 0.001, "loss": 2.3938, "step": 16496 }, { "epoch": 0.6979016837295879, "grad_norm": 0.18353481590747833, "learning_rate": 0.001, "loss": 1.9701, "step": 16497 }, { "epoch": 0.6979439884931044, "grad_norm": 0.4982626438140869, "learning_rate": 0.001, "loss": 2.8423, "step": 16498 }, { "epoch": 0.6979862932566206, "grad_norm": 0.5572125911712646, "learning_rate": 0.001, "loss": 1.6008, "step": 16499 }, { "epoch": 0.6980285980201371, "grad_norm": 0.18385641276836395, "learning_rate": 0.001, "loss": 1.7157, "step": 16500 }, { "epoch": 0.6980709027836535, "grad_norm": 0.20316894352436066, "learning_rate": 0.001, "loss": 3.3949, "step": 16501 }, { "epoch": 0.6981132075471698, "grad_norm": 0.21854710578918457, "learning_rate": 0.001, "loss": 2.4985, "step": 16502 }, { "epoch": 0.6981555123106862, "grad_norm": 0.2654995620250702, "learning_rate": 0.001, "loss": 2.077, "step": 16503 }, { "epoch": 0.6981978170742026, "grad_norm": 0.15483231842517853, "learning_rate": 0.001, "loss": 2.5364, "step": 16504 }, { "epoch": 0.6982401218377189, "grad_norm": 0.1526549905538559, "learning_rate": 0.001, "loss": 2.8325, "step": 16505 }, { "epoch": 0.6982824266012353, "grad_norm": 0.18169742822647095, "learning_rate": 0.001, "loss": 2.6684, "step": 16506 }, { "epoch": 0.6983247313647517, "grad_norm": 0.19134920835494995, "learning_rate": 0.001, "loss": 2.8449, "step": 16507 }, { "epoch": 0.698367036128268, "grad_norm": 3.5741662979125977, "learning_rate": 0.001, "loss": 2.137, "step": 16508 }, { "epoch": 0.6984093408917844, "grad_norm": 0.42037051916122437, "learning_rate": 0.001, "loss": 1.8969, "step": 16509 }, { "epoch": 0.6984516456553008, "grad_norm": 0.1691959798336029, "learning_rate": 0.001, "loss": 1.5428, "step": 16510 }, { "epoch": 0.6984939504188171, "grad_norm": 0.19847933948040009, "learning_rate": 0.001, "loss": 1.9573, "step": 16511 }, { "epoch": 0.6985362551823335, "grad_norm": 0.8859964609146118, "learning_rate": 0.001, "loss": 2.9135, "step": 16512 }, { "epoch": 0.6985785599458499, "grad_norm": 0.17541298270225525, "learning_rate": 0.001, "loss": 2.6869, "step": 16513 }, { "epoch": 0.6986208647093662, "grad_norm": 0.1629028022289276, "learning_rate": 0.001, "loss": 2.2677, "step": 16514 }, { "epoch": 0.6986631694728827, "grad_norm": 0.39890217781066895, "learning_rate": 0.001, "loss": 1.998, "step": 16515 }, { "epoch": 0.6987054742363991, "grad_norm": 0.16391772031784058, "learning_rate": 0.001, "loss": 2.2344, "step": 16516 }, { "epoch": 0.6987477789999154, "grad_norm": 0.20144188404083252, "learning_rate": 0.001, "loss": 2.3193, "step": 16517 }, { "epoch": 0.6987900837634318, "grad_norm": 0.3551698923110962, "learning_rate": 0.001, "loss": 1.9678, "step": 16518 }, { "epoch": 0.6988323885269482, "grad_norm": 3.031266450881958, "learning_rate": 0.001, "loss": 2.1602, "step": 16519 }, { "epoch": 0.6988746932904645, "grad_norm": 0.21469104290008545, "learning_rate": 0.001, "loss": 2.2958, "step": 16520 }, { "epoch": 0.6989169980539809, "grad_norm": 0.9255651831626892, "learning_rate": 0.001, "loss": 1.9495, "step": 16521 }, { "epoch": 0.6989593028174973, "grad_norm": 0.1491638422012329, "learning_rate": 0.001, "loss": 1.8283, "step": 16522 }, { "epoch": 0.6990016075810136, "grad_norm": 0.22015687823295593, "learning_rate": 0.001, "loss": 2.5978, "step": 16523 }, { "epoch": 0.69904391234453, "grad_norm": 0.17289428412914276, "learning_rate": 0.001, "loss": 1.9345, "step": 16524 }, { "epoch": 0.6990862171080464, "grad_norm": 0.1912022829055786, "learning_rate": 0.001, "loss": 2.4924, "step": 16525 }, { "epoch": 0.6991285218715627, "grad_norm": 0.17688611149787903, "learning_rate": 0.001, "loss": 1.7264, "step": 16526 }, { "epoch": 0.6991708266350791, "grad_norm": 0.1654282510280609, "learning_rate": 0.001, "loss": 2.0543, "step": 16527 }, { "epoch": 0.6992131313985955, "grad_norm": 3.0969583988189697, "learning_rate": 0.001, "loss": 2.2927, "step": 16528 }, { "epoch": 0.6992554361621118, "grad_norm": 0.2961462438106537, "learning_rate": 0.001, "loss": 1.3273, "step": 16529 }, { "epoch": 0.6992977409256282, "grad_norm": 0.1830281913280487, "learning_rate": 0.001, "loss": 2.2964, "step": 16530 }, { "epoch": 0.6993400456891447, "grad_norm": 0.19656279683113098, "learning_rate": 0.001, "loss": 2.521, "step": 16531 }, { "epoch": 0.699382350452661, "grad_norm": 0.3053706884384155, "learning_rate": 0.001, "loss": 2.1476, "step": 16532 }, { "epoch": 0.6994246552161774, "grad_norm": 0.3848680257797241, "learning_rate": 0.001, "loss": 2.0692, "step": 16533 }, { "epoch": 0.6994669599796937, "grad_norm": 0.20205725729465485, "learning_rate": 0.001, "loss": 2.2957, "step": 16534 }, { "epoch": 0.6995092647432101, "grad_norm": 0.21945300698280334, "learning_rate": 0.001, "loss": 1.8217, "step": 16535 }, { "epoch": 0.6995515695067265, "grad_norm": 0.3286508321762085, "learning_rate": 0.001, "loss": 2.1553, "step": 16536 }, { "epoch": 0.6995938742702428, "grad_norm": 0.17390255630016327, "learning_rate": 0.001, "loss": 1.8902, "step": 16537 }, { "epoch": 0.6996361790337592, "grad_norm": 0.2187155783176422, "learning_rate": 0.001, "loss": 2.5251, "step": 16538 }, { "epoch": 0.6996784837972756, "grad_norm": 0.1886243224143982, "learning_rate": 0.001, "loss": 1.8132, "step": 16539 }, { "epoch": 0.6997207885607919, "grad_norm": 0.14639221131801605, "learning_rate": 0.001, "loss": 2.2848, "step": 16540 }, { "epoch": 0.6997630933243083, "grad_norm": 0.2016509771347046, "learning_rate": 0.001, "loss": 2.3703, "step": 16541 }, { "epoch": 0.6998053980878247, "grad_norm": 0.2064632922410965, "learning_rate": 0.001, "loss": 1.8111, "step": 16542 }, { "epoch": 0.699847702851341, "grad_norm": 0.304959237575531, "learning_rate": 0.001, "loss": 1.8825, "step": 16543 }, { "epoch": 0.6998900076148574, "grad_norm": 0.23426057398319244, "learning_rate": 0.001, "loss": 1.7511, "step": 16544 }, { "epoch": 0.6999323123783738, "grad_norm": 0.15036675333976746, "learning_rate": 0.001, "loss": 1.4636, "step": 16545 }, { "epoch": 0.6999746171418901, "grad_norm": 0.18853148818016052, "learning_rate": 0.001, "loss": 3.3999, "step": 16546 }, { "epoch": 0.7000169219054065, "grad_norm": 0.20281235873699188, "learning_rate": 0.001, "loss": 3.1192, "step": 16547 }, { "epoch": 0.700059226668923, "grad_norm": 0.1816108375787735, "learning_rate": 0.001, "loss": 2.7407, "step": 16548 }, { "epoch": 0.7001015314324393, "grad_norm": 0.28820428252220154, "learning_rate": 0.001, "loss": 2.1106, "step": 16549 }, { "epoch": 0.7001438361959557, "grad_norm": 0.1509302407503128, "learning_rate": 0.001, "loss": 1.7362, "step": 16550 }, { "epoch": 0.7001861409594721, "grad_norm": 0.14989890158176422, "learning_rate": 0.001, "loss": 1.9436, "step": 16551 }, { "epoch": 0.7002284457229884, "grad_norm": 0.17388369143009186, "learning_rate": 0.001, "loss": 1.4622, "step": 16552 }, { "epoch": 0.7002707504865048, "grad_norm": 0.28890854120254517, "learning_rate": 0.001, "loss": 2.0939, "step": 16553 }, { "epoch": 0.7003130552500212, "grad_norm": 0.1795245260000229, "learning_rate": 0.001, "loss": 1.9428, "step": 16554 }, { "epoch": 0.7003553600135375, "grad_norm": 0.178712397813797, "learning_rate": 0.001, "loss": 1.6748, "step": 16555 }, { "epoch": 0.7003976647770539, "grad_norm": 0.16266508400440216, "learning_rate": 0.001, "loss": 1.7466, "step": 16556 }, { "epoch": 0.7004399695405703, "grad_norm": 0.18018513917922974, "learning_rate": 0.001, "loss": 1.5014, "step": 16557 }, { "epoch": 0.7004822743040866, "grad_norm": 0.20177042484283447, "learning_rate": 0.001, "loss": 1.8159, "step": 16558 }, { "epoch": 0.700524579067603, "grad_norm": 0.4257431626319885, "learning_rate": 0.001, "loss": 2.8354, "step": 16559 }, { "epoch": 0.7005668838311194, "grad_norm": 0.3945598006248474, "learning_rate": 0.001, "loss": 2.5141, "step": 16560 }, { "epoch": 0.7006091885946357, "grad_norm": 0.1487802118062973, "learning_rate": 0.001, "loss": 1.9653, "step": 16561 }, { "epoch": 0.7006514933581521, "grad_norm": 0.1524672657251358, "learning_rate": 0.001, "loss": 2.3534, "step": 16562 }, { "epoch": 0.7006937981216685, "grad_norm": 0.1571507751941681, "learning_rate": 0.001, "loss": 3.0126, "step": 16563 }, { "epoch": 0.7007361028851848, "grad_norm": 1.0442800521850586, "learning_rate": 0.001, "loss": 2.0008, "step": 16564 }, { "epoch": 0.7007784076487013, "grad_norm": 0.13998109102249146, "learning_rate": 0.001, "loss": 2.0278, "step": 16565 }, { "epoch": 0.7008207124122177, "grad_norm": 0.1480410099029541, "learning_rate": 0.001, "loss": 1.6822, "step": 16566 }, { "epoch": 0.700863017175734, "grad_norm": 0.2611854374408722, "learning_rate": 0.001, "loss": 1.9182, "step": 16567 }, { "epoch": 0.7009053219392504, "grad_norm": 1.3647289276123047, "learning_rate": 0.001, "loss": 1.8359, "step": 16568 }, { "epoch": 0.7009476267027668, "grad_norm": 0.15801845490932465, "learning_rate": 0.001, "loss": 1.8454, "step": 16569 }, { "epoch": 0.7009899314662831, "grad_norm": 0.18349795043468475, "learning_rate": 0.001, "loss": 1.8095, "step": 16570 }, { "epoch": 0.7010322362297995, "grad_norm": 0.16039691865444183, "learning_rate": 0.001, "loss": 2.1951, "step": 16571 }, { "epoch": 0.7010745409933159, "grad_norm": 0.21815955638885498, "learning_rate": 0.001, "loss": 1.767, "step": 16572 }, { "epoch": 0.7011168457568322, "grad_norm": 0.22475208342075348, "learning_rate": 0.001, "loss": 3.201, "step": 16573 }, { "epoch": 0.7011591505203486, "grad_norm": 0.1533900797367096, "learning_rate": 0.001, "loss": 1.5355, "step": 16574 }, { "epoch": 0.701201455283865, "grad_norm": 0.18003559112548828, "learning_rate": 0.001, "loss": 2.124, "step": 16575 }, { "epoch": 0.7012437600473813, "grad_norm": 2.3861000537872314, "learning_rate": 0.001, "loss": 2.4133, "step": 16576 }, { "epoch": 0.7012860648108977, "grad_norm": 0.18636606633663177, "learning_rate": 0.001, "loss": 1.7297, "step": 16577 }, { "epoch": 0.7013283695744141, "grad_norm": 0.1709393411874771, "learning_rate": 0.001, "loss": 2.7423, "step": 16578 }, { "epoch": 0.7013706743379304, "grad_norm": 0.1964663714170456, "learning_rate": 0.001, "loss": 2.2989, "step": 16579 }, { "epoch": 0.7014129791014468, "grad_norm": 0.13408979773521423, "learning_rate": 0.001, "loss": 1.2903, "step": 16580 }, { "epoch": 0.7014552838649631, "grad_norm": 0.17530257999897003, "learning_rate": 0.001, "loss": 2.0109, "step": 16581 }, { "epoch": 0.7014975886284796, "grad_norm": 0.44883960485458374, "learning_rate": 0.001, "loss": 3.1007, "step": 16582 }, { "epoch": 0.701539893391996, "grad_norm": 4.928041458129883, "learning_rate": 0.001, "loss": 2.222, "step": 16583 }, { "epoch": 0.7015821981555123, "grad_norm": 0.1666925698518753, "learning_rate": 0.001, "loss": 2.2696, "step": 16584 }, { "epoch": 0.7016245029190287, "grad_norm": 0.44181501865386963, "learning_rate": 0.001, "loss": 1.9347, "step": 16585 }, { "epoch": 0.7016668076825451, "grad_norm": 0.17727473378181458, "learning_rate": 0.001, "loss": 2.1058, "step": 16586 }, { "epoch": 0.7017091124460614, "grad_norm": 0.1318918913602829, "learning_rate": 0.001, "loss": 2.1093, "step": 16587 }, { "epoch": 0.7017514172095778, "grad_norm": 0.1690230816602707, "learning_rate": 0.001, "loss": 2.2003, "step": 16588 }, { "epoch": 0.7017937219730942, "grad_norm": 0.29973065853118896, "learning_rate": 0.001, "loss": 2.0176, "step": 16589 }, { "epoch": 0.7018360267366105, "grad_norm": 0.5178530216217041, "learning_rate": 0.001, "loss": 3.1915, "step": 16590 }, { "epoch": 0.7018783315001269, "grad_norm": 0.1572248637676239, "learning_rate": 0.001, "loss": 1.943, "step": 16591 }, { "epoch": 0.7019206362636433, "grad_norm": 0.187464639544487, "learning_rate": 0.001, "loss": 2.0762, "step": 16592 }, { "epoch": 0.7019629410271596, "grad_norm": 0.21802839636802673, "learning_rate": 0.001, "loss": 1.9435, "step": 16593 }, { "epoch": 0.702005245790676, "grad_norm": 0.20468945801258087, "learning_rate": 0.001, "loss": 3.3978, "step": 16594 }, { "epoch": 0.7020475505541924, "grad_norm": 2.5467371940612793, "learning_rate": 0.001, "loss": 2.1057, "step": 16595 }, { "epoch": 0.7020898553177087, "grad_norm": 0.198502317070961, "learning_rate": 0.001, "loss": 1.8921, "step": 16596 }, { "epoch": 0.7021321600812251, "grad_norm": 0.18070580065250397, "learning_rate": 0.001, "loss": 2.1183, "step": 16597 }, { "epoch": 0.7021744648447416, "grad_norm": 0.4479145109653473, "learning_rate": 0.001, "loss": 2.0895, "step": 16598 }, { "epoch": 0.7022167696082579, "grad_norm": 0.16668649017810822, "learning_rate": 0.001, "loss": 1.7609, "step": 16599 }, { "epoch": 0.7022590743717743, "grad_norm": 0.17309440672397614, "learning_rate": 0.001, "loss": 1.5591, "step": 16600 }, { "epoch": 0.7023013791352907, "grad_norm": 1.5597689151763916, "learning_rate": 0.001, "loss": 2.1839, "step": 16601 }, { "epoch": 0.702343683898807, "grad_norm": 0.2020406872034073, "learning_rate": 0.001, "loss": 2.4463, "step": 16602 }, { "epoch": 0.7023859886623234, "grad_norm": 0.17011041939258575, "learning_rate": 0.001, "loss": 2.2795, "step": 16603 }, { "epoch": 0.7024282934258398, "grad_norm": 0.1457245647907257, "learning_rate": 0.001, "loss": 2.8202, "step": 16604 }, { "epoch": 0.7024705981893561, "grad_norm": 0.1977822333574295, "learning_rate": 0.001, "loss": 2.1196, "step": 16605 }, { "epoch": 0.7025129029528725, "grad_norm": 0.14688114821910858, "learning_rate": 0.001, "loss": 1.9252, "step": 16606 }, { "epoch": 0.7025552077163889, "grad_norm": 0.308075487613678, "learning_rate": 0.001, "loss": 2.4381, "step": 16607 }, { "epoch": 0.7025975124799052, "grad_norm": 0.1714773029088974, "learning_rate": 0.001, "loss": 1.698, "step": 16608 }, { "epoch": 0.7026398172434216, "grad_norm": 0.18509937822818756, "learning_rate": 0.001, "loss": 2.676, "step": 16609 }, { "epoch": 0.702682122006938, "grad_norm": 0.37178945541381836, "learning_rate": 0.001, "loss": 3.245, "step": 16610 }, { "epoch": 0.7027244267704543, "grad_norm": 0.15938927233219147, "learning_rate": 0.001, "loss": 1.8862, "step": 16611 }, { "epoch": 0.7027667315339707, "grad_norm": 0.25779882073402405, "learning_rate": 0.001, "loss": 3.5429, "step": 16612 }, { "epoch": 0.7028090362974871, "grad_norm": 0.1717577427625656, "learning_rate": 0.001, "loss": 1.811, "step": 16613 }, { "epoch": 0.7028513410610034, "grad_norm": 0.25885605812072754, "learning_rate": 0.001, "loss": 2.4719, "step": 16614 }, { "epoch": 0.7028936458245199, "grad_norm": 0.15932494401931763, "learning_rate": 0.001, "loss": 2.5687, "step": 16615 }, { "epoch": 0.7029359505880363, "grad_norm": 0.15366816520690918, "learning_rate": 0.001, "loss": 2.3769, "step": 16616 }, { "epoch": 0.7029782553515526, "grad_norm": 0.4392596483230591, "learning_rate": 0.001, "loss": 2.3932, "step": 16617 }, { "epoch": 0.703020560115069, "grad_norm": 0.18615837395191193, "learning_rate": 0.001, "loss": 2.1175, "step": 16618 }, { "epoch": 0.7030628648785854, "grad_norm": 0.20948411524295807, "learning_rate": 0.001, "loss": 2.4846, "step": 16619 }, { "epoch": 0.7031051696421017, "grad_norm": 0.7131271362304688, "learning_rate": 0.001, "loss": 2.928, "step": 16620 }, { "epoch": 0.7031474744056181, "grad_norm": 0.23972147703170776, "learning_rate": 0.001, "loss": 2.3654, "step": 16621 }, { "epoch": 0.7031897791691345, "grad_norm": 0.23003694415092468, "learning_rate": 0.001, "loss": 2.846, "step": 16622 }, { "epoch": 0.7032320839326508, "grad_norm": 0.14880388975143433, "learning_rate": 0.001, "loss": 1.8853, "step": 16623 }, { "epoch": 0.7032743886961672, "grad_norm": 0.1396287977695465, "learning_rate": 0.001, "loss": 2.4475, "step": 16624 }, { "epoch": 0.7033166934596835, "grad_norm": 0.146676704287529, "learning_rate": 0.001, "loss": 2.1553, "step": 16625 }, { "epoch": 0.7033589982231999, "grad_norm": 0.17937001585960388, "learning_rate": 0.001, "loss": 2.787, "step": 16626 }, { "epoch": 0.7034013029867163, "grad_norm": 0.18604117631912231, "learning_rate": 0.001, "loss": 2.3641, "step": 16627 }, { "epoch": 0.7034436077502326, "grad_norm": 3.127412796020508, "learning_rate": 0.001, "loss": 1.5014, "step": 16628 }, { "epoch": 0.703485912513749, "grad_norm": 0.6706067323684692, "learning_rate": 0.001, "loss": 2.0265, "step": 16629 }, { "epoch": 0.7035282172772654, "grad_norm": 0.13607746362686157, "learning_rate": 0.001, "loss": 1.5039, "step": 16630 }, { "epoch": 0.7035705220407817, "grad_norm": 0.2080031931400299, "learning_rate": 0.001, "loss": 2.7118, "step": 16631 }, { "epoch": 0.7036128268042982, "grad_norm": 0.2915956377983093, "learning_rate": 0.001, "loss": 1.9805, "step": 16632 }, { "epoch": 0.7036551315678146, "grad_norm": 1.015211582183838, "learning_rate": 0.001, "loss": 2.4291, "step": 16633 }, { "epoch": 0.7036974363313309, "grad_norm": 0.15686677396297455, "learning_rate": 0.001, "loss": 1.4743, "step": 16634 }, { "epoch": 0.7037397410948473, "grad_norm": 0.3258562386035919, "learning_rate": 0.001, "loss": 2.1383, "step": 16635 }, { "epoch": 0.7037820458583637, "grad_norm": 0.21752215921878815, "learning_rate": 0.001, "loss": 2.0518, "step": 16636 }, { "epoch": 0.70382435062188, "grad_norm": 3.988706111907959, "learning_rate": 0.001, "loss": 3.0142, "step": 16637 }, { "epoch": 0.7038666553853964, "grad_norm": 0.15791606903076172, "learning_rate": 0.001, "loss": 2.3269, "step": 16638 }, { "epoch": 0.7039089601489128, "grad_norm": 0.16375815868377686, "learning_rate": 0.001, "loss": 1.5517, "step": 16639 }, { "epoch": 0.7039512649124291, "grad_norm": 2.8950631618499756, "learning_rate": 0.001, "loss": 1.831, "step": 16640 }, { "epoch": 0.7039935696759455, "grad_norm": 0.5850285887718201, "learning_rate": 0.001, "loss": 2.3522, "step": 16641 }, { "epoch": 0.7040358744394619, "grad_norm": 0.25037726759910583, "learning_rate": 0.001, "loss": 2.2778, "step": 16642 }, { "epoch": 0.7040781792029782, "grad_norm": 0.17441709339618683, "learning_rate": 0.001, "loss": 2.4771, "step": 16643 }, { "epoch": 0.7041204839664946, "grad_norm": 0.15096983313560486, "learning_rate": 0.001, "loss": 1.4872, "step": 16644 }, { "epoch": 0.704162788730011, "grad_norm": 1.1566590070724487, "learning_rate": 0.001, "loss": 1.9335, "step": 16645 }, { "epoch": 0.7042050934935273, "grad_norm": 0.21050888299942017, "learning_rate": 0.001, "loss": 1.6326, "step": 16646 }, { "epoch": 0.7042473982570437, "grad_norm": 0.24145705997943878, "learning_rate": 0.001, "loss": 3.1265, "step": 16647 }, { "epoch": 0.7042897030205602, "grad_norm": 0.20239047706127167, "learning_rate": 0.001, "loss": 1.7814, "step": 16648 }, { "epoch": 0.7043320077840765, "grad_norm": 0.5811108946800232, "learning_rate": 0.001, "loss": 2.4936, "step": 16649 }, { "epoch": 0.7043743125475929, "grad_norm": 0.42122969031333923, "learning_rate": 0.001, "loss": 2.2731, "step": 16650 }, { "epoch": 0.7044166173111093, "grad_norm": 0.3062237501144409, "learning_rate": 0.001, "loss": 1.9344, "step": 16651 }, { "epoch": 0.7044589220746256, "grad_norm": 1.1125757694244385, "learning_rate": 0.001, "loss": 2.0033, "step": 16652 }, { "epoch": 0.704501226838142, "grad_norm": 0.20193973183631897, "learning_rate": 0.001, "loss": 2.0509, "step": 16653 }, { "epoch": 0.7045435316016584, "grad_norm": 5.243770122528076, "learning_rate": 0.001, "loss": 1.7479, "step": 16654 }, { "epoch": 0.7045858363651747, "grad_norm": 0.1896519958972931, "learning_rate": 0.001, "loss": 1.9381, "step": 16655 }, { "epoch": 0.7046281411286911, "grad_norm": 0.21094900369644165, "learning_rate": 0.001, "loss": 1.6246, "step": 16656 }, { "epoch": 0.7046704458922075, "grad_norm": 0.24396421015262604, "learning_rate": 0.001, "loss": 3.4794, "step": 16657 }, { "epoch": 0.7047127506557238, "grad_norm": 0.30104437470436096, "learning_rate": 0.001, "loss": 3.0927, "step": 16658 }, { "epoch": 0.7047550554192402, "grad_norm": 0.4394260346889496, "learning_rate": 0.001, "loss": 2.2789, "step": 16659 }, { "epoch": 0.7047973601827566, "grad_norm": 0.23813438415527344, "learning_rate": 0.001, "loss": 1.8201, "step": 16660 }, { "epoch": 0.7048396649462729, "grad_norm": 0.2260584682226181, "learning_rate": 0.001, "loss": 2.8637, "step": 16661 }, { "epoch": 0.7048819697097893, "grad_norm": 0.17535214126110077, "learning_rate": 0.001, "loss": 1.7847, "step": 16662 }, { "epoch": 0.7049242744733057, "grad_norm": 0.342427134513855, "learning_rate": 0.001, "loss": 2.5649, "step": 16663 }, { "epoch": 0.704966579236822, "grad_norm": 0.2312489151954651, "learning_rate": 0.001, "loss": 2.3301, "step": 16664 }, { "epoch": 0.7050088840003385, "grad_norm": 0.21483004093170166, "learning_rate": 0.001, "loss": 2.0729, "step": 16665 }, { "epoch": 0.7050511887638549, "grad_norm": 2.1229746341705322, "learning_rate": 0.001, "loss": 2.058, "step": 16666 }, { "epoch": 0.7050934935273712, "grad_norm": 0.20138874650001526, "learning_rate": 0.001, "loss": 2.5361, "step": 16667 }, { "epoch": 0.7051357982908876, "grad_norm": 0.1767900288105011, "learning_rate": 0.001, "loss": 2.0497, "step": 16668 }, { "epoch": 0.7051781030544039, "grad_norm": 0.23017890751361847, "learning_rate": 0.001, "loss": 1.6551, "step": 16669 }, { "epoch": 0.7052204078179203, "grad_norm": 0.3998371362686157, "learning_rate": 0.001, "loss": 2.6177, "step": 16670 }, { "epoch": 0.7052627125814367, "grad_norm": 0.1956208050251007, "learning_rate": 0.001, "loss": 2.4729, "step": 16671 }, { "epoch": 0.705305017344953, "grad_norm": 0.13961687684059143, "learning_rate": 0.001, "loss": 2.5719, "step": 16672 }, { "epoch": 0.7053473221084694, "grad_norm": 0.581203281879425, "learning_rate": 0.001, "loss": 3.0076, "step": 16673 }, { "epoch": 0.7053896268719858, "grad_norm": 0.18303988873958588, "learning_rate": 0.001, "loss": 1.9536, "step": 16674 }, { "epoch": 0.7054319316355021, "grad_norm": 0.18635371327400208, "learning_rate": 0.001, "loss": 1.5738, "step": 16675 }, { "epoch": 0.7054742363990185, "grad_norm": 0.17525175213813782, "learning_rate": 0.001, "loss": 2.3192, "step": 16676 }, { "epoch": 0.7055165411625349, "grad_norm": 1.5950613021850586, "learning_rate": 0.001, "loss": 2.6333, "step": 16677 }, { "epoch": 0.7055588459260512, "grad_norm": 0.2035263627767563, "learning_rate": 0.001, "loss": 1.9577, "step": 16678 }, { "epoch": 0.7056011506895676, "grad_norm": 0.16556823253631592, "learning_rate": 0.001, "loss": 2.3302, "step": 16679 }, { "epoch": 0.705643455453084, "grad_norm": 6.488144397735596, "learning_rate": 0.001, "loss": 2.0294, "step": 16680 }, { "epoch": 0.7056857602166003, "grad_norm": 0.21068356931209564, "learning_rate": 0.001, "loss": 3.265, "step": 16681 }, { "epoch": 0.7057280649801168, "grad_norm": 0.1684761941432953, "learning_rate": 0.001, "loss": 2.9163, "step": 16682 }, { "epoch": 0.7057703697436332, "grad_norm": 2.740279197692871, "learning_rate": 0.001, "loss": 1.8363, "step": 16683 }, { "epoch": 0.7058126745071495, "grad_norm": 69.86604309082031, "learning_rate": 0.001, "loss": 2.5254, "step": 16684 }, { "epoch": 0.7058549792706659, "grad_norm": 0.2736927568912506, "learning_rate": 0.001, "loss": 2.2184, "step": 16685 }, { "epoch": 0.7058972840341823, "grad_norm": 0.21192248165607452, "learning_rate": 0.001, "loss": 2.7321, "step": 16686 }, { "epoch": 0.7059395887976986, "grad_norm": 0.3148384988307953, "learning_rate": 0.001, "loss": 1.9477, "step": 16687 }, { "epoch": 0.705981893561215, "grad_norm": 0.24907708168029785, "learning_rate": 0.001, "loss": 2.7906, "step": 16688 }, { "epoch": 0.7060241983247314, "grad_norm": 0.3226729929447174, "learning_rate": 0.001, "loss": 2.8049, "step": 16689 }, { "epoch": 0.7060665030882477, "grad_norm": 3.953677177429199, "learning_rate": 0.001, "loss": 2.0564, "step": 16690 }, { "epoch": 0.7061088078517641, "grad_norm": 0.24923500418663025, "learning_rate": 0.001, "loss": 1.3582, "step": 16691 }, { "epoch": 0.7061511126152805, "grad_norm": 0.9128309488296509, "learning_rate": 0.001, "loss": 3.0, "step": 16692 }, { "epoch": 0.7061934173787968, "grad_norm": 0.30432406067848206, "learning_rate": 0.001, "loss": 2.2832, "step": 16693 }, { "epoch": 0.7062357221423132, "grad_norm": 0.27826377749443054, "learning_rate": 0.001, "loss": 3.6967, "step": 16694 }, { "epoch": 0.7062780269058296, "grad_norm": 1.1862016916275024, "learning_rate": 0.001, "loss": 2.8519, "step": 16695 }, { "epoch": 0.7063203316693459, "grad_norm": 0.27995389699935913, "learning_rate": 0.001, "loss": 2.0341, "step": 16696 }, { "epoch": 0.7063626364328623, "grad_norm": 0.20460890233516693, "learning_rate": 0.001, "loss": 2.0071, "step": 16697 }, { "epoch": 0.7064049411963788, "grad_norm": 0.20291836559772491, "learning_rate": 0.001, "loss": 2.3107, "step": 16698 }, { "epoch": 0.706447245959895, "grad_norm": 0.2692174017429352, "learning_rate": 0.001, "loss": 2.5738, "step": 16699 }, { "epoch": 0.7064895507234115, "grad_norm": 0.1654096394777298, "learning_rate": 0.001, "loss": 2.286, "step": 16700 }, { "epoch": 0.7065318554869279, "grad_norm": 0.23862436413764954, "learning_rate": 0.001, "loss": 2.321, "step": 16701 }, { "epoch": 0.7065741602504442, "grad_norm": 0.1421448439359665, "learning_rate": 0.001, "loss": 2.7367, "step": 16702 }, { "epoch": 0.7066164650139606, "grad_norm": 0.5097910165786743, "learning_rate": 0.001, "loss": 2.5246, "step": 16703 }, { "epoch": 0.706658769777477, "grad_norm": 0.19460159540176392, "learning_rate": 0.001, "loss": 2.2217, "step": 16704 }, { "epoch": 0.7067010745409933, "grad_norm": 0.17561720311641693, "learning_rate": 0.001, "loss": 1.7901, "step": 16705 }, { "epoch": 0.7067433793045097, "grad_norm": 0.16379234194755554, "learning_rate": 0.001, "loss": 2.0123, "step": 16706 }, { "epoch": 0.7067856840680261, "grad_norm": 0.29534009099006653, "learning_rate": 0.001, "loss": 1.7291, "step": 16707 }, { "epoch": 0.7068279888315424, "grad_norm": 0.16030588746070862, "learning_rate": 0.001, "loss": 2.4982, "step": 16708 }, { "epoch": 0.7068702935950588, "grad_norm": 0.19317007064819336, "learning_rate": 0.001, "loss": 1.7916, "step": 16709 }, { "epoch": 0.7069125983585752, "grad_norm": 16.575075149536133, "learning_rate": 0.001, "loss": 1.7214, "step": 16710 }, { "epoch": 0.7069549031220915, "grad_norm": 0.6508944630622864, "learning_rate": 0.001, "loss": 3.3014, "step": 16711 }, { "epoch": 0.7069972078856079, "grad_norm": 0.17529930174350739, "learning_rate": 0.001, "loss": 2.833, "step": 16712 }, { "epoch": 0.7070395126491243, "grad_norm": 0.18948234617710114, "learning_rate": 0.001, "loss": 2.3365, "step": 16713 }, { "epoch": 0.7070818174126406, "grad_norm": 0.19644437730312347, "learning_rate": 0.001, "loss": 3.7717, "step": 16714 }, { "epoch": 0.707124122176157, "grad_norm": 0.36067622900009155, "learning_rate": 0.001, "loss": 2.016, "step": 16715 }, { "epoch": 0.7071664269396734, "grad_norm": 0.1735169142484665, "learning_rate": 0.001, "loss": 2.1556, "step": 16716 }, { "epoch": 0.7072087317031898, "grad_norm": 0.7500831484794617, "learning_rate": 0.001, "loss": 2.5418, "step": 16717 }, { "epoch": 0.7072510364667062, "grad_norm": 0.1380908042192459, "learning_rate": 0.001, "loss": 2.0679, "step": 16718 }, { "epoch": 0.7072933412302225, "grad_norm": 1.2244987487792969, "learning_rate": 0.001, "loss": 2.6043, "step": 16719 }, { "epoch": 0.7073356459937389, "grad_norm": 0.14987333118915558, "learning_rate": 0.001, "loss": 2.4967, "step": 16720 }, { "epoch": 0.7073779507572553, "grad_norm": 0.17000074684619904, "learning_rate": 0.001, "loss": 2.2771, "step": 16721 }, { "epoch": 0.7074202555207716, "grad_norm": 2.4218010902404785, "learning_rate": 0.001, "loss": 2.0412, "step": 16722 }, { "epoch": 0.707462560284288, "grad_norm": 0.24326300621032715, "learning_rate": 0.001, "loss": 2.3567, "step": 16723 }, { "epoch": 0.7075048650478044, "grad_norm": 0.5091056227684021, "learning_rate": 0.001, "loss": 2.88, "step": 16724 }, { "epoch": 0.7075471698113207, "grad_norm": 0.16985633969306946, "learning_rate": 0.001, "loss": 1.6534, "step": 16725 }, { "epoch": 0.7075894745748371, "grad_norm": 0.20579244196414948, "learning_rate": 0.001, "loss": 1.6615, "step": 16726 }, { "epoch": 0.7076317793383535, "grad_norm": 0.19666582345962524, "learning_rate": 0.001, "loss": 2.5436, "step": 16727 }, { "epoch": 0.7076740841018698, "grad_norm": 0.1708710938692093, "learning_rate": 0.001, "loss": 1.5373, "step": 16728 }, { "epoch": 0.7077163888653862, "grad_norm": 0.17626847326755524, "learning_rate": 0.001, "loss": 1.9095, "step": 16729 }, { "epoch": 0.7077586936289026, "grad_norm": 0.19842320680618286, "learning_rate": 0.001, "loss": 2.8567, "step": 16730 }, { "epoch": 0.707800998392419, "grad_norm": 0.17139360308647156, "learning_rate": 0.001, "loss": 2.3247, "step": 16731 }, { "epoch": 0.7078433031559354, "grad_norm": 0.14886251091957092, "learning_rate": 0.001, "loss": 1.6924, "step": 16732 }, { "epoch": 0.7078856079194518, "grad_norm": 0.182938814163208, "learning_rate": 0.001, "loss": 1.2942, "step": 16733 }, { "epoch": 0.7079279126829681, "grad_norm": 0.21765394508838654, "learning_rate": 0.001, "loss": 2.992, "step": 16734 }, { "epoch": 0.7079702174464845, "grad_norm": 49.08909606933594, "learning_rate": 0.001, "loss": 2.9176, "step": 16735 }, { "epoch": 0.7080125222100009, "grad_norm": 0.15202230215072632, "learning_rate": 0.001, "loss": 1.8571, "step": 16736 }, { "epoch": 0.7080548269735172, "grad_norm": 0.2932276129722595, "learning_rate": 0.001, "loss": 2.6166, "step": 16737 }, { "epoch": 0.7080971317370336, "grad_norm": 0.15039752423763275, "learning_rate": 0.001, "loss": 1.5913, "step": 16738 }, { "epoch": 0.70813943650055, "grad_norm": 0.1468665897846222, "learning_rate": 0.001, "loss": 2.4612, "step": 16739 }, { "epoch": 0.7081817412640663, "grad_norm": 0.17689596116542816, "learning_rate": 0.001, "loss": 2.5566, "step": 16740 }, { "epoch": 0.7082240460275827, "grad_norm": 0.22001603245735168, "learning_rate": 0.001, "loss": 2.2724, "step": 16741 }, { "epoch": 0.7082663507910991, "grad_norm": 0.24464015662670135, "learning_rate": 0.001, "loss": 2.3597, "step": 16742 }, { "epoch": 0.7083086555546154, "grad_norm": 0.1501077562570572, "learning_rate": 0.001, "loss": 2.4862, "step": 16743 }, { "epoch": 0.7083509603181318, "grad_norm": 0.22457842528820038, "learning_rate": 0.001, "loss": 1.9533, "step": 16744 }, { "epoch": 0.7083932650816482, "grad_norm": 0.48851919174194336, "learning_rate": 0.001, "loss": 1.8844, "step": 16745 }, { "epoch": 0.7084355698451645, "grad_norm": 0.16567832231521606, "learning_rate": 0.001, "loss": 2.3328, "step": 16746 }, { "epoch": 0.708477874608681, "grad_norm": 0.16666191816329956, "learning_rate": 0.001, "loss": 3.0872, "step": 16747 }, { "epoch": 0.7085201793721974, "grad_norm": 0.1672544926404953, "learning_rate": 0.001, "loss": 2.2649, "step": 16748 }, { "epoch": 0.7085624841357137, "grad_norm": 0.1558084934949875, "learning_rate": 0.001, "loss": 1.8319, "step": 16749 }, { "epoch": 0.7086047888992301, "grad_norm": 0.14037245512008667, "learning_rate": 0.001, "loss": 2.3805, "step": 16750 }, { "epoch": 0.7086470936627465, "grad_norm": 0.16425585746765137, "learning_rate": 0.001, "loss": 1.7784, "step": 16751 }, { "epoch": 0.7086893984262628, "grad_norm": 0.7935085892677307, "learning_rate": 0.001, "loss": 2.1095, "step": 16752 }, { "epoch": 0.7087317031897792, "grad_norm": 0.16057053208351135, "learning_rate": 0.001, "loss": 1.8058, "step": 16753 }, { "epoch": 0.7087740079532956, "grad_norm": 0.16115844249725342, "learning_rate": 0.001, "loss": 1.6998, "step": 16754 }, { "epoch": 0.7088163127168119, "grad_norm": 1.6587260961532593, "learning_rate": 0.001, "loss": 2.3739, "step": 16755 }, { "epoch": 0.7088586174803283, "grad_norm": 0.11960441619157791, "learning_rate": 0.001, "loss": 1.2877, "step": 16756 }, { "epoch": 0.7089009222438447, "grad_norm": 0.28413790464401245, "learning_rate": 0.001, "loss": 2.0965, "step": 16757 }, { "epoch": 0.708943227007361, "grad_norm": 0.18682576715946198, "learning_rate": 0.001, "loss": 2.3906, "step": 16758 }, { "epoch": 0.7089855317708774, "grad_norm": 0.15329422056674957, "learning_rate": 0.001, "loss": 2.0463, "step": 16759 }, { "epoch": 0.7090278365343937, "grad_norm": 0.18216441571712494, "learning_rate": 0.001, "loss": 2.1109, "step": 16760 }, { "epoch": 0.7090701412979101, "grad_norm": 0.38540518283843994, "learning_rate": 0.001, "loss": 1.7257, "step": 16761 }, { "epoch": 0.7091124460614265, "grad_norm": 0.2819367051124573, "learning_rate": 0.001, "loss": 1.708, "step": 16762 }, { "epoch": 0.7091547508249428, "grad_norm": 0.16718243062496185, "learning_rate": 0.001, "loss": 1.9525, "step": 16763 }, { "epoch": 0.7091970555884592, "grad_norm": 0.2087067812681198, "learning_rate": 0.001, "loss": 2.1724, "step": 16764 }, { "epoch": 0.7092393603519757, "grad_norm": 0.46061161160469055, "learning_rate": 0.001, "loss": 3.8056, "step": 16765 }, { "epoch": 0.709281665115492, "grad_norm": 0.20006658136844635, "learning_rate": 0.001, "loss": 3.0642, "step": 16766 }, { "epoch": 0.7093239698790084, "grad_norm": 0.15130923688411713, "learning_rate": 0.001, "loss": 2.0505, "step": 16767 }, { "epoch": 0.7093662746425248, "grad_norm": 0.20808841288089752, "learning_rate": 0.001, "loss": 2.407, "step": 16768 }, { "epoch": 0.7094085794060411, "grad_norm": 0.17218559980392456, "learning_rate": 0.001, "loss": 2.0906, "step": 16769 }, { "epoch": 0.7094508841695575, "grad_norm": 0.14409933984279633, "learning_rate": 0.001, "loss": 1.674, "step": 16770 }, { "epoch": 0.7094931889330739, "grad_norm": 0.19326108694076538, "learning_rate": 0.001, "loss": 2.979, "step": 16771 }, { "epoch": 0.7095354936965902, "grad_norm": 0.1743086278438568, "learning_rate": 0.001, "loss": 3.2637, "step": 16772 }, { "epoch": 0.7095777984601066, "grad_norm": 0.17006252706050873, "learning_rate": 0.001, "loss": 1.9035, "step": 16773 }, { "epoch": 0.709620103223623, "grad_norm": 0.1594936102628708, "learning_rate": 0.001, "loss": 1.8383, "step": 16774 }, { "epoch": 0.7096624079871393, "grad_norm": 0.1599188596010208, "learning_rate": 0.001, "loss": 2.2926, "step": 16775 }, { "epoch": 0.7097047127506557, "grad_norm": 0.22632001340389252, "learning_rate": 0.001, "loss": 2.0174, "step": 16776 }, { "epoch": 0.7097470175141721, "grad_norm": 0.151228666305542, "learning_rate": 0.001, "loss": 3.3681, "step": 16777 }, { "epoch": 0.7097893222776884, "grad_norm": 0.154108464717865, "learning_rate": 0.001, "loss": 2.6175, "step": 16778 }, { "epoch": 0.7098316270412048, "grad_norm": 0.23178794980049133, "learning_rate": 0.001, "loss": 2.6288, "step": 16779 }, { "epoch": 0.7098739318047212, "grad_norm": 0.18430373072624207, "learning_rate": 0.001, "loss": 1.8585, "step": 16780 }, { "epoch": 0.7099162365682375, "grad_norm": 1.811462640762329, "learning_rate": 0.001, "loss": 2.4217, "step": 16781 }, { "epoch": 0.709958541331754, "grad_norm": 0.14598025381565094, "learning_rate": 0.001, "loss": 1.4804, "step": 16782 }, { "epoch": 0.7100008460952704, "grad_norm": 0.162042036652565, "learning_rate": 0.001, "loss": 2.7988, "step": 16783 }, { "epoch": 0.7100431508587867, "grad_norm": 0.24106714129447937, "learning_rate": 0.001, "loss": 2.4054, "step": 16784 }, { "epoch": 0.7100854556223031, "grad_norm": 0.2140771746635437, "learning_rate": 0.001, "loss": 3.0376, "step": 16785 }, { "epoch": 0.7101277603858195, "grad_norm": 0.24649621546268463, "learning_rate": 0.001, "loss": 1.7422, "step": 16786 }, { "epoch": 0.7101700651493358, "grad_norm": 0.5776690244674683, "learning_rate": 0.001, "loss": 2.1208, "step": 16787 }, { "epoch": 0.7102123699128522, "grad_norm": 0.21545056998729706, "learning_rate": 0.001, "loss": 2.2494, "step": 16788 }, { "epoch": 0.7102546746763686, "grad_norm": 0.16463403403759003, "learning_rate": 0.001, "loss": 2.7351, "step": 16789 }, { "epoch": 0.7102969794398849, "grad_norm": 0.20469972491264343, "learning_rate": 0.001, "loss": 2.3172, "step": 16790 }, { "epoch": 0.7103392842034013, "grad_norm": 0.1745065301656723, "learning_rate": 0.001, "loss": 2.9764, "step": 16791 }, { "epoch": 0.7103815889669177, "grad_norm": 0.15587376058101654, "learning_rate": 0.001, "loss": 2.395, "step": 16792 }, { "epoch": 0.710423893730434, "grad_norm": 0.21353621780872345, "learning_rate": 0.001, "loss": 2.1667, "step": 16793 }, { "epoch": 0.7104661984939504, "grad_norm": 1.4838371276855469, "learning_rate": 0.001, "loss": 2.7724, "step": 16794 }, { "epoch": 0.7105085032574668, "grad_norm": 0.18408633768558502, "learning_rate": 0.001, "loss": 1.7745, "step": 16795 }, { "epoch": 0.7105508080209831, "grad_norm": 0.20254147052764893, "learning_rate": 0.001, "loss": 3.6371, "step": 16796 }, { "epoch": 0.7105931127844995, "grad_norm": 0.1831352263689041, "learning_rate": 0.001, "loss": 1.6486, "step": 16797 }, { "epoch": 0.710635417548016, "grad_norm": 0.18303878605365753, "learning_rate": 0.001, "loss": 1.9915, "step": 16798 }, { "epoch": 0.7106777223115323, "grad_norm": 8.696745872497559, "learning_rate": 0.001, "loss": 2.9612, "step": 16799 }, { "epoch": 0.7107200270750487, "grad_norm": 0.16578525304794312, "learning_rate": 0.001, "loss": 2.5333, "step": 16800 }, { "epoch": 0.7107623318385651, "grad_norm": 0.19835564494132996, "learning_rate": 0.001, "loss": 2.5376, "step": 16801 }, { "epoch": 0.7108046366020814, "grad_norm": 0.18402199447155, "learning_rate": 0.001, "loss": 2.1194, "step": 16802 }, { "epoch": 0.7108469413655978, "grad_norm": 0.39828217029571533, "learning_rate": 0.001, "loss": 2.7564, "step": 16803 }, { "epoch": 0.7108892461291141, "grad_norm": 0.17361795902252197, "learning_rate": 0.001, "loss": 2.9527, "step": 16804 }, { "epoch": 0.7109315508926305, "grad_norm": 0.2756977379322052, "learning_rate": 0.001, "loss": 2.1633, "step": 16805 }, { "epoch": 0.7109738556561469, "grad_norm": 0.22067061066627502, "learning_rate": 0.001, "loss": 2.6841, "step": 16806 }, { "epoch": 0.7110161604196632, "grad_norm": 0.1891237497329712, "learning_rate": 0.001, "loss": 1.4142, "step": 16807 }, { "epoch": 0.7110584651831796, "grad_norm": 1.683262586593628, "learning_rate": 0.001, "loss": 2.0285, "step": 16808 }, { "epoch": 0.711100769946696, "grad_norm": 0.16583381593227386, "learning_rate": 0.001, "loss": 1.7941, "step": 16809 }, { "epoch": 0.7111430747102123, "grad_norm": 2.930053472518921, "learning_rate": 0.001, "loss": 1.8499, "step": 16810 }, { "epoch": 0.7111853794737287, "grad_norm": 0.1657000333070755, "learning_rate": 0.001, "loss": 1.7437, "step": 16811 }, { "epoch": 0.7112276842372451, "grad_norm": 0.16928398609161377, "learning_rate": 0.001, "loss": 1.7506, "step": 16812 }, { "epoch": 0.7112699890007614, "grad_norm": 0.1839533895254135, "learning_rate": 0.001, "loss": 1.7639, "step": 16813 }, { "epoch": 0.7113122937642778, "grad_norm": 0.15033124387264252, "learning_rate": 0.001, "loss": 2.3976, "step": 16814 }, { "epoch": 0.7113545985277943, "grad_norm": 0.21158377826213837, "learning_rate": 0.001, "loss": 2.7739, "step": 16815 }, { "epoch": 0.7113969032913106, "grad_norm": 0.14168700575828552, "learning_rate": 0.001, "loss": 1.7022, "step": 16816 }, { "epoch": 0.711439208054827, "grad_norm": 0.17619457840919495, "learning_rate": 0.001, "loss": 3.0258, "step": 16817 }, { "epoch": 0.7114815128183434, "grad_norm": 2.186882972717285, "learning_rate": 0.001, "loss": 1.669, "step": 16818 }, { "epoch": 0.7115238175818597, "grad_norm": 0.37411370873451233, "learning_rate": 0.001, "loss": 1.7232, "step": 16819 }, { "epoch": 0.7115661223453761, "grad_norm": 0.16331219673156738, "learning_rate": 0.001, "loss": 1.4706, "step": 16820 }, { "epoch": 0.7116084271088925, "grad_norm": 0.9669393301010132, "learning_rate": 0.001, "loss": 2.8231, "step": 16821 }, { "epoch": 0.7116507318724088, "grad_norm": 1.7849273681640625, "learning_rate": 0.001, "loss": 2.0226, "step": 16822 }, { "epoch": 0.7116930366359252, "grad_norm": 5.401309013366699, "learning_rate": 0.001, "loss": 2.1295, "step": 16823 }, { "epoch": 0.7117353413994416, "grad_norm": 4.94477653503418, "learning_rate": 0.001, "loss": 3.141, "step": 16824 }, { "epoch": 0.7117776461629579, "grad_norm": 0.1646738350391388, "learning_rate": 0.001, "loss": 2.3503, "step": 16825 }, { "epoch": 0.7118199509264743, "grad_norm": 0.2677449584007263, "learning_rate": 0.001, "loss": 1.6298, "step": 16826 }, { "epoch": 0.7118622556899907, "grad_norm": 0.19275929033756256, "learning_rate": 0.001, "loss": 1.8281, "step": 16827 }, { "epoch": 0.711904560453507, "grad_norm": 0.9130849242210388, "learning_rate": 0.001, "loss": 2.2454, "step": 16828 }, { "epoch": 0.7119468652170234, "grad_norm": 0.16374343633651733, "learning_rate": 0.001, "loss": 1.355, "step": 16829 }, { "epoch": 0.7119891699805398, "grad_norm": 0.19432541728019714, "learning_rate": 0.001, "loss": 3.3869, "step": 16830 }, { "epoch": 0.7120314747440561, "grad_norm": 0.8363915681838989, "learning_rate": 0.001, "loss": 2.2074, "step": 16831 }, { "epoch": 0.7120737795075726, "grad_norm": 0.18882228434085846, "learning_rate": 0.001, "loss": 2.0904, "step": 16832 }, { "epoch": 0.712116084271089, "grad_norm": 0.16533978283405304, "learning_rate": 0.001, "loss": 1.8895, "step": 16833 }, { "epoch": 0.7121583890346053, "grad_norm": 2.9509565830230713, "learning_rate": 0.001, "loss": 2.142, "step": 16834 }, { "epoch": 0.7122006937981217, "grad_norm": 0.2698115408420563, "learning_rate": 0.001, "loss": 3.958, "step": 16835 }, { "epoch": 0.7122429985616381, "grad_norm": 0.1884506493806839, "learning_rate": 0.001, "loss": 2.1897, "step": 16836 }, { "epoch": 0.7122853033251544, "grad_norm": 59.53257751464844, "learning_rate": 0.001, "loss": 2.1418, "step": 16837 }, { "epoch": 0.7123276080886708, "grad_norm": 4.942716121673584, "learning_rate": 0.001, "loss": 2.5548, "step": 16838 }, { "epoch": 0.7123699128521872, "grad_norm": 0.3117887079715729, "learning_rate": 0.001, "loss": 1.6967, "step": 16839 }, { "epoch": 0.7124122176157035, "grad_norm": 0.2594894766807556, "learning_rate": 0.001, "loss": 1.6713, "step": 16840 }, { "epoch": 0.7124545223792199, "grad_norm": 0.37897738814353943, "learning_rate": 0.001, "loss": 1.9001, "step": 16841 }, { "epoch": 0.7124968271427363, "grad_norm": 0.3171361982822418, "learning_rate": 0.001, "loss": 2.6247, "step": 16842 }, { "epoch": 0.7125391319062526, "grad_norm": 0.15616539120674133, "learning_rate": 0.001, "loss": 2.1841, "step": 16843 }, { "epoch": 0.712581436669769, "grad_norm": 0.363093763589859, "learning_rate": 0.001, "loss": 1.9907, "step": 16844 }, { "epoch": 0.7126237414332854, "grad_norm": 0.20196084678173065, "learning_rate": 0.001, "loss": 2.3817, "step": 16845 }, { "epoch": 0.7126660461968017, "grad_norm": 0.19358810782432556, "learning_rate": 0.001, "loss": 1.6368, "step": 16846 }, { "epoch": 0.7127083509603181, "grad_norm": 0.24825577437877655, "learning_rate": 0.001, "loss": 2.6596, "step": 16847 }, { "epoch": 0.7127506557238346, "grad_norm": 0.20185577869415283, "learning_rate": 0.001, "loss": 2.1362, "step": 16848 }, { "epoch": 0.7127929604873509, "grad_norm": 0.3401681184768677, "learning_rate": 0.001, "loss": 1.8744, "step": 16849 }, { "epoch": 0.7128352652508673, "grad_norm": 0.2052794247865677, "learning_rate": 0.001, "loss": 1.8685, "step": 16850 }, { "epoch": 0.7128775700143836, "grad_norm": 0.17829133570194244, "learning_rate": 0.001, "loss": 2.681, "step": 16851 }, { "epoch": 0.7129198747779, "grad_norm": 0.2980566918849945, "learning_rate": 0.001, "loss": 2.1763, "step": 16852 }, { "epoch": 0.7129621795414164, "grad_norm": 0.22543774545192719, "learning_rate": 0.001, "loss": 1.9288, "step": 16853 }, { "epoch": 0.7130044843049327, "grad_norm": 1.7789720296859741, "learning_rate": 0.001, "loss": 2.1398, "step": 16854 }, { "epoch": 0.7130467890684491, "grad_norm": 0.2461077868938446, "learning_rate": 0.001, "loss": 1.7258, "step": 16855 }, { "epoch": 0.7130890938319655, "grad_norm": 0.1980772763490677, "learning_rate": 0.001, "loss": 2.8052, "step": 16856 }, { "epoch": 0.7131313985954818, "grad_norm": 0.19356103241443634, "learning_rate": 0.001, "loss": 2.0402, "step": 16857 }, { "epoch": 0.7131737033589982, "grad_norm": 0.21868008375167847, "learning_rate": 0.001, "loss": 2.9919, "step": 16858 }, { "epoch": 0.7132160081225146, "grad_norm": 0.22265969216823578, "learning_rate": 0.001, "loss": 1.8722, "step": 16859 }, { "epoch": 0.7132583128860309, "grad_norm": 0.9610694646835327, "learning_rate": 0.001, "loss": 1.7194, "step": 16860 }, { "epoch": 0.7133006176495473, "grad_norm": 1.113793134689331, "learning_rate": 0.001, "loss": 2.0243, "step": 16861 }, { "epoch": 0.7133429224130637, "grad_norm": 3.1779723167419434, "learning_rate": 0.001, "loss": 1.7827, "step": 16862 }, { "epoch": 0.71338522717658, "grad_norm": 0.1677677184343338, "learning_rate": 0.001, "loss": 2.1601, "step": 16863 }, { "epoch": 0.7134275319400964, "grad_norm": 0.7339451313018799, "learning_rate": 0.001, "loss": 1.9814, "step": 16864 }, { "epoch": 0.7134698367036129, "grad_norm": 0.4018705487251282, "learning_rate": 0.001, "loss": 2.3781, "step": 16865 }, { "epoch": 0.7135121414671292, "grad_norm": 9.572480201721191, "learning_rate": 0.001, "loss": 1.9837, "step": 16866 }, { "epoch": 0.7135544462306456, "grad_norm": 0.26376014947891235, "learning_rate": 0.001, "loss": 1.7696, "step": 16867 }, { "epoch": 0.713596750994162, "grad_norm": 1.0289099216461182, "learning_rate": 0.001, "loss": 2.5283, "step": 16868 }, { "epoch": 0.7136390557576783, "grad_norm": 0.1844884753227234, "learning_rate": 0.001, "loss": 1.9274, "step": 16869 }, { "epoch": 0.7136813605211947, "grad_norm": 0.19893215596675873, "learning_rate": 0.001, "loss": 2.1862, "step": 16870 }, { "epoch": 0.7137236652847111, "grad_norm": 0.239218607544899, "learning_rate": 0.001, "loss": 1.6428, "step": 16871 }, { "epoch": 0.7137659700482274, "grad_norm": 0.3456837832927704, "learning_rate": 0.001, "loss": 1.6866, "step": 16872 }, { "epoch": 0.7138082748117438, "grad_norm": 0.20002485811710358, "learning_rate": 0.001, "loss": 2.0121, "step": 16873 }, { "epoch": 0.7138505795752602, "grad_norm": 0.16409829258918762, "learning_rate": 0.001, "loss": 2.7127, "step": 16874 }, { "epoch": 0.7138928843387765, "grad_norm": 0.2976275086402893, "learning_rate": 0.001, "loss": 2.683, "step": 16875 }, { "epoch": 0.7139351891022929, "grad_norm": 0.1813412308692932, "learning_rate": 0.001, "loss": 1.4256, "step": 16876 }, { "epoch": 0.7139774938658093, "grad_norm": 0.28270435333251953, "learning_rate": 0.001, "loss": 1.8264, "step": 16877 }, { "epoch": 0.7140197986293256, "grad_norm": 0.5542454719543457, "learning_rate": 0.001, "loss": 2.6197, "step": 16878 }, { "epoch": 0.714062103392842, "grad_norm": 0.17437031865119934, "learning_rate": 0.001, "loss": 2.1137, "step": 16879 }, { "epoch": 0.7141044081563585, "grad_norm": 0.3185507357120514, "learning_rate": 0.001, "loss": 2.0136, "step": 16880 }, { "epoch": 0.7141467129198747, "grad_norm": 0.8088506460189819, "learning_rate": 0.001, "loss": 3.2072, "step": 16881 }, { "epoch": 0.7141890176833912, "grad_norm": 0.15809717774391174, "learning_rate": 0.001, "loss": 1.428, "step": 16882 }, { "epoch": 0.7142313224469076, "grad_norm": 0.16368800401687622, "learning_rate": 0.001, "loss": 2.1529, "step": 16883 }, { "epoch": 0.7142736272104239, "grad_norm": 0.2843414843082428, "learning_rate": 0.001, "loss": 1.4441, "step": 16884 }, { "epoch": 0.7143159319739403, "grad_norm": 0.20295199751853943, "learning_rate": 0.001, "loss": 2.4748, "step": 16885 }, { "epoch": 0.7143582367374567, "grad_norm": 0.15212145447731018, "learning_rate": 0.001, "loss": 2.8044, "step": 16886 }, { "epoch": 0.714400541500973, "grad_norm": 0.1592477262020111, "learning_rate": 0.001, "loss": 2.3869, "step": 16887 }, { "epoch": 0.7144428462644894, "grad_norm": 0.18488934636116028, "learning_rate": 0.001, "loss": 2.8091, "step": 16888 }, { "epoch": 0.7144851510280058, "grad_norm": 0.2524031698703766, "learning_rate": 0.001, "loss": 3.1258, "step": 16889 }, { "epoch": 0.7145274557915221, "grad_norm": 0.19174166023731232, "learning_rate": 0.001, "loss": 2.3683, "step": 16890 }, { "epoch": 0.7145697605550385, "grad_norm": 0.7119539380073547, "learning_rate": 0.001, "loss": 1.8931, "step": 16891 }, { "epoch": 0.7146120653185549, "grad_norm": 0.14731153845787048, "learning_rate": 0.001, "loss": 3.5619, "step": 16892 }, { "epoch": 0.7146543700820712, "grad_norm": 0.25515803694725037, "learning_rate": 0.001, "loss": 1.8279, "step": 16893 }, { "epoch": 0.7146966748455876, "grad_norm": 0.1940464973449707, "learning_rate": 0.001, "loss": 2.2902, "step": 16894 }, { "epoch": 0.7147389796091039, "grad_norm": 0.3229266405105591, "learning_rate": 0.001, "loss": 1.7167, "step": 16895 }, { "epoch": 0.7147812843726203, "grad_norm": 0.166666179895401, "learning_rate": 0.001, "loss": 2.0199, "step": 16896 }, { "epoch": 0.7148235891361367, "grad_norm": 0.14919742941856384, "learning_rate": 0.001, "loss": 1.6522, "step": 16897 }, { "epoch": 0.714865893899653, "grad_norm": 0.1620715856552124, "learning_rate": 0.001, "loss": 2.5642, "step": 16898 }, { "epoch": 0.7149081986631695, "grad_norm": 1.447795033454895, "learning_rate": 0.001, "loss": 2.2744, "step": 16899 }, { "epoch": 0.7149505034266859, "grad_norm": 0.4487624764442444, "learning_rate": 0.001, "loss": 2.0419, "step": 16900 }, { "epoch": 0.7149928081902022, "grad_norm": 0.21174852550029755, "learning_rate": 0.001, "loss": 3.2943, "step": 16901 }, { "epoch": 0.7150351129537186, "grad_norm": 0.16627584397792816, "learning_rate": 0.001, "loss": 2.8095, "step": 16902 }, { "epoch": 0.715077417717235, "grad_norm": 0.6215983629226685, "learning_rate": 0.001, "loss": 1.8961, "step": 16903 }, { "epoch": 0.7151197224807513, "grad_norm": 0.3979867696762085, "learning_rate": 0.001, "loss": 2.1163, "step": 16904 }, { "epoch": 0.7151620272442677, "grad_norm": 0.17654618620872498, "learning_rate": 0.001, "loss": 2.0656, "step": 16905 }, { "epoch": 0.7152043320077841, "grad_norm": 0.15997301042079926, "learning_rate": 0.001, "loss": 1.4513, "step": 16906 }, { "epoch": 0.7152466367713004, "grad_norm": 0.21495701372623444, "learning_rate": 0.001, "loss": 2.0596, "step": 16907 }, { "epoch": 0.7152889415348168, "grad_norm": 0.3844386339187622, "learning_rate": 0.001, "loss": 1.82, "step": 16908 }, { "epoch": 0.7153312462983332, "grad_norm": 1.2073756456375122, "learning_rate": 0.001, "loss": 1.7744, "step": 16909 }, { "epoch": 0.7153735510618495, "grad_norm": 0.272298663854599, "learning_rate": 0.001, "loss": 3.1949, "step": 16910 }, { "epoch": 0.7154158558253659, "grad_norm": 0.18787261843681335, "learning_rate": 0.001, "loss": 2.01, "step": 16911 }, { "epoch": 0.7154581605888823, "grad_norm": 0.16972093284130096, "learning_rate": 0.001, "loss": 2.3674, "step": 16912 }, { "epoch": 0.7155004653523986, "grad_norm": 0.20305274426937103, "learning_rate": 0.001, "loss": 2.1895, "step": 16913 }, { "epoch": 0.715542770115915, "grad_norm": 0.16325430572032928, "learning_rate": 0.001, "loss": 1.6928, "step": 16914 }, { "epoch": 0.7155850748794315, "grad_norm": 0.17961114645004272, "learning_rate": 0.001, "loss": 2.2898, "step": 16915 }, { "epoch": 0.7156273796429478, "grad_norm": 0.15911836922168732, "learning_rate": 0.001, "loss": 2.2762, "step": 16916 }, { "epoch": 0.7156696844064642, "grad_norm": 0.1571052074432373, "learning_rate": 0.001, "loss": 2.0235, "step": 16917 }, { "epoch": 0.7157119891699806, "grad_norm": 0.17579875886440277, "learning_rate": 0.001, "loss": 2.8934, "step": 16918 }, { "epoch": 0.7157542939334969, "grad_norm": 0.24277016520500183, "learning_rate": 0.001, "loss": 3.4171, "step": 16919 }, { "epoch": 0.7157965986970133, "grad_norm": 1.3790236711502075, "learning_rate": 0.001, "loss": 3.0564, "step": 16920 }, { "epoch": 0.7158389034605297, "grad_norm": 0.1847652643918991, "learning_rate": 0.001, "loss": 2.5091, "step": 16921 }, { "epoch": 0.715881208224046, "grad_norm": 0.18262675404548645, "learning_rate": 0.001, "loss": 2.8952, "step": 16922 }, { "epoch": 0.7159235129875624, "grad_norm": 0.5282437205314636, "learning_rate": 0.001, "loss": 2.3116, "step": 16923 }, { "epoch": 0.7159658177510788, "grad_norm": 0.17553091049194336, "learning_rate": 0.001, "loss": 1.662, "step": 16924 }, { "epoch": 0.7160081225145951, "grad_norm": 0.15575726330280304, "learning_rate": 0.001, "loss": 1.5341, "step": 16925 }, { "epoch": 0.7160504272781115, "grad_norm": 0.1408965289592743, "learning_rate": 0.001, "loss": 1.2918, "step": 16926 }, { "epoch": 0.7160927320416279, "grad_norm": 0.18358954787254333, "learning_rate": 0.001, "loss": 2.8717, "step": 16927 }, { "epoch": 0.7161350368051442, "grad_norm": 0.15843123197555542, "learning_rate": 0.001, "loss": 2.5845, "step": 16928 }, { "epoch": 0.7161773415686606, "grad_norm": 0.1516776978969574, "learning_rate": 0.001, "loss": 2.7124, "step": 16929 }, { "epoch": 0.716219646332177, "grad_norm": 0.2799467146396637, "learning_rate": 0.001, "loss": 3.124, "step": 16930 }, { "epoch": 0.7162619510956933, "grad_norm": 0.16782619059085846, "learning_rate": 0.001, "loss": 2.856, "step": 16931 }, { "epoch": 0.7163042558592098, "grad_norm": 0.17226386070251465, "learning_rate": 0.001, "loss": 2.282, "step": 16932 }, { "epoch": 0.7163465606227262, "grad_norm": 0.1673172563314438, "learning_rate": 0.001, "loss": 2.0755, "step": 16933 }, { "epoch": 0.7163888653862425, "grad_norm": 0.20048165321350098, "learning_rate": 0.001, "loss": 2.2061, "step": 16934 }, { "epoch": 0.7164311701497589, "grad_norm": 0.3704041838645935, "learning_rate": 0.001, "loss": 1.8703, "step": 16935 }, { "epoch": 0.7164734749132753, "grad_norm": 0.17238663136959076, "learning_rate": 0.001, "loss": 1.8328, "step": 16936 }, { "epoch": 0.7165157796767916, "grad_norm": 0.5309063196182251, "learning_rate": 0.001, "loss": 2.7206, "step": 16937 }, { "epoch": 0.716558084440308, "grad_norm": 0.28528085350990295, "learning_rate": 0.001, "loss": 1.8669, "step": 16938 }, { "epoch": 0.7166003892038244, "grad_norm": 0.34017452597618103, "learning_rate": 0.001, "loss": 2.7997, "step": 16939 }, { "epoch": 0.7166426939673407, "grad_norm": 0.19825142621994019, "learning_rate": 0.001, "loss": 3.2758, "step": 16940 }, { "epoch": 0.7166849987308571, "grad_norm": 0.18345898389816284, "learning_rate": 0.001, "loss": 1.6755, "step": 16941 }, { "epoch": 0.7167273034943734, "grad_norm": 0.15514928102493286, "learning_rate": 0.001, "loss": 1.7149, "step": 16942 }, { "epoch": 0.7167696082578898, "grad_norm": 0.19425754249095917, "learning_rate": 0.001, "loss": 2.3292, "step": 16943 }, { "epoch": 0.7168119130214062, "grad_norm": 0.21819639205932617, "learning_rate": 0.001, "loss": 2.918, "step": 16944 }, { "epoch": 0.7168542177849225, "grad_norm": 0.16114571690559387, "learning_rate": 0.001, "loss": 1.7553, "step": 16945 }, { "epoch": 0.7168965225484389, "grad_norm": 0.1726664900779724, "learning_rate": 0.001, "loss": 1.8447, "step": 16946 }, { "epoch": 0.7169388273119554, "grad_norm": 0.17410875856876373, "learning_rate": 0.001, "loss": 1.649, "step": 16947 }, { "epoch": 0.7169811320754716, "grad_norm": 0.17056071758270264, "learning_rate": 0.001, "loss": 2.7856, "step": 16948 }, { "epoch": 0.7170234368389881, "grad_norm": 0.2786010801792145, "learning_rate": 0.001, "loss": 2.5549, "step": 16949 }, { "epoch": 0.7170657416025045, "grad_norm": 3.383892774581909, "learning_rate": 0.001, "loss": 3.8689, "step": 16950 }, { "epoch": 0.7171080463660208, "grad_norm": 0.8111663460731506, "learning_rate": 0.001, "loss": 1.7525, "step": 16951 }, { "epoch": 0.7171503511295372, "grad_norm": 0.22883149981498718, "learning_rate": 0.001, "loss": 2.36, "step": 16952 }, { "epoch": 0.7171926558930536, "grad_norm": 0.17972877621650696, "learning_rate": 0.001, "loss": 2.6088, "step": 16953 }, { "epoch": 0.7172349606565699, "grad_norm": 0.2092818170785904, "learning_rate": 0.001, "loss": 1.8192, "step": 16954 }, { "epoch": 0.7172772654200863, "grad_norm": 0.16795043647289276, "learning_rate": 0.001, "loss": 2.887, "step": 16955 }, { "epoch": 0.7173195701836027, "grad_norm": 0.16794079542160034, "learning_rate": 0.001, "loss": 2.2538, "step": 16956 }, { "epoch": 0.717361874947119, "grad_norm": 0.1680205762386322, "learning_rate": 0.001, "loss": 1.7101, "step": 16957 }, { "epoch": 0.7174041797106354, "grad_norm": 0.17401909828186035, "learning_rate": 0.001, "loss": 1.853, "step": 16958 }, { "epoch": 0.7174464844741518, "grad_norm": 0.49490857124328613, "learning_rate": 0.001, "loss": 2.6701, "step": 16959 }, { "epoch": 0.7174887892376681, "grad_norm": 0.17487838864326477, "learning_rate": 0.001, "loss": 1.2779, "step": 16960 }, { "epoch": 0.7175310940011845, "grad_norm": 0.23202207684516907, "learning_rate": 0.001, "loss": 2.4539, "step": 16961 }, { "epoch": 0.7175733987647009, "grad_norm": 0.4263302981853485, "learning_rate": 0.001, "loss": 1.7071, "step": 16962 }, { "epoch": 0.7176157035282172, "grad_norm": 0.22260010242462158, "learning_rate": 0.001, "loss": 2.4392, "step": 16963 }, { "epoch": 0.7176580082917337, "grad_norm": 0.15869098901748657, "learning_rate": 0.001, "loss": 1.7775, "step": 16964 }, { "epoch": 0.7177003130552501, "grad_norm": 0.19737808406352997, "learning_rate": 0.001, "loss": 2.0257, "step": 16965 }, { "epoch": 0.7177426178187664, "grad_norm": 0.15930262207984924, "learning_rate": 0.001, "loss": 3.0723, "step": 16966 }, { "epoch": 0.7177849225822828, "grad_norm": 0.1594778150320053, "learning_rate": 0.001, "loss": 2.2068, "step": 16967 }, { "epoch": 0.7178272273457992, "grad_norm": 0.17711618542671204, "learning_rate": 0.001, "loss": 1.7199, "step": 16968 }, { "epoch": 0.7178695321093155, "grad_norm": 0.7712798118591309, "learning_rate": 0.001, "loss": 2.6924, "step": 16969 }, { "epoch": 0.7179118368728319, "grad_norm": 0.16748692095279694, "learning_rate": 0.001, "loss": 2.1455, "step": 16970 }, { "epoch": 0.7179541416363483, "grad_norm": 0.2054317146539688, "learning_rate": 0.001, "loss": 2.3641, "step": 16971 }, { "epoch": 0.7179964463998646, "grad_norm": 1.4810477495193481, "learning_rate": 0.001, "loss": 2.5664, "step": 16972 }, { "epoch": 0.718038751163381, "grad_norm": 0.3210020363330841, "learning_rate": 0.001, "loss": 2.4745, "step": 16973 }, { "epoch": 0.7180810559268974, "grad_norm": 0.14637017250061035, "learning_rate": 0.001, "loss": 2.0775, "step": 16974 }, { "epoch": 0.7181233606904137, "grad_norm": 0.1749821901321411, "learning_rate": 0.001, "loss": 2.4121, "step": 16975 }, { "epoch": 0.7181656654539301, "grad_norm": 0.1919098049402237, "learning_rate": 0.001, "loss": 1.879, "step": 16976 }, { "epoch": 0.7182079702174465, "grad_norm": 0.14838182926177979, "learning_rate": 0.001, "loss": 2.2794, "step": 16977 }, { "epoch": 0.7182502749809628, "grad_norm": 0.3306111991405487, "learning_rate": 0.001, "loss": 1.7571, "step": 16978 }, { "epoch": 0.7182925797444792, "grad_norm": 0.1290099322795868, "learning_rate": 0.001, "loss": 2.0743, "step": 16979 }, { "epoch": 0.7183348845079957, "grad_norm": 0.2530765235424042, "learning_rate": 0.001, "loss": 1.92, "step": 16980 }, { "epoch": 0.718377189271512, "grad_norm": 0.15586386620998383, "learning_rate": 0.001, "loss": 2.5149, "step": 16981 }, { "epoch": 0.7184194940350284, "grad_norm": 0.4855019748210907, "learning_rate": 0.001, "loss": 1.9419, "step": 16982 }, { "epoch": 0.7184617987985448, "grad_norm": 6.393121719360352, "learning_rate": 0.001, "loss": 1.7564, "step": 16983 }, { "epoch": 0.7185041035620611, "grad_norm": 0.1833300143480301, "learning_rate": 0.001, "loss": 2.1363, "step": 16984 }, { "epoch": 0.7185464083255775, "grad_norm": 0.1298752874135971, "learning_rate": 0.001, "loss": 2.2444, "step": 16985 }, { "epoch": 0.7185887130890938, "grad_norm": 0.16792719066143036, "learning_rate": 0.001, "loss": 1.7757, "step": 16986 }, { "epoch": 0.7186310178526102, "grad_norm": 1.4320098161697388, "learning_rate": 0.001, "loss": 2.647, "step": 16987 }, { "epoch": 0.7186733226161266, "grad_norm": 0.13900575041770935, "learning_rate": 0.001, "loss": 1.6843, "step": 16988 }, { "epoch": 0.7187156273796429, "grad_norm": 0.1738477498292923, "learning_rate": 0.001, "loss": 1.4185, "step": 16989 }, { "epoch": 0.7187579321431593, "grad_norm": 0.5591545701026917, "learning_rate": 0.001, "loss": 2.4475, "step": 16990 }, { "epoch": 0.7188002369066757, "grad_norm": 0.20126506686210632, "learning_rate": 0.001, "loss": 2.3001, "step": 16991 }, { "epoch": 0.718842541670192, "grad_norm": 0.3284417986869812, "learning_rate": 0.001, "loss": 2.2221, "step": 16992 }, { "epoch": 0.7188848464337084, "grad_norm": 0.1933998167514801, "learning_rate": 0.001, "loss": 1.9981, "step": 16993 }, { "epoch": 0.7189271511972248, "grad_norm": 1.0560952425003052, "learning_rate": 0.001, "loss": 2.3462, "step": 16994 }, { "epoch": 0.7189694559607411, "grad_norm": 0.16365966200828552, "learning_rate": 0.001, "loss": 2.6972, "step": 16995 }, { "epoch": 0.7190117607242575, "grad_norm": 0.21969819068908691, "learning_rate": 0.001, "loss": 2.3659, "step": 16996 }, { "epoch": 0.719054065487774, "grad_norm": 9.586769104003906, "learning_rate": 0.001, "loss": 1.9068, "step": 16997 }, { "epoch": 0.7190963702512903, "grad_norm": 0.15527045726776123, "learning_rate": 0.001, "loss": 1.8663, "step": 16998 }, { "epoch": 0.7191386750148067, "grad_norm": 0.25316473841667175, "learning_rate": 0.001, "loss": 2.7527, "step": 16999 }, { "epoch": 0.7191809797783231, "grad_norm": 0.20590928196907043, "learning_rate": 0.001, "loss": 2.1561, "step": 17000 }, { "epoch": 0.7192232845418394, "grad_norm": 0.23334155976772308, "learning_rate": 0.001, "loss": 1.6148, "step": 17001 }, { "epoch": 0.7192655893053558, "grad_norm": 0.1720026433467865, "learning_rate": 0.001, "loss": 2.4956, "step": 17002 }, { "epoch": 0.7193078940688722, "grad_norm": 0.18993832170963287, "learning_rate": 0.001, "loss": 2.4176, "step": 17003 }, { "epoch": 0.7193501988323885, "grad_norm": 1.2263435125350952, "learning_rate": 0.001, "loss": 1.695, "step": 17004 }, { "epoch": 0.7193925035959049, "grad_norm": 0.20992887020111084, "learning_rate": 0.001, "loss": 3.1313, "step": 17005 }, { "epoch": 0.7194348083594213, "grad_norm": 0.2623100280761719, "learning_rate": 0.001, "loss": 1.8112, "step": 17006 }, { "epoch": 0.7194771131229376, "grad_norm": 0.2040976881980896, "learning_rate": 0.001, "loss": 2.8483, "step": 17007 }, { "epoch": 0.719519417886454, "grad_norm": 0.1961153894662857, "learning_rate": 0.001, "loss": 3.994, "step": 17008 }, { "epoch": 0.7195617226499704, "grad_norm": 0.1991744339466095, "learning_rate": 0.001, "loss": 1.9362, "step": 17009 }, { "epoch": 0.7196040274134867, "grad_norm": 0.1773693561553955, "learning_rate": 0.001, "loss": 2.3754, "step": 17010 }, { "epoch": 0.7196463321770031, "grad_norm": 0.322916716337204, "learning_rate": 0.001, "loss": 2.6735, "step": 17011 }, { "epoch": 0.7196886369405195, "grad_norm": 0.1525036096572876, "learning_rate": 0.001, "loss": 2.265, "step": 17012 }, { "epoch": 0.7197309417040358, "grad_norm": 0.15779347717761993, "learning_rate": 0.001, "loss": 1.6281, "step": 17013 }, { "epoch": 0.7197732464675523, "grad_norm": 0.15959423780441284, "learning_rate": 0.001, "loss": 1.3413, "step": 17014 }, { "epoch": 0.7198155512310687, "grad_norm": 0.3631104826927185, "learning_rate": 0.001, "loss": 1.6842, "step": 17015 }, { "epoch": 0.719857855994585, "grad_norm": 0.474289208650589, "learning_rate": 0.001, "loss": 2.0346, "step": 17016 }, { "epoch": 0.7199001607581014, "grad_norm": 0.19243498146533966, "learning_rate": 0.001, "loss": 2.3207, "step": 17017 }, { "epoch": 0.7199424655216178, "grad_norm": 0.17244835197925568, "learning_rate": 0.001, "loss": 2.3529, "step": 17018 }, { "epoch": 0.7199847702851341, "grad_norm": 0.16670437157154083, "learning_rate": 0.001, "loss": 2.4943, "step": 17019 }, { "epoch": 0.7200270750486505, "grad_norm": 0.1707116663455963, "learning_rate": 0.001, "loss": 2.6172, "step": 17020 }, { "epoch": 0.7200693798121669, "grad_norm": 0.17514576017856598, "learning_rate": 0.001, "loss": 1.9051, "step": 17021 }, { "epoch": 0.7201116845756832, "grad_norm": 0.17995479702949524, "learning_rate": 0.001, "loss": 2.3423, "step": 17022 }, { "epoch": 0.7201539893391996, "grad_norm": 0.1695273518562317, "learning_rate": 0.001, "loss": 1.9609, "step": 17023 }, { "epoch": 0.720196294102716, "grad_norm": 0.18186168372631073, "learning_rate": 0.001, "loss": 1.8993, "step": 17024 }, { "epoch": 0.7202385988662323, "grad_norm": 0.17733162641525269, "learning_rate": 0.001, "loss": 2.2507, "step": 17025 }, { "epoch": 0.7202809036297487, "grad_norm": 0.17569288611412048, "learning_rate": 0.001, "loss": 2.4908, "step": 17026 }, { "epoch": 0.7203232083932651, "grad_norm": 0.1461903601884842, "learning_rate": 0.001, "loss": 2.3501, "step": 17027 }, { "epoch": 0.7203655131567814, "grad_norm": 0.13951033353805542, "learning_rate": 0.001, "loss": 1.5234, "step": 17028 }, { "epoch": 0.7204078179202978, "grad_norm": 0.19020099937915802, "learning_rate": 0.001, "loss": 2.583, "step": 17029 }, { "epoch": 0.7204501226838141, "grad_norm": 0.26185300946235657, "learning_rate": 0.001, "loss": 2.0379, "step": 17030 }, { "epoch": 0.7204924274473306, "grad_norm": 0.15246087312698364, "learning_rate": 0.001, "loss": 1.6983, "step": 17031 }, { "epoch": 0.720534732210847, "grad_norm": 0.21648335456848145, "learning_rate": 0.001, "loss": 2.4807, "step": 17032 }, { "epoch": 0.7205770369743633, "grad_norm": 0.15605172514915466, "learning_rate": 0.001, "loss": 1.7671, "step": 17033 }, { "epoch": 0.7206193417378797, "grad_norm": 0.1461627036333084, "learning_rate": 0.001, "loss": 1.6648, "step": 17034 }, { "epoch": 0.7206616465013961, "grad_norm": 0.15967121720314026, "learning_rate": 0.001, "loss": 2.1308, "step": 17035 }, { "epoch": 0.7207039512649124, "grad_norm": 0.17863650619983673, "learning_rate": 0.001, "loss": 2.646, "step": 17036 }, { "epoch": 0.7207462560284288, "grad_norm": 0.15027737617492676, "learning_rate": 0.001, "loss": 2.302, "step": 17037 }, { "epoch": 0.7207885607919452, "grad_norm": 0.30569708347320557, "learning_rate": 0.001, "loss": 3.3331, "step": 17038 }, { "epoch": 0.7208308655554615, "grad_norm": 0.18736647069454193, "learning_rate": 0.001, "loss": 1.5433, "step": 17039 }, { "epoch": 0.7208731703189779, "grad_norm": 0.16825266182422638, "learning_rate": 0.001, "loss": 1.511, "step": 17040 }, { "epoch": 0.7209154750824943, "grad_norm": 0.1982458382844925, "learning_rate": 0.001, "loss": 2.066, "step": 17041 }, { "epoch": 0.7209577798460106, "grad_norm": 0.1793079972267151, "learning_rate": 0.001, "loss": 1.6284, "step": 17042 }, { "epoch": 0.721000084609527, "grad_norm": 0.1480327844619751, "learning_rate": 0.001, "loss": 1.4117, "step": 17043 }, { "epoch": 0.7210423893730434, "grad_norm": 0.3210713267326355, "learning_rate": 0.001, "loss": 1.739, "step": 17044 }, { "epoch": 0.7210846941365597, "grad_norm": 0.2352566421031952, "learning_rate": 0.001, "loss": 1.7573, "step": 17045 }, { "epoch": 0.7211269989000761, "grad_norm": 0.1587694138288498, "learning_rate": 0.001, "loss": 2.3439, "step": 17046 }, { "epoch": 0.7211693036635926, "grad_norm": 0.9405317306518555, "learning_rate": 0.001, "loss": 1.9967, "step": 17047 }, { "epoch": 0.7212116084271089, "grad_norm": 0.1904739886522293, "learning_rate": 0.001, "loss": 1.9261, "step": 17048 }, { "epoch": 0.7212539131906253, "grad_norm": 0.12879721820354462, "learning_rate": 0.001, "loss": 2.6484, "step": 17049 }, { "epoch": 0.7212962179541417, "grad_norm": 0.17692124843597412, "learning_rate": 0.001, "loss": 2.0815, "step": 17050 }, { "epoch": 0.721338522717658, "grad_norm": 0.16509714722633362, "learning_rate": 0.001, "loss": 2.9258, "step": 17051 }, { "epoch": 0.7213808274811744, "grad_norm": 0.1631636917591095, "learning_rate": 0.001, "loss": 2.8743, "step": 17052 }, { "epoch": 0.7214231322446908, "grad_norm": 0.174580916762352, "learning_rate": 0.001, "loss": 3.0192, "step": 17053 }, { "epoch": 0.7214654370082071, "grad_norm": 0.8501453399658203, "learning_rate": 0.001, "loss": 1.8267, "step": 17054 }, { "epoch": 0.7215077417717235, "grad_norm": 0.15166904032230377, "learning_rate": 0.001, "loss": 2.4201, "step": 17055 }, { "epoch": 0.7215500465352399, "grad_norm": 0.16179314255714417, "learning_rate": 0.001, "loss": 1.9094, "step": 17056 }, { "epoch": 0.7215923512987562, "grad_norm": 0.14122450351715088, "learning_rate": 0.001, "loss": 1.327, "step": 17057 }, { "epoch": 0.7216346560622726, "grad_norm": 0.16468989849090576, "learning_rate": 0.001, "loss": 1.9848, "step": 17058 }, { "epoch": 0.721676960825789, "grad_norm": 0.14296945929527283, "learning_rate": 0.001, "loss": 1.6847, "step": 17059 }, { "epoch": 0.7217192655893053, "grad_norm": 0.1540268510580063, "learning_rate": 0.001, "loss": 1.4727, "step": 17060 }, { "epoch": 0.7217615703528217, "grad_norm": 0.14178214967250824, "learning_rate": 0.001, "loss": 1.8472, "step": 17061 }, { "epoch": 0.7218038751163381, "grad_norm": 0.1965574473142624, "learning_rate": 0.001, "loss": 2.9151, "step": 17062 }, { "epoch": 0.7218461798798544, "grad_norm": 0.1626625508069992, "learning_rate": 0.001, "loss": 1.8456, "step": 17063 }, { "epoch": 0.7218884846433709, "grad_norm": 1.8781062364578247, "learning_rate": 0.001, "loss": 2.1349, "step": 17064 }, { "epoch": 0.7219307894068873, "grad_norm": 0.1811523288488388, "learning_rate": 0.001, "loss": 3.1633, "step": 17065 }, { "epoch": 0.7219730941704036, "grad_norm": 0.1420523077249527, "learning_rate": 0.001, "loss": 2.1598, "step": 17066 }, { "epoch": 0.72201539893392, "grad_norm": 0.23266121745109558, "learning_rate": 0.001, "loss": 2.5496, "step": 17067 }, { "epoch": 0.7220577036974364, "grad_norm": 0.15366718173027039, "learning_rate": 0.001, "loss": 1.6903, "step": 17068 }, { "epoch": 0.7221000084609527, "grad_norm": 0.1720408946275711, "learning_rate": 0.001, "loss": 2.101, "step": 17069 }, { "epoch": 0.7221423132244691, "grad_norm": 0.13053718209266663, "learning_rate": 0.001, "loss": 2.9885, "step": 17070 }, { "epoch": 0.7221846179879855, "grad_norm": 0.1682283878326416, "learning_rate": 0.001, "loss": 2.0249, "step": 17071 }, { "epoch": 0.7222269227515018, "grad_norm": 0.1607704609632492, "learning_rate": 0.001, "loss": 2.3205, "step": 17072 }, { "epoch": 0.7222692275150182, "grad_norm": 0.14773066341876984, "learning_rate": 0.001, "loss": 2.3242, "step": 17073 }, { "epoch": 0.7223115322785346, "grad_norm": 2.609527349472046, "learning_rate": 0.001, "loss": 1.6714, "step": 17074 }, { "epoch": 0.7223538370420509, "grad_norm": 0.22098509967327118, "learning_rate": 0.001, "loss": 2.6751, "step": 17075 }, { "epoch": 0.7223961418055673, "grad_norm": 0.27444925904273987, "learning_rate": 0.001, "loss": 2.977, "step": 17076 }, { "epoch": 0.7224384465690836, "grad_norm": 1.863974690437317, "learning_rate": 0.001, "loss": 1.914, "step": 17077 }, { "epoch": 0.7224807513326, "grad_norm": 0.18942175805568695, "learning_rate": 0.001, "loss": 1.7231, "step": 17078 }, { "epoch": 0.7225230560961164, "grad_norm": 20.305295944213867, "learning_rate": 0.001, "loss": 1.9652, "step": 17079 }, { "epoch": 0.7225653608596327, "grad_norm": 1.116580843925476, "learning_rate": 0.001, "loss": 2.388, "step": 17080 }, { "epoch": 0.7226076656231492, "grad_norm": 0.2017759084701538, "learning_rate": 0.001, "loss": 2.4368, "step": 17081 }, { "epoch": 0.7226499703866656, "grad_norm": 0.20675040781497955, "learning_rate": 0.001, "loss": 1.8265, "step": 17082 }, { "epoch": 0.7226922751501819, "grad_norm": 0.2491598129272461, "learning_rate": 0.001, "loss": 1.6281, "step": 17083 }, { "epoch": 0.7227345799136983, "grad_norm": 0.7096131443977356, "learning_rate": 0.001, "loss": 2.6187, "step": 17084 }, { "epoch": 0.7227768846772147, "grad_norm": 0.5838418006896973, "learning_rate": 0.001, "loss": 3.2109, "step": 17085 }, { "epoch": 0.722819189440731, "grad_norm": 0.2073265165090561, "learning_rate": 0.001, "loss": 1.8221, "step": 17086 }, { "epoch": 0.7228614942042474, "grad_norm": 0.13714540004730225, "learning_rate": 0.001, "loss": 2.1434, "step": 17087 }, { "epoch": 0.7229037989677638, "grad_norm": 0.16703394055366516, "learning_rate": 0.001, "loss": 2.1473, "step": 17088 }, { "epoch": 0.7229461037312801, "grad_norm": 1.2047275304794312, "learning_rate": 0.001, "loss": 2.9204, "step": 17089 }, { "epoch": 0.7229884084947965, "grad_norm": 0.22018975019454956, "learning_rate": 0.001, "loss": 3.5448, "step": 17090 }, { "epoch": 0.7230307132583129, "grad_norm": 0.4262462854385376, "learning_rate": 0.001, "loss": 2.1982, "step": 17091 }, { "epoch": 0.7230730180218292, "grad_norm": 0.1653733253479004, "learning_rate": 0.001, "loss": 3.1182, "step": 17092 }, { "epoch": 0.7231153227853456, "grad_norm": 0.20591098070144653, "learning_rate": 0.001, "loss": 3.3591, "step": 17093 }, { "epoch": 0.723157627548862, "grad_norm": 0.177374005317688, "learning_rate": 0.001, "loss": 2.7171, "step": 17094 }, { "epoch": 0.7231999323123783, "grad_norm": 0.17736870050430298, "learning_rate": 0.001, "loss": 1.6567, "step": 17095 }, { "epoch": 0.7232422370758947, "grad_norm": 0.16085311770439148, "learning_rate": 0.001, "loss": 1.6649, "step": 17096 }, { "epoch": 0.7232845418394112, "grad_norm": 0.1982753872871399, "learning_rate": 0.001, "loss": 1.6603, "step": 17097 }, { "epoch": 0.7233268466029275, "grad_norm": 0.1861245036125183, "learning_rate": 0.001, "loss": 2.91, "step": 17098 }, { "epoch": 0.7233691513664439, "grad_norm": 0.7297300100326538, "learning_rate": 0.001, "loss": 1.8701, "step": 17099 }, { "epoch": 0.7234114561299603, "grad_norm": 0.16953283548355103, "learning_rate": 0.001, "loss": 3.1791, "step": 17100 }, { "epoch": 0.7234537608934766, "grad_norm": 0.16749191284179688, "learning_rate": 0.001, "loss": 2.5591, "step": 17101 }, { "epoch": 0.723496065656993, "grad_norm": 0.19384531676769257, "learning_rate": 0.001, "loss": 2.2332, "step": 17102 }, { "epoch": 0.7235383704205094, "grad_norm": 2.1700196266174316, "learning_rate": 0.001, "loss": 3.0336, "step": 17103 }, { "epoch": 0.7235806751840257, "grad_norm": 0.1668250858783722, "learning_rate": 0.001, "loss": 2.2885, "step": 17104 }, { "epoch": 0.7236229799475421, "grad_norm": 0.17841757833957672, "learning_rate": 0.001, "loss": 2.3188, "step": 17105 }, { "epoch": 0.7236652847110585, "grad_norm": 0.15527507662773132, "learning_rate": 0.001, "loss": 1.8841, "step": 17106 }, { "epoch": 0.7237075894745748, "grad_norm": 0.5220174789428711, "learning_rate": 0.001, "loss": 1.6185, "step": 17107 }, { "epoch": 0.7237498942380912, "grad_norm": 0.14695467054843903, "learning_rate": 0.001, "loss": 2.0267, "step": 17108 }, { "epoch": 0.7237921990016076, "grad_norm": 0.39175304770469666, "learning_rate": 0.001, "loss": 1.7712, "step": 17109 }, { "epoch": 0.7238345037651239, "grad_norm": 0.15665383636951447, "learning_rate": 0.001, "loss": 2.5027, "step": 17110 }, { "epoch": 0.7238768085286403, "grad_norm": 0.17397943139076233, "learning_rate": 0.001, "loss": 1.9146, "step": 17111 }, { "epoch": 0.7239191132921567, "grad_norm": 0.18213899433612823, "learning_rate": 0.001, "loss": 2.6108, "step": 17112 }, { "epoch": 0.723961418055673, "grad_norm": 0.1579909473657608, "learning_rate": 0.001, "loss": 1.4951, "step": 17113 }, { "epoch": 0.7240037228191895, "grad_norm": 0.15795019268989563, "learning_rate": 0.001, "loss": 1.5673, "step": 17114 }, { "epoch": 0.7240460275827059, "grad_norm": 0.24189157783985138, "learning_rate": 0.001, "loss": 3.2042, "step": 17115 }, { "epoch": 0.7240883323462222, "grad_norm": 0.1958611160516739, "learning_rate": 0.001, "loss": 2.8201, "step": 17116 }, { "epoch": 0.7241306371097386, "grad_norm": 0.18128181993961334, "learning_rate": 0.001, "loss": 2.0228, "step": 17117 }, { "epoch": 0.724172941873255, "grad_norm": 0.1512492448091507, "learning_rate": 0.001, "loss": 2.3753, "step": 17118 }, { "epoch": 0.7242152466367713, "grad_norm": 0.15748295187950134, "learning_rate": 0.001, "loss": 1.9492, "step": 17119 }, { "epoch": 0.7242575514002877, "grad_norm": 15.251922607421875, "learning_rate": 0.001, "loss": 3.1576, "step": 17120 }, { "epoch": 0.724299856163804, "grad_norm": 0.17477959394454956, "learning_rate": 0.001, "loss": 1.827, "step": 17121 }, { "epoch": 0.7243421609273204, "grad_norm": 0.16635996103286743, "learning_rate": 0.001, "loss": 2.5203, "step": 17122 }, { "epoch": 0.7243844656908368, "grad_norm": 0.12794765830039978, "learning_rate": 0.001, "loss": 2.4801, "step": 17123 }, { "epoch": 0.7244267704543531, "grad_norm": 0.1665220856666565, "learning_rate": 0.001, "loss": 1.6915, "step": 17124 }, { "epoch": 0.7244690752178695, "grad_norm": 0.20942756533622742, "learning_rate": 0.001, "loss": 2.1367, "step": 17125 }, { "epoch": 0.7245113799813859, "grad_norm": 0.1444472074508667, "learning_rate": 0.001, "loss": 1.4025, "step": 17126 }, { "epoch": 0.7245536847449022, "grad_norm": 0.5599656105041504, "learning_rate": 0.001, "loss": 1.6415, "step": 17127 }, { "epoch": 0.7245959895084186, "grad_norm": 0.17753274738788605, "learning_rate": 0.001, "loss": 2.3513, "step": 17128 }, { "epoch": 0.724638294271935, "grad_norm": 0.18549706041812897, "learning_rate": 0.001, "loss": 2.4151, "step": 17129 }, { "epoch": 0.7246805990354513, "grad_norm": 0.17280645668506622, "learning_rate": 0.001, "loss": 2.5004, "step": 17130 }, { "epoch": 0.7247229037989678, "grad_norm": 0.16694018244743347, "learning_rate": 0.001, "loss": 2.1255, "step": 17131 }, { "epoch": 0.7247652085624842, "grad_norm": 0.1705455332994461, "learning_rate": 0.001, "loss": 1.967, "step": 17132 }, { "epoch": 0.7248075133260005, "grad_norm": 0.1580754816532135, "learning_rate": 0.001, "loss": 1.9559, "step": 17133 }, { "epoch": 0.7248498180895169, "grad_norm": 0.9335688352584839, "learning_rate": 0.001, "loss": 2.4301, "step": 17134 }, { "epoch": 0.7248921228530333, "grad_norm": 0.1776943802833557, "learning_rate": 0.001, "loss": 1.4292, "step": 17135 }, { "epoch": 0.7249344276165496, "grad_norm": 0.15362563729286194, "learning_rate": 0.001, "loss": 2.3276, "step": 17136 }, { "epoch": 0.724976732380066, "grad_norm": 0.14412017166614532, "learning_rate": 0.001, "loss": 1.8388, "step": 17137 }, { "epoch": 0.7250190371435824, "grad_norm": 0.2188391387462616, "learning_rate": 0.001, "loss": 2.7813, "step": 17138 }, { "epoch": 0.7250613419070987, "grad_norm": 0.18921682238578796, "learning_rate": 0.001, "loss": 2.1525, "step": 17139 }, { "epoch": 0.7251036466706151, "grad_norm": 0.1472938060760498, "learning_rate": 0.001, "loss": 2.2099, "step": 17140 }, { "epoch": 0.7251459514341315, "grad_norm": 0.1493636518716812, "learning_rate": 0.001, "loss": 2.0206, "step": 17141 }, { "epoch": 0.7251882561976478, "grad_norm": 0.16733522713184357, "learning_rate": 0.001, "loss": 2.0764, "step": 17142 }, { "epoch": 0.7252305609611642, "grad_norm": 0.18534940481185913, "learning_rate": 0.001, "loss": 2.0909, "step": 17143 }, { "epoch": 0.7252728657246806, "grad_norm": 0.25004249811172485, "learning_rate": 0.001, "loss": 1.9715, "step": 17144 }, { "epoch": 0.7253151704881969, "grad_norm": 0.1925039291381836, "learning_rate": 0.001, "loss": 1.8828, "step": 17145 }, { "epoch": 0.7253574752517133, "grad_norm": 0.19968828558921814, "learning_rate": 0.001, "loss": 1.8314, "step": 17146 }, { "epoch": 0.7253997800152298, "grad_norm": 0.19976693391799927, "learning_rate": 0.001, "loss": 2.0041, "step": 17147 }, { "epoch": 0.725442084778746, "grad_norm": 0.16204243898391724, "learning_rate": 0.001, "loss": 1.5418, "step": 17148 }, { "epoch": 0.7254843895422625, "grad_norm": 0.6488047242164612, "learning_rate": 0.001, "loss": 1.5626, "step": 17149 }, { "epoch": 0.7255266943057789, "grad_norm": 0.16644714772701263, "learning_rate": 0.001, "loss": 2.0278, "step": 17150 }, { "epoch": 0.7255689990692952, "grad_norm": 0.16991443932056427, "learning_rate": 0.001, "loss": 1.7933, "step": 17151 }, { "epoch": 0.7256113038328116, "grad_norm": 0.23698163032531738, "learning_rate": 0.001, "loss": 2.3938, "step": 17152 }, { "epoch": 0.725653608596328, "grad_norm": 1.1459918022155762, "learning_rate": 0.001, "loss": 2.0805, "step": 17153 }, { "epoch": 0.7256959133598443, "grad_norm": 0.19081348180770874, "learning_rate": 0.001, "loss": 1.9892, "step": 17154 }, { "epoch": 0.7257382181233607, "grad_norm": 0.20583724975585938, "learning_rate": 0.001, "loss": 1.9256, "step": 17155 }, { "epoch": 0.7257805228868771, "grad_norm": 0.15794368088245392, "learning_rate": 0.001, "loss": 1.7678, "step": 17156 }, { "epoch": 0.7258228276503934, "grad_norm": 0.22491112351417542, "learning_rate": 0.001, "loss": 2.1716, "step": 17157 }, { "epoch": 0.7258651324139098, "grad_norm": 0.37008705735206604, "learning_rate": 0.001, "loss": 3.8031, "step": 17158 }, { "epoch": 0.7259074371774262, "grad_norm": 0.2206166684627533, "learning_rate": 0.001, "loss": 2.6122, "step": 17159 }, { "epoch": 0.7259497419409425, "grad_norm": 0.1652143895626068, "learning_rate": 0.001, "loss": 2.2458, "step": 17160 }, { "epoch": 0.7259920467044589, "grad_norm": 0.16952086985111237, "learning_rate": 0.001, "loss": 1.978, "step": 17161 }, { "epoch": 0.7260343514679753, "grad_norm": 0.15790002048015594, "learning_rate": 0.001, "loss": 1.9818, "step": 17162 }, { "epoch": 0.7260766562314916, "grad_norm": 0.18033292889595032, "learning_rate": 0.001, "loss": 2.0203, "step": 17163 }, { "epoch": 0.7261189609950081, "grad_norm": 0.19904761016368866, "learning_rate": 0.001, "loss": 3.5056, "step": 17164 }, { "epoch": 0.7261612657585244, "grad_norm": 0.18638011813163757, "learning_rate": 0.001, "loss": 2.5683, "step": 17165 }, { "epoch": 0.7262035705220408, "grad_norm": 0.1429300308227539, "learning_rate": 0.001, "loss": 1.5911, "step": 17166 }, { "epoch": 0.7262458752855572, "grad_norm": 0.18004530668258667, "learning_rate": 0.001, "loss": 2.3988, "step": 17167 }, { "epoch": 0.7262881800490735, "grad_norm": 0.16043299436569214, "learning_rate": 0.001, "loss": 1.6417, "step": 17168 }, { "epoch": 0.7263304848125899, "grad_norm": 0.15215149521827698, "learning_rate": 0.001, "loss": 2.1354, "step": 17169 }, { "epoch": 0.7263727895761063, "grad_norm": 0.2153785228729248, "learning_rate": 0.001, "loss": 3.3095, "step": 17170 }, { "epoch": 0.7264150943396226, "grad_norm": 0.17379812896251678, "learning_rate": 0.001, "loss": 1.6931, "step": 17171 }, { "epoch": 0.726457399103139, "grad_norm": 0.14409448206424713, "learning_rate": 0.001, "loss": 1.8867, "step": 17172 }, { "epoch": 0.7264997038666554, "grad_norm": 0.16144266724586487, "learning_rate": 0.001, "loss": 1.6323, "step": 17173 }, { "epoch": 0.7265420086301717, "grad_norm": 0.16007545590400696, "learning_rate": 0.001, "loss": 1.6542, "step": 17174 }, { "epoch": 0.7265843133936881, "grad_norm": 0.12861347198486328, "learning_rate": 0.001, "loss": 1.838, "step": 17175 }, { "epoch": 0.7266266181572045, "grad_norm": 0.1386764645576477, "learning_rate": 0.001, "loss": 1.7484, "step": 17176 }, { "epoch": 0.7266689229207208, "grad_norm": 0.18406988680362701, "learning_rate": 0.001, "loss": 2.2947, "step": 17177 }, { "epoch": 0.7267112276842372, "grad_norm": 0.1446983367204666, "learning_rate": 0.001, "loss": 2.2782, "step": 17178 }, { "epoch": 0.7267535324477536, "grad_norm": 0.17378035187721252, "learning_rate": 0.001, "loss": 4.0799, "step": 17179 }, { "epoch": 0.72679583721127, "grad_norm": 0.5141298174858093, "learning_rate": 0.001, "loss": 1.748, "step": 17180 }, { "epoch": 0.7268381419747864, "grad_norm": 0.16872873902320862, "learning_rate": 0.001, "loss": 2.1087, "step": 17181 }, { "epoch": 0.7268804467383028, "grad_norm": 0.1536315381526947, "learning_rate": 0.001, "loss": 2.7305, "step": 17182 }, { "epoch": 0.7269227515018191, "grad_norm": 17.74666404724121, "learning_rate": 0.001, "loss": 1.3806, "step": 17183 }, { "epoch": 0.7269650562653355, "grad_norm": 0.17030657827854156, "learning_rate": 0.001, "loss": 2.2526, "step": 17184 }, { "epoch": 0.7270073610288519, "grad_norm": 0.1639617383480072, "learning_rate": 0.001, "loss": 1.9684, "step": 17185 }, { "epoch": 0.7270496657923682, "grad_norm": 0.18141882121562958, "learning_rate": 0.001, "loss": 1.8486, "step": 17186 }, { "epoch": 0.7270919705558846, "grad_norm": 0.18478603661060333, "learning_rate": 0.001, "loss": 2.4985, "step": 17187 }, { "epoch": 0.727134275319401, "grad_norm": 0.1748218983411789, "learning_rate": 0.001, "loss": 1.778, "step": 17188 }, { "epoch": 0.7271765800829173, "grad_norm": 2.296082019805908, "learning_rate": 0.001, "loss": 2.8597, "step": 17189 }, { "epoch": 0.7272188848464337, "grad_norm": 0.18864890933036804, "learning_rate": 0.001, "loss": 2.4064, "step": 17190 }, { "epoch": 0.7272611896099501, "grad_norm": 0.17584924399852753, "learning_rate": 0.001, "loss": 2.0013, "step": 17191 }, { "epoch": 0.7273034943734664, "grad_norm": 0.1358068883419037, "learning_rate": 0.001, "loss": 1.7993, "step": 17192 }, { "epoch": 0.7273457991369828, "grad_norm": 0.17698509991168976, "learning_rate": 0.001, "loss": 2.1616, "step": 17193 }, { "epoch": 0.7273881039004992, "grad_norm": 0.4649740159511566, "learning_rate": 0.001, "loss": 2.0258, "step": 17194 }, { "epoch": 0.7274304086640155, "grad_norm": 0.15231867134571075, "learning_rate": 0.001, "loss": 2.1438, "step": 17195 }, { "epoch": 0.727472713427532, "grad_norm": 0.19217731058597565, "learning_rate": 0.001, "loss": 3.0598, "step": 17196 }, { "epoch": 0.7275150181910484, "grad_norm": 0.18122124671936035, "learning_rate": 0.001, "loss": 1.54, "step": 17197 }, { "epoch": 0.7275573229545647, "grad_norm": 0.13571122288703918, "learning_rate": 0.001, "loss": 1.2435, "step": 17198 }, { "epoch": 0.7275996277180811, "grad_norm": 0.14728273451328278, "learning_rate": 0.001, "loss": 1.6244, "step": 17199 }, { "epoch": 0.7276419324815975, "grad_norm": 0.17009060084819794, "learning_rate": 0.001, "loss": 2.5318, "step": 17200 }, { "epoch": 0.7276842372451138, "grad_norm": 0.17915566265583038, "learning_rate": 0.001, "loss": 2.2632, "step": 17201 }, { "epoch": 0.7277265420086302, "grad_norm": 0.19644324481487274, "learning_rate": 0.001, "loss": 1.6086, "step": 17202 }, { "epoch": 0.7277688467721466, "grad_norm": 0.16370825469493866, "learning_rate": 0.001, "loss": 1.992, "step": 17203 }, { "epoch": 0.7278111515356629, "grad_norm": 0.219704270362854, "learning_rate": 0.001, "loss": 3.0066, "step": 17204 }, { "epoch": 0.7278534562991793, "grad_norm": 0.2036728709936142, "learning_rate": 0.001, "loss": 2.1629, "step": 17205 }, { "epoch": 0.7278957610626957, "grad_norm": 0.5341696739196777, "learning_rate": 0.001, "loss": 1.8417, "step": 17206 }, { "epoch": 0.727938065826212, "grad_norm": 0.23344549536705017, "learning_rate": 0.001, "loss": 2.0511, "step": 17207 }, { "epoch": 0.7279803705897284, "grad_norm": 0.1574479639530182, "learning_rate": 0.001, "loss": 2.1478, "step": 17208 }, { "epoch": 0.7280226753532448, "grad_norm": 0.14581382274627686, "learning_rate": 0.001, "loss": 3.0699, "step": 17209 }, { "epoch": 0.7280649801167611, "grad_norm": 0.18210309743881226, "learning_rate": 0.001, "loss": 3.2913, "step": 17210 }, { "epoch": 0.7281072848802775, "grad_norm": 0.2757953107357025, "learning_rate": 0.001, "loss": 2.9062, "step": 17211 }, { "epoch": 0.7281495896437938, "grad_norm": 0.19155064225196838, "learning_rate": 0.001, "loss": 1.9776, "step": 17212 }, { "epoch": 0.7281918944073102, "grad_norm": 0.17974048852920532, "learning_rate": 0.001, "loss": 2.187, "step": 17213 }, { "epoch": 0.7282341991708267, "grad_norm": 0.16559724509716034, "learning_rate": 0.001, "loss": 1.7224, "step": 17214 }, { "epoch": 0.728276503934343, "grad_norm": 0.1683637499809265, "learning_rate": 0.001, "loss": 3.5013, "step": 17215 }, { "epoch": 0.7283188086978594, "grad_norm": 0.1559930294752121, "learning_rate": 0.001, "loss": 2.0094, "step": 17216 }, { "epoch": 0.7283611134613758, "grad_norm": 0.17992942035198212, "learning_rate": 0.001, "loss": 2.7717, "step": 17217 }, { "epoch": 0.7284034182248921, "grad_norm": 0.1760255992412567, "learning_rate": 0.001, "loss": 2.0994, "step": 17218 }, { "epoch": 0.7284457229884085, "grad_norm": 0.15609480440616608, "learning_rate": 0.001, "loss": 2.3692, "step": 17219 }, { "epoch": 0.7284880277519249, "grad_norm": 0.27221378684043884, "learning_rate": 0.001, "loss": 1.5019, "step": 17220 }, { "epoch": 0.7285303325154412, "grad_norm": 0.1502402275800705, "learning_rate": 0.001, "loss": 2.3058, "step": 17221 }, { "epoch": 0.7285726372789576, "grad_norm": 0.886763334274292, "learning_rate": 0.001, "loss": 2.2973, "step": 17222 }, { "epoch": 0.728614942042474, "grad_norm": 0.1851658821105957, "learning_rate": 0.001, "loss": 1.7426, "step": 17223 }, { "epoch": 0.7286572468059903, "grad_norm": 0.13750481605529785, "learning_rate": 0.001, "loss": 2.0439, "step": 17224 }, { "epoch": 0.7286995515695067, "grad_norm": 0.3507780134677887, "learning_rate": 0.001, "loss": 2.1812, "step": 17225 }, { "epoch": 0.7287418563330231, "grad_norm": 0.2409433126449585, "learning_rate": 0.001, "loss": 1.9943, "step": 17226 }, { "epoch": 0.7287841610965394, "grad_norm": 1.0777748823165894, "learning_rate": 0.001, "loss": 1.5415, "step": 17227 }, { "epoch": 0.7288264658600558, "grad_norm": 0.5831173062324524, "learning_rate": 0.001, "loss": 2.9824, "step": 17228 }, { "epoch": 0.7288687706235722, "grad_norm": 0.16224071383476257, "learning_rate": 0.001, "loss": 1.6938, "step": 17229 }, { "epoch": 0.7289110753870885, "grad_norm": 0.3064275085926056, "learning_rate": 0.001, "loss": 1.9604, "step": 17230 }, { "epoch": 0.728953380150605, "grad_norm": 0.16514526307582855, "learning_rate": 0.001, "loss": 2.3135, "step": 17231 }, { "epoch": 0.7289956849141214, "grad_norm": 0.23485006392002106, "learning_rate": 0.001, "loss": 2.1271, "step": 17232 }, { "epoch": 0.7290379896776377, "grad_norm": 0.1845911592245102, "learning_rate": 0.001, "loss": 3.1486, "step": 17233 }, { "epoch": 0.7290802944411541, "grad_norm": 0.23723876476287842, "learning_rate": 0.001, "loss": 3.0125, "step": 17234 }, { "epoch": 0.7291225992046705, "grad_norm": 0.16877859830856323, "learning_rate": 0.001, "loss": 1.6634, "step": 17235 }, { "epoch": 0.7291649039681868, "grad_norm": 0.1759745180606842, "learning_rate": 0.001, "loss": 1.8636, "step": 17236 }, { "epoch": 0.7292072087317032, "grad_norm": 0.14648011326789856, "learning_rate": 0.001, "loss": 1.6105, "step": 17237 }, { "epoch": 0.7292495134952196, "grad_norm": 0.18712373077869415, "learning_rate": 0.001, "loss": 2.1293, "step": 17238 }, { "epoch": 0.7292918182587359, "grad_norm": 0.1619027853012085, "learning_rate": 0.001, "loss": 1.3905, "step": 17239 }, { "epoch": 0.7293341230222523, "grad_norm": 0.16455508768558502, "learning_rate": 0.001, "loss": 2.3091, "step": 17240 }, { "epoch": 0.7293764277857687, "grad_norm": 0.173019140958786, "learning_rate": 0.001, "loss": 1.8305, "step": 17241 }, { "epoch": 0.729418732549285, "grad_norm": 0.14237745106220245, "learning_rate": 0.001, "loss": 2.0488, "step": 17242 }, { "epoch": 0.7294610373128014, "grad_norm": 0.1654769331216812, "learning_rate": 0.001, "loss": 1.6978, "step": 17243 }, { "epoch": 0.7295033420763178, "grad_norm": 0.13907228410243988, "learning_rate": 0.001, "loss": 1.6468, "step": 17244 }, { "epoch": 0.7295456468398341, "grad_norm": 0.15198984742164612, "learning_rate": 0.001, "loss": 1.644, "step": 17245 }, { "epoch": 0.7295879516033505, "grad_norm": 0.3324761688709259, "learning_rate": 0.001, "loss": 1.8737, "step": 17246 }, { "epoch": 0.729630256366867, "grad_norm": 0.16085344552993774, "learning_rate": 0.001, "loss": 1.588, "step": 17247 }, { "epoch": 0.7296725611303833, "grad_norm": 0.21202977001667023, "learning_rate": 0.001, "loss": 2.3259, "step": 17248 }, { "epoch": 0.7297148658938997, "grad_norm": 0.15043148398399353, "learning_rate": 0.001, "loss": 1.9283, "step": 17249 }, { "epoch": 0.7297571706574161, "grad_norm": 0.1499723643064499, "learning_rate": 0.001, "loss": 1.2876, "step": 17250 }, { "epoch": 0.7297994754209324, "grad_norm": 0.18416666984558105, "learning_rate": 0.001, "loss": 1.9966, "step": 17251 }, { "epoch": 0.7298417801844488, "grad_norm": 0.19342102110385895, "learning_rate": 0.001, "loss": 2.1563, "step": 17252 }, { "epoch": 0.7298840849479652, "grad_norm": 0.16368161141872406, "learning_rate": 0.001, "loss": 2.5488, "step": 17253 }, { "epoch": 0.7299263897114815, "grad_norm": 0.19238623976707458, "learning_rate": 0.001, "loss": 2.4449, "step": 17254 }, { "epoch": 0.7299686944749979, "grad_norm": 0.1476447880268097, "learning_rate": 0.001, "loss": 2.1851, "step": 17255 }, { "epoch": 0.7300109992385142, "grad_norm": 2.3451356887817383, "learning_rate": 0.001, "loss": 1.8425, "step": 17256 }, { "epoch": 0.7300533040020306, "grad_norm": 0.172628253698349, "learning_rate": 0.001, "loss": 2.2727, "step": 17257 }, { "epoch": 0.730095608765547, "grad_norm": 0.23679950833320618, "learning_rate": 0.001, "loss": 2.0103, "step": 17258 }, { "epoch": 0.7301379135290633, "grad_norm": 0.16247250139713287, "learning_rate": 0.001, "loss": 1.9596, "step": 17259 }, { "epoch": 0.7301802182925797, "grad_norm": 0.9013334512710571, "learning_rate": 0.001, "loss": 3.134, "step": 17260 }, { "epoch": 0.7302225230560961, "grad_norm": 0.15641777217388153, "learning_rate": 0.001, "loss": 2.2646, "step": 17261 }, { "epoch": 0.7302648278196124, "grad_norm": 0.15012650191783905, "learning_rate": 0.001, "loss": 1.8738, "step": 17262 }, { "epoch": 0.7303071325831288, "grad_norm": 0.16362197697162628, "learning_rate": 0.001, "loss": 2.5672, "step": 17263 }, { "epoch": 0.7303494373466453, "grad_norm": 4.925112724304199, "learning_rate": 0.001, "loss": 3.0045, "step": 17264 }, { "epoch": 0.7303917421101616, "grad_norm": 0.1557965725660324, "learning_rate": 0.001, "loss": 1.6906, "step": 17265 }, { "epoch": 0.730434046873678, "grad_norm": 0.1717749536037445, "learning_rate": 0.001, "loss": 1.9104, "step": 17266 }, { "epoch": 0.7304763516371944, "grad_norm": 0.17836451530456543, "learning_rate": 0.001, "loss": 2.1333, "step": 17267 }, { "epoch": 0.7305186564007107, "grad_norm": 0.15282116830348969, "learning_rate": 0.001, "loss": 1.9864, "step": 17268 }, { "epoch": 0.7305609611642271, "grad_norm": 0.16762694716453552, "learning_rate": 0.001, "loss": 2.6978, "step": 17269 }, { "epoch": 0.7306032659277435, "grad_norm": 0.3359089493751526, "learning_rate": 0.001, "loss": 2.244, "step": 17270 }, { "epoch": 0.7306455706912598, "grad_norm": 0.6835538148880005, "learning_rate": 0.001, "loss": 3.3602, "step": 17271 }, { "epoch": 0.7306878754547762, "grad_norm": 0.16285963356494904, "learning_rate": 0.001, "loss": 1.4811, "step": 17272 }, { "epoch": 0.7307301802182926, "grad_norm": 1.3391896486282349, "learning_rate": 0.001, "loss": 2.6051, "step": 17273 }, { "epoch": 0.7307724849818089, "grad_norm": 0.17385242879390717, "learning_rate": 0.001, "loss": 3.2361, "step": 17274 }, { "epoch": 0.7308147897453253, "grad_norm": 5.107343673706055, "learning_rate": 0.001, "loss": 2.1832, "step": 17275 }, { "epoch": 0.7308570945088417, "grad_norm": 0.14723435044288635, "learning_rate": 0.001, "loss": 1.8271, "step": 17276 }, { "epoch": 0.730899399272358, "grad_norm": 0.20319636166095734, "learning_rate": 0.001, "loss": 2.0419, "step": 17277 }, { "epoch": 0.7309417040358744, "grad_norm": 0.21243970096111298, "learning_rate": 0.001, "loss": 2.0175, "step": 17278 }, { "epoch": 0.7309840087993908, "grad_norm": 0.16713847219944, "learning_rate": 0.001, "loss": 1.5741, "step": 17279 }, { "epoch": 0.7310263135629071, "grad_norm": 0.16459450125694275, "learning_rate": 0.001, "loss": 2.6241, "step": 17280 }, { "epoch": 0.7310686183264236, "grad_norm": 0.13629010319709778, "learning_rate": 0.001, "loss": 2.9519, "step": 17281 }, { "epoch": 0.73111092308994, "grad_norm": 0.1602662056684494, "learning_rate": 0.001, "loss": 1.6162, "step": 17282 }, { "epoch": 0.7311532278534563, "grad_norm": 0.36450186371803284, "learning_rate": 0.001, "loss": 1.7024, "step": 17283 }, { "epoch": 0.7311955326169727, "grad_norm": 0.1558094173669815, "learning_rate": 0.001, "loss": 2.2161, "step": 17284 }, { "epoch": 0.7312378373804891, "grad_norm": 0.161569282412529, "learning_rate": 0.001, "loss": 1.6652, "step": 17285 }, { "epoch": 0.7312801421440054, "grad_norm": 0.394671767950058, "learning_rate": 0.001, "loss": 2.9097, "step": 17286 }, { "epoch": 0.7313224469075218, "grad_norm": 0.1987708956003189, "learning_rate": 0.001, "loss": 2.3612, "step": 17287 }, { "epoch": 0.7313647516710382, "grad_norm": 0.19308647513389587, "learning_rate": 0.001, "loss": 2.5338, "step": 17288 }, { "epoch": 0.7314070564345545, "grad_norm": 0.15085269510746002, "learning_rate": 0.001, "loss": 2.0816, "step": 17289 }, { "epoch": 0.7314493611980709, "grad_norm": 0.15625354647636414, "learning_rate": 0.001, "loss": 1.6518, "step": 17290 }, { "epoch": 0.7314916659615873, "grad_norm": 6.045670032501221, "learning_rate": 0.001, "loss": 2.2589, "step": 17291 }, { "epoch": 0.7315339707251036, "grad_norm": 1.6492812633514404, "learning_rate": 0.001, "loss": 2.7467, "step": 17292 }, { "epoch": 0.73157627548862, "grad_norm": 0.18798352777957916, "learning_rate": 0.001, "loss": 1.8589, "step": 17293 }, { "epoch": 0.7316185802521364, "grad_norm": 0.1604667603969574, "learning_rate": 0.001, "loss": 2.51, "step": 17294 }, { "epoch": 0.7316608850156527, "grad_norm": 0.15313079953193665, "learning_rate": 0.001, "loss": 2.0193, "step": 17295 }, { "epoch": 0.7317031897791691, "grad_norm": 0.28037911653518677, "learning_rate": 0.001, "loss": 1.8532, "step": 17296 }, { "epoch": 0.7317454945426856, "grad_norm": 0.21967746317386627, "learning_rate": 0.001, "loss": 2.5508, "step": 17297 }, { "epoch": 0.7317877993062019, "grad_norm": 0.16519500315189362, "learning_rate": 0.001, "loss": 1.861, "step": 17298 }, { "epoch": 0.7318301040697183, "grad_norm": 0.22370240092277527, "learning_rate": 0.001, "loss": 3.3633, "step": 17299 }, { "epoch": 0.7318724088332346, "grad_norm": 0.18574631214141846, "learning_rate": 0.001, "loss": 2.219, "step": 17300 }, { "epoch": 0.731914713596751, "grad_norm": 6.258697986602783, "learning_rate": 0.001, "loss": 2.1701, "step": 17301 }, { "epoch": 0.7319570183602674, "grad_norm": 0.16539275646209717, "learning_rate": 0.001, "loss": 1.8636, "step": 17302 }, { "epoch": 0.7319993231237837, "grad_norm": 0.47777655720710754, "learning_rate": 0.001, "loss": 1.445, "step": 17303 }, { "epoch": 0.7320416278873001, "grad_norm": 0.16214773058891296, "learning_rate": 0.001, "loss": 2.2202, "step": 17304 }, { "epoch": 0.7320839326508165, "grad_norm": 0.16900905966758728, "learning_rate": 0.001, "loss": 2.3208, "step": 17305 }, { "epoch": 0.7321262374143328, "grad_norm": 0.2838803231716156, "learning_rate": 0.001, "loss": 3.2839, "step": 17306 }, { "epoch": 0.7321685421778492, "grad_norm": 0.19927377998828888, "learning_rate": 0.001, "loss": 1.9397, "step": 17307 }, { "epoch": 0.7322108469413656, "grad_norm": 0.2335241436958313, "learning_rate": 0.001, "loss": 2.9701, "step": 17308 }, { "epoch": 0.7322531517048819, "grad_norm": 0.1455473154783249, "learning_rate": 0.001, "loss": 2.0696, "step": 17309 }, { "epoch": 0.7322954564683983, "grad_norm": 0.17584875226020813, "learning_rate": 0.001, "loss": 1.9355, "step": 17310 }, { "epoch": 0.7323377612319147, "grad_norm": 0.1485612541437149, "learning_rate": 0.001, "loss": 1.7784, "step": 17311 }, { "epoch": 0.732380065995431, "grad_norm": 0.17357254028320312, "learning_rate": 0.001, "loss": 2.162, "step": 17312 }, { "epoch": 0.7324223707589474, "grad_norm": 0.322561651468277, "learning_rate": 0.001, "loss": 1.7913, "step": 17313 }, { "epoch": 0.7324646755224639, "grad_norm": 0.17470252513885498, "learning_rate": 0.001, "loss": 1.835, "step": 17314 }, { "epoch": 0.7325069802859802, "grad_norm": 0.1539514809846878, "learning_rate": 0.001, "loss": 1.8439, "step": 17315 }, { "epoch": 0.7325492850494966, "grad_norm": 0.13950100541114807, "learning_rate": 0.001, "loss": 1.9242, "step": 17316 }, { "epoch": 0.732591589813013, "grad_norm": 1.2174321413040161, "learning_rate": 0.001, "loss": 2.0929, "step": 17317 }, { "epoch": 0.7326338945765293, "grad_norm": 0.6627561450004578, "learning_rate": 0.001, "loss": 2.2273, "step": 17318 }, { "epoch": 0.7326761993400457, "grad_norm": 0.17448411881923676, "learning_rate": 0.001, "loss": 1.911, "step": 17319 }, { "epoch": 0.7327185041035621, "grad_norm": 0.20414674282073975, "learning_rate": 0.001, "loss": 2.4904, "step": 17320 }, { "epoch": 0.7327608088670784, "grad_norm": 3.205564022064209, "learning_rate": 0.001, "loss": 2.7201, "step": 17321 }, { "epoch": 0.7328031136305948, "grad_norm": 0.17250360548496246, "learning_rate": 0.001, "loss": 2.3204, "step": 17322 }, { "epoch": 0.7328454183941112, "grad_norm": 0.1782556027173996, "learning_rate": 0.001, "loss": 1.8324, "step": 17323 }, { "epoch": 0.7328877231576275, "grad_norm": 0.15511076152324677, "learning_rate": 0.001, "loss": 1.9694, "step": 17324 }, { "epoch": 0.7329300279211439, "grad_norm": 0.49949324131011963, "learning_rate": 0.001, "loss": 3.1143, "step": 17325 }, { "epoch": 0.7329723326846603, "grad_norm": 0.1534297913312912, "learning_rate": 0.001, "loss": 1.7866, "step": 17326 }, { "epoch": 0.7330146374481766, "grad_norm": 0.15869884192943573, "learning_rate": 0.001, "loss": 1.9116, "step": 17327 }, { "epoch": 0.733056942211693, "grad_norm": 0.14846599102020264, "learning_rate": 0.001, "loss": 2.6805, "step": 17328 }, { "epoch": 0.7330992469752095, "grad_norm": 0.15433016419410706, "learning_rate": 0.001, "loss": 2.7788, "step": 17329 }, { "epoch": 0.7331415517387257, "grad_norm": 0.17470060288906097, "learning_rate": 0.001, "loss": 1.5982, "step": 17330 }, { "epoch": 0.7331838565022422, "grad_norm": 0.1676022708415985, "learning_rate": 0.001, "loss": 3.3472, "step": 17331 }, { "epoch": 0.7332261612657586, "grad_norm": 0.16653110086917877, "learning_rate": 0.001, "loss": 1.7236, "step": 17332 }, { "epoch": 0.7332684660292749, "grad_norm": 0.1555767059326172, "learning_rate": 0.001, "loss": 2.201, "step": 17333 }, { "epoch": 0.7333107707927913, "grad_norm": 3.2302515506744385, "learning_rate": 0.001, "loss": 2.3596, "step": 17334 }, { "epoch": 0.7333530755563077, "grad_norm": 0.13147684931755066, "learning_rate": 0.001, "loss": 1.4347, "step": 17335 }, { "epoch": 0.733395380319824, "grad_norm": 0.1869000345468521, "learning_rate": 0.001, "loss": 1.6992, "step": 17336 }, { "epoch": 0.7334376850833404, "grad_norm": 1.2183090448379517, "learning_rate": 0.001, "loss": 2.0567, "step": 17337 }, { "epoch": 0.7334799898468568, "grad_norm": 0.1941007673740387, "learning_rate": 0.001, "loss": 1.8982, "step": 17338 }, { "epoch": 0.7335222946103731, "grad_norm": 0.16951008141040802, "learning_rate": 0.001, "loss": 2.2301, "step": 17339 }, { "epoch": 0.7335645993738895, "grad_norm": 0.19473901391029358, "learning_rate": 0.001, "loss": 2.7647, "step": 17340 }, { "epoch": 0.7336069041374059, "grad_norm": 0.22168588638305664, "learning_rate": 0.001, "loss": 1.8121, "step": 17341 }, { "epoch": 0.7336492089009222, "grad_norm": 0.9966937303543091, "learning_rate": 0.001, "loss": 1.607, "step": 17342 }, { "epoch": 0.7336915136644386, "grad_norm": 0.17989544570446014, "learning_rate": 0.001, "loss": 1.695, "step": 17343 }, { "epoch": 0.733733818427955, "grad_norm": 0.2009655088186264, "learning_rate": 0.001, "loss": 2.5365, "step": 17344 }, { "epoch": 0.7337761231914713, "grad_norm": 0.17318382859230042, "learning_rate": 0.001, "loss": 2.3375, "step": 17345 }, { "epoch": 0.7338184279549878, "grad_norm": 0.5182844996452332, "learning_rate": 0.001, "loss": 1.7107, "step": 17346 }, { "epoch": 0.733860732718504, "grad_norm": 0.15933315455913544, "learning_rate": 0.001, "loss": 2.7681, "step": 17347 }, { "epoch": 0.7339030374820205, "grad_norm": 0.17201949656009674, "learning_rate": 0.001, "loss": 2.4166, "step": 17348 }, { "epoch": 0.7339453422455369, "grad_norm": 0.15629622340202332, "learning_rate": 0.001, "loss": 1.4808, "step": 17349 }, { "epoch": 0.7339876470090532, "grad_norm": 0.17875443398952484, "learning_rate": 0.001, "loss": 1.7621, "step": 17350 }, { "epoch": 0.7340299517725696, "grad_norm": 0.18575279414653778, "learning_rate": 0.001, "loss": 2.5072, "step": 17351 }, { "epoch": 0.734072256536086, "grad_norm": 0.17435061931610107, "learning_rate": 0.001, "loss": 1.8393, "step": 17352 }, { "epoch": 0.7341145612996023, "grad_norm": 0.18890979886054993, "learning_rate": 0.001, "loss": 2.0328, "step": 17353 }, { "epoch": 0.7341568660631187, "grad_norm": 0.41785550117492676, "learning_rate": 0.001, "loss": 2.1083, "step": 17354 }, { "epoch": 0.7341991708266351, "grad_norm": 0.16583463549613953, "learning_rate": 0.001, "loss": 1.7583, "step": 17355 }, { "epoch": 0.7342414755901514, "grad_norm": 0.19213053584098816, "learning_rate": 0.001, "loss": 1.8698, "step": 17356 }, { "epoch": 0.7342837803536678, "grad_norm": 0.7036476731300354, "learning_rate": 0.001, "loss": 2.8578, "step": 17357 }, { "epoch": 0.7343260851171842, "grad_norm": 0.48419860005378723, "learning_rate": 0.001, "loss": 2.4056, "step": 17358 }, { "epoch": 0.7343683898807005, "grad_norm": 0.16891148686408997, "learning_rate": 0.001, "loss": 2.6222, "step": 17359 }, { "epoch": 0.7344106946442169, "grad_norm": 0.19437286257743835, "learning_rate": 0.001, "loss": 1.9693, "step": 17360 }, { "epoch": 0.7344529994077333, "grad_norm": 0.1752423644065857, "learning_rate": 0.001, "loss": 2.8185, "step": 17361 }, { "epoch": 0.7344953041712496, "grad_norm": 0.19541260600090027, "learning_rate": 0.001, "loss": 2.6349, "step": 17362 }, { "epoch": 0.734537608934766, "grad_norm": 0.19351324439048767, "learning_rate": 0.001, "loss": 2.8542, "step": 17363 }, { "epoch": 0.7345799136982825, "grad_norm": 0.17461591958999634, "learning_rate": 0.001, "loss": 2.2905, "step": 17364 }, { "epoch": 0.7346222184617988, "grad_norm": 0.16678562760353088, "learning_rate": 0.001, "loss": 1.2962, "step": 17365 }, { "epoch": 0.7346645232253152, "grad_norm": 0.16038046777248383, "learning_rate": 0.001, "loss": 1.9527, "step": 17366 }, { "epoch": 0.7347068279888316, "grad_norm": 0.1805296242237091, "learning_rate": 0.001, "loss": 3.3531, "step": 17367 }, { "epoch": 0.7347491327523479, "grad_norm": 0.8957823514938354, "learning_rate": 0.001, "loss": 1.7126, "step": 17368 }, { "epoch": 0.7347914375158643, "grad_norm": 0.1552397757768631, "learning_rate": 0.001, "loss": 1.6727, "step": 17369 }, { "epoch": 0.7348337422793807, "grad_norm": 0.2678411900997162, "learning_rate": 0.001, "loss": 1.7442, "step": 17370 }, { "epoch": 0.734876047042897, "grad_norm": 2.7068116664886475, "learning_rate": 0.001, "loss": 2.5148, "step": 17371 }, { "epoch": 0.7349183518064134, "grad_norm": 0.18146149814128876, "learning_rate": 0.001, "loss": 2.6581, "step": 17372 }, { "epoch": 0.7349606565699298, "grad_norm": 0.18075819313526154, "learning_rate": 0.001, "loss": 1.3142, "step": 17373 }, { "epoch": 0.7350029613334461, "grad_norm": 0.2121601551771164, "learning_rate": 0.001, "loss": 1.9923, "step": 17374 }, { "epoch": 0.7350452660969625, "grad_norm": 0.15848374366760254, "learning_rate": 0.001, "loss": 1.6758, "step": 17375 }, { "epoch": 0.7350875708604789, "grad_norm": 0.17913897335529327, "learning_rate": 0.001, "loss": 2.6544, "step": 17376 }, { "epoch": 0.7351298756239952, "grad_norm": 0.6060518026351929, "learning_rate": 0.001, "loss": 2.1991, "step": 17377 }, { "epoch": 0.7351721803875116, "grad_norm": 0.16152165830135345, "learning_rate": 0.001, "loss": 1.9824, "step": 17378 }, { "epoch": 0.735214485151028, "grad_norm": 0.21107393503189087, "learning_rate": 0.001, "loss": 2.0665, "step": 17379 }, { "epoch": 0.7352567899145444, "grad_norm": 0.26977473497390747, "learning_rate": 0.001, "loss": 2.0408, "step": 17380 }, { "epoch": 0.7352990946780608, "grad_norm": 0.18134251236915588, "learning_rate": 0.001, "loss": 2.6027, "step": 17381 }, { "epoch": 0.7353413994415772, "grad_norm": 0.1814247965812683, "learning_rate": 0.001, "loss": 2.2769, "step": 17382 }, { "epoch": 0.7353837042050935, "grad_norm": 0.1722179353237152, "learning_rate": 0.001, "loss": 2.6387, "step": 17383 }, { "epoch": 0.7354260089686099, "grad_norm": 0.6016577482223511, "learning_rate": 0.001, "loss": 3.3811, "step": 17384 }, { "epoch": 0.7354683137321263, "grad_norm": 0.16259095072746277, "learning_rate": 0.001, "loss": 2.2258, "step": 17385 }, { "epoch": 0.7355106184956426, "grad_norm": 74.26570892333984, "learning_rate": 0.001, "loss": 1.5017, "step": 17386 }, { "epoch": 0.735552923259159, "grad_norm": 0.2374870926141739, "learning_rate": 0.001, "loss": 2.5826, "step": 17387 }, { "epoch": 0.7355952280226754, "grad_norm": 0.15083283185958862, "learning_rate": 0.001, "loss": 1.9046, "step": 17388 }, { "epoch": 0.7356375327861917, "grad_norm": 0.2028711885213852, "learning_rate": 0.001, "loss": 2.3847, "step": 17389 }, { "epoch": 0.7356798375497081, "grad_norm": 0.20516479015350342, "learning_rate": 0.001, "loss": 2.4515, "step": 17390 }, { "epoch": 0.7357221423132244, "grad_norm": 0.2230457216501236, "learning_rate": 0.001, "loss": 2.0254, "step": 17391 }, { "epoch": 0.7357644470767408, "grad_norm": 0.18468298017978668, "learning_rate": 0.001, "loss": 3.509, "step": 17392 }, { "epoch": 0.7358067518402572, "grad_norm": 0.4097122848033905, "learning_rate": 0.001, "loss": 2.2616, "step": 17393 }, { "epoch": 0.7358490566037735, "grad_norm": 0.17995667457580566, "learning_rate": 0.001, "loss": 2.5077, "step": 17394 }, { "epoch": 0.7358913613672899, "grad_norm": 0.5382487177848816, "learning_rate": 0.001, "loss": 2.278, "step": 17395 }, { "epoch": 0.7359336661308064, "grad_norm": 7.614555358886719, "learning_rate": 0.001, "loss": 2.148, "step": 17396 }, { "epoch": 0.7359759708943227, "grad_norm": 0.22902828454971313, "learning_rate": 0.001, "loss": 3.9575, "step": 17397 }, { "epoch": 0.7360182756578391, "grad_norm": 0.193025603890419, "learning_rate": 0.001, "loss": 2.3315, "step": 17398 }, { "epoch": 0.7360605804213555, "grad_norm": 0.164814755320549, "learning_rate": 0.001, "loss": 3.175, "step": 17399 }, { "epoch": 0.7361028851848718, "grad_norm": 0.16597630083560944, "learning_rate": 0.001, "loss": 2.1436, "step": 17400 }, { "epoch": 0.7361451899483882, "grad_norm": 0.264950692653656, "learning_rate": 0.001, "loss": 2.5343, "step": 17401 }, { "epoch": 0.7361874947119046, "grad_norm": 0.23448620736598969, "learning_rate": 0.001, "loss": 2.1109, "step": 17402 }, { "epoch": 0.7362297994754209, "grad_norm": 1.8346327543258667, "learning_rate": 0.001, "loss": 2.1264, "step": 17403 }, { "epoch": 0.7362721042389373, "grad_norm": 0.3137005865573883, "learning_rate": 0.001, "loss": 3.8748, "step": 17404 }, { "epoch": 0.7363144090024537, "grad_norm": 0.15299704670906067, "learning_rate": 0.001, "loss": 1.8221, "step": 17405 }, { "epoch": 0.73635671376597, "grad_norm": 0.3703068494796753, "learning_rate": 0.001, "loss": 2.0435, "step": 17406 }, { "epoch": 0.7363990185294864, "grad_norm": 0.15267716348171234, "learning_rate": 0.001, "loss": 2.9499, "step": 17407 }, { "epoch": 0.7364413232930028, "grad_norm": 0.2238219976425171, "learning_rate": 0.001, "loss": 1.8026, "step": 17408 }, { "epoch": 0.7364836280565191, "grad_norm": 0.22296573221683502, "learning_rate": 0.001, "loss": 2.8872, "step": 17409 }, { "epoch": 0.7365259328200355, "grad_norm": 0.3025241494178772, "learning_rate": 0.001, "loss": 3.631, "step": 17410 }, { "epoch": 0.7365682375835519, "grad_norm": 0.19598890841007233, "learning_rate": 0.001, "loss": 3.2347, "step": 17411 }, { "epoch": 0.7366105423470682, "grad_norm": 0.15584322810173035, "learning_rate": 0.001, "loss": 2.8041, "step": 17412 }, { "epoch": 0.7366528471105847, "grad_norm": 10.525110244750977, "learning_rate": 0.001, "loss": 2.9718, "step": 17413 }, { "epoch": 0.7366951518741011, "grad_norm": 0.19018413126468658, "learning_rate": 0.001, "loss": 1.9068, "step": 17414 }, { "epoch": 0.7367374566376174, "grad_norm": 0.1911727637052536, "learning_rate": 0.001, "loss": 2.2898, "step": 17415 }, { "epoch": 0.7367797614011338, "grad_norm": 0.2422974407672882, "learning_rate": 0.001, "loss": 1.8923, "step": 17416 }, { "epoch": 0.7368220661646502, "grad_norm": 0.18192513287067413, "learning_rate": 0.001, "loss": 3.3953, "step": 17417 }, { "epoch": 0.7368643709281665, "grad_norm": 1.0662686824798584, "learning_rate": 0.001, "loss": 3.1413, "step": 17418 }, { "epoch": 0.7369066756916829, "grad_norm": 0.20433665812015533, "learning_rate": 0.001, "loss": 2.1175, "step": 17419 }, { "epoch": 0.7369489804551993, "grad_norm": 0.2949641942977905, "learning_rate": 0.001, "loss": 1.5695, "step": 17420 }, { "epoch": 0.7369912852187156, "grad_norm": 0.21626880764961243, "learning_rate": 0.001, "loss": 2.4829, "step": 17421 }, { "epoch": 0.737033589982232, "grad_norm": 0.18955782055854797, "learning_rate": 0.001, "loss": 2.3706, "step": 17422 }, { "epoch": 0.7370758947457484, "grad_norm": 0.19381113350391388, "learning_rate": 0.001, "loss": 1.5407, "step": 17423 }, { "epoch": 0.7371181995092647, "grad_norm": 0.5135764479637146, "learning_rate": 0.001, "loss": 2.4144, "step": 17424 }, { "epoch": 0.7371605042727811, "grad_norm": 0.15292295813560486, "learning_rate": 0.001, "loss": 2.3463, "step": 17425 }, { "epoch": 0.7372028090362975, "grad_norm": 5.058320045471191, "learning_rate": 0.001, "loss": 1.8338, "step": 17426 }, { "epoch": 0.7372451137998138, "grad_norm": 0.22380977869033813, "learning_rate": 0.001, "loss": 2.2025, "step": 17427 }, { "epoch": 0.7372874185633302, "grad_norm": 0.17026227712631226, "learning_rate": 0.001, "loss": 2.0961, "step": 17428 }, { "epoch": 0.7373297233268467, "grad_norm": 0.1634519249200821, "learning_rate": 0.001, "loss": 3.1614, "step": 17429 }, { "epoch": 0.737372028090363, "grad_norm": 0.17133593559265137, "learning_rate": 0.001, "loss": 2.1654, "step": 17430 }, { "epoch": 0.7374143328538794, "grad_norm": 2.215588092803955, "learning_rate": 0.001, "loss": 1.7608, "step": 17431 }, { "epoch": 0.7374566376173958, "grad_norm": 3.688143730163574, "learning_rate": 0.001, "loss": 2.6195, "step": 17432 }, { "epoch": 0.7374989423809121, "grad_norm": 0.18635474145412445, "learning_rate": 0.001, "loss": 2.0476, "step": 17433 }, { "epoch": 0.7375412471444285, "grad_norm": 0.17662550508975983, "learning_rate": 0.001, "loss": 2.4137, "step": 17434 }, { "epoch": 0.7375835519079449, "grad_norm": 0.20594938099384308, "learning_rate": 0.001, "loss": 2.4498, "step": 17435 }, { "epoch": 0.7376258566714612, "grad_norm": 0.33524230122566223, "learning_rate": 0.001, "loss": 2.8723, "step": 17436 }, { "epoch": 0.7376681614349776, "grad_norm": 0.2059050351381302, "learning_rate": 0.001, "loss": 2.2312, "step": 17437 }, { "epoch": 0.7377104661984939, "grad_norm": 0.2377101182937622, "learning_rate": 0.001, "loss": 2.8656, "step": 17438 }, { "epoch": 0.7377527709620103, "grad_norm": 0.18826983869075775, "learning_rate": 0.001, "loss": 1.5286, "step": 17439 }, { "epoch": 0.7377950757255267, "grad_norm": 0.16893354058265686, "learning_rate": 0.001, "loss": 1.7591, "step": 17440 }, { "epoch": 0.737837380489043, "grad_norm": 0.20331987738609314, "learning_rate": 0.001, "loss": 1.9848, "step": 17441 }, { "epoch": 0.7378796852525594, "grad_norm": 0.291287362575531, "learning_rate": 0.001, "loss": 2.1354, "step": 17442 }, { "epoch": 0.7379219900160758, "grad_norm": 0.17716658115386963, "learning_rate": 0.001, "loss": 2.696, "step": 17443 }, { "epoch": 0.7379642947795921, "grad_norm": 0.18112552165985107, "learning_rate": 0.001, "loss": 2.3412, "step": 17444 }, { "epoch": 0.7380065995431085, "grad_norm": 0.2917419672012329, "learning_rate": 0.001, "loss": 1.7166, "step": 17445 }, { "epoch": 0.738048904306625, "grad_norm": 0.17572566866874695, "learning_rate": 0.001, "loss": 1.9647, "step": 17446 }, { "epoch": 0.7380912090701413, "grad_norm": 0.27557796239852905, "learning_rate": 0.001, "loss": 2.2187, "step": 17447 }, { "epoch": 0.7381335138336577, "grad_norm": 10.137809753417969, "learning_rate": 0.001, "loss": 1.7207, "step": 17448 }, { "epoch": 0.7381758185971741, "grad_norm": 0.20233531296253204, "learning_rate": 0.001, "loss": 1.4421, "step": 17449 }, { "epoch": 0.7382181233606904, "grad_norm": 0.2746541202068329, "learning_rate": 0.001, "loss": 1.8292, "step": 17450 }, { "epoch": 0.7382604281242068, "grad_norm": 0.2038559764623642, "learning_rate": 0.001, "loss": 2.6724, "step": 17451 }, { "epoch": 0.7383027328877232, "grad_norm": 0.22853845357894897, "learning_rate": 0.001, "loss": 2.7177, "step": 17452 }, { "epoch": 0.7383450376512395, "grad_norm": 0.24598032236099243, "learning_rate": 0.001, "loss": 2.0161, "step": 17453 }, { "epoch": 0.7383873424147559, "grad_norm": 0.24328875541687012, "learning_rate": 0.001, "loss": 3.1901, "step": 17454 }, { "epoch": 0.7384296471782723, "grad_norm": 0.18322333693504333, "learning_rate": 0.001, "loss": 1.7166, "step": 17455 }, { "epoch": 0.7384719519417886, "grad_norm": 0.2122056633234024, "learning_rate": 0.001, "loss": 2.1863, "step": 17456 }, { "epoch": 0.738514256705305, "grad_norm": 0.22805139422416687, "learning_rate": 0.001, "loss": 2.4584, "step": 17457 }, { "epoch": 0.7385565614688214, "grad_norm": 2.5409958362579346, "learning_rate": 0.001, "loss": 2.2642, "step": 17458 }, { "epoch": 0.7385988662323377, "grad_norm": 0.2093057632446289, "learning_rate": 0.001, "loss": 1.5797, "step": 17459 }, { "epoch": 0.7386411709958541, "grad_norm": 0.1915285587310791, "learning_rate": 0.001, "loss": 2.1698, "step": 17460 }, { "epoch": 0.7386834757593705, "grad_norm": 1.6814018487930298, "learning_rate": 0.001, "loss": 1.5954, "step": 17461 }, { "epoch": 0.7387257805228868, "grad_norm": 0.19684362411499023, "learning_rate": 0.001, "loss": 1.8076, "step": 17462 }, { "epoch": 0.7387680852864033, "grad_norm": 0.18094508349895477, "learning_rate": 0.001, "loss": 1.9718, "step": 17463 }, { "epoch": 0.7388103900499197, "grad_norm": 1.161989450454712, "learning_rate": 0.001, "loss": 2.7361, "step": 17464 }, { "epoch": 0.738852694813436, "grad_norm": 0.21886514127254486, "learning_rate": 0.001, "loss": 2.6973, "step": 17465 }, { "epoch": 0.7388949995769524, "grad_norm": 0.201472669839859, "learning_rate": 0.001, "loss": 2.3263, "step": 17466 }, { "epoch": 0.7389373043404688, "grad_norm": 0.17126241326332092, "learning_rate": 0.001, "loss": 2.0101, "step": 17467 }, { "epoch": 0.7389796091039851, "grad_norm": 0.18553845584392548, "learning_rate": 0.001, "loss": 1.9736, "step": 17468 }, { "epoch": 0.7390219138675015, "grad_norm": 0.17792284488677979, "learning_rate": 0.001, "loss": 2.87, "step": 17469 }, { "epoch": 0.7390642186310179, "grad_norm": 0.14445818960666656, "learning_rate": 0.001, "loss": 1.7331, "step": 17470 }, { "epoch": 0.7391065233945342, "grad_norm": 0.15313506126403809, "learning_rate": 0.001, "loss": 1.9839, "step": 17471 }, { "epoch": 0.7391488281580506, "grad_norm": 0.17701950669288635, "learning_rate": 0.001, "loss": 2.0847, "step": 17472 }, { "epoch": 0.739191132921567, "grad_norm": 0.1592811495065689, "learning_rate": 0.001, "loss": 2.9718, "step": 17473 }, { "epoch": 0.7392334376850833, "grad_norm": 0.1532157063484192, "learning_rate": 0.001, "loss": 2.1228, "step": 17474 }, { "epoch": 0.7392757424485997, "grad_norm": 0.1617754101753235, "learning_rate": 0.001, "loss": 2.5941, "step": 17475 }, { "epoch": 0.7393180472121161, "grad_norm": 0.1594044715166092, "learning_rate": 0.001, "loss": 2.0657, "step": 17476 }, { "epoch": 0.7393603519756324, "grad_norm": 0.18134772777557373, "learning_rate": 0.001, "loss": 1.7252, "step": 17477 }, { "epoch": 0.7394026567391488, "grad_norm": 0.14515188336372375, "learning_rate": 0.001, "loss": 1.3929, "step": 17478 }, { "epoch": 0.7394449615026653, "grad_norm": 0.2504208981990814, "learning_rate": 0.001, "loss": 2.7328, "step": 17479 }, { "epoch": 0.7394872662661816, "grad_norm": 0.24100682139396667, "learning_rate": 0.001, "loss": 1.7722, "step": 17480 }, { "epoch": 0.739529571029698, "grad_norm": 0.19070883095264435, "learning_rate": 0.001, "loss": 1.6493, "step": 17481 }, { "epoch": 0.7395718757932143, "grad_norm": 0.2634289264678955, "learning_rate": 0.001, "loss": 2.8336, "step": 17482 }, { "epoch": 0.7396141805567307, "grad_norm": 0.1580793410539627, "learning_rate": 0.001, "loss": 2.444, "step": 17483 }, { "epoch": 0.7396564853202471, "grad_norm": 0.17331601679325104, "learning_rate": 0.001, "loss": 3.8579, "step": 17484 }, { "epoch": 0.7396987900837634, "grad_norm": 0.13026723265647888, "learning_rate": 0.001, "loss": 2.681, "step": 17485 }, { "epoch": 0.7397410948472798, "grad_norm": 0.16975976526737213, "learning_rate": 0.001, "loss": 1.8355, "step": 17486 }, { "epoch": 0.7397833996107962, "grad_norm": 0.1425335556268692, "learning_rate": 0.001, "loss": 2.1688, "step": 17487 }, { "epoch": 0.7398257043743125, "grad_norm": 0.16109095513820648, "learning_rate": 0.001, "loss": 2.1205, "step": 17488 }, { "epoch": 0.7398680091378289, "grad_norm": 0.14170965552330017, "learning_rate": 0.001, "loss": 1.7205, "step": 17489 }, { "epoch": 0.7399103139013453, "grad_norm": 0.1703367382287979, "learning_rate": 0.001, "loss": 2.2542, "step": 17490 }, { "epoch": 0.7399526186648616, "grad_norm": 0.18292082846164703, "learning_rate": 0.001, "loss": 2.0744, "step": 17491 }, { "epoch": 0.739994923428378, "grad_norm": 0.15937143564224243, "learning_rate": 0.001, "loss": 2.5822, "step": 17492 }, { "epoch": 0.7400372281918944, "grad_norm": 0.14507554471492767, "learning_rate": 0.001, "loss": 1.8052, "step": 17493 }, { "epoch": 0.7400795329554107, "grad_norm": 0.4141727387905121, "learning_rate": 0.001, "loss": 2.2241, "step": 17494 }, { "epoch": 0.7401218377189271, "grad_norm": 0.14581911265850067, "learning_rate": 0.001, "loss": 1.9695, "step": 17495 }, { "epoch": 0.7401641424824436, "grad_norm": 0.1397586613893509, "learning_rate": 0.001, "loss": 2.4452, "step": 17496 }, { "epoch": 0.7402064472459599, "grad_norm": 0.21478283405303955, "learning_rate": 0.001, "loss": 2.4327, "step": 17497 }, { "epoch": 0.7402487520094763, "grad_norm": 0.16876371204853058, "learning_rate": 0.001, "loss": 2.7402, "step": 17498 }, { "epoch": 0.7402910567729927, "grad_norm": 0.3392346203327179, "learning_rate": 0.001, "loss": 2.3049, "step": 17499 }, { "epoch": 0.740333361536509, "grad_norm": 0.1360718309879303, "learning_rate": 0.001, "loss": 2.0013, "step": 17500 }, { "epoch": 0.7403756663000254, "grad_norm": 0.14087393879890442, "learning_rate": 0.001, "loss": 1.7843, "step": 17501 }, { "epoch": 0.7404179710635418, "grad_norm": 0.15217366814613342, "learning_rate": 0.001, "loss": 3.6722, "step": 17502 }, { "epoch": 0.7404602758270581, "grad_norm": 0.17477324604988098, "learning_rate": 0.001, "loss": 2.2989, "step": 17503 }, { "epoch": 0.7405025805905745, "grad_norm": 16.218931198120117, "learning_rate": 0.001, "loss": 1.676, "step": 17504 }, { "epoch": 0.7405448853540909, "grad_norm": 0.1638619303703308, "learning_rate": 0.001, "loss": 2.3842, "step": 17505 }, { "epoch": 0.7405871901176072, "grad_norm": 31.428306579589844, "learning_rate": 0.001, "loss": 3.2131, "step": 17506 }, { "epoch": 0.7406294948811236, "grad_norm": 0.1608552634716034, "learning_rate": 0.001, "loss": 1.9849, "step": 17507 }, { "epoch": 0.74067179964464, "grad_norm": 0.20988480746746063, "learning_rate": 0.001, "loss": 2.2174, "step": 17508 }, { "epoch": 0.7407141044081563, "grad_norm": 3.444148302078247, "learning_rate": 0.001, "loss": 2.4545, "step": 17509 }, { "epoch": 0.7407564091716727, "grad_norm": 0.17780986428260803, "learning_rate": 0.001, "loss": 2.6699, "step": 17510 }, { "epoch": 0.7407987139351891, "grad_norm": 0.19687338173389435, "learning_rate": 0.001, "loss": 2.1274, "step": 17511 }, { "epoch": 0.7408410186987054, "grad_norm": 0.3244110643863678, "learning_rate": 0.001, "loss": 1.981, "step": 17512 }, { "epoch": 0.7408833234622219, "grad_norm": 0.19059693813323975, "learning_rate": 0.001, "loss": 2.4835, "step": 17513 }, { "epoch": 0.7409256282257383, "grad_norm": 0.2618594169616699, "learning_rate": 0.001, "loss": 3.4605, "step": 17514 }, { "epoch": 0.7409679329892546, "grad_norm": 0.4329042136669159, "learning_rate": 0.001, "loss": 2.3207, "step": 17515 }, { "epoch": 0.741010237752771, "grad_norm": 0.45828118920326233, "learning_rate": 0.001, "loss": 2.6254, "step": 17516 }, { "epoch": 0.7410525425162874, "grad_norm": 0.17340798676013947, "learning_rate": 0.001, "loss": 3.2398, "step": 17517 }, { "epoch": 0.7410948472798037, "grad_norm": 0.1793540120124817, "learning_rate": 0.001, "loss": 2.3224, "step": 17518 }, { "epoch": 0.7411371520433201, "grad_norm": 0.1443507820367813, "learning_rate": 0.001, "loss": 1.4805, "step": 17519 }, { "epoch": 0.7411794568068365, "grad_norm": 0.1604100912809372, "learning_rate": 0.001, "loss": 1.6807, "step": 17520 }, { "epoch": 0.7412217615703528, "grad_norm": 0.17397022247314453, "learning_rate": 0.001, "loss": 1.6518, "step": 17521 }, { "epoch": 0.7412640663338692, "grad_norm": 0.49950161576271057, "learning_rate": 0.001, "loss": 2.6769, "step": 17522 }, { "epoch": 0.7413063710973856, "grad_norm": 0.15705753862857819, "learning_rate": 0.001, "loss": 1.4069, "step": 17523 }, { "epoch": 0.7413486758609019, "grad_norm": 0.14743894338607788, "learning_rate": 0.001, "loss": 2.8272, "step": 17524 }, { "epoch": 0.7413909806244183, "grad_norm": 0.1543998420238495, "learning_rate": 0.001, "loss": 3.0686, "step": 17525 }, { "epoch": 0.7414332853879346, "grad_norm": 0.16085979342460632, "learning_rate": 0.001, "loss": 1.5601, "step": 17526 }, { "epoch": 0.741475590151451, "grad_norm": 0.22067369520664215, "learning_rate": 0.001, "loss": 3.7326, "step": 17527 }, { "epoch": 0.7415178949149674, "grad_norm": 0.17484331130981445, "learning_rate": 0.001, "loss": 2.4677, "step": 17528 }, { "epoch": 0.7415601996784837, "grad_norm": 0.3703678548336029, "learning_rate": 0.001, "loss": 2.2413, "step": 17529 }, { "epoch": 0.7416025044420002, "grad_norm": 0.2962591350078583, "learning_rate": 0.001, "loss": 1.7069, "step": 17530 }, { "epoch": 0.7416448092055166, "grad_norm": 0.656477689743042, "learning_rate": 0.001, "loss": 2.4666, "step": 17531 }, { "epoch": 0.7416871139690329, "grad_norm": 0.36826154589653015, "learning_rate": 0.001, "loss": 1.8836, "step": 17532 }, { "epoch": 0.7417294187325493, "grad_norm": 0.17669114470481873, "learning_rate": 0.001, "loss": 1.9258, "step": 17533 }, { "epoch": 0.7417717234960657, "grad_norm": 0.14634403586387634, "learning_rate": 0.001, "loss": 1.8056, "step": 17534 }, { "epoch": 0.741814028259582, "grad_norm": 0.15235856175422668, "learning_rate": 0.001, "loss": 3.3709, "step": 17535 }, { "epoch": 0.7418563330230984, "grad_norm": 0.1723131686449051, "learning_rate": 0.001, "loss": 2.7196, "step": 17536 }, { "epoch": 0.7418986377866148, "grad_norm": 0.1826736032962799, "learning_rate": 0.001, "loss": 2.494, "step": 17537 }, { "epoch": 0.7419409425501311, "grad_norm": 0.16118574142456055, "learning_rate": 0.001, "loss": 2.9765, "step": 17538 }, { "epoch": 0.7419832473136475, "grad_norm": 0.4343286454677582, "learning_rate": 0.001, "loss": 1.7836, "step": 17539 }, { "epoch": 0.7420255520771639, "grad_norm": 1.067611575126648, "learning_rate": 0.001, "loss": 2.923, "step": 17540 }, { "epoch": 0.7420678568406802, "grad_norm": 0.178229421377182, "learning_rate": 0.001, "loss": 2.2143, "step": 17541 }, { "epoch": 0.7421101616041966, "grad_norm": 0.15373486280441284, "learning_rate": 0.001, "loss": 1.7178, "step": 17542 }, { "epoch": 0.742152466367713, "grad_norm": 0.16188517212867737, "learning_rate": 0.001, "loss": 2.612, "step": 17543 }, { "epoch": 0.7421947711312293, "grad_norm": 0.1258399486541748, "learning_rate": 0.001, "loss": 1.7355, "step": 17544 }, { "epoch": 0.7422370758947457, "grad_norm": 0.14767125248908997, "learning_rate": 0.001, "loss": 1.7827, "step": 17545 }, { "epoch": 0.7422793806582622, "grad_norm": 0.8984226584434509, "learning_rate": 0.001, "loss": 2.3823, "step": 17546 }, { "epoch": 0.7423216854217785, "grad_norm": 0.12772268056869507, "learning_rate": 0.001, "loss": 2.3786, "step": 17547 }, { "epoch": 0.7423639901852949, "grad_norm": 0.14280395209789276, "learning_rate": 0.001, "loss": 2.6982, "step": 17548 }, { "epoch": 0.7424062949488113, "grad_norm": 0.15735042095184326, "learning_rate": 0.001, "loss": 2.907, "step": 17549 }, { "epoch": 0.7424485997123276, "grad_norm": 0.22708289325237274, "learning_rate": 0.001, "loss": 2.9517, "step": 17550 }, { "epoch": 0.742490904475844, "grad_norm": 0.5488585233688354, "learning_rate": 0.001, "loss": 3.046, "step": 17551 }, { "epoch": 0.7425332092393604, "grad_norm": 0.15782161056995392, "learning_rate": 0.001, "loss": 2.2142, "step": 17552 }, { "epoch": 0.7425755140028767, "grad_norm": 0.147671177983284, "learning_rate": 0.001, "loss": 2.2613, "step": 17553 }, { "epoch": 0.7426178187663931, "grad_norm": 0.16425222158432007, "learning_rate": 0.001, "loss": 1.8327, "step": 17554 }, { "epoch": 0.7426601235299095, "grad_norm": 0.15929697453975677, "learning_rate": 0.001, "loss": 2.6455, "step": 17555 }, { "epoch": 0.7427024282934258, "grad_norm": 0.16624753177165985, "learning_rate": 0.001, "loss": 1.5187, "step": 17556 }, { "epoch": 0.7427447330569422, "grad_norm": 0.36340129375457764, "learning_rate": 0.001, "loss": 2.103, "step": 17557 }, { "epoch": 0.7427870378204586, "grad_norm": 0.20203012228012085, "learning_rate": 0.001, "loss": 3.6063, "step": 17558 }, { "epoch": 0.7428293425839749, "grad_norm": 0.2196560502052307, "learning_rate": 0.001, "loss": 2.2121, "step": 17559 }, { "epoch": 0.7428716473474913, "grad_norm": 0.16001419723033905, "learning_rate": 0.001, "loss": 2.8535, "step": 17560 }, { "epoch": 0.7429139521110077, "grad_norm": 0.2432798594236374, "learning_rate": 0.001, "loss": 2.1841, "step": 17561 }, { "epoch": 0.742956256874524, "grad_norm": 0.1755453497171402, "learning_rate": 0.001, "loss": 2.4195, "step": 17562 }, { "epoch": 0.7429985616380405, "grad_norm": 0.16746820509433746, "learning_rate": 0.001, "loss": 1.645, "step": 17563 }, { "epoch": 0.7430408664015569, "grad_norm": 0.1702737808227539, "learning_rate": 0.001, "loss": 2.0672, "step": 17564 }, { "epoch": 0.7430831711650732, "grad_norm": 0.15115857124328613, "learning_rate": 0.001, "loss": 1.7634, "step": 17565 }, { "epoch": 0.7431254759285896, "grad_norm": 0.192986860871315, "learning_rate": 0.001, "loss": 2.5294, "step": 17566 }, { "epoch": 0.743167780692106, "grad_norm": 0.1629578322172165, "learning_rate": 0.001, "loss": 1.551, "step": 17567 }, { "epoch": 0.7432100854556223, "grad_norm": 2.344882011413574, "learning_rate": 0.001, "loss": 3.6963, "step": 17568 }, { "epoch": 0.7432523902191387, "grad_norm": 0.19861623644828796, "learning_rate": 0.001, "loss": 2.5566, "step": 17569 }, { "epoch": 0.7432946949826551, "grad_norm": 0.1944287270307541, "learning_rate": 0.001, "loss": 1.4486, "step": 17570 }, { "epoch": 0.7433369997461714, "grad_norm": 0.2017359435558319, "learning_rate": 0.001, "loss": 2.2659, "step": 17571 }, { "epoch": 0.7433793045096878, "grad_norm": 0.19808664917945862, "learning_rate": 0.001, "loss": 2.0332, "step": 17572 }, { "epoch": 0.7434216092732041, "grad_norm": 0.1653938591480255, "learning_rate": 0.001, "loss": 2.2198, "step": 17573 }, { "epoch": 0.7434639140367205, "grad_norm": 0.20208589732646942, "learning_rate": 0.001, "loss": 2.2754, "step": 17574 }, { "epoch": 0.7435062188002369, "grad_norm": 0.1551780104637146, "learning_rate": 0.001, "loss": 1.8926, "step": 17575 }, { "epoch": 0.7435485235637532, "grad_norm": 0.20212167501449585, "learning_rate": 0.001, "loss": 2.2995, "step": 17576 }, { "epoch": 0.7435908283272696, "grad_norm": 0.2041611671447754, "learning_rate": 0.001, "loss": 2.7081, "step": 17577 }, { "epoch": 0.743633133090786, "grad_norm": 0.18079808354377747, "learning_rate": 0.001, "loss": 3.0791, "step": 17578 }, { "epoch": 0.7436754378543023, "grad_norm": 0.15058568120002747, "learning_rate": 0.001, "loss": 2.3326, "step": 17579 }, { "epoch": 0.7437177426178188, "grad_norm": 0.18063566088676453, "learning_rate": 0.001, "loss": 2.0293, "step": 17580 }, { "epoch": 0.7437600473813352, "grad_norm": 0.16308987140655518, "learning_rate": 0.001, "loss": 2.3896, "step": 17581 }, { "epoch": 0.7438023521448515, "grad_norm": 0.14598752558231354, "learning_rate": 0.001, "loss": 1.7756, "step": 17582 }, { "epoch": 0.7438446569083679, "grad_norm": 0.17939159274101257, "learning_rate": 0.001, "loss": 2.0577, "step": 17583 }, { "epoch": 0.7438869616718843, "grad_norm": 0.2707332670688629, "learning_rate": 0.001, "loss": 2.4434, "step": 17584 }, { "epoch": 0.7439292664354006, "grad_norm": 0.15140265226364136, "learning_rate": 0.001, "loss": 1.9017, "step": 17585 }, { "epoch": 0.743971571198917, "grad_norm": 0.18124867975711823, "learning_rate": 0.001, "loss": 2.7051, "step": 17586 }, { "epoch": 0.7440138759624334, "grad_norm": 0.20102323591709137, "learning_rate": 0.001, "loss": 2.3188, "step": 17587 }, { "epoch": 0.7440561807259497, "grad_norm": 1.4687132835388184, "learning_rate": 0.001, "loss": 2.4702, "step": 17588 }, { "epoch": 0.7440984854894661, "grad_norm": 0.4242343604564667, "learning_rate": 0.001, "loss": 1.6438, "step": 17589 }, { "epoch": 0.7441407902529825, "grad_norm": 0.1698838770389557, "learning_rate": 0.001, "loss": 1.8073, "step": 17590 }, { "epoch": 0.7441830950164988, "grad_norm": 0.3089980483055115, "learning_rate": 0.001, "loss": 2.1405, "step": 17591 }, { "epoch": 0.7442253997800152, "grad_norm": 0.8378577828407288, "learning_rate": 0.001, "loss": 1.7228, "step": 17592 }, { "epoch": 0.7442677045435316, "grad_norm": 0.14282885193824768, "learning_rate": 0.001, "loss": 2.7357, "step": 17593 }, { "epoch": 0.7443100093070479, "grad_norm": 0.6160475015640259, "learning_rate": 0.001, "loss": 1.9501, "step": 17594 }, { "epoch": 0.7443523140705643, "grad_norm": 0.17935483157634735, "learning_rate": 0.001, "loss": 2.9295, "step": 17595 }, { "epoch": 0.7443946188340808, "grad_norm": 0.15372052788734436, "learning_rate": 0.001, "loss": 1.741, "step": 17596 }, { "epoch": 0.7444369235975971, "grad_norm": 0.13836802542209625, "learning_rate": 0.001, "loss": 2.3167, "step": 17597 }, { "epoch": 0.7444792283611135, "grad_norm": 0.4473693370819092, "learning_rate": 0.001, "loss": 1.2818, "step": 17598 }, { "epoch": 0.7445215331246299, "grad_norm": 0.1744404435157776, "learning_rate": 0.001, "loss": 1.7035, "step": 17599 }, { "epoch": 0.7445638378881462, "grad_norm": 0.1443067491054535, "learning_rate": 0.001, "loss": 2.1968, "step": 17600 }, { "epoch": 0.7446061426516626, "grad_norm": 0.14389725029468536, "learning_rate": 0.001, "loss": 1.8681, "step": 17601 }, { "epoch": 0.744648447415179, "grad_norm": 0.1405625343322754, "learning_rate": 0.001, "loss": 2.3588, "step": 17602 }, { "epoch": 0.7446907521786953, "grad_norm": 0.16592542827129364, "learning_rate": 0.001, "loss": 1.8942, "step": 17603 }, { "epoch": 0.7447330569422117, "grad_norm": 0.23292699456214905, "learning_rate": 0.001, "loss": 3.1274, "step": 17604 }, { "epoch": 0.7447753617057281, "grad_norm": 0.23173336684703827, "learning_rate": 0.001, "loss": 2.626, "step": 17605 }, { "epoch": 0.7448176664692444, "grad_norm": 0.1321907639503479, "learning_rate": 0.001, "loss": 1.5545, "step": 17606 }, { "epoch": 0.7448599712327608, "grad_norm": 0.15251246094703674, "learning_rate": 0.001, "loss": 1.5877, "step": 17607 }, { "epoch": 0.7449022759962772, "grad_norm": 0.16497798264026642, "learning_rate": 0.001, "loss": 2.6468, "step": 17608 }, { "epoch": 0.7449445807597935, "grad_norm": 0.15258750319480896, "learning_rate": 0.001, "loss": 2.7736, "step": 17609 }, { "epoch": 0.7449868855233099, "grad_norm": 0.1986195147037506, "learning_rate": 0.001, "loss": 2.0389, "step": 17610 }, { "epoch": 0.7450291902868263, "grad_norm": 0.13651925325393677, "learning_rate": 0.001, "loss": 1.9696, "step": 17611 }, { "epoch": 0.7450714950503426, "grad_norm": 0.17349354922771454, "learning_rate": 0.001, "loss": 1.5452, "step": 17612 }, { "epoch": 0.7451137998138591, "grad_norm": 0.18067964911460876, "learning_rate": 0.001, "loss": 1.5918, "step": 17613 }, { "epoch": 0.7451561045773755, "grad_norm": 91.02615356445312, "learning_rate": 0.001, "loss": 1.9013, "step": 17614 }, { "epoch": 0.7451984093408918, "grad_norm": 0.17014148831367493, "learning_rate": 0.001, "loss": 2.085, "step": 17615 }, { "epoch": 0.7452407141044082, "grad_norm": 0.6066452860832214, "learning_rate": 0.001, "loss": 2.2974, "step": 17616 }, { "epoch": 0.7452830188679245, "grad_norm": 0.1638554334640503, "learning_rate": 0.001, "loss": 1.9358, "step": 17617 }, { "epoch": 0.7453253236314409, "grad_norm": 0.17447148263454437, "learning_rate": 0.001, "loss": 2.4924, "step": 17618 }, { "epoch": 0.7453676283949573, "grad_norm": 0.21377401053905487, "learning_rate": 0.001, "loss": 1.7172, "step": 17619 }, { "epoch": 0.7454099331584736, "grad_norm": 0.21663278341293335, "learning_rate": 0.001, "loss": 1.6387, "step": 17620 }, { "epoch": 0.74545223792199, "grad_norm": 0.1757373809814453, "learning_rate": 0.001, "loss": 2.1301, "step": 17621 }, { "epoch": 0.7454945426855064, "grad_norm": 0.2108677327632904, "learning_rate": 0.001, "loss": 1.9894, "step": 17622 }, { "epoch": 0.7455368474490227, "grad_norm": 0.23199456930160522, "learning_rate": 0.001, "loss": 1.837, "step": 17623 }, { "epoch": 0.7455791522125391, "grad_norm": 0.2354292869567871, "learning_rate": 0.001, "loss": 1.8365, "step": 17624 }, { "epoch": 0.7456214569760555, "grad_norm": 0.2139493077993393, "learning_rate": 0.001, "loss": 2.3974, "step": 17625 }, { "epoch": 0.7456637617395718, "grad_norm": 0.21422770619392395, "learning_rate": 0.001, "loss": 3.0719, "step": 17626 }, { "epoch": 0.7457060665030882, "grad_norm": 0.28964513540267944, "learning_rate": 0.001, "loss": 1.8213, "step": 17627 }, { "epoch": 0.7457483712666046, "grad_norm": 0.148183211684227, "learning_rate": 0.001, "loss": 1.9446, "step": 17628 }, { "epoch": 0.745790676030121, "grad_norm": 0.18475840985774994, "learning_rate": 0.001, "loss": 2.2334, "step": 17629 }, { "epoch": 0.7458329807936374, "grad_norm": 0.15556128323078156, "learning_rate": 0.001, "loss": 2.723, "step": 17630 }, { "epoch": 0.7458752855571538, "grad_norm": 0.23832905292510986, "learning_rate": 0.001, "loss": 2.1825, "step": 17631 }, { "epoch": 0.7459175903206701, "grad_norm": 0.4453236162662506, "learning_rate": 0.001, "loss": 2.9713, "step": 17632 }, { "epoch": 0.7459598950841865, "grad_norm": 0.17969189584255219, "learning_rate": 0.001, "loss": 2.0573, "step": 17633 }, { "epoch": 0.7460021998477029, "grad_norm": 0.32808932662010193, "learning_rate": 0.001, "loss": 1.9398, "step": 17634 }, { "epoch": 0.7460445046112192, "grad_norm": 0.1659063994884491, "learning_rate": 0.001, "loss": 1.8874, "step": 17635 }, { "epoch": 0.7460868093747356, "grad_norm": 0.14386507868766785, "learning_rate": 0.001, "loss": 2.0049, "step": 17636 }, { "epoch": 0.746129114138252, "grad_norm": 0.174132838845253, "learning_rate": 0.001, "loss": 2.1084, "step": 17637 }, { "epoch": 0.7461714189017683, "grad_norm": 15.538298606872559, "learning_rate": 0.001, "loss": 2.0439, "step": 17638 }, { "epoch": 0.7462137236652847, "grad_norm": 2.191509962081909, "learning_rate": 0.001, "loss": 2.0302, "step": 17639 }, { "epoch": 0.7462560284288011, "grad_norm": 0.2102295458316803, "learning_rate": 0.001, "loss": 2.0219, "step": 17640 }, { "epoch": 0.7462983331923174, "grad_norm": 0.4853608310222626, "learning_rate": 0.001, "loss": 1.6501, "step": 17641 }, { "epoch": 0.7463406379558338, "grad_norm": 0.36355969309806824, "learning_rate": 0.001, "loss": 1.8691, "step": 17642 }, { "epoch": 0.7463829427193502, "grad_norm": 0.1756628006696701, "learning_rate": 0.001, "loss": 2.27, "step": 17643 }, { "epoch": 0.7464252474828665, "grad_norm": 0.1729697287082672, "learning_rate": 0.001, "loss": 3.3898, "step": 17644 }, { "epoch": 0.746467552246383, "grad_norm": 0.17120568454265594, "learning_rate": 0.001, "loss": 2.0535, "step": 17645 }, { "epoch": 0.7465098570098994, "grad_norm": 0.18185213208198547, "learning_rate": 0.001, "loss": 1.8239, "step": 17646 }, { "epoch": 0.7465521617734157, "grad_norm": 0.13639383018016815, "learning_rate": 0.001, "loss": 1.7961, "step": 17647 }, { "epoch": 0.7465944665369321, "grad_norm": 2.5754916667938232, "learning_rate": 0.001, "loss": 1.9791, "step": 17648 }, { "epoch": 0.7466367713004485, "grad_norm": 0.13714253902435303, "learning_rate": 0.001, "loss": 1.497, "step": 17649 }, { "epoch": 0.7466790760639648, "grad_norm": 1.1888046264648438, "learning_rate": 0.001, "loss": 1.7414, "step": 17650 }, { "epoch": 0.7467213808274812, "grad_norm": 0.1794203370809555, "learning_rate": 0.001, "loss": 1.3829, "step": 17651 }, { "epoch": 0.7467636855909976, "grad_norm": 2.2134206295013428, "learning_rate": 0.001, "loss": 1.8633, "step": 17652 }, { "epoch": 0.7468059903545139, "grad_norm": 0.1946529895067215, "learning_rate": 0.001, "loss": 2.1527, "step": 17653 }, { "epoch": 0.7468482951180303, "grad_norm": 0.18843619525432587, "learning_rate": 0.001, "loss": 1.8838, "step": 17654 }, { "epoch": 0.7468905998815467, "grad_norm": 0.30551382899284363, "learning_rate": 0.001, "loss": 2.2091, "step": 17655 }, { "epoch": 0.746932904645063, "grad_norm": 0.18397283554077148, "learning_rate": 0.001, "loss": 1.8819, "step": 17656 }, { "epoch": 0.7469752094085794, "grad_norm": 0.4012526273727417, "learning_rate": 0.001, "loss": 2.8932, "step": 17657 }, { "epoch": 0.7470175141720958, "grad_norm": 0.1578911393880844, "learning_rate": 0.001, "loss": 2.5474, "step": 17658 }, { "epoch": 0.7470598189356121, "grad_norm": 0.16640107333660126, "learning_rate": 0.001, "loss": 1.9356, "step": 17659 }, { "epoch": 0.7471021236991285, "grad_norm": 9.117905616760254, "learning_rate": 0.001, "loss": 2.1185, "step": 17660 }, { "epoch": 0.7471444284626448, "grad_norm": 0.7601824998855591, "learning_rate": 0.001, "loss": 2.4204, "step": 17661 }, { "epoch": 0.7471867332261612, "grad_norm": 0.2028346061706543, "learning_rate": 0.001, "loss": 2.3328, "step": 17662 }, { "epoch": 0.7472290379896777, "grad_norm": 0.4436508119106293, "learning_rate": 0.001, "loss": 2.5131, "step": 17663 }, { "epoch": 0.747271342753194, "grad_norm": 0.19675542414188385, "learning_rate": 0.001, "loss": 2.5793, "step": 17664 }, { "epoch": 0.7473136475167104, "grad_norm": 0.22706976532936096, "learning_rate": 0.001, "loss": 2.364, "step": 17665 }, { "epoch": 0.7473559522802268, "grad_norm": 0.19858993589878082, "learning_rate": 0.001, "loss": 2.0174, "step": 17666 }, { "epoch": 0.7473982570437431, "grad_norm": 0.20804426074028015, "learning_rate": 0.001, "loss": 1.8808, "step": 17667 }, { "epoch": 0.7474405618072595, "grad_norm": 0.21756567060947418, "learning_rate": 0.001, "loss": 1.7364, "step": 17668 }, { "epoch": 0.7474828665707759, "grad_norm": 17.11835479736328, "learning_rate": 0.001, "loss": 2.8228, "step": 17669 }, { "epoch": 0.7475251713342922, "grad_norm": 0.1679975390434265, "learning_rate": 0.001, "loss": 2.387, "step": 17670 }, { "epoch": 0.7475674760978086, "grad_norm": 0.16715805232524872, "learning_rate": 0.001, "loss": 2.7919, "step": 17671 }, { "epoch": 0.747609780861325, "grad_norm": 0.17563515901565552, "learning_rate": 0.001, "loss": 2.2313, "step": 17672 }, { "epoch": 0.7476520856248413, "grad_norm": 0.16387246549129486, "learning_rate": 0.001, "loss": 1.6362, "step": 17673 }, { "epoch": 0.7476943903883577, "grad_norm": 0.1781141310930252, "learning_rate": 0.001, "loss": 4.1604, "step": 17674 }, { "epoch": 0.7477366951518741, "grad_norm": 0.17480358481407166, "learning_rate": 0.001, "loss": 1.872, "step": 17675 }, { "epoch": 0.7477789999153904, "grad_norm": 0.192658469080925, "learning_rate": 0.001, "loss": 1.8116, "step": 17676 }, { "epoch": 0.7478213046789068, "grad_norm": 0.2537101209163666, "learning_rate": 0.001, "loss": 3.9484, "step": 17677 }, { "epoch": 0.7478636094424232, "grad_norm": 1.3975545167922974, "learning_rate": 0.001, "loss": 2.4941, "step": 17678 }, { "epoch": 0.7479059142059395, "grad_norm": 0.21007265150547028, "learning_rate": 0.001, "loss": 2.1936, "step": 17679 }, { "epoch": 0.747948218969456, "grad_norm": 0.20976990461349487, "learning_rate": 0.001, "loss": 2.3444, "step": 17680 }, { "epoch": 0.7479905237329724, "grad_norm": 0.28432410955429077, "learning_rate": 0.001, "loss": 2.4682, "step": 17681 }, { "epoch": 0.7480328284964887, "grad_norm": 0.17756503820419312, "learning_rate": 0.001, "loss": 3.1183, "step": 17682 }, { "epoch": 0.7480751332600051, "grad_norm": 0.148085817694664, "learning_rate": 0.001, "loss": 1.5892, "step": 17683 }, { "epoch": 0.7481174380235215, "grad_norm": 0.15588833391666412, "learning_rate": 0.001, "loss": 2.1042, "step": 17684 }, { "epoch": 0.7481597427870378, "grad_norm": 0.16696330904960632, "learning_rate": 0.001, "loss": 1.4378, "step": 17685 }, { "epoch": 0.7482020475505542, "grad_norm": 0.14883705973625183, "learning_rate": 0.001, "loss": 1.5751, "step": 17686 }, { "epoch": 0.7482443523140706, "grad_norm": 0.8387157320976257, "learning_rate": 0.001, "loss": 4.0032, "step": 17687 }, { "epoch": 0.7482866570775869, "grad_norm": 0.16816678643226624, "learning_rate": 0.001, "loss": 2.0232, "step": 17688 }, { "epoch": 0.7483289618411033, "grad_norm": 0.17581240832805634, "learning_rate": 0.001, "loss": 2.221, "step": 17689 }, { "epoch": 0.7483712666046197, "grad_norm": 0.20603400468826294, "learning_rate": 0.001, "loss": 1.6247, "step": 17690 }, { "epoch": 0.748413571368136, "grad_norm": 1.0062761306762695, "learning_rate": 0.001, "loss": 2.0145, "step": 17691 }, { "epoch": 0.7484558761316524, "grad_norm": 0.1723836064338684, "learning_rate": 0.001, "loss": 2.6105, "step": 17692 }, { "epoch": 0.7484981808951688, "grad_norm": 0.1439078003168106, "learning_rate": 0.001, "loss": 2.2332, "step": 17693 }, { "epoch": 0.7485404856586851, "grad_norm": 0.19162747263908386, "learning_rate": 0.001, "loss": 2.2636, "step": 17694 }, { "epoch": 0.7485827904222015, "grad_norm": 0.2729785740375519, "learning_rate": 0.001, "loss": 2.4435, "step": 17695 }, { "epoch": 0.748625095185718, "grad_norm": 0.9558886885643005, "learning_rate": 0.001, "loss": 2.0409, "step": 17696 }, { "epoch": 0.7486673999492343, "grad_norm": 0.1588534116744995, "learning_rate": 0.001, "loss": 2.9943, "step": 17697 }, { "epoch": 0.7487097047127507, "grad_norm": 0.21727414429187775, "learning_rate": 0.001, "loss": 1.9491, "step": 17698 }, { "epoch": 0.7487520094762671, "grad_norm": 0.18474645912647247, "learning_rate": 0.001, "loss": 2.6354, "step": 17699 }, { "epoch": 0.7487943142397834, "grad_norm": 0.23161320388317108, "learning_rate": 0.001, "loss": 2.2559, "step": 17700 }, { "epoch": 0.7488366190032998, "grad_norm": 0.16369740664958954, "learning_rate": 0.001, "loss": 1.8356, "step": 17701 }, { "epoch": 0.7488789237668162, "grad_norm": 0.17817635834217072, "learning_rate": 0.001, "loss": 1.8854, "step": 17702 }, { "epoch": 0.7489212285303325, "grad_norm": 0.18831977248191833, "learning_rate": 0.001, "loss": 1.8879, "step": 17703 }, { "epoch": 0.7489635332938489, "grad_norm": 0.4837913513183594, "learning_rate": 0.001, "loss": 1.9171, "step": 17704 }, { "epoch": 0.7490058380573653, "grad_norm": 0.5666935443878174, "learning_rate": 0.001, "loss": 3.624, "step": 17705 }, { "epoch": 0.7490481428208816, "grad_norm": 0.16191402077674866, "learning_rate": 0.001, "loss": 3.664, "step": 17706 }, { "epoch": 0.749090447584398, "grad_norm": 0.16512976586818695, "learning_rate": 0.001, "loss": 1.9419, "step": 17707 }, { "epoch": 0.7491327523479143, "grad_norm": 0.15622422099113464, "learning_rate": 0.001, "loss": 2.814, "step": 17708 }, { "epoch": 0.7491750571114307, "grad_norm": 0.17258132994174957, "learning_rate": 0.001, "loss": 2.0253, "step": 17709 }, { "epoch": 0.7492173618749471, "grad_norm": 0.19456447660923004, "learning_rate": 0.001, "loss": 2.9622, "step": 17710 }, { "epoch": 0.7492596666384634, "grad_norm": 2.774994134902954, "learning_rate": 0.001, "loss": 2.2187, "step": 17711 }, { "epoch": 0.7493019714019798, "grad_norm": 0.17640100419521332, "learning_rate": 0.001, "loss": 1.976, "step": 17712 }, { "epoch": 0.7493442761654963, "grad_norm": 0.17278911173343658, "learning_rate": 0.001, "loss": 2.401, "step": 17713 }, { "epoch": 0.7493865809290126, "grad_norm": 0.20537172257900238, "learning_rate": 0.001, "loss": 3.0728, "step": 17714 }, { "epoch": 0.749428885692529, "grad_norm": 0.35584142804145813, "learning_rate": 0.001, "loss": 3.3195, "step": 17715 }, { "epoch": 0.7494711904560454, "grad_norm": 0.20729830861091614, "learning_rate": 0.001, "loss": 2.5783, "step": 17716 }, { "epoch": 0.7495134952195617, "grad_norm": 0.1861492097377777, "learning_rate": 0.001, "loss": 1.9174, "step": 17717 }, { "epoch": 0.7495557999830781, "grad_norm": 0.18184538185596466, "learning_rate": 0.001, "loss": 2.2683, "step": 17718 }, { "epoch": 0.7495981047465945, "grad_norm": 0.15846717357635498, "learning_rate": 0.001, "loss": 1.5975, "step": 17719 }, { "epoch": 0.7496404095101108, "grad_norm": 0.15116605162620544, "learning_rate": 0.001, "loss": 1.2862, "step": 17720 }, { "epoch": 0.7496827142736272, "grad_norm": 0.15482908487319946, "learning_rate": 0.001, "loss": 2.0605, "step": 17721 }, { "epoch": 0.7497250190371436, "grad_norm": 0.1819373369216919, "learning_rate": 0.001, "loss": 2.8881, "step": 17722 }, { "epoch": 0.7497673238006599, "grad_norm": 0.16253606975078583, "learning_rate": 0.001, "loss": 1.448, "step": 17723 }, { "epoch": 0.7498096285641763, "grad_norm": 0.15486584603786469, "learning_rate": 0.001, "loss": 2.7328, "step": 17724 }, { "epoch": 0.7498519333276927, "grad_norm": 0.14722788333892822, "learning_rate": 0.001, "loss": 2.3268, "step": 17725 }, { "epoch": 0.749894238091209, "grad_norm": 0.928293764591217, "learning_rate": 0.001, "loss": 2.1948, "step": 17726 }, { "epoch": 0.7499365428547254, "grad_norm": 0.14269466698169708, "learning_rate": 0.001, "loss": 1.9547, "step": 17727 }, { "epoch": 0.7499788476182419, "grad_norm": 0.7089377641677856, "learning_rate": 0.001, "loss": 2.996, "step": 17728 }, { "epoch": 0.7500211523817581, "grad_norm": 2.6846911907196045, "learning_rate": 0.001, "loss": 1.8741, "step": 17729 }, { "epoch": 0.7500634571452746, "grad_norm": 0.13764113187789917, "learning_rate": 0.001, "loss": 2.2695, "step": 17730 }, { "epoch": 0.750105761908791, "grad_norm": 0.16455744206905365, "learning_rate": 0.001, "loss": 1.9604, "step": 17731 }, { "epoch": 0.7501480666723073, "grad_norm": 1.5582802295684814, "learning_rate": 0.001, "loss": 2.6966, "step": 17732 }, { "epoch": 0.7501903714358237, "grad_norm": 0.15412558615207672, "learning_rate": 0.001, "loss": 1.8025, "step": 17733 }, { "epoch": 0.7502326761993401, "grad_norm": 0.17679987847805023, "learning_rate": 0.001, "loss": 3.0607, "step": 17734 }, { "epoch": 0.7502749809628564, "grad_norm": 0.2634994685649872, "learning_rate": 0.001, "loss": 2.326, "step": 17735 }, { "epoch": 0.7503172857263728, "grad_norm": 0.18038220703601837, "learning_rate": 0.001, "loss": 1.9938, "step": 17736 }, { "epoch": 0.7503595904898892, "grad_norm": 0.1706598848104477, "learning_rate": 0.001, "loss": 2.8744, "step": 17737 }, { "epoch": 0.7504018952534055, "grad_norm": 0.16422058641910553, "learning_rate": 0.001, "loss": 2.5134, "step": 17738 }, { "epoch": 0.7504442000169219, "grad_norm": 0.1558268815279007, "learning_rate": 0.001, "loss": 2.8615, "step": 17739 }, { "epoch": 0.7504865047804383, "grad_norm": 0.15481281280517578, "learning_rate": 0.001, "loss": 2.537, "step": 17740 }, { "epoch": 0.7505288095439546, "grad_norm": 0.1353859305381775, "learning_rate": 0.001, "loss": 1.9273, "step": 17741 }, { "epoch": 0.750571114307471, "grad_norm": 0.1670721471309662, "learning_rate": 0.001, "loss": 2.0898, "step": 17742 }, { "epoch": 0.7506134190709874, "grad_norm": 0.17640924453735352, "learning_rate": 0.001, "loss": 1.8176, "step": 17743 }, { "epoch": 0.7506557238345037, "grad_norm": 0.18629953265190125, "learning_rate": 0.001, "loss": 2.1797, "step": 17744 }, { "epoch": 0.7506980285980202, "grad_norm": 0.2514755129814148, "learning_rate": 0.001, "loss": 3.1144, "step": 17745 }, { "epoch": 0.7507403333615366, "grad_norm": 2.408249855041504, "learning_rate": 0.001, "loss": 1.7701, "step": 17746 }, { "epoch": 0.7507826381250529, "grad_norm": 1.3865883350372314, "learning_rate": 0.001, "loss": 2.3281, "step": 17747 }, { "epoch": 0.7508249428885693, "grad_norm": 0.16517174243927002, "learning_rate": 0.001, "loss": 2.4022, "step": 17748 }, { "epoch": 0.7508672476520857, "grad_norm": 7.395416736602783, "learning_rate": 0.001, "loss": 2.9588, "step": 17749 }, { "epoch": 0.750909552415602, "grad_norm": 0.24712392687797546, "learning_rate": 0.001, "loss": 2.1324, "step": 17750 }, { "epoch": 0.7509518571791184, "grad_norm": 0.14635969698429108, "learning_rate": 0.001, "loss": 1.5613, "step": 17751 }, { "epoch": 0.7509941619426347, "grad_norm": 0.16035181283950806, "learning_rate": 0.001, "loss": 2.7492, "step": 17752 }, { "epoch": 0.7510364667061511, "grad_norm": 0.17972306907176971, "learning_rate": 0.001, "loss": 2.2771, "step": 17753 }, { "epoch": 0.7510787714696675, "grad_norm": 0.2305939793586731, "learning_rate": 0.001, "loss": 1.5971, "step": 17754 }, { "epoch": 0.7511210762331838, "grad_norm": 0.21389088034629822, "learning_rate": 0.001, "loss": 2.2783, "step": 17755 }, { "epoch": 0.7511633809967002, "grad_norm": 0.17007265985012054, "learning_rate": 0.001, "loss": 2.0106, "step": 17756 }, { "epoch": 0.7512056857602166, "grad_norm": 0.18640732765197754, "learning_rate": 0.001, "loss": 1.8577, "step": 17757 }, { "epoch": 0.7512479905237329, "grad_norm": 15.663119316101074, "learning_rate": 0.001, "loss": 2.9581, "step": 17758 }, { "epoch": 0.7512902952872493, "grad_norm": 4.945131778717041, "learning_rate": 0.001, "loss": 2.8615, "step": 17759 }, { "epoch": 0.7513326000507657, "grad_norm": 0.16905252635478973, "learning_rate": 0.001, "loss": 2.2082, "step": 17760 }, { "epoch": 0.751374904814282, "grad_norm": 0.18479637801647186, "learning_rate": 0.001, "loss": 1.8339, "step": 17761 }, { "epoch": 0.7514172095777985, "grad_norm": 0.16893810033798218, "learning_rate": 0.001, "loss": 3.0538, "step": 17762 }, { "epoch": 0.7514595143413149, "grad_norm": 0.4891713559627533, "learning_rate": 0.001, "loss": 2.1883, "step": 17763 }, { "epoch": 0.7515018191048312, "grad_norm": 2.8719229698181152, "learning_rate": 0.001, "loss": 2.5028, "step": 17764 }, { "epoch": 0.7515441238683476, "grad_norm": 0.335845947265625, "learning_rate": 0.001, "loss": 1.8828, "step": 17765 }, { "epoch": 0.751586428631864, "grad_norm": 0.17647729814052582, "learning_rate": 0.001, "loss": 1.4358, "step": 17766 }, { "epoch": 0.7516287333953803, "grad_norm": 38.300811767578125, "learning_rate": 0.001, "loss": 2.461, "step": 17767 }, { "epoch": 0.7516710381588967, "grad_norm": 0.25352421402931213, "learning_rate": 0.001, "loss": 2.5402, "step": 17768 }, { "epoch": 0.7517133429224131, "grad_norm": 0.1729395091533661, "learning_rate": 0.001, "loss": 1.8336, "step": 17769 }, { "epoch": 0.7517556476859294, "grad_norm": 0.2317625731229782, "learning_rate": 0.001, "loss": 2.022, "step": 17770 }, { "epoch": 0.7517979524494458, "grad_norm": 23.282867431640625, "learning_rate": 0.001, "loss": 3.9744, "step": 17771 }, { "epoch": 0.7518402572129622, "grad_norm": 0.25208935141563416, "learning_rate": 0.001, "loss": 2.7925, "step": 17772 }, { "epoch": 0.7518825619764785, "grad_norm": 0.16493536531925201, "learning_rate": 0.001, "loss": 1.7565, "step": 17773 }, { "epoch": 0.7519248667399949, "grad_norm": 0.8458042144775391, "learning_rate": 0.001, "loss": 2.9931, "step": 17774 }, { "epoch": 0.7519671715035113, "grad_norm": 0.22725796699523926, "learning_rate": 0.001, "loss": 2.1438, "step": 17775 }, { "epoch": 0.7520094762670276, "grad_norm": 0.15165364742279053, "learning_rate": 0.001, "loss": 1.9869, "step": 17776 }, { "epoch": 0.752051781030544, "grad_norm": 0.1665864735841751, "learning_rate": 0.001, "loss": 1.4288, "step": 17777 }, { "epoch": 0.7520940857940605, "grad_norm": 0.20887833833694458, "learning_rate": 0.001, "loss": 2.0971, "step": 17778 }, { "epoch": 0.7521363905575768, "grad_norm": 0.6601106524467468, "learning_rate": 0.001, "loss": 1.7179, "step": 17779 }, { "epoch": 0.7521786953210932, "grad_norm": 1.797553300857544, "learning_rate": 0.001, "loss": 1.86, "step": 17780 }, { "epoch": 0.7522210000846096, "grad_norm": 0.6821169853210449, "learning_rate": 0.001, "loss": 1.9908, "step": 17781 }, { "epoch": 0.7522633048481259, "grad_norm": 0.24313047528266907, "learning_rate": 0.001, "loss": 1.7132, "step": 17782 }, { "epoch": 0.7523056096116423, "grad_norm": 0.16890186071395874, "learning_rate": 0.001, "loss": 2.7889, "step": 17783 }, { "epoch": 0.7523479143751587, "grad_norm": 0.3041240870952606, "learning_rate": 0.001, "loss": 1.5615, "step": 17784 }, { "epoch": 0.752390219138675, "grad_norm": 0.17701970040798187, "learning_rate": 0.001, "loss": 1.9374, "step": 17785 }, { "epoch": 0.7524325239021914, "grad_norm": 0.16033196449279785, "learning_rate": 0.001, "loss": 1.8036, "step": 17786 }, { "epoch": 0.7524748286657078, "grad_norm": 0.18691828846931458, "learning_rate": 0.001, "loss": 2.0472, "step": 17787 }, { "epoch": 0.7525171334292241, "grad_norm": 0.20954419672489166, "learning_rate": 0.001, "loss": 1.2313, "step": 17788 }, { "epoch": 0.7525594381927405, "grad_norm": 0.17444777488708496, "learning_rate": 0.001, "loss": 2.1319, "step": 17789 }, { "epoch": 0.7526017429562569, "grad_norm": 0.1553867757320404, "learning_rate": 0.001, "loss": 2.1579, "step": 17790 }, { "epoch": 0.7526440477197732, "grad_norm": 0.16180811822414398, "learning_rate": 0.001, "loss": 1.8497, "step": 17791 }, { "epoch": 0.7526863524832896, "grad_norm": 0.16590484976768494, "learning_rate": 0.001, "loss": 1.7005, "step": 17792 }, { "epoch": 0.752728657246806, "grad_norm": 0.161569282412529, "learning_rate": 0.001, "loss": 1.9812, "step": 17793 }, { "epoch": 0.7527709620103223, "grad_norm": 0.1726117730140686, "learning_rate": 0.001, "loss": 1.6706, "step": 17794 }, { "epoch": 0.7528132667738388, "grad_norm": 1.3740065097808838, "learning_rate": 0.001, "loss": 3.4836, "step": 17795 }, { "epoch": 0.7528555715373552, "grad_norm": 0.20311777293682098, "learning_rate": 0.001, "loss": 3.3955, "step": 17796 }, { "epoch": 0.7528978763008715, "grad_norm": 0.1724340319633484, "learning_rate": 0.001, "loss": 2.1475, "step": 17797 }, { "epoch": 0.7529401810643879, "grad_norm": 0.15523028373718262, "learning_rate": 0.001, "loss": 2.4205, "step": 17798 }, { "epoch": 0.7529824858279042, "grad_norm": 0.16621935367584229, "learning_rate": 0.001, "loss": 2.1048, "step": 17799 }, { "epoch": 0.7530247905914206, "grad_norm": 0.1458512395620346, "learning_rate": 0.001, "loss": 2.0226, "step": 17800 }, { "epoch": 0.753067095354937, "grad_norm": 1.5626475811004639, "learning_rate": 0.001, "loss": 1.4764, "step": 17801 }, { "epoch": 0.7531094001184533, "grad_norm": 0.17712949216365814, "learning_rate": 0.001, "loss": 2.0474, "step": 17802 }, { "epoch": 0.7531517048819697, "grad_norm": 0.184153750538826, "learning_rate": 0.001, "loss": 1.932, "step": 17803 }, { "epoch": 0.7531940096454861, "grad_norm": 0.17124585807323456, "learning_rate": 0.001, "loss": 1.8677, "step": 17804 }, { "epoch": 0.7532363144090024, "grad_norm": 0.18082095682621002, "learning_rate": 0.001, "loss": 2.0149, "step": 17805 }, { "epoch": 0.7532786191725188, "grad_norm": 0.4178064465522766, "learning_rate": 0.001, "loss": 2.1771, "step": 17806 }, { "epoch": 0.7533209239360352, "grad_norm": 0.16732126474380493, "learning_rate": 0.001, "loss": 2.033, "step": 17807 }, { "epoch": 0.7533632286995515, "grad_norm": 0.6002020239830017, "learning_rate": 0.001, "loss": 2.7392, "step": 17808 }, { "epoch": 0.7534055334630679, "grad_norm": 0.1773737519979477, "learning_rate": 0.001, "loss": 1.8149, "step": 17809 }, { "epoch": 0.7534478382265843, "grad_norm": 1.7740546464920044, "learning_rate": 0.001, "loss": 2.1816, "step": 17810 }, { "epoch": 0.7534901429901006, "grad_norm": 0.6229245066642761, "learning_rate": 0.001, "loss": 1.4621, "step": 17811 }, { "epoch": 0.753532447753617, "grad_norm": 1.3649619817733765, "learning_rate": 0.001, "loss": 1.8576, "step": 17812 }, { "epoch": 0.7535747525171335, "grad_norm": 0.14693179726600647, "learning_rate": 0.001, "loss": 1.8112, "step": 17813 }, { "epoch": 0.7536170572806498, "grad_norm": 0.5923940539360046, "learning_rate": 0.001, "loss": 2.9757, "step": 17814 }, { "epoch": 0.7536593620441662, "grad_norm": 0.2543085217475891, "learning_rate": 0.001, "loss": 3.2552, "step": 17815 }, { "epoch": 0.7537016668076826, "grad_norm": 0.1711706966161728, "learning_rate": 0.001, "loss": 1.7678, "step": 17816 }, { "epoch": 0.7537439715711989, "grad_norm": 0.1597229242324829, "learning_rate": 0.001, "loss": 2.3738, "step": 17817 }, { "epoch": 0.7537862763347153, "grad_norm": 1.7806466817855835, "learning_rate": 0.001, "loss": 3.6486, "step": 17818 }, { "epoch": 0.7538285810982317, "grad_norm": 0.1596599519252777, "learning_rate": 0.001, "loss": 3.2813, "step": 17819 }, { "epoch": 0.753870885861748, "grad_norm": 0.4388027787208557, "learning_rate": 0.001, "loss": 2.2502, "step": 17820 }, { "epoch": 0.7539131906252644, "grad_norm": 0.15271490812301636, "learning_rate": 0.001, "loss": 3.0926, "step": 17821 }, { "epoch": 0.7539554953887808, "grad_norm": 0.19142945110797882, "learning_rate": 0.001, "loss": 1.8214, "step": 17822 }, { "epoch": 0.7539978001522971, "grad_norm": 0.18499502539634705, "learning_rate": 0.001, "loss": 1.8132, "step": 17823 }, { "epoch": 0.7540401049158135, "grad_norm": 0.21583931148052216, "learning_rate": 0.001, "loss": 2.8415, "step": 17824 }, { "epoch": 0.7540824096793299, "grad_norm": 0.12202106416225433, "learning_rate": 0.001, "loss": 1.5671, "step": 17825 }, { "epoch": 0.7541247144428462, "grad_norm": 0.17914700508117676, "learning_rate": 0.001, "loss": 2.0909, "step": 17826 }, { "epoch": 0.7541670192063626, "grad_norm": 0.17888757586479187, "learning_rate": 0.001, "loss": 1.953, "step": 17827 }, { "epoch": 0.754209323969879, "grad_norm": 0.16106991469860077, "learning_rate": 0.001, "loss": 1.9447, "step": 17828 }, { "epoch": 0.7542516287333954, "grad_norm": 0.13767673075199127, "learning_rate": 0.001, "loss": 2.0734, "step": 17829 }, { "epoch": 0.7542939334969118, "grad_norm": 0.20107002556324005, "learning_rate": 0.001, "loss": 2.2336, "step": 17830 }, { "epoch": 0.7543362382604282, "grad_norm": 0.1628139615058899, "learning_rate": 0.001, "loss": 2.8548, "step": 17831 }, { "epoch": 0.7543785430239445, "grad_norm": 0.20057493448257446, "learning_rate": 0.001, "loss": 1.914, "step": 17832 }, { "epoch": 0.7544208477874609, "grad_norm": 0.1650390923023224, "learning_rate": 0.001, "loss": 2.3508, "step": 17833 }, { "epoch": 0.7544631525509773, "grad_norm": 0.14668241143226624, "learning_rate": 0.001, "loss": 2.1428, "step": 17834 }, { "epoch": 0.7545054573144936, "grad_norm": 0.24032439291477203, "learning_rate": 0.001, "loss": 2.4814, "step": 17835 }, { "epoch": 0.75454776207801, "grad_norm": 0.1620781570672989, "learning_rate": 0.001, "loss": 2.1885, "step": 17836 }, { "epoch": 0.7545900668415264, "grad_norm": 0.5074538588523865, "learning_rate": 0.001, "loss": 2.2945, "step": 17837 }, { "epoch": 0.7546323716050427, "grad_norm": 0.1588655561208725, "learning_rate": 0.001, "loss": 1.9829, "step": 17838 }, { "epoch": 0.7546746763685591, "grad_norm": 0.13773521780967712, "learning_rate": 0.001, "loss": 2.0773, "step": 17839 }, { "epoch": 0.7547169811320755, "grad_norm": 0.16354626417160034, "learning_rate": 0.001, "loss": 1.9614, "step": 17840 }, { "epoch": 0.7547592858955918, "grad_norm": 0.15736053884029388, "learning_rate": 0.001, "loss": 1.4906, "step": 17841 }, { "epoch": 0.7548015906591082, "grad_norm": 0.13895869255065918, "learning_rate": 0.001, "loss": 1.3762, "step": 17842 }, { "epoch": 0.7548438954226245, "grad_norm": 0.16241589188575745, "learning_rate": 0.001, "loss": 2.8513, "step": 17843 }, { "epoch": 0.7548862001861409, "grad_norm": 0.1497146338224411, "learning_rate": 0.001, "loss": 1.982, "step": 17844 }, { "epoch": 0.7549285049496574, "grad_norm": 0.6093889474868774, "learning_rate": 0.001, "loss": 2.6218, "step": 17845 }, { "epoch": 0.7549708097131737, "grad_norm": 0.2828693091869354, "learning_rate": 0.001, "loss": 2.0783, "step": 17846 }, { "epoch": 0.7550131144766901, "grad_norm": 0.17641054093837738, "learning_rate": 0.001, "loss": 2.1691, "step": 17847 }, { "epoch": 0.7550554192402065, "grad_norm": 0.4624340534210205, "learning_rate": 0.001, "loss": 1.5838, "step": 17848 }, { "epoch": 0.7550977240037228, "grad_norm": 0.18794561922550201, "learning_rate": 0.001, "loss": 2.8615, "step": 17849 }, { "epoch": 0.7551400287672392, "grad_norm": 0.1454625278711319, "learning_rate": 0.001, "loss": 2.3494, "step": 17850 }, { "epoch": 0.7551823335307556, "grad_norm": 0.16596491634845734, "learning_rate": 0.001, "loss": 2.1871, "step": 17851 }, { "epoch": 0.7552246382942719, "grad_norm": 0.1360815316438675, "learning_rate": 0.001, "loss": 1.9442, "step": 17852 }, { "epoch": 0.7552669430577883, "grad_norm": 0.12390490621328354, "learning_rate": 0.001, "loss": 1.8553, "step": 17853 }, { "epoch": 0.7553092478213047, "grad_norm": 0.14053763449192047, "learning_rate": 0.001, "loss": 1.8414, "step": 17854 }, { "epoch": 0.755351552584821, "grad_norm": 1.1963753700256348, "learning_rate": 0.001, "loss": 2.295, "step": 17855 }, { "epoch": 0.7553938573483374, "grad_norm": 0.17305585741996765, "learning_rate": 0.001, "loss": 2.9799, "step": 17856 }, { "epoch": 0.7554361621118538, "grad_norm": 0.9570670127868652, "learning_rate": 0.001, "loss": 3.0922, "step": 17857 }, { "epoch": 0.7554784668753701, "grad_norm": 0.1583283543586731, "learning_rate": 0.001, "loss": 2.4881, "step": 17858 }, { "epoch": 0.7555207716388865, "grad_norm": 0.18101347982883453, "learning_rate": 0.001, "loss": 2.2682, "step": 17859 }, { "epoch": 0.7555630764024029, "grad_norm": 0.17083586752414703, "learning_rate": 0.001, "loss": 2.5757, "step": 17860 }, { "epoch": 0.7556053811659192, "grad_norm": 0.2182934582233429, "learning_rate": 0.001, "loss": 2.3445, "step": 17861 }, { "epoch": 0.7556476859294357, "grad_norm": 0.13147765398025513, "learning_rate": 0.001, "loss": 1.6128, "step": 17862 }, { "epoch": 0.7556899906929521, "grad_norm": 0.21211394667625427, "learning_rate": 0.001, "loss": 2.4639, "step": 17863 }, { "epoch": 0.7557322954564684, "grad_norm": 0.29777416586875916, "learning_rate": 0.001, "loss": 2.3622, "step": 17864 }, { "epoch": 0.7557746002199848, "grad_norm": 0.20672816038131714, "learning_rate": 0.001, "loss": 2.1961, "step": 17865 }, { "epoch": 0.7558169049835012, "grad_norm": 2.0547661781311035, "learning_rate": 0.001, "loss": 4.3223, "step": 17866 }, { "epoch": 0.7558592097470175, "grad_norm": 0.40489253401756287, "learning_rate": 0.001, "loss": 2.754, "step": 17867 }, { "epoch": 0.7559015145105339, "grad_norm": 0.18186049163341522, "learning_rate": 0.001, "loss": 1.9801, "step": 17868 }, { "epoch": 0.7559438192740503, "grad_norm": 0.1969180703163147, "learning_rate": 0.001, "loss": 1.7136, "step": 17869 }, { "epoch": 0.7559861240375666, "grad_norm": 0.16497890651226044, "learning_rate": 0.001, "loss": 1.6463, "step": 17870 }, { "epoch": 0.756028428801083, "grad_norm": 0.16345803439617157, "learning_rate": 0.001, "loss": 2.0088, "step": 17871 }, { "epoch": 0.7560707335645994, "grad_norm": 0.2046748548746109, "learning_rate": 0.001, "loss": 2.7136, "step": 17872 }, { "epoch": 0.7561130383281157, "grad_norm": 0.3788432776927948, "learning_rate": 0.001, "loss": 3.172, "step": 17873 }, { "epoch": 0.7561553430916321, "grad_norm": 5.223359107971191, "learning_rate": 0.001, "loss": 1.9627, "step": 17874 }, { "epoch": 0.7561976478551485, "grad_norm": 0.1771371066570282, "learning_rate": 0.001, "loss": 2.7354, "step": 17875 }, { "epoch": 0.7562399526186648, "grad_norm": 0.19962891936302185, "learning_rate": 0.001, "loss": 2.2917, "step": 17876 }, { "epoch": 0.7562822573821812, "grad_norm": 0.1908496767282486, "learning_rate": 0.001, "loss": 3.2174, "step": 17877 }, { "epoch": 0.7563245621456977, "grad_norm": 0.15644274652004242, "learning_rate": 0.001, "loss": 2.0214, "step": 17878 }, { "epoch": 0.756366866909214, "grad_norm": 0.3016762435436249, "learning_rate": 0.001, "loss": 1.9582, "step": 17879 }, { "epoch": 0.7564091716727304, "grad_norm": 0.14754663407802582, "learning_rate": 0.001, "loss": 2.2023, "step": 17880 }, { "epoch": 0.7564514764362468, "grad_norm": 0.14365266263484955, "learning_rate": 0.001, "loss": 2.4457, "step": 17881 }, { "epoch": 0.7564937811997631, "grad_norm": 0.15732894837856293, "learning_rate": 0.001, "loss": 2.6365, "step": 17882 }, { "epoch": 0.7565360859632795, "grad_norm": 0.17526346445083618, "learning_rate": 0.001, "loss": 2.0218, "step": 17883 }, { "epoch": 0.7565783907267959, "grad_norm": 0.15322791039943695, "learning_rate": 0.001, "loss": 1.6749, "step": 17884 }, { "epoch": 0.7566206954903122, "grad_norm": 0.24172990024089813, "learning_rate": 0.001, "loss": 2.5218, "step": 17885 }, { "epoch": 0.7566630002538286, "grad_norm": 0.15460051596164703, "learning_rate": 0.001, "loss": 2.4132, "step": 17886 }, { "epoch": 0.7567053050173449, "grad_norm": 0.13354411721229553, "learning_rate": 0.001, "loss": 2.9439, "step": 17887 }, { "epoch": 0.7567476097808613, "grad_norm": 0.13748109340667725, "learning_rate": 0.001, "loss": 1.4254, "step": 17888 }, { "epoch": 0.7567899145443777, "grad_norm": 0.24838584661483765, "learning_rate": 0.001, "loss": 2.0226, "step": 17889 }, { "epoch": 0.756832219307894, "grad_norm": 0.15621478855609894, "learning_rate": 0.001, "loss": 3.084, "step": 17890 }, { "epoch": 0.7568745240714104, "grad_norm": 0.543929934501648, "learning_rate": 0.001, "loss": 1.88, "step": 17891 }, { "epoch": 0.7569168288349268, "grad_norm": 0.4589284062385559, "learning_rate": 0.001, "loss": 2.0766, "step": 17892 }, { "epoch": 0.7569591335984431, "grad_norm": 0.4541616141796112, "learning_rate": 0.001, "loss": 2.6497, "step": 17893 }, { "epoch": 0.7570014383619595, "grad_norm": 0.1465660035610199, "learning_rate": 0.001, "loss": 2.4708, "step": 17894 }, { "epoch": 0.757043743125476, "grad_norm": 0.2618861198425293, "learning_rate": 0.001, "loss": 2.753, "step": 17895 }, { "epoch": 0.7570860478889923, "grad_norm": 0.15626366436481476, "learning_rate": 0.001, "loss": 2.0046, "step": 17896 }, { "epoch": 0.7571283526525087, "grad_norm": 0.15386773645877838, "learning_rate": 0.001, "loss": 2.168, "step": 17897 }, { "epoch": 0.7571706574160251, "grad_norm": 0.15330027043819427, "learning_rate": 0.001, "loss": 2.3295, "step": 17898 }, { "epoch": 0.7572129621795414, "grad_norm": 0.15845079720020294, "learning_rate": 0.001, "loss": 2.0764, "step": 17899 }, { "epoch": 0.7572552669430578, "grad_norm": 0.5372272729873657, "learning_rate": 0.001, "loss": 2.3282, "step": 17900 }, { "epoch": 0.7572975717065742, "grad_norm": 0.1655784249305725, "learning_rate": 0.001, "loss": 1.5967, "step": 17901 }, { "epoch": 0.7573398764700905, "grad_norm": 0.17060010135173798, "learning_rate": 0.001, "loss": 3.1258, "step": 17902 }, { "epoch": 0.7573821812336069, "grad_norm": 0.16431990265846252, "learning_rate": 0.001, "loss": 1.9269, "step": 17903 }, { "epoch": 0.7574244859971233, "grad_norm": 0.17108014225959778, "learning_rate": 0.001, "loss": 2.9287, "step": 17904 }, { "epoch": 0.7574667907606396, "grad_norm": 0.14799624681472778, "learning_rate": 0.001, "loss": 1.8449, "step": 17905 }, { "epoch": 0.757509095524156, "grad_norm": 0.3792171776294708, "learning_rate": 0.001, "loss": 2.2688, "step": 17906 }, { "epoch": 0.7575514002876724, "grad_norm": 0.2001536786556244, "learning_rate": 0.001, "loss": 2.1087, "step": 17907 }, { "epoch": 0.7575937050511887, "grad_norm": 0.1881311982870102, "learning_rate": 0.001, "loss": 2.8072, "step": 17908 }, { "epoch": 0.7576360098147051, "grad_norm": 0.16043172776699066, "learning_rate": 0.001, "loss": 2.4464, "step": 17909 }, { "epoch": 0.7576783145782215, "grad_norm": 0.16389212012290955, "learning_rate": 0.001, "loss": 1.8501, "step": 17910 }, { "epoch": 0.7577206193417378, "grad_norm": 0.14742010831832886, "learning_rate": 0.001, "loss": 1.5688, "step": 17911 }, { "epoch": 0.7577629241052543, "grad_norm": 0.3455764949321747, "learning_rate": 0.001, "loss": 1.6951, "step": 17912 }, { "epoch": 0.7578052288687707, "grad_norm": 0.15572752058506012, "learning_rate": 0.001, "loss": 2.8535, "step": 17913 }, { "epoch": 0.757847533632287, "grad_norm": 0.14037249982357025, "learning_rate": 0.001, "loss": 2.1352, "step": 17914 }, { "epoch": 0.7578898383958034, "grad_norm": 0.14692841470241547, "learning_rate": 0.001, "loss": 2.1657, "step": 17915 }, { "epoch": 0.7579321431593198, "grad_norm": 0.15857268869876862, "learning_rate": 0.001, "loss": 2.1919, "step": 17916 }, { "epoch": 0.7579744479228361, "grad_norm": 0.20824623107910156, "learning_rate": 0.001, "loss": 2.8121, "step": 17917 }, { "epoch": 0.7580167526863525, "grad_norm": 0.1485569328069687, "learning_rate": 0.001, "loss": 3.027, "step": 17918 }, { "epoch": 0.7580590574498689, "grad_norm": 0.3441106081008911, "learning_rate": 0.001, "loss": 1.4367, "step": 17919 }, { "epoch": 0.7581013622133852, "grad_norm": 0.9153411984443665, "learning_rate": 0.001, "loss": 1.6852, "step": 17920 }, { "epoch": 0.7581436669769016, "grad_norm": 0.15342696011066437, "learning_rate": 0.001, "loss": 2.0896, "step": 17921 }, { "epoch": 0.758185971740418, "grad_norm": 0.1278454065322876, "learning_rate": 0.001, "loss": 1.6947, "step": 17922 }, { "epoch": 0.7582282765039343, "grad_norm": 0.22508245706558228, "learning_rate": 0.001, "loss": 2.1324, "step": 17923 }, { "epoch": 0.7582705812674507, "grad_norm": 0.1679566353559494, "learning_rate": 0.001, "loss": 2.0825, "step": 17924 }, { "epoch": 0.7583128860309671, "grad_norm": 0.18976467847824097, "learning_rate": 0.001, "loss": 2.0653, "step": 17925 }, { "epoch": 0.7583551907944834, "grad_norm": 0.13648661971092224, "learning_rate": 0.001, "loss": 1.651, "step": 17926 }, { "epoch": 0.7583974955579998, "grad_norm": 0.14440205693244934, "learning_rate": 0.001, "loss": 1.8385, "step": 17927 }, { "epoch": 0.7584398003215163, "grad_norm": 0.14819002151489258, "learning_rate": 0.001, "loss": 1.4996, "step": 17928 }, { "epoch": 0.7584821050850326, "grad_norm": 0.17857134342193604, "learning_rate": 0.001, "loss": 1.8688, "step": 17929 }, { "epoch": 0.758524409848549, "grad_norm": 0.17260822653770447, "learning_rate": 0.001, "loss": 1.8572, "step": 17930 }, { "epoch": 0.7585667146120654, "grad_norm": 0.15363721549510956, "learning_rate": 0.001, "loss": 2.2281, "step": 17931 }, { "epoch": 0.7586090193755817, "grad_norm": 0.2327968031167984, "learning_rate": 0.001, "loss": 2.4439, "step": 17932 }, { "epoch": 0.7586513241390981, "grad_norm": 0.1449955552816391, "learning_rate": 0.001, "loss": 1.5978, "step": 17933 }, { "epoch": 0.7586936289026144, "grad_norm": 0.14919523894786835, "learning_rate": 0.001, "loss": 1.8171, "step": 17934 }, { "epoch": 0.7587359336661308, "grad_norm": 0.12412866950035095, "learning_rate": 0.001, "loss": 2.6799, "step": 17935 }, { "epoch": 0.7587782384296472, "grad_norm": 0.16862471401691437, "learning_rate": 0.001, "loss": 2.93, "step": 17936 }, { "epoch": 0.7588205431931635, "grad_norm": 0.16229136288166046, "learning_rate": 0.001, "loss": 1.2241, "step": 17937 }, { "epoch": 0.7588628479566799, "grad_norm": 0.12667950987815857, "learning_rate": 0.001, "loss": 1.3335, "step": 17938 }, { "epoch": 0.7589051527201963, "grad_norm": 0.1758270263671875, "learning_rate": 0.001, "loss": 2.1729, "step": 17939 }, { "epoch": 0.7589474574837126, "grad_norm": 0.17729204893112183, "learning_rate": 0.001, "loss": 3.5026, "step": 17940 }, { "epoch": 0.758989762247229, "grad_norm": 0.16940808296203613, "learning_rate": 0.001, "loss": 2.3168, "step": 17941 }, { "epoch": 0.7590320670107454, "grad_norm": 0.14315323531627655, "learning_rate": 0.001, "loss": 1.6935, "step": 17942 }, { "epoch": 0.7590743717742617, "grad_norm": 0.14111851155757904, "learning_rate": 0.001, "loss": 1.6618, "step": 17943 }, { "epoch": 0.7591166765377781, "grad_norm": 0.28591257333755493, "learning_rate": 0.001, "loss": 3.0757, "step": 17944 }, { "epoch": 0.7591589813012946, "grad_norm": 0.15771640837192535, "learning_rate": 0.001, "loss": 2.0072, "step": 17945 }, { "epoch": 0.7592012860648109, "grad_norm": 0.15648822486400604, "learning_rate": 0.001, "loss": 2.8095, "step": 17946 }, { "epoch": 0.7592435908283273, "grad_norm": 0.20096895098686218, "learning_rate": 0.001, "loss": 2.3453, "step": 17947 }, { "epoch": 0.7592858955918437, "grad_norm": 0.17997436225414276, "learning_rate": 0.001, "loss": 1.9256, "step": 17948 }, { "epoch": 0.75932820035536, "grad_norm": 0.1645924299955368, "learning_rate": 0.001, "loss": 1.9429, "step": 17949 }, { "epoch": 0.7593705051188764, "grad_norm": 0.14926403760910034, "learning_rate": 0.001, "loss": 2.39, "step": 17950 }, { "epoch": 0.7594128098823928, "grad_norm": 0.17297221720218658, "learning_rate": 0.001, "loss": 1.5502, "step": 17951 }, { "epoch": 0.7594551146459091, "grad_norm": 0.1334647685289383, "learning_rate": 0.001, "loss": 1.7092, "step": 17952 }, { "epoch": 0.7594974194094255, "grad_norm": 0.7537721395492554, "learning_rate": 0.001, "loss": 1.3184, "step": 17953 }, { "epoch": 0.7595397241729419, "grad_norm": 0.2796461880207062, "learning_rate": 0.001, "loss": 2.0563, "step": 17954 }, { "epoch": 0.7595820289364582, "grad_norm": 0.2787031829357147, "learning_rate": 0.001, "loss": 2.0524, "step": 17955 }, { "epoch": 0.7596243336999746, "grad_norm": 0.172209694981575, "learning_rate": 0.001, "loss": 1.8967, "step": 17956 }, { "epoch": 0.759666638463491, "grad_norm": 0.15456169843673706, "learning_rate": 0.001, "loss": 2.1134, "step": 17957 }, { "epoch": 0.7597089432270073, "grad_norm": 0.16440053284168243, "learning_rate": 0.001, "loss": 1.9277, "step": 17958 }, { "epoch": 0.7597512479905237, "grad_norm": 0.2558080554008484, "learning_rate": 0.001, "loss": 2.6084, "step": 17959 }, { "epoch": 0.7597935527540401, "grad_norm": 0.1783752590417862, "learning_rate": 0.001, "loss": 2.1082, "step": 17960 }, { "epoch": 0.7598358575175564, "grad_norm": 0.14168411493301392, "learning_rate": 0.001, "loss": 1.8182, "step": 17961 }, { "epoch": 0.7598781622810729, "grad_norm": 0.1725158542394638, "learning_rate": 0.001, "loss": 2.5628, "step": 17962 }, { "epoch": 0.7599204670445893, "grad_norm": 0.13498811423778534, "learning_rate": 0.001, "loss": 2.1323, "step": 17963 }, { "epoch": 0.7599627718081056, "grad_norm": 0.15643754601478577, "learning_rate": 0.001, "loss": 2.3276, "step": 17964 }, { "epoch": 0.760005076571622, "grad_norm": 0.16019053757190704, "learning_rate": 0.001, "loss": 2.2398, "step": 17965 }, { "epoch": 0.7600473813351384, "grad_norm": 0.1496978998184204, "learning_rate": 0.001, "loss": 3.8534, "step": 17966 }, { "epoch": 0.7600896860986547, "grad_norm": 0.18137343227863312, "learning_rate": 0.001, "loss": 2.4502, "step": 17967 }, { "epoch": 0.7601319908621711, "grad_norm": 0.14350903034210205, "learning_rate": 0.001, "loss": 2.6869, "step": 17968 }, { "epoch": 0.7601742956256875, "grad_norm": 0.17714965343475342, "learning_rate": 0.001, "loss": 1.9266, "step": 17969 }, { "epoch": 0.7602166003892038, "grad_norm": 0.13317923247814178, "learning_rate": 0.001, "loss": 1.5833, "step": 17970 }, { "epoch": 0.7602589051527202, "grad_norm": 0.13361962139606476, "learning_rate": 0.001, "loss": 1.6225, "step": 17971 }, { "epoch": 0.7603012099162366, "grad_norm": 0.1693313717842102, "learning_rate": 0.001, "loss": 1.9779, "step": 17972 }, { "epoch": 0.7603435146797529, "grad_norm": 1.043839454650879, "learning_rate": 0.001, "loss": 1.9982, "step": 17973 }, { "epoch": 0.7603858194432693, "grad_norm": 0.19669762253761292, "learning_rate": 0.001, "loss": 2.6891, "step": 17974 }, { "epoch": 0.7604281242067857, "grad_norm": 0.1441902369260788, "learning_rate": 0.001, "loss": 2.0312, "step": 17975 }, { "epoch": 0.760470428970302, "grad_norm": 0.13222302496433258, "learning_rate": 0.001, "loss": 2.0124, "step": 17976 }, { "epoch": 0.7605127337338184, "grad_norm": 0.16443274915218353, "learning_rate": 0.001, "loss": 3.1681, "step": 17977 }, { "epoch": 0.7605550384973347, "grad_norm": 0.1863120049238205, "learning_rate": 0.001, "loss": 2.4867, "step": 17978 }, { "epoch": 0.7605973432608512, "grad_norm": 0.15816029906272888, "learning_rate": 0.001, "loss": 3.2455, "step": 17979 }, { "epoch": 0.7606396480243676, "grad_norm": 0.20944538712501526, "learning_rate": 0.001, "loss": 2.5672, "step": 17980 }, { "epoch": 0.7606819527878839, "grad_norm": 0.13495473563671112, "learning_rate": 0.001, "loss": 1.9507, "step": 17981 }, { "epoch": 0.7607242575514003, "grad_norm": 0.160276859998703, "learning_rate": 0.001, "loss": 2.5909, "step": 17982 }, { "epoch": 0.7607665623149167, "grad_norm": 0.14563147723674774, "learning_rate": 0.001, "loss": 1.6701, "step": 17983 }, { "epoch": 0.760808867078433, "grad_norm": 0.16634686291217804, "learning_rate": 0.001, "loss": 2.6882, "step": 17984 }, { "epoch": 0.7608511718419494, "grad_norm": 0.17471922934055328, "learning_rate": 0.001, "loss": 3.1525, "step": 17985 }, { "epoch": 0.7608934766054658, "grad_norm": 0.18728522956371307, "learning_rate": 0.001, "loss": 3.5754, "step": 17986 }, { "epoch": 0.7609357813689821, "grad_norm": 0.13889671862125397, "learning_rate": 0.001, "loss": 2.6335, "step": 17987 }, { "epoch": 0.7609780861324985, "grad_norm": 0.1883813589811325, "learning_rate": 0.001, "loss": 2.5539, "step": 17988 }, { "epoch": 0.7610203908960149, "grad_norm": 0.16442306339740753, "learning_rate": 0.001, "loss": 2.6422, "step": 17989 }, { "epoch": 0.7610626956595312, "grad_norm": 0.154715433716774, "learning_rate": 0.001, "loss": 2.8743, "step": 17990 }, { "epoch": 0.7611050004230476, "grad_norm": 1.595334529876709, "learning_rate": 0.001, "loss": 2.4011, "step": 17991 }, { "epoch": 0.761147305186564, "grad_norm": 0.14046059548854828, "learning_rate": 0.001, "loss": 2.2203, "step": 17992 }, { "epoch": 0.7611896099500803, "grad_norm": 0.16517886519432068, "learning_rate": 0.001, "loss": 1.8345, "step": 17993 }, { "epoch": 0.7612319147135967, "grad_norm": 0.15693698823451996, "learning_rate": 0.001, "loss": 2.506, "step": 17994 }, { "epoch": 0.7612742194771132, "grad_norm": 0.2535119950771332, "learning_rate": 0.001, "loss": 2.2293, "step": 17995 }, { "epoch": 0.7613165242406295, "grad_norm": 0.30919238924980164, "learning_rate": 0.001, "loss": 2.0888, "step": 17996 }, { "epoch": 0.7613588290041459, "grad_norm": 0.21086013317108154, "learning_rate": 0.001, "loss": 1.4487, "step": 17997 }, { "epoch": 0.7614011337676623, "grad_norm": 0.14983509480953217, "learning_rate": 0.001, "loss": 1.9109, "step": 17998 }, { "epoch": 0.7614434385311786, "grad_norm": 0.16807793080806732, "learning_rate": 0.001, "loss": 4.2247, "step": 17999 }, { "epoch": 0.761485743294695, "grad_norm": 0.15390969812870026, "learning_rate": 0.001, "loss": 1.5664, "step": 18000 }, { "epoch": 0.7615280480582114, "grad_norm": 0.16306841373443604, "learning_rate": 0.001, "loss": 1.6577, "step": 18001 }, { "epoch": 0.7615703528217277, "grad_norm": 0.16728578507900238, "learning_rate": 0.001, "loss": 1.9575, "step": 18002 }, { "epoch": 0.7616126575852441, "grad_norm": 0.16129907965660095, "learning_rate": 0.001, "loss": 1.9566, "step": 18003 }, { "epoch": 0.7616549623487605, "grad_norm": 0.1789942979812622, "learning_rate": 0.001, "loss": 1.806, "step": 18004 }, { "epoch": 0.7616972671122768, "grad_norm": 0.15799731016159058, "learning_rate": 0.001, "loss": 2.8574, "step": 18005 }, { "epoch": 0.7617395718757932, "grad_norm": 0.13212554156780243, "learning_rate": 0.001, "loss": 1.6819, "step": 18006 }, { "epoch": 0.7617818766393096, "grad_norm": 0.13850046694278717, "learning_rate": 0.001, "loss": 2.0633, "step": 18007 }, { "epoch": 0.7618241814028259, "grad_norm": 0.22076542675495148, "learning_rate": 0.001, "loss": 2.0944, "step": 18008 }, { "epoch": 0.7618664861663423, "grad_norm": 0.18561388552188873, "learning_rate": 0.001, "loss": 1.7448, "step": 18009 }, { "epoch": 0.7619087909298587, "grad_norm": 0.15947283804416656, "learning_rate": 0.001, "loss": 1.9042, "step": 18010 }, { "epoch": 0.761951095693375, "grad_norm": 0.49783751368522644, "learning_rate": 0.001, "loss": 2.8313, "step": 18011 }, { "epoch": 0.7619934004568915, "grad_norm": 0.16959191858768463, "learning_rate": 0.001, "loss": 2.4751, "step": 18012 }, { "epoch": 0.7620357052204079, "grad_norm": 0.1717720329761505, "learning_rate": 0.001, "loss": 3.4214, "step": 18013 }, { "epoch": 0.7620780099839242, "grad_norm": 0.1780117154121399, "learning_rate": 0.001, "loss": 2.4375, "step": 18014 }, { "epoch": 0.7621203147474406, "grad_norm": 0.1493871957063675, "learning_rate": 0.001, "loss": 1.8541, "step": 18015 }, { "epoch": 0.762162619510957, "grad_norm": 0.1525392383337021, "learning_rate": 0.001, "loss": 1.7101, "step": 18016 }, { "epoch": 0.7622049242744733, "grad_norm": 0.14318202435970306, "learning_rate": 0.001, "loss": 3.5063, "step": 18017 }, { "epoch": 0.7622472290379897, "grad_norm": 0.5359983444213867, "learning_rate": 0.001, "loss": 1.6701, "step": 18018 }, { "epoch": 0.7622895338015061, "grad_norm": 0.19831795990467072, "learning_rate": 0.001, "loss": 2.242, "step": 18019 }, { "epoch": 0.7623318385650224, "grad_norm": 0.4935970604419708, "learning_rate": 0.001, "loss": 2.697, "step": 18020 }, { "epoch": 0.7623741433285388, "grad_norm": 0.19180017709732056, "learning_rate": 0.001, "loss": 2.217, "step": 18021 }, { "epoch": 0.7624164480920551, "grad_norm": 0.25960850715637207, "learning_rate": 0.001, "loss": 1.561, "step": 18022 }, { "epoch": 0.7624587528555715, "grad_norm": 0.25631117820739746, "learning_rate": 0.001, "loss": 2.6779, "step": 18023 }, { "epoch": 0.7625010576190879, "grad_norm": 2.3049187660217285, "learning_rate": 0.001, "loss": 2.1273, "step": 18024 }, { "epoch": 0.7625433623826042, "grad_norm": 0.1636704057455063, "learning_rate": 0.001, "loss": 1.6586, "step": 18025 }, { "epoch": 0.7625856671461206, "grad_norm": 0.17722514271736145, "learning_rate": 0.001, "loss": 1.6602, "step": 18026 }, { "epoch": 0.762627971909637, "grad_norm": 0.22410543262958527, "learning_rate": 0.001, "loss": 2.5346, "step": 18027 }, { "epoch": 0.7626702766731533, "grad_norm": 0.15916574001312256, "learning_rate": 0.001, "loss": 2.2563, "step": 18028 }, { "epoch": 0.7627125814366698, "grad_norm": 0.17527827620506287, "learning_rate": 0.001, "loss": 2.4785, "step": 18029 }, { "epoch": 0.7627548862001862, "grad_norm": 0.2694207727909088, "learning_rate": 0.001, "loss": 2.3781, "step": 18030 }, { "epoch": 0.7627971909637025, "grad_norm": 0.2880823314189911, "learning_rate": 0.001, "loss": 2.6001, "step": 18031 }, { "epoch": 0.7628394957272189, "grad_norm": 0.165915384888649, "learning_rate": 0.001, "loss": 2.18, "step": 18032 }, { "epoch": 0.7628818004907353, "grad_norm": 0.19809134304523468, "learning_rate": 0.001, "loss": 2.8755, "step": 18033 }, { "epoch": 0.7629241052542516, "grad_norm": 0.18514703214168549, "learning_rate": 0.001, "loss": 1.9967, "step": 18034 }, { "epoch": 0.762966410017768, "grad_norm": 0.17875197529792786, "learning_rate": 0.001, "loss": 2.2859, "step": 18035 }, { "epoch": 0.7630087147812844, "grad_norm": 0.1806037873029709, "learning_rate": 0.001, "loss": 2.6236, "step": 18036 }, { "epoch": 0.7630510195448007, "grad_norm": 0.15905499458312988, "learning_rate": 0.001, "loss": 2.1554, "step": 18037 }, { "epoch": 0.7630933243083171, "grad_norm": 0.1544172763824463, "learning_rate": 0.001, "loss": 1.8756, "step": 18038 }, { "epoch": 0.7631356290718335, "grad_norm": 0.20902614295482635, "learning_rate": 0.001, "loss": 2.6106, "step": 18039 }, { "epoch": 0.7631779338353498, "grad_norm": 0.15740123391151428, "learning_rate": 0.001, "loss": 2.3193, "step": 18040 }, { "epoch": 0.7632202385988662, "grad_norm": 0.15449416637420654, "learning_rate": 0.001, "loss": 1.6584, "step": 18041 }, { "epoch": 0.7632625433623826, "grad_norm": 0.17526280879974365, "learning_rate": 0.001, "loss": 1.9062, "step": 18042 }, { "epoch": 0.7633048481258989, "grad_norm": 0.19311073422431946, "learning_rate": 0.001, "loss": 2.3969, "step": 18043 }, { "epoch": 0.7633471528894153, "grad_norm": 0.17864413559436798, "learning_rate": 0.001, "loss": 1.7516, "step": 18044 }, { "epoch": 0.7633894576529318, "grad_norm": 0.16350750625133514, "learning_rate": 0.001, "loss": 1.8934, "step": 18045 }, { "epoch": 0.7634317624164481, "grad_norm": 0.29366812109947205, "learning_rate": 0.001, "loss": 3.6984, "step": 18046 }, { "epoch": 0.7634740671799645, "grad_norm": 0.8536985516548157, "learning_rate": 0.001, "loss": 1.4539, "step": 18047 }, { "epoch": 0.7635163719434809, "grad_norm": 0.1530151069164276, "learning_rate": 0.001, "loss": 1.9095, "step": 18048 }, { "epoch": 0.7635586767069972, "grad_norm": 0.1519416868686676, "learning_rate": 0.001, "loss": 1.5347, "step": 18049 }, { "epoch": 0.7636009814705136, "grad_norm": 0.31431666016578674, "learning_rate": 0.001, "loss": 2.056, "step": 18050 }, { "epoch": 0.76364328623403, "grad_norm": 0.9129075407981873, "learning_rate": 0.001, "loss": 1.9324, "step": 18051 }, { "epoch": 0.7636855909975463, "grad_norm": 0.1397624909877777, "learning_rate": 0.001, "loss": 1.7459, "step": 18052 }, { "epoch": 0.7637278957610627, "grad_norm": 0.1634640395641327, "learning_rate": 0.001, "loss": 2.0907, "step": 18053 }, { "epoch": 0.7637702005245791, "grad_norm": 0.18420040607452393, "learning_rate": 0.001, "loss": 2.1298, "step": 18054 }, { "epoch": 0.7638125052880954, "grad_norm": 1.812959909439087, "learning_rate": 0.001, "loss": 3.1896, "step": 18055 }, { "epoch": 0.7638548100516118, "grad_norm": 0.40835338830947876, "learning_rate": 0.001, "loss": 2.3623, "step": 18056 }, { "epoch": 0.7638971148151282, "grad_norm": 5.703685283660889, "learning_rate": 0.001, "loss": 2.2481, "step": 18057 }, { "epoch": 0.7639394195786445, "grad_norm": 0.18355365097522736, "learning_rate": 0.001, "loss": 2.5109, "step": 18058 }, { "epoch": 0.7639817243421609, "grad_norm": 38.65842056274414, "learning_rate": 0.001, "loss": 2.5208, "step": 18059 }, { "epoch": 0.7640240291056773, "grad_norm": 0.1402842104434967, "learning_rate": 0.001, "loss": 2.0796, "step": 18060 }, { "epoch": 0.7640663338691936, "grad_norm": 0.28925231099128723, "learning_rate": 0.001, "loss": 2.1025, "step": 18061 }, { "epoch": 0.7641086386327101, "grad_norm": 0.1598457396030426, "learning_rate": 0.001, "loss": 1.9011, "step": 18062 }, { "epoch": 0.7641509433962265, "grad_norm": 0.21429701149463654, "learning_rate": 0.001, "loss": 1.6832, "step": 18063 }, { "epoch": 0.7641932481597428, "grad_norm": 0.1933804154396057, "learning_rate": 0.001, "loss": 2.435, "step": 18064 }, { "epoch": 0.7642355529232592, "grad_norm": 0.21492686867713928, "learning_rate": 0.001, "loss": 1.7783, "step": 18065 }, { "epoch": 0.7642778576867756, "grad_norm": 0.19295653700828552, "learning_rate": 0.001, "loss": 1.6563, "step": 18066 }, { "epoch": 0.7643201624502919, "grad_norm": 0.22416500747203827, "learning_rate": 0.001, "loss": 1.7468, "step": 18067 }, { "epoch": 0.7643624672138083, "grad_norm": 0.1974020004272461, "learning_rate": 0.001, "loss": 2.6946, "step": 18068 }, { "epoch": 0.7644047719773246, "grad_norm": 0.1901572346687317, "learning_rate": 0.001, "loss": 2.1431, "step": 18069 }, { "epoch": 0.764447076740841, "grad_norm": 0.1712511032819748, "learning_rate": 0.001, "loss": 2.1096, "step": 18070 }, { "epoch": 0.7644893815043574, "grad_norm": 0.1516590565443039, "learning_rate": 0.001, "loss": 1.7473, "step": 18071 }, { "epoch": 0.7645316862678737, "grad_norm": 0.7932908535003662, "learning_rate": 0.001, "loss": 1.8818, "step": 18072 }, { "epoch": 0.7645739910313901, "grad_norm": 0.12099297344684601, "learning_rate": 0.001, "loss": 1.6472, "step": 18073 }, { "epoch": 0.7646162957949065, "grad_norm": 0.1378883421421051, "learning_rate": 0.001, "loss": 1.8047, "step": 18074 }, { "epoch": 0.7646586005584228, "grad_norm": 0.1538868397474289, "learning_rate": 0.001, "loss": 2.1053, "step": 18075 }, { "epoch": 0.7647009053219392, "grad_norm": 2.756589889526367, "learning_rate": 0.001, "loss": 2.4575, "step": 18076 }, { "epoch": 0.7647432100854556, "grad_norm": 0.16562233865261078, "learning_rate": 0.001, "loss": 1.8964, "step": 18077 }, { "epoch": 0.764785514848972, "grad_norm": 0.17701025307178497, "learning_rate": 0.001, "loss": 2.101, "step": 18078 }, { "epoch": 0.7648278196124884, "grad_norm": 0.2576713562011719, "learning_rate": 0.001, "loss": 1.7816, "step": 18079 }, { "epoch": 0.7648701243760048, "grad_norm": 0.20448629558086395, "learning_rate": 0.001, "loss": 2.3767, "step": 18080 }, { "epoch": 0.7649124291395211, "grad_norm": 0.18108433485031128, "learning_rate": 0.001, "loss": 1.4452, "step": 18081 }, { "epoch": 0.7649547339030375, "grad_norm": 0.17302542924880981, "learning_rate": 0.001, "loss": 2.3699, "step": 18082 }, { "epoch": 0.7649970386665539, "grad_norm": 0.16477636992931366, "learning_rate": 0.001, "loss": 2.436, "step": 18083 }, { "epoch": 0.7650393434300702, "grad_norm": 0.1961863785982132, "learning_rate": 0.001, "loss": 2.1719, "step": 18084 }, { "epoch": 0.7650816481935866, "grad_norm": 0.14665809273719788, "learning_rate": 0.001, "loss": 1.5338, "step": 18085 }, { "epoch": 0.765123952957103, "grad_norm": 0.16945761442184448, "learning_rate": 0.001, "loss": 3.4643, "step": 18086 }, { "epoch": 0.7651662577206193, "grad_norm": 0.16046762466430664, "learning_rate": 0.001, "loss": 1.7755, "step": 18087 }, { "epoch": 0.7652085624841357, "grad_norm": 0.15023651719093323, "learning_rate": 0.001, "loss": 2.3674, "step": 18088 }, { "epoch": 0.7652508672476521, "grad_norm": 0.18328256905078888, "learning_rate": 0.001, "loss": 3.5899, "step": 18089 }, { "epoch": 0.7652931720111684, "grad_norm": 0.164375901222229, "learning_rate": 0.001, "loss": 2.2567, "step": 18090 }, { "epoch": 0.7653354767746848, "grad_norm": 0.15443533658981323, "learning_rate": 0.001, "loss": 1.6702, "step": 18091 }, { "epoch": 0.7653777815382012, "grad_norm": 0.1532910019159317, "learning_rate": 0.001, "loss": 1.7926, "step": 18092 }, { "epoch": 0.7654200863017175, "grad_norm": 0.17233017086982727, "learning_rate": 0.001, "loss": 1.9303, "step": 18093 }, { "epoch": 0.765462391065234, "grad_norm": 0.3771326243877411, "learning_rate": 0.001, "loss": 2.6317, "step": 18094 }, { "epoch": 0.7655046958287504, "grad_norm": 0.15392722189426422, "learning_rate": 0.001, "loss": 2.2798, "step": 18095 }, { "epoch": 0.7655470005922667, "grad_norm": 0.15488563477993011, "learning_rate": 0.001, "loss": 2.1111, "step": 18096 }, { "epoch": 0.7655893053557831, "grad_norm": 0.17814667522907257, "learning_rate": 0.001, "loss": 3.3561, "step": 18097 }, { "epoch": 0.7656316101192995, "grad_norm": 0.26778027415275574, "learning_rate": 0.001, "loss": 1.7169, "step": 18098 }, { "epoch": 0.7656739148828158, "grad_norm": 0.18762215971946716, "learning_rate": 0.001, "loss": 2.769, "step": 18099 }, { "epoch": 0.7657162196463322, "grad_norm": 20.262495040893555, "learning_rate": 0.001, "loss": 2.2304, "step": 18100 }, { "epoch": 0.7657585244098486, "grad_norm": 0.15660881996154785, "learning_rate": 0.001, "loss": 2.1479, "step": 18101 }, { "epoch": 0.7658008291733649, "grad_norm": 0.1680443435907364, "learning_rate": 0.001, "loss": 1.9905, "step": 18102 }, { "epoch": 0.7658431339368813, "grad_norm": 0.16119442880153656, "learning_rate": 0.001, "loss": 2.5656, "step": 18103 }, { "epoch": 0.7658854387003977, "grad_norm": 0.1531805545091629, "learning_rate": 0.001, "loss": 1.4154, "step": 18104 }, { "epoch": 0.765927743463914, "grad_norm": 1.081006407737732, "learning_rate": 0.001, "loss": 2.4958, "step": 18105 }, { "epoch": 0.7659700482274304, "grad_norm": 0.24466001987457275, "learning_rate": 0.001, "loss": 2.9361, "step": 18106 }, { "epoch": 0.7660123529909468, "grad_norm": 0.17916032671928406, "learning_rate": 0.001, "loss": 1.9019, "step": 18107 }, { "epoch": 0.7660546577544631, "grad_norm": 0.19994664192199707, "learning_rate": 0.001, "loss": 1.9385, "step": 18108 }, { "epoch": 0.7660969625179795, "grad_norm": 0.2043987214565277, "learning_rate": 0.001, "loss": 2.122, "step": 18109 }, { "epoch": 0.766139267281496, "grad_norm": 0.18687529861927032, "learning_rate": 0.001, "loss": 1.7168, "step": 18110 }, { "epoch": 0.7661815720450122, "grad_norm": 0.32144445180892944, "learning_rate": 0.001, "loss": 2.2964, "step": 18111 }, { "epoch": 0.7662238768085287, "grad_norm": 0.1715632677078247, "learning_rate": 0.001, "loss": 1.7487, "step": 18112 }, { "epoch": 0.766266181572045, "grad_norm": 0.22985003888607025, "learning_rate": 0.001, "loss": 2.0248, "step": 18113 }, { "epoch": 0.7663084863355614, "grad_norm": 1.1345404386520386, "learning_rate": 0.001, "loss": 2.2758, "step": 18114 }, { "epoch": 0.7663507910990778, "grad_norm": 0.18999895453453064, "learning_rate": 0.001, "loss": 1.8452, "step": 18115 }, { "epoch": 0.7663930958625941, "grad_norm": 0.8503357768058777, "learning_rate": 0.001, "loss": 2.1866, "step": 18116 }, { "epoch": 0.7664354006261105, "grad_norm": 0.19124074280261993, "learning_rate": 0.001, "loss": 2.4042, "step": 18117 }, { "epoch": 0.7664777053896269, "grad_norm": 0.20431570708751678, "learning_rate": 0.001, "loss": 1.7436, "step": 18118 }, { "epoch": 0.7665200101531432, "grad_norm": 0.2346670776605606, "learning_rate": 0.001, "loss": 2.3448, "step": 18119 }, { "epoch": 0.7665623149166596, "grad_norm": 0.19955824315547943, "learning_rate": 0.001, "loss": 3.1449, "step": 18120 }, { "epoch": 0.766604619680176, "grad_norm": 0.1841670423746109, "learning_rate": 0.001, "loss": 1.9185, "step": 18121 }, { "epoch": 0.7666469244436923, "grad_norm": 0.44874322414398193, "learning_rate": 0.001, "loss": 2.127, "step": 18122 }, { "epoch": 0.7666892292072087, "grad_norm": 0.1544332355260849, "learning_rate": 0.001, "loss": 2.2697, "step": 18123 }, { "epoch": 0.7667315339707251, "grad_norm": 0.1674651801586151, "learning_rate": 0.001, "loss": 1.9383, "step": 18124 }, { "epoch": 0.7667738387342414, "grad_norm": 3.113783359527588, "learning_rate": 0.001, "loss": 1.4092, "step": 18125 }, { "epoch": 0.7668161434977578, "grad_norm": 0.1927240639925003, "learning_rate": 0.001, "loss": 3.3805, "step": 18126 }, { "epoch": 0.7668584482612743, "grad_norm": 0.21248434484004974, "learning_rate": 0.001, "loss": 2.3386, "step": 18127 }, { "epoch": 0.7669007530247905, "grad_norm": 0.2704300582408905, "learning_rate": 0.001, "loss": 2.011, "step": 18128 }, { "epoch": 0.766943057788307, "grad_norm": 0.33151501417160034, "learning_rate": 0.001, "loss": 1.9698, "step": 18129 }, { "epoch": 0.7669853625518234, "grad_norm": 0.2982079088687897, "learning_rate": 0.001, "loss": 2.9232, "step": 18130 }, { "epoch": 0.7670276673153397, "grad_norm": 0.185440793633461, "learning_rate": 0.001, "loss": 2.1382, "step": 18131 }, { "epoch": 0.7670699720788561, "grad_norm": 0.1537931114435196, "learning_rate": 0.001, "loss": 3.2961, "step": 18132 }, { "epoch": 0.7671122768423725, "grad_norm": 0.19790029525756836, "learning_rate": 0.001, "loss": 1.857, "step": 18133 }, { "epoch": 0.7671545816058888, "grad_norm": 0.16664250195026398, "learning_rate": 0.001, "loss": 2.3612, "step": 18134 }, { "epoch": 0.7671968863694052, "grad_norm": 0.19958829879760742, "learning_rate": 0.001, "loss": 1.444, "step": 18135 }, { "epoch": 0.7672391911329216, "grad_norm": 0.25182950496673584, "learning_rate": 0.001, "loss": 2.2047, "step": 18136 }, { "epoch": 0.7672814958964379, "grad_norm": 0.19357016682624817, "learning_rate": 0.001, "loss": 2.5632, "step": 18137 }, { "epoch": 0.7673238006599543, "grad_norm": 0.20123781263828278, "learning_rate": 0.001, "loss": 3.6875, "step": 18138 }, { "epoch": 0.7673661054234707, "grad_norm": 0.21408236026763916, "learning_rate": 0.001, "loss": 2.7204, "step": 18139 }, { "epoch": 0.767408410186987, "grad_norm": 0.1696946918964386, "learning_rate": 0.001, "loss": 2.0886, "step": 18140 }, { "epoch": 0.7674507149505034, "grad_norm": 0.8207197189331055, "learning_rate": 0.001, "loss": 2.7853, "step": 18141 }, { "epoch": 0.7674930197140198, "grad_norm": 2.7755489349365234, "learning_rate": 0.001, "loss": 4.5532, "step": 18142 }, { "epoch": 0.7675353244775361, "grad_norm": 0.20465411245822906, "learning_rate": 0.001, "loss": 1.803, "step": 18143 }, { "epoch": 0.7675776292410526, "grad_norm": 0.16455033421516418, "learning_rate": 0.001, "loss": 2.0663, "step": 18144 }, { "epoch": 0.767619934004569, "grad_norm": 3.680520534515381, "learning_rate": 0.001, "loss": 1.9112, "step": 18145 }, { "epoch": 0.7676622387680853, "grad_norm": 0.20432768762111664, "learning_rate": 0.001, "loss": 1.8184, "step": 18146 }, { "epoch": 0.7677045435316017, "grad_norm": 0.193999245762825, "learning_rate": 0.001, "loss": 2.0712, "step": 18147 }, { "epoch": 0.7677468482951181, "grad_norm": 0.18594130873680115, "learning_rate": 0.001, "loss": 1.8831, "step": 18148 }, { "epoch": 0.7677891530586344, "grad_norm": 5.5671491622924805, "learning_rate": 0.001, "loss": 2.2832, "step": 18149 }, { "epoch": 0.7678314578221508, "grad_norm": 0.22034242749214172, "learning_rate": 0.001, "loss": 3.3777, "step": 18150 }, { "epoch": 0.7678737625856672, "grad_norm": 0.16389696300029755, "learning_rate": 0.001, "loss": 2.6919, "step": 18151 }, { "epoch": 0.7679160673491835, "grad_norm": 4.396114349365234, "learning_rate": 0.001, "loss": 2.7745, "step": 18152 }, { "epoch": 0.7679583721126999, "grad_norm": 0.18078528344631195, "learning_rate": 0.001, "loss": 2.5654, "step": 18153 }, { "epoch": 0.7680006768762163, "grad_norm": 0.2645280361175537, "learning_rate": 0.001, "loss": 1.831, "step": 18154 }, { "epoch": 0.7680429816397326, "grad_norm": 0.2649464011192322, "learning_rate": 0.001, "loss": 2.6965, "step": 18155 }, { "epoch": 0.768085286403249, "grad_norm": 0.22064730525016785, "learning_rate": 0.001, "loss": 2.3275, "step": 18156 }, { "epoch": 0.7681275911667654, "grad_norm": 0.22132393717765808, "learning_rate": 0.001, "loss": 1.9728, "step": 18157 }, { "epoch": 0.7681698959302817, "grad_norm": 0.20887607336044312, "learning_rate": 0.001, "loss": 2.3195, "step": 18158 }, { "epoch": 0.7682122006937981, "grad_norm": 1.3075305223464966, "learning_rate": 0.001, "loss": 2.5001, "step": 18159 }, { "epoch": 0.7682545054573144, "grad_norm": 0.395717978477478, "learning_rate": 0.001, "loss": 2.147, "step": 18160 }, { "epoch": 0.7682968102208309, "grad_norm": 0.18617072701454163, "learning_rate": 0.001, "loss": 2.0584, "step": 18161 }, { "epoch": 0.7683391149843473, "grad_norm": 2.8809750080108643, "learning_rate": 0.001, "loss": 3.424, "step": 18162 }, { "epoch": 0.7683814197478636, "grad_norm": 0.1813565492630005, "learning_rate": 0.001, "loss": 2.0456, "step": 18163 }, { "epoch": 0.76842372451138, "grad_norm": 0.15641596913337708, "learning_rate": 0.001, "loss": 2.2862, "step": 18164 }, { "epoch": 0.7684660292748964, "grad_norm": 0.1734810769557953, "learning_rate": 0.001, "loss": 3.9354, "step": 18165 }, { "epoch": 0.7685083340384127, "grad_norm": 0.17491766810417175, "learning_rate": 0.001, "loss": 2.1404, "step": 18166 }, { "epoch": 0.7685506388019291, "grad_norm": 0.1915895938873291, "learning_rate": 0.001, "loss": 2.9, "step": 18167 }, { "epoch": 0.7685929435654455, "grad_norm": 0.1676090508699417, "learning_rate": 0.001, "loss": 2.2399, "step": 18168 }, { "epoch": 0.7686352483289618, "grad_norm": 0.19483682513237, "learning_rate": 0.001, "loss": 2.1038, "step": 18169 }, { "epoch": 0.7686775530924782, "grad_norm": 0.19385400414466858, "learning_rate": 0.001, "loss": 1.5603, "step": 18170 }, { "epoch": 0.7687198578559946, "grad_norm": 0.15440934896469116, "learning_rate": 0.001, "loss": 1.5675, "step": 18171 }, { "epoch": 0.7687621626195109, "grad_norm": 0.1513241082429886, "learning_rate": 0.001, "loss": 1.92, "step": 18172 }, { "epoch": 0.7688044673830273, "grad_norm": 0.1512671858072281, "learning_rate": 0.001, "loss": 2.1714, "step": 18173 }, { "epoch": 0.7688467721465437, "grad_norm": 0.31254085898399353, "learning_rate": 0.001, "loss": 4.2346, "step": 18174 }, { "epoch": 0.76888907691006, "grad_norm": 0.14157380163669586, "learning_rate": 0.001, "loss": 1.5382, "step": 18175 }, { "epoch": 0.7689313816735764, "grad_norm": 0.1927814781665802, "learning_rate": 0.001, "loss": 2.2692, "step": 18176 }, { "epoch": 0.7689736864370929, "grad_norm": 0.6695175766944885, "learning_rate": 0.001, "loss": 2.4544, "step": 18177 }, { "epoch": 0.7690159912006092, "grad_norm": 0.15355220437049866, "learning_rate": 0.001, "loss": 1.2551, "step": 18178 }, { "epoch": 0.7690582959641256, "grad_norm": 1.621734380722046, "learning_rate": 0.001, "loss": 2.1838, "step": 18179 }, { "epoch": 0.769100600727642, "grad_norm": 0.13713295757770538, "learning_rate": 0.001, "loss": 1.9986, "step": 18180 }, { "epoch": 0.7691429054911583, "grad_norm": 0.3079672157764435, "learning_rate": 0.001, "loss": 2.9259, "step": 18181 }, { "epoch": 0.7691852102546747, "grad_norm": 0.7089863419532776, "learning_rate": 0.001, "loss": 2.9016, "step": 18182 }, { "epoch": 0.7692275150181911, "grad_norm": 1.244018316268921, "learning_rate": 0.001, "loss": 3.3858, "step": 18183 }, { "epoch": 0.7692698197817074, "grad_norm": 0.19387851655483246, "learning_rate": 0.001, "loss": 3.4741, "step": 18184 }, { "epoch": 0.7693121245452238, "grad_norm": 0.20450355112552643, "learning_rate": 0.001, "loss": 1.7754, "step": 18185 }, { "epoch": 0.7693544293087402, "grad_norm": 0.18945342302322388, "learning_rate": 0.001, "loss": 1.9047, "step": 18186 }, { "epoch": 0.7693967340722565, "grad_norm": 0.191238135099411, "learning_rate": 0.001, "loss": 2.1089, "step": 18187 }, { "epoch": 0.7694390388357729, "grad_norm": 0.2367229014635086, "learning_rate": 0.001, "loss": 2.7953, "step": 18188 }, { "epoch": 0.7694813435992893, "grad_norm": 2.5353989601135254, "learning_rate": 0.001, "loss": 1.9474, "step": 18189 }, { "epoch": 0.7695236483628056, "grad_norm": 2.5594029426574707, "learning_rate": 0.001, "loss": 1.7098, "step": 18190 }, { "epoch": 0.769565953126322, "grad_norm": 0.21357464790344238, "learning_rate": 0.001, "loss": 3.2049, "step": 18191 }, { "epoch": 0.7696082578898384, "grad_norm": 0.1735057681798935, "learning_rate": 0.001, "loss": 3.6089, "step": 18192 }, { "epoch": 0.7696505626533547, "grad_norm": 0.18362459540367126, "learning_rate": 0.001, "loss": 1.9546, "step": 18193 }, { "epoch": 0.7696928674168712, "grad_norm": 2.1002511978149414, "learning_rate": 0.001, "loss": 2.7012, "step": 18194 }, { "epoch": 0.7697351721803876, "grad_norm": 0.1482006460428238, "learning_rate": 0.001, "loss": 1.4662, "step": 18195 }, { "epoch": 0.7697774769439039, "grad_norm": 0.6873133778572083, "learning_rate": 0.001, "loss": 4.3751, "step": 18196 }, { "epoch": 0.7698197817074203, "grad_norm": 0.19415785372257233, "learning_rate": 0.001, "loss": 2.9213, "step": 18197 }, { "epoch": 0.7698620864709367, "grad_norm": 0.18056610226631165, "learning_rate": 0.001, "loss": 2.1502, "step": 18198 }, { "epoch": 0.769904391234453, "grad_norm": 0.15959571301937103, "learning_rate": 0.001, "loss": 2.5785, "step": 18199 }, { "epoch": 0.7699466959979694, "grad_norm": 1.567895770072937, "learning_rate": 0.001, "loss": 1.4776, "step": 18200 }, { "epoch": 0.7699890007614858, "grad_norm": 0.17681176960468292, "learning_rate": 0.001, "loss": 1.8845, "step": 18201 }, { "epoch": 0.7700313055250021, "grad_norm": 0.16770051419734955, "learning_rate": 0.001, "loss": 1.8349, "step": 18202 }, { "epoch": 0.7700736102885185, "grad_norm": 2.6900062561035156, "learning_rate": 0.001, "loss": 2.6632, "step": 18203 }, { "epoch": 0.7701159150520348, "grad_norm": 0.18311475217342377, "learning_rate": 0.001, "loss": 1.4378, "step": 18204 }, { "epoch": 0.7701582198155512, "grad_norm": 0.19490399956703186, "learning_rate": 0.001, "loss": 2.3233, "step": 18205 }, { "epoch": 0.7702005245790676, "grad_norm": 0.17303673923015594, "learning_rate": 0.001, "loss": 1.9792, "step": 18206 }, { "epoch": 0.7702428293425839, "grad_norm": 0.18099454045295715, "learning_rate": 0.001, "loss": 2.1376, "step": 18207 }, { "epoch": 0.7702851341061003, "grad_norm": 0.1846335083246231, "learning_rate": 0.001, "loss": 2.6739, "step": 18208 }, { "epoch": 0.7703274388696167, "grad_norm": 0.9617498517036438, "learning_rate": 0.001, "loss": 2.0516, "step": 18209 }, { "epoch": 0.770369743633133, "grad_norm": 0.1959528774023056, "learning_rate": 0.001, "loss": 1.6241, "step": 18210 }, { "epoch": 0.7704120483966495, "grad_norm": 0.18654143810272217, "learning_rate": 0.001, "loss": 2.8616, "step": 18211 }, { "epoch": 0.7704543531601659, "grad_norm": 0.19656072556972504, "learning_rate": 0.001, "loss": 1.9619, "step": 18212 }, { "epoch": 0.7704966579236822, "grad_norm": 0.42158690094947815, "learning_rate": 0.001, "loss": 2.3077, "step": 18213 }, { "epoch": 0.7705389626871986, "grad_norm": 0.2899084687232971, "learning_rate": 0.001, "loss": 1.9099, "step": 18214 }, { "epoch": 0.770581267450715, "grad_norm": 0.19894026219844818, "learning_rate": 0.001, "loss": 2.0858, "step": 18215 }, { "epoch": 0.7706235722142313, "grad_norm": 0.16096381843090057, "learning_rate": 0.001, "loss": 1.724, "step": 18216 }, { "epoch": 0.7706658769777477, "grad_norm": 0.19218985736370087, "learning_rate": 0.001, "loss": 2.7003, "step": 18217 }, { "epoch": 0.7707081817412641, "grad_norm": 0.24575604498386383, "learning_rate": 0.001, "loss": 2.7877, "step": 18218 }, { "epoch": 0.7707504865047804, "grad_norm": 0.17563903331756592, "learning_rate": 0.001, "loss": 2.192, "step": 18219 }, { "epoch": 0.7707927912682968, "grad_norm": 0.18912796676158905, "learning_rate": 0.001, "loss": 2.3836, "step": 18220 }, { "epoch": 0.7708350960318132, "grad_norm": 0.16776569187641144, "learning_rate": 0.001, "loss": 1.9411, "step": 18221 }, { "epoch": 0.7708774007953295, "grad_norm": 0.18997344374656677, "learning_rate": 0.001, "loss": 2.5683, "step": 18222 }, { "epoch": 0.7709197055588459, "grad_norm": 3.814743757247925, "learning_rate": 0.001, "loss": 2.4773, "step": 18223 }, { "epoch": 0.7709620103223623, "grad_norm": 0.17335738241672516, "learning_rate": 0.001, "loss": 1.8626, "step": 18224 }, { "epoch": 0.7710043150858786, "grad_norm": 1.7937777042388916, "learning_rate": 0.001, "loss": 2.8774, "step": 18225 }, { "epoch": 0.771046619849395, "grad_norm": 0.16799603402614594, "learning_rate": 0.001, "loss": 1.7608, "step": 18226 }, { "epoch": 0.7710889246129115, "grad_norm": 0.17312444746494293, "learning_rate": 0.001, "loss": 2.4437, "step": 18227 }, { "epoch": 0.7711312293764278, "grad_norm": 1.968964695930481, "learning_rate": 0.001, "loss": 1.916, "step": 18228 }, { "epoch": 0.7711735341399442, "grad_norm": 0.1496216207742691, "learning_rate": 0.001, "loss": 1.789, "step": 18229 }, { "epoch": 0.7712158389034606, "grad_norm": 0.15248148143291473, "learning_rate": 0.001, "loss": 2.2559, "step": 18230 }, { "epoch": 0.7712581436669769, "grad_norm": 0.2088804543018341, "learning_rate": 0.001, "loss": 2.0968, "step": 18231 }, { "epoch": 0.7713004484304933, "grad_norm": 0.20815947651863098, "learning_rate": 0.001, "loss": 2.6797, "step": 18232 }, { "epoch": 0.7713427531940097, "grad_norm": 0.5374844670295715, "learning_rate": 0.001, "loss": 3.3856, "step": 18233 }, { "epoch": 0.771385057957526, "grad_norm": 0.26020729541778564, "learning_rate": 0.001, "loss": 1.9107, "step": 18234 }, { "epoch": 0.7714273627210424, "grad_norm": 0.27381160855293274, "learning_rate": 0.001, "loss": 2.0782, "step": 18235 }, { "epoch": 0.7714696674845588, "grad_norm": 0.23972102999687195, "learning_rate": 0.001, "loss": 2.4307, "step": 18236 }, { "epoch": 0.7715119722480751, "grad_norm": 0.1825583428144455, "learning_rate": 0.001, "loss": 1.7901, "step": 18237 }, { "epoch": 0.7715542770115915, "grad_norm": 0.28334465622901917, "learning_rate": 0.001, "loss": 1.9967, "step": 18238 }, { "epoch": 0.7715965817751079, "grad_norm": 1.4033657312393188, "learning_rate": 0.001, "loss": 3.312, "step": 18239 }, { "epoch": 0.7716388865386242, "grad_norm": 0.20028841495513916, "learning_rate": 0.001, "loss": 1.7072, "step": 18240 }, { "epoch": 0.7716811913021406, "grad_norm": 0.1848575472831726, "learning_rate": 0.001, "loss": 1.6997, "step": 18241 }, { "epoch": 0.771723496065657, "grad_norm": 0.1979037970304489, "learning_rate": 0.001, "loss": 2.4092, "step": 18242 }, { "epoch": 0.7717658008291733, "grad_norm": 0.2577193081378937, "learning_rate": 0.001, "loss": 2.3364, "step": 18243 }, { "epoch": 0.7718081055926898, "grad_norm": 0.21102125942707062, "learning_rate": 0.001, "loss": 2.8763, "step": 18244 }, { "epoch": 0.7718504103562062, "grad_norm": 0.17282043397426605, "learning_rate": 0.001, "loss": 1.6663, "step": 18245 }, { "epoch": 0.7718927151197225, "grad_norm": 0.2045888453722, "learning_rate": 0.001, "loss": 2.0012, "step": 18246 }, { "epoch": 0.7719350198832389, "grad_norm": 0.22302895784378052, "learning_rate": 0.001, "loss": 2.485, "step": 18247 }, { "epoch": 0.7719773246467552, "grad_norm": 0.19768020510673523, "learning_rate": 0.001, "loss": 2.3736, "step": 18248 }, { "epoch": 0.7720196294102716, "grad_norm": 0.15795622766017914, "learning_rate": 0.001, "loss": 1.9866, "step": 18249 }, { "epoch": 0.772061934173788, "grad_norm": 0.1597718894481659, "learning_rate": 0.001, "loss": 2.3061, "step": 18250 }, { "epoch": 0.7721042389373043, "grad_norm": 0.14455056190490723, "learning_rate": 0.001, "loss": 2.7711, "step": 18251 }, { "epoch": 0.7721465437008207, "grad_norm": 0.16666211187839508, "learning_rate": 0.001, "loss": 2.4853, "step": 18252 }, { "epoch": 0.7721888484643371, "grad_norm": 0.15804295241832733, "learning_rate": 0.001, "loss": 2.7146, "step": 18253 }, { "epoch": 0.7722311532278534, "grad_norm": 0.14087384939193726, "learning_rate": 0.001, "loss": 1.6901, "step": 18254 }, { "epoch": 0.7722734579913698, "grad_norm": 0.24747240543365479, "learning_rate": 0.001, "loss": 2.3874, "step": 18255 }, { "epoch": 0.7723157627548862, "grad_norm": 0.20662999153137207, "learning_rate": 0.001, "loss": 1.7758, "step": 18256 }, { "epoch": 0.7723580675184025, "grad_norm": 0.15056248009204865, "learning_rate": 0.001, "loss": 2.264, "step": 18257 }, { "epoch": 0.7724003722819189, "grad_norm": 0.1590527892112732, "learning_rate": 0.001, "loss": 2.5744, "step": 18258 }, { "epoch": 0.7724426770454353, "grad_norm": 0.18478628993034363, "learning_rate": 0.001, "loss": 2.4574, "step": 18259 }, { "epoch": 0.7724849818089516, "grad_norm": 0.47995075583457947, "learning_rate": 0.001, "loss": 2.2169, "step": 18260 }, { "epoch": 0.772527286572468, "grad_norm": 0.18007495999336243, "learning_rate": 0.001, "loss": 2.0898, "step": 18261 }, { "epoch": 0.7725695913359845, "grad_norm": 0.15627458691596985, "learning_rate": 0.001, "loss": 2.0773, "step": 18262 }, { "epoch": 0.7726118960995008, "grad_norm": 0.18800586462020874, "learning_rate": 0.001, "loss": 3.2193, "step": 18263 }, { "epoch": 0.7726542008630172, "grad_norm": 0.3073885440826416, "learning_rate": 0.001, "loss": 2.0712, "step": 18264 }, { "epoch": 0.7726965056265336, "grad_norm": 0.1837342083454132, "learning_rate": 0.001, "loss": 3.1809, "step": 18265 }, { "epoch": 0.7727388103900499, "grad_norm": 0.24296334385871887, "learning_rate": 0.001, "loss": 2.2377, "step": 18266 }, { "epoch": 0.7727811151535663, "grad_norm": 0.17990991473197937, "learning_rate": 0.001, "loss": 2.3957, "step": 18267 }, { "epoch": 0.7728234199170827, "grad_norm": 0.1581333428621292, "learning_rate": 0.001, "loss": 2.1276, "step": 18268 }, { "epoch": 0.772865724680599, "grad_norm": 0.1533811092376709, "learning_rate": 0.001, "loss": 1.9765, "step": 18269 }, { "epoch": 0.7729080294441154, "grad_norm": 0.3548398017883301, "learning_rate": 0.001, "loss": 3.269, "step": 18270 }, { "epoch": 0.7729503342076318, "grad_norm": 0.16134926676750183, "learning_rate": 0.001, "loss": 2.0527, "step": 18271 }, { "epoch": 0.7729926389711481, "grad_norm": 0.713190495967865, "learning_rate": 0.001, "loss": 1.8366, "step": 18272 }, { "epoch": 0.7730349437346645, "grad_norm": 0.8022021055221558, "learning_rate": 0.001, "loss": 1.7423, "step": 18273 }, { "epoch": 0.7730772484981809, "grad_norm": 0.38094237446784973, "learning_rate": 0.001, "loss": 2.4639, "step": 18274 }, { "epoch": 0.7731195532616972, "grad_norm": 0.1615244299173355, "learning_rate": 0.001, "loss": 2.2037, "step": 18275 }, { "epoch": 0.7731618580252136, "grad_norm": 0.23506537079811096, "learning_rate": 0.001, "loss": 2.9041, "step": 18276 }, { "epoch": 0.77320416278873, "grad_norm": 0.26342645287513733, "learning_rate": 0.001, "loss": 2.0227, "step": 18277 }, { "epoch": 0.7732464675522464, "grad_norm": 0.16665269434452057, "learning_rate": 0.001, "loss": 1.5072, "step": 18278 }, { "epoch": 0.7732887723157628, "grad_norm": 0.15748518705368042, "learning_rate": 0.001, "loss": 1.9506, "step": 18279 }, { "epoch": 0.7733310770792792, "grad_norm": 0.20437052845954895, "learning_rate": 0.001, "loss": 1.7317, "step": 18280 }, { "epoch": 0.7733733818427955, "grad_norm": 0.1785205602645874, "learning_rate": 0.001, "loss": 2.6008, "step": 18281 }, { "epoch": 0.7734156866063119, "grad_norm": 0.15185962617397308, "learning_rate": 0.001, "loss": 2.136, "step": 18282 }, { "epoch": 0.7734579913698283, "grad_norm": 0.1808384209871292, "learning_rate": 0.001, "loss": 1.8633, "step": 18283 }, { "epoch": 0.7735002961333446, "grad_norm": 0.2651410698890686, "learning_rate": 0.001, "loss": 2.2161, "step": 18284 }, { "epoch": 0.773542600896861, "grad_norm": 0.15414567291736603, "learning_rate": 0.001, "loss": 2.2774, "step": 18285 }, { "epoch": 0.7735849056603774, "grad_norm": 0.16589045524597168, "learning_rate": 0.001, "loss": 1.5533, "step": 18286 }, { "epoch": 0.7736272104238937, "grad_norm": 0.15032519400119781, "learning_rate": 0.001, "loss": 1.6888, "step": 18287 }, { "epoch": 0.7736695151874101, "grad_norm": 0.7096246480941772, "learning_rate": 0.001, "loss": 1.9838, "step": 18288 }, { "epoch": 0.7737118199509265, "grad_norm": 0.16361872851848602, "learning_rate": 0.001, "loss": 1.6044, "step": 18289 }, { "epoch": 0.7737541247144428, "grad_norm": 0.3765888810157776, "learning_rate": 0.001, "loss": 1.7739, "step": 18290 }, { "epoch": 0.7737964294779592, "grad_norm": 0.3084273934364319, "learning_rate": 0.001, "loss": 1.2695, "step": 18291 }, { "epoch": 0.7738387342414756, "grad_norm": 0.17378173768520355, "learning_rate": 0.001, "loss": 1.9922, "step": 18292 }, { "epoch": 0.7738810390049919, "grad_norm": 0.671561598777771, "learning_rate": 0.001, "loss": 2.7077, "step": 18293 }, { "epoch": 0.7739233437685084, "grad_norm": 0.17134688794612885, "learning_rate": 0.001, "loss": 2.1455, "step": 18294 }, { "epoch": 0.7739656485320247, "grad_norm": 0.15048515796661377, "learning_rate": 0.001, "loss": 2.2337, "step": 18295 }, { "epoch": 0.7740079532955411, "grad_norm": 0.14688226580619812, "learning_rate": 0.001, "loss": 2.2607, "step": 18296 }, { "epoch": 0.7740502580590575, "grad_norm": 0.15102264285087585, "learning_rate": 0.001, "loss": 2.3796, "step": 18297 }, { "epoch": 0.7740925628225738, "grad_norm": 0.15952768921852112, "learning_rate": 0.001, "loss": 2.2732, "step": 18298 }, { "epoch": 0.7741348675860902, "grad_norm": 0.15502522885799408, "learning_rate": 0.001, "loss": 2.3354, "step": 18299 }, { "epoch": 0.7741771723496066, "grad_norm": 0.17360159754753113, "learning_rate": 0.001, "loss": 1.4465, "step": 18300 }, { "epoch": 0.7742194771131229, "grad_norm": 0.14462324976921082, "learning_rate": 0.001, "loss": 1.5657, "step": 18301 }, { "epoch": 0.7742617818766393, "grad_norm": 0.22618313133716583, "learning_rate": 0.001, "loss": 1.497, "step": 18302 }, { "epoch": 0.7743040866401557, "grad_norm": 0.14733265340328217, "learning_rate": 0.001, "loss": 2.7168, "step": 18303 }, { "epoch": 0.774346391403672, "grad_norm": 0.16763785481452942, "learning_rate": 0.001, "loss": 2.0218, "step": 18304 }, { "epoch": 0.7743886961671884, "grad_norm": 0.14719419181346893, "learning_rate": 0.001, "loss": 1.8908, "step": 18305 }, { "epoch": 0.7744310009307048, "grad_norm": 0.14694784581661224, "learning_rate": 0.001, "loss": 2.3633, "step": 18306 }, { "epoch": 0.7744733056942211, "grad_norm": 0.1397048681974411, "learning_rate": 0.001, "loss": 1.574, "step": 18307 }, { "epoch": 0.7745156104577375, "grad_norm": 0.138363778591156, "learning_rate": 0.001, "loss": 2.1553, "step": 18308 }, { "epoch": 0.774557915221254, "grad_norm": 0.17978732287883759, "learning_rate": 0.001, "loss": 2.1659, "step": 18309 }, { "epoch": 0.7746002199847702, "grad_norm": 2.1564154624938965, "learning_rate": 0.001, "loss": 2.5153, "step": 18310 }, { "epoch": 0.7746425247482867, "grad_norm": 0.1512441188097, "learning_rate": 0.001, "loss": 1.7129, "step": 18311 }, { "epoch": 0.7746848295118031, "grad_norm": 0.21785925328731537, "learning_rate": 0.001, "loss": 2.0366, "step": 18312 }, { "epoch": 0.7747271342753194, "grad_norm": 0.5639128684997559, "learning_rate": 0.001, "loss": 1.4766, "step": 18313 }, { "epoch": 0.7747694390388358, "grad_norm": 0.2070891112089157, "learning_rate": 0.001, "loss": 2.2829, "step": 18314 }, { "epoch": 0.7748117438023522, "grad_norm": 0.16625727713108063, "learning_rate": 0.001, "loss": 1.5067, "step": 18315 }, { "epoch": 0.7748540485658685, "grad_norm": 0.5115146040916443, "learning_rate": 0.001, "loss": 2.7549, "step": 18316 }, { "epoch": 0.7748963533293849, "grad_norm": 0.17250525951385498, "learning_rate": 0.001, "loss": 1.7243, "step": 18317 }, { "epoch": 0.7749386580929013, "grad_norm": 1.3713905811309814, "learning_rate": 0.001, "loss": 2.225, "step": 18318 }, { "epoch": 0.7749809628564176, "grad_norm": 0.21677060425281525, "learning_rate": 0.001, "loss": 3.3116, "step": 18319 }, { "epoch": 0.775023267619934, "grad_norm": 0.21051226556301117, "learning_rate": 0.001, "loss": 2.2168, "step": 18320 }, { "epoch": 0.7750655723834504, "grad_norm": 0.15511994063854218, "learning_rate": 0.001, "loss": 1.6332, "step": 18321 }, { "epoch": 0.7751078771469667, "grad_norm": 0.19078783690929413, "learning_rate": 0.001, "loss": 2.149, "step": 18322 }, { "epoch": 0.7751501819104831, "grad_norm": 0.186100572347641, "learning_rate": 0.001, "loss": 2.3936, "step": 18323 }, { "epoch": 0.7751924866739995, "grad_norm": 0.19699545204639435, "learning_rate": 0.001, "loss": 1.6859, "step": 18324 }, { "epoch": 0.7752347914375158, "grad_norm": 0.1534021496772766, "learning_rate": 0.001, "loss": 1.7206, "step": 18325 }, { "epoch": 0.7752770962010322, "grad_norm": 0.1593978852033615, "learning_rate": 0.001, "loss": 2.3795, "step": 18326 }, { "epoch": 0.7753194009645487, "grad_norm": 0.9152129292488098, "learning_rate": 0.001, "loss": 2.5192, "step": 18327 }, { "epoch": 0.775361705728065, "grad_norm": 3.8987066745758057, "learning_rate": 0.001, "loss": 1.7634, "step": 18328 }, { "epoch": 0.7754040104915814, "grad_norm": 0.16911078989505768, "learning_rate": 0.001, "loss": 1.7858, "step": 18329 }, { "epoch": 0.7754463152550978, "grad_norm": 0.1814729869365692, "learning_rate": 0.001, "loss": 2.2558, "step": 18330 }, { "epoch": 0.7754886200186141, "grad_norm": 0.15336821973323822, "learning_rate": 0.001, "loss": 2.127, "step": 18331 }, { "epoch": 0.7755309247821305, "grad_norm": 0.7835121750831604, "learning_rate": 0.001, "loss": 2.1583, "step": 18332 }, { "epoch": 0.7755732295456469, "grad_norm": 0.19294503331184387, "learning_rate": 0.001, "loss": 2.2585, "step": 18333 }, { "epoch": 0.7756155343091632, "grad_norm": 6.45863151550293, "learning_rate": 0.001, "loss": 2.1715, "step": 18334 }, { "epoch": 0.7756578390726796, "grad_norm": 0.17118413746356964, "learning_rate": 0.001, "loss": 1.3514, "step": 18335 }, { "epoch": 0.775700143836196, "grad_norm": 0.24696171283721924, "learning_rate": 0.001, "loss": 2.2035, "step": 18336 }, { "epoch": 0.7757424485997123, "grad_norm": 0.33256638050079346, "learning_rate": 0.001, "loss": 2.5719, "step": 18337 }, { "epoch": 0.7757847533632287, "grad_norm": 0.19757241010665894, "learning_rate": 0.001, "loss": 2.4873, "step": 18338 }, { "epoch": 0.775827058126745, "grad_norm": 0.17525336146354675, "learning_rate": 0.001, "loss": 2.2333, "step": 18339 }, { "epoch": 0.7758693628902614, "grad_norm": 0.15870629251003265, "learning_rate": 0.001, "loss": 1.8791, "step": 18340 }, { "epoch": 0.7759116676537778, "grad_norm": 0.13413628935813904, "learning_rate": 0.001, "loss": 2.8583, "step": 18341 }, { "epoch": 0.7759539724172941, "grad_norm": 0.15136483311653137, "learning_rate": 0.001, "loss": 2.368, "step": 18342 }, { "epoch": 0.7759962771808105, "grad_norm": 0.15002518892288208, "learning_rate": 0.001, "loss": 1.6174, "step": 18343 }, { "epoch": 0.776038581944327, "grad_norm": 0.15149597823619843, "learning_rate": 0.001, "loss": 2.7551, "step": 18344 }, { "epoch": 0.7760808867078433, "grad_norm": 0.548881471157074, "learning_rate": 0.001, "loss": 1.8676, "step": 18345 }, { "epoch": 0.7761231914713597, "grad_norm": 0.1294621080160141, "learning_rate": 0.001, "loss": 1.3881, "step": 18346 }, { "epoch": 0.7761654962348761, "grad_norm": 0.1591734141111374, "learning_rate": 0.001, "loss": 2.6516, "step": 18347 }, { "epoch": 0.7762078009983924, "grad_norm": 0.7913060784339905, "learning_rate": 0.001, "loss": 2.4505, "step": 18348 }, { "epoch": 0.7762501057619088, "grad_norm": 8.328814506530762, "learning_rate": 0.001, "loss": 2.4966, "step": 18349 }, { "epoch": 0.7762924105254252, "grad_norm": 0.15105003118515015, "learning_rate": 0.001, "loss": 2.0033, "step": 18350 }, { "epoch": 0.7763347152889415, "grad_norm": 0.1463308483362198, "learning_rate": 0.001, "loss": 2.31, "step": 18351 }, { "epoch": 0.7763770200524579, "grad_norm": 1.781333327293396, "learning_rate": 0.001, "loss": 3.0765, "step": 18352 }, { "epoch": 0.7764193248159743, "grad_norm": 0.17749984562397003, "learning_rate": 0.001, "loss": 2.4567, "step": 18353 }, { "epoch": 0.7764616295794906, "grad_norm": 0.16004282236099243, "learning_rate": 0.001, "loss": 2.1417, "step": 18354 }, { "epoch": 0.776503934343007, "grad_norm": 0.18965484201908112, "learning_rate": 0.001, "loss": 2.5025, "step": 18355 }, { "epoch": 0.7765462391065234, "grad_norm": 0.4342174530029297, "learning_rate": 0.001, "loss": 1.8881, "step": 18356 }, { "epoch": 0.7765885438700397, "grad_norm": 1.2400356531143188, "learning_rate": 0.001, "loss": 3.1552, "step": 18357 }, { "epoch": 0.7766308486335561, "grad_norm": 0.33312109112739563, "learning_rate": 0.001, "loss": 2.3914, "step": 18358 }, { "epoch": 0.7766731533970725, "grad_norm": 0.1646675318479538, "learning_rate": 0.001, "loss": 2.3323, "step": 18359 }, { "epoch": 0.7767154581605888, "grad_norm": 0.16873008012771606, "learning_rate": 0.001, "loss": 2.777, "step": 18360 }, { "epoch": 0.7767577629241053, "grad_norm": 0.2983357906341553, "learning_rate": 0.001, "loss": 2.6237, "step": 18361 }, { "epoch": 0.7768000676876217, "grad_norm": 10.579153060913086, "learning_rate": 0.001, "loss": 1.5843, "step": 18362 }, { "epoch": 0.776842372451138, "grad_norm": 0.1804652363061905, "learning_rate": 0.001, "loss": 2.4111, "step": 18363 }, { "epoch": 0.7768846772146544, "grad_norm": 0.24087312817573547, "learning_rate": 0.001, "loss": 2.2012, "step": 18364 }, { "epoch": 0.7769269819781708, "grad_norm": 0.19113610684871674, "learning_rate": 0.001, "loss": 2.2822, "step": 18365 }, { "epoch": 0.7769692867416871, "grad_norm": 0.20484258234500885, "learning_rate": 0.001, "loss": 2.0493, "step": 18366 }, { "epoch": 0.7770115915052035, "grad_norm": 0.19894827902317047, "learning_rate": 0.001, "loss": 3.3502, "step": 18367 }, { "epoch": 0.7770538962687199, "grad_norm": 0.20970451831817627, "learning_rate": 0.001, "loss": 1.6449, "step": 18368 }, { "epoch": 0.7770962010322362, "grad_norm": 0.1429346650838852, "learning_rate": 0.001, "loss": 1.8039, "step": 18369 }, { "epoch": 0.7771385057957526, "grad_norm": 0.16938546299934387, "learning_rate": 0.001, "loss": 1.9748, "step": 18370 }, { "epoch": 0.777180810559269, "grad_norm": 0.9999366998672485, "learning_rate": 0.001, "loss": 1.5648, "step": 18371 }, { "epoch": 0.7772231153227853, "grad_norm": 0.16697566211223602, "learning_rate": 0.001, "loss": 1.9137, "step": 18372 }, { "epoch": 0.7772654200863017, "grad_norm": 0.1452540159225464, "learning_rate": 0.001, "loss": 2.1988, "step": 18373 }, { "epoch": 0.7773077248498181, "grad_norm": 0.19160769879817963, "learning_rate": 0.001, "loss": 3.139, "step": 18374 }, { "epoch": 0.7773500296133344, "grad_norm": 0.1582278609275818, "learning_rate": 0.001, "loss": 2.0894, "step": 18375 }, { "epoch": 0.7773923343768508, "grad_norm": 0.27617523074150085, "learning_rate": 0.001, "loss": 2.507, "step": 18376 }, { "epoch": 0.7774346391403673, "grad_norm": 0.15912631154060364, "learning_rate": 0.001, "loss": 1.2789, "step": 18377 }, { "epoch": 0.7774769439038836, "grad_norm": 0.13841427862644196, "learning_rate": 0.001, "loss": 1.6491, "step": 18378 }, { "epoch": 0.7775192486674, "grad_norm": 0.1486634612083435, "learning_rate": 0.001, "loss": 1.6654, "step": 18379 }, { "epoch": 0.7775615534309164, "grad_norm": 0.16134053468704224, "learning_rate": 0.001, "loss": 1.6808, "step": 18380 }, { "epoch": 0.7776038581944327, "grad_norm": 0.5859285593032837, "learning_rate": 0.001, "loss": 1.9378, "step": 18381 }, { "epoch": 0.7776461629579491, "grad_norm": 0.1989150196313858, "learning_rate": 0.001, "loss": 2.8664, "step": 18382 }, { "epoch": 0.7776884677214654, "grad_norm": 0.21168352663516998, "learning_rate": 0.001, "loss": 2.7791, "step": 18383 }, { "epoch": 0.7777307724849818, "grad_norm": 0.16079917550086975, "learning_rate": 0.001, "loss": 1.5344, "step": 18384 }, { "epoch": 0.7777730772484982, "grad_norm": 0.17647407948970795, "learning_rate": 0.001, "loss": 2.9867, "step": 18385 }, { "epoch": 0.7778153820120145, "grad_norm": 0.15070800483226776, "learning_rate": 0.001, "loss": 2.0682, "step": 18386 }, { "epoch": 0.7778576867755309, "grad_norm": 0.1633099466562271, "learning_rate": 0.001, "loss": 1.5573, "step": 18387 }, { "epoch": 0.7778999915390473, "grad_norm": 0.1457083374261856, "learning_rate": 0.001, "loss": 1.8068, "step": 18388 }, { "epoch": 0.7779422963025636, "grad_norm": 0.1393345296382904, "learning_rate": 0.001, "loss": 2.3662, "step": 18389 }, { "epoch": 0.77798460106608, "grad_norm": 0.17682814598083496, "learning_rate": 0.001, "loss": 2.6571, "step": 18390 }, { "epoch": 0.7780269058295964, "grad_norm": 0.1801498383283615, "learning_rate": 0.001, "loss": 2.6531, "step": 18391 }, { "epoch": 0.7780692105931127, "grad_norm": 0.14461541175842285, "learning_rate": 0.001, "loss": 1.6549, "step": 18392 }, { "epoch": 0.7781115153566291, "grad_norm": 0.16726653277873993, "learning_rate": 0.001, "loss": 2.2859, "step": 18393 }, { "epoch": 0.7781538201201456, "grad_norm": 0.1532205492258072, "learning_rate": 0.001, "loss": 1.4662, "step": 18394 }, { "epoch": 0.7781961248836619, "grad_norm": 0.13347402215003967, "learning_rate": 0.001, "loss": 2.1848, "step": 18395 }, { "epoch": 0.7782384296471783, "grad_norm": 0.20856840908527374, "learning_rate": 0.001, "loss": 2.0527, "step": 18396 }, { "epoch": 0.7782807344106947, "grad_norm": 0.15012764930725098, "learning_rate": 0.001, "loss": 2.0695, "step": 18397 }, { "epoch": 0.778323039174211, "grad_norm": 2.177178144454956, "learning_rate": 0.001, "loss": 1.4426, "step": 18398 }, { "epoch": 0.7783653439377274, "grad_norm": 0.22611618041992188, "learning_rate": 0.001, "loss": 2.052, "step": 18399 }, { "epoch": 0.7784076487012438, "grad_norm": 0.1328599900007248, "learning_rate": 0.001, "loss": 1.7561, "step": 18400 }, { "epoch": 0.7784499534647601, "grad_norm": 0.14337456226348877, "learning_rate": 0.001, "loss": 1.1991, "step": 18401 }, { "epoch": 0.7784922582282765, "grad_norm": 0.15648649632930756, "learning_rate": 0.001, "loss": 2.0966, "step": 18402 }, { "epoch": 0.7785345629917929, "grad_norm": 0.15187643468379974, "learning_rate": 0.001, "loss": 1.8629, "step": 18403 }, { "epoch": 0.7785768677553092, "grad_norm": 0.12914754450321198, "learning_rate": 0.001, "loss": 1.6909, "step": 18404 }, { "epoch": 0.7786191725188256, "grad_norm": 0.14733096957206726, "learning_rate": 0.001, "loss": 1.9314, "step": 18405 }, { "epoch": 0.778661477282342, "grad_norm": 0.19098341464996338, "learning_rate": 0.001, "loss": 2.2461, "step": 18406 }, { "epoch": 0.7787037820458583, "grad_norm": 0.14427435398101807, "learning_rate": 0.001, "loss": 1.5976, "step": 18407 }, { "epoch": 0.7787460868093747, "grad_norm": 0.14376568794250488, "learning_rate": 0.001, "loss": 1.7142, "step": 18408 }, { "epoch": 0.7787883915728911, "grad_norm": 0.1582573801279068, "learning_rate": 0.001, "loss": 3.5712, "step": 18409 }, { "epoch": 0.7788306963364074, "grad_norm": 0.16873016953468323, "learning_rate": 0.001, "loss": 2.0883, "step": 18410 }, { "epoch": 0.7788730010999239, "grad_norm": 0.14867006242275238, "learning_rate": 0.001, "loss": 1.5226, "step": 18411 }, { "epoch": 0.7789153058634403, "grad_norm": 0.1830413043498993, "learning_rate": 0.001, "loss": 1.5773, "step": 18412 }, { "epoch": 0.7789576106269566, "grad_norm": 0.21609345078468323, "learning_rate": 0.001, "loss": 2.2652, "step": 18413 }, { "epoch": 0.778999915390473, "grad_norm": 0.36571288108825684, "learning_rate": 0.001, "loss": 1.8297, "step": 18414 }, { "epoch": 0.7790422201539894, "grad_norm": 0.15142600238323212, "learning_rate": 0.001, "loss": 1.5149, "step": 18415 }, { "epoch": 0.7790845249175057, "grad_norm": 0.7442256212234497, "learning_rate": 0.001, "loss": 4.0185, "step": 18416 }, { "epoch": 0.7791268296810221, "grad_norm": 0.28748589754104614, "learning_rate": 0.001, "loss": 3.1263, "step": 18417 }, { "epoch": 0.7791691344445385, "grad_norm": 0.16540317237377167, "learning_rate": 0.001, "loss": 3.5016, "step": 18418 }, { "epoch": 0.7792114392080548, "grad_norm": 0.15089643001556396, "learning_rate": 0.001, "loss": 2.1887, "step": 18419 }, { "epoch": 0.7792537439715712, "grad_norm": 0.18112583458423615, "learning_rate": 0.001, "loss": 1.6532, "step": 18420 }, { "epoch": 0.7792960487350876, "grad_norm": 0.14614035189151764, "learning_rate": 0.001, "loss": 1.8557, "step": 18421 }, { "epoch": 0.7793383534986039, "grad_norm": 0.1359279751777649, "learning_rate": 0.001, "loss": 1.6901, "step": 18422 }, { "epoch": 0.7793806582621203, "grad_norm": 1.8162649869918823, "learning_rate": 0.001, "loss": 2.0045, "step": 18423 }, { "epoch": 0.7794229630256367, "grad_norm": 0.33419543504714966, "learning_rate": 0.001, "loss": 2.7161, "step": 18424 }, { "epoch": 0.779465267789153, "grad_norm": 0.14467467367649078, "learning_rate": 0.001, "loss": 1.7674, "step": 18425 }, { "epoch": 0.7795075725526694, "grad_norm": 0.17725689709186554, "learning_rate": 0.001, "loss": 1.6678, "step": 18426 }, { "epoch": 0.7795498773161859, "grad_norm": 0.1722497195005417, "learning_rate": 0.001, "loss": 1.9924, "step": 18427 }, { "epoch": 0.7795921820797022, "grad_norm": 0.17150290310382843, "learning_rate": 0.001, "loss": 1.5635, "step": 18428 }, { "epoch": 0.7796344868432186, "grad_norm": 0.14820575714111328, "learning_rate": 0.001, "loss": 2.9991, "step": 18429 }, { "epoch": 0.7796767916067349, "grad_norm": 0.18718744814395905, "learning_rate": 0.001, "loss": 2.1338, "step": 18430 }, { "epoch": 0.7797190963702513, "grad_norm": 0.17354276776313782, "learning_rate": 0.001, "loss": 2.0303, "step": 18431 }, { "epoch": 0.7797614011337677, "grad_norm": 0.17366386950016022, "learning_rate": 0.001, "loss": 1.5947, "step": 18432 }, { "epoch": 0.779803705897284, "grad_norm": 0.1608889102935791, "learning_rate": 0.001, "loss": 3.5028, "step": 18433 }, { "epoch": 0.7798460106608004, "grad_norm": 0.16302143037319183, "learning_rate": 0.001, "loss": 2.1618, "step": 18434 }, { "epoch": 0.7798883154243168, "grad_norm": 0.19998212158679962, "learning_rate": 0.001, "loss": 1.3854, "step": 18435 }, { "epoch": 0.7799306201878331, "grad_norm": 0.20762750506401062, "learning_rate": 0.001, "loss": 3.0683, "step": 18436 }, { "epoch": 0.7799729249513495, "grad_norm": 0.1632770299911499, "learning_rate": 0.001, "loss": 1.3391, "step": 18437 }, { "epoch": 0.7800152297148659, "grad_norm": 0.4506990909576416, "learning_rate": 0.001, "loss": 3.0315, "step": 18438 }, { "epoch": 0.7800575344783822, "grad_norm": 0.1689714938402176, "learning_rate": 0.001, "loss": 2.7837, "step": 18439 }, { "epoch": 0.7800998392418986, "grad_norm": 0.16053323447704315, "learning_rate": 0.001, "loss": 1.4578, "step": 18440 }, { "epoch": 0.780142144005415, "grad_norm": 0.14367254078388214, "learning_rate": 0.001, "loss": 2.418, "step": 18441 }, { "epoch": 0.7801844487689313, "grad_norm": 0.6091342568397522, "learning_rate": 0.001, "loss": 2.745, "step": 18442 }, { "epoch": 0.7802267535324477, "grad_norm": 0.2298995554447174, "learning_rate": 0.001, "loss": 3.9425, "step": 18443 }, { "epoch": 0.7802690582959642, "grad_norm": 0.15140703320503235, "learning_rate": 0.001, "loss": 2.7755, "step": 18444 }, { "epoch": 0.7803113630594805, "grad_norm": 0.29984918236732483, "learning_rate": 0.001, "loss": 2.1757, "step": 18445 }, { "epoch": 0.7803536678229969, "grad_norm": 0.18075025081634521, "learning_rate": 0.001, "loss": 1.7825, "step": 18446 }, { "epoch": 0.7803959725865133, "grad_norm": 0.23606182634830475, "learning_rate": 0.001, "loss": 1.9542, "step": 18447 }, { "epoch": 0.7804382773500296, "grad_norm": 0.17639142274856567, "learning_rate": 0.001, "loss": 2.5699, "step": 18448 }, { "epoch": 0.780480582113546, "grad_norm": 0.3127742111682892, "learning_rate": 0.001, "loss": 1.3408, "step": 18449 }, { "epoch": 0.7805228868770624, "grad_norm": 0.2586624026298523, "learning_rate": 0.001, "loss": 1.4728, "step": 18450 }, { "epoch": 0.7805651916405787, "grad_norm": 0.14627474546432495, "learning_rate": 0.001, "loss": 1.5882, "step": 18451 }, { "epoch": 0.7806074964040951, "grad_norm": 0.22566846013069153, "learning_rate": 0.001, "loss": 1.6143, "step": 18452 }, { "epoch": 0.7806498011676115, "grad_norm": 0.5345284342765808, "learning_rate": 0.001, "loss": 2.4135, "step": 18453 }, { "epoch": 0.7806921059311278, "grad_norm": 0.16989070177078247, "learning_rate": 0.001, "loss": 2.8811, "step": 18454 }, { "epoch": 0.7807344106946442, "grad_norm": 0.7486119866371155, "learning_rate": 0.001, "loss": 2.1102, "step": 18455 }, { "epoch": 0.7807767154581606, "grad_norm": 0.1473877727985382, "learning_rate": 0.001, "loss": 2.0649, "step": 18456 }, { "epoch": 0.7808190202216769, "grad_norm": 0.1684478372335434, "learning_rate": 0.001, "loss": 1.9911, "step": 18457 }, { "epoch": 0.7808613249851933, "grad_norm": 2.9781100749969482, "learning_rate": 0.001, "loss": 1.858, "step": 18458 }, { "epoch": 0.7809036297487097, "grad_norm": 0.13891150057315826, "learning_rate": 0.001, "loss": 1.716, "step": 18459 }, { "epoch": 0.780945934512226, "grad_norm": 0.16608332097530365, "learning_rate": 0.001, "loss": 1.6527, "step": 18460 }, { "epoch": 0.7809882392757425, "grad_norm": 0.161564901471138, "learning_rate": 0.001, "loss": 2.0179, "step": 18461 }, { "epoch": 0.7810305440392589, "grad_norm": 0.17590850591659546, "learning_rate": 0.001, "loss": 1.8993, "step": 18462 }, { "epoch": 0.7810728488027752, "grad_norm": 0.15447676181793213, "learning_rate": 0.001, "loss": 2.4161, "step": 18463 }, { "epoch": 0.7811151535662916, "grad_norm": 0.13972973823547363, "learning_rate": 0.001, "loss": 2.4988, "step": 18464 }, { "epoch": 0.781157458329808, "grad_norm": 0.1585020124912262, "learning_rate": 0.001, "loss": 2.4953, "step": 18465 }, { "epoch": 0.7811997630933243, "grad_norm": 0.7317923307418823, "learning_rate": 0.001, "loss": 2.9961, "step": 18466 }, { "epoch": 0.7812420678568407, "grad_norm": 0.1768050640821457, "learning_rate": 0.001, "loss": 3.3468, "step": 18467 }, { "epoch": 0.7812843726203571, "grad_norm": 0.22169825434684753, "learning_rate": 0.001, "loss": 2.0874, "step": 18468 }, { "epoch": 0.7813266773838734, "grad_norm": 0.16282053291797638, "learning_rate": 0.001, "loss": 2.3298, "step": 18469 }, { "epoch": 0.7813689821473898, "grad_norm": 0.13636824488639832, "learning_rate": 0.001, "loss": 1.5925, "step": 18470 }, { "epoch": 0.7814112869109062, "grad_norm": 0.1489405781030655, "learning_rate": 0.001, "loss": 2.6353, "step": 18471 }, { "epoch": 0.7814535916744225, "grad_norm": 0.19491471350193024, "learning_rate": 0.001, "loss": 3.0984, "step": 18472 }, { "epoch": 0.7814958964379389, "grad_norm": 0.22249197959899902, "learning_rate": 0.001, "loss": 2.7739, "step": 18473 }, { "epoch": 0.7815382012014552, "grad_norm": 0.21340025961399078, "learning_rate": 0.001, "loss": 2.2505, "step": 18474 }, { "epoch": 0.7815805059649716, "grad_norm": 0.16923391819000244, "learning_rate": 0.001, "loss": 2.5597, "step": 18475 }, { "epoch": 0.781622810728488, "grad_norm": 0.1564716249704361, "learning_rate": 0.001, "loss": 2.6755, "step": 18476 }, { "epoch": 0.7816651154920043, "grad_norm": 0.18122835457324982, "learning_rate": 0.001, "loss": 1.8659, "step": 18477 }, { "epoch": 0.7817074202555208, "grad_norm": 0.5826538801193237, "learning_rate": 0.001, "loss": 2.1115, "step": 18478 }, { "epoch": 0.7817497250190372, "grad_norm": 0.15686936676502228, "learning_rate": 0.001, "loss": 2.1577, "step": 18479 }, { "epoch": 0.7817920297825535, "grad_norm": 0.1563858985900879, "learning_rate": 0.001, "loss": 1.9294, "step": 18480 }, { "epoch": 0.7818343345460699, "grad_norm": 0.6455838680267334, "learning_rate": 0.001, "loss": 2.0729, "step": 18481 }, { "epoch": 0.7818766393095863, "grad_norm": 0.16011108458042145, "learning_rate": 0.001, "loss": 2.1436, "step": 18482 }, { "epoch": 0.7819189440731026, "grad_norm": 0.1725652515888214, "learning_rate": 0.001, "loss": 2.3672, "step": 18483 }, { "epoch": 0.781961248836619, "grad_norm": 0.17138071358203888, "learning_rate": 0.001, "loss": 2.05, "step": 18484 }, { "epoch": 0.7820035536001354, "grad_norm": 0.16126024723052979, "learning_rate": 0.001, "loss": 1.8446, "step": 18485 }, { "epoch": 0.7820458583636517, "grad_norm": 0.20708617568016052, "learning_rate": 0.001, "loss": 2.3597, "step": 18486 }, { "epoch": 0.7820881631271681, "grad_norm": 0.3041653037071228, "learning_rate": 0.001, "loss": 2.9242, "step": 18487 }, { "epoch": 0.7821304678906845, "grad_norm": 0.2294047325849533, "learning_rate": 0.001, "loss": 2.7331, "step": 18488 }, { "epoch": 0.7821727726542008, "grad_norm": 0.16051490604877472, "learning_rate": 0.001, "loss": 1.3815, "step": 18489 }, { "epoch": 0.7822150774177172, "grad_norm": 0.1533360332250595, "learning_rate": 0.001, "loss": 1.7686, "step": 18490 }, { "epoch": 0.7822573821812336, "grad_norm": 0.3606812655925751, "learning_rate": 0.001, "loss": 2.5197, "step": 18491 }, { "epoch": 0.7822996869447499, "grad_norm": 0.15387903153896332, "learning_rate": 0.001, "loss": 1.8732, "step": 18492 }, { "epoch": 0.7823419917082663, "grad_norm": 0.1506211757659912, "learning_rate": 0.001, "loss": 1.9354, "step": 18493 }, { "epoch": 0.7823842964717828, "grad_norm": 0.1458059847354889, "learning_rate": 0.001, "loss": 1.6647, "step": 18494 }, { "epoch": 0.7824266012352991, "grad_norm": 0.1346750259399414, "learning_rate": 0.001, "loss": 1.8682, "step": 18495 }, { "epoch": 0.7824689059988155, "grad_norm": 0.1503671556711197, "learning_rate": 0.001, "loss": 1.8601, "step": 18496 }, { "epoch": 0.7825112107623319, "grad_norm": 0.15590086579322815, "learning_rate": 0.001, "loss": 3.2885, "step": 18497 }, { "epoch": 0.7825535155258482, "grad_norm": 0.16297362744808197, "learning_rate": 0.001, "loss": 2.2588, "step": 18498 }, { "epoch": 0.7825958202893646, "grad_norm": 0.15889789164066315, "learning_rate": 0.001, "loss": 1.5667, "step": 18499 }, { "epoch": 0.782638125052881, "grad_norm": 0.16023693978786469, "learning_rate": 0.001, "loss": 2.3448, "step": 18500 }, { "epoch": 0.7826804298163973, "grad_norm": 0.19175249338150024, "learning_rate": 0.001, "loss": 2.4261, "step": 18501 }, { "epoch": 0.7827227345799137, "grad_norm": 0.14406760036945343, "learning_rate": 0.001, "loss": 1.5323, "step": 18502 }, { "epoch": 0.7827650393434301, "grad_norm": 0.14279572665691376, "learning_rate": 0.001, "loss": 1.4731, "step": 18503 }, { "epoch": 0.7828073441069464, "grad_norm": 7.251346111297607, "learning_rate": 0.001, "loss": 2.6161, "step": 18504 }, { "epoch": 0.7828496488704628, "grad_norm": 0.580826997756958, "learning_rate": 0.001, "loss": 2.6049, "step": 18505 }, { "epoch": 0.7828919536339792, "grad_norm": 2.745222330093384, "learning_rate": 0.001, "loss": 2.3253, "step": 18506 }, { "epoch": 0.7829342583974955, "grad_norm": 0.14414088428020477, "learning_rate": 0.001, "loss": 1.7045, "step": 18507 }, { "epoch": 0.7829765631610119, "grad_norm": 0.1275712102651596, "learning_rate": 0.001, "loss": 2.0115, "step": 18508 }, { "epoch": 0.7830188679245284, "grad_norm": 0.1394435614347458, "learning_rate": 0.001, "loss": 1.4622, "step": 18509 }, { "epoch": 0.7830611726880446, "grad_norm": 0.1480412781238556, "learning_rate": 0.001, "loss": 1.8277, "step": 18510 }, { "epoch": 0.7831034774515611, "grad_norm": 0.14259982109069824, "learning_rate": 0.001, "loss": 2.2549, "step": 18511 }, { "epoch": 0.7831457822150775, "grad_norm": 2.498133897781372, "learning_rate": 0.001, "loss": 3.1715, "step": 18512 }, { "epoch": 0.7831880869785938, "grad_norm": 0.19657468795776367, "learning_rate": 0.001, "loss": 2.4656, "step": 18513 }, { "epoch": 0.7832303917421102, "grad_norm": 0.1902858018875122, "learning_rate": 0.001, "loss": 2.3779, "step": 18514 }, { "epoch": 0.7832726965056266, "grad_norm": 2.73234486579895, "learning_rate": 0.001, "loss": 2.9337, "step": 18515 }, { "epoch": 0.7833150012691429, "grad_norm": 0.16122861206531525, "learning_rate": 0.001, "loss": 3.7783, "step": 18516 }, { "epoch": 0.7833573060326593, "grad_norm": 0.146601602435112, "learning_rate": 0.001, "loss": 1.7877, "step": 18517 }, { "epoch": 0.7833996107961756, "grad_norm": 0.1656588464975357, "learning_rate": 0.001, "loss": 2.4555, "step": 18518 }, { "epoch": 0.783441915559692, "grad_norm": 0.1633993536233902, "learning_rate": 0.001, "loss": 2.131, "step": 18519 }, { "epoch": 0.7834842203232084, "grad_norm": 0.13799798488616943, "learning_rate": 0.001, "loss": 1.6201, "step": 18520 }, { "epoch": 0.7835265250867247, "grad_norm": 0.1774521917104721, "learning_rate": 0.001, "loss": 2.4657, "step": 18521 }, { "epoch": 0.7835688298502411, "grad_norm": 0.17615440487861633, "learning_rate": 0.001, "loss": 2.1959, "step": 18522 }, { "epoch": 0.7836111346137575, "grad_norm": 0.18356232345104218, "learning_rate": 0.001, "loss": 2.6112, "step": 18523 }, { "epoch": 0.7836534393772738, "grad_norm": 0.1374877244234085, "learning_rate": 0.001, "loss": 1.8857, "step": 18524 }, { "epoch": 0.7836957441407902, "grad_norm": 0.2870613634586334, "learning_rate": 0.001, "loss": 1.7536, "step": 18525 }, { "epoch": 0.7837380489043067, "grad_norm": 0.16457685828208923, "learning_rate": 0.001, "loss": 1.7759, "step": 18526 }, { "epoch": 0.783780353667823, "grad_norm": 0.12657257914543152, "learning_rate": 0.001, "loss": 2.1712, "step": 18527 }, { "epoch": 0.7838226584313394, "grad_norm": 1.0247093439102173, "learning_rate": 0.001, "loss": 1.3212, "step": 18528 }, { "epoch": 0.7838649631948558, "grad_norm": 2.865370273590088, "learning_rate": 0.001, "loss": 2.1017, "step": 18529 }, { "epoch": 0.7839072679583721, "grad_norm": 0.2102050930261612, "learning_rate": 0.001, "loss": 1.917, "step": 18530 }, { "epoch": 0.7839495727218885, "grad_norm": 0.23753419518470764, "learning_rate": 0.001, "loss": 2.7787, "step": 18531 }, { "epoch": 0.7839918774854049, "grad_norm": 0.15850041806697845, "learning_rate": 0.001, "loss": 1.5583, "step": 18532 }, { "epoch": 0.7840341822489212, "grad_norm": 0.1486329436302185, "learning_rate": 0.001, "loss": 1.9008, "step": 18533 }, { "epoch": 0.7840764870124376, "grad_norm": 0.13608387112617493, "learning_rate": 0.001, "loss": 2.6181, "step": 18534 }, { "epoch": 0.784118791775954, "grad_norm": 0.1750655174255371, "learning_rate": 0.001, "loss": 3.3077, "step": 18535 }, { "epoch": 0.7841610965394703, "grad_norm": 0.15577921271324158, "learning_rate": 0.001, "loss": 2.3589, "step": 18536 }, { "epoch": 0.7842034013029867, "grad_norm": 0.16472339630126953, "learning_rate": 0.001, "loss": 2.7532, "step": 18537 }, { "epoch": 0.7842457060665031, "grad_norm": 0.8173422813415527, "learning_rate": 0.001, "loss": 2.1228, "step": 18538 }, { "epoch": 0.7842880108300194, "grad_norm": 0.16363029181957245, "learning_rate": 0.001, "loss": 3.242, "step": 18539 }, { "epoch": 0.7843303155935358, "grad_norm": 1.0863935947418213, "learning_rate": 0.001, "loss": 2.4159, "step": 18540 }, { "epoch": 0.7843726203570522, "grad_norm": 0.17449085414409637, "learning_rate": 0.001, "loss": 3.3047, "step": 18541 }, { "epoch": 0.7844149251205685, "grad_norm": 0.2527814507484436, "learning_rate": 0.001, "loss": 2.5967, "step": 18542 }, { "epoch": 0.784457229884085, "grad_norm": 0.13562943041324615, "learning_rate": 0.001, "loss": 2.5034, "step": 18543 }, { "epoch": 0.7844995346476014, "grad_norm": 0.22819682955741882, "learning_rate": 0.001, "loss": 1.9655, "step": 18544 }, { "epoch": 0.7845418394111177, "grad_norm": 0.7350271940231323, "learning_rate": 0.001, "loss": 2.8429, "step": 18545 }, { "epoch": 0.7845841441746341, "grad_norm": 0.16228371858596802, "learning_rate": 0.001, "loss": 1.4537, "step": 18546 }, { "epoch": 0.7846264489381505, "grad_norm": 0.5120057463645935, "learning_rate": 0.001, "loss": 1.5295, "step": 18547 }, { "epoch": 0.7846687537016668, "grad_norm": 0.17189784348011017, "learning_rate": 0.001, "loss": 2.5171, "step": 18548 }, { "epoch": 0.7847110584651832, "grad_norm": 0.2788395285606384, "learning_rate": 0.001, "loss": 1.7338, "step": 18549 }, { "epoch": 0.7847533632286996, "grad_norm": 0.17548993229866028, "learning_rate": 0.001, "loss": 2.0646, "step": 18550 }, { "epoch": 0.7847956679922159, "grad_norm": 0.15027286112308502, "learning_rate": 0.001, "loss": 1.6634, "step": 18551 }, { "epoch": 0.7848379727557323, "grad_norm": 0.14293362200260162, "learning_rate": 0.001, "loss": 1.7618, "step": 18552 }, { "epoch": 0.7848802775192487, "grad_norm": 0.17201675474643707, "learning_rate": 0.001, "loss": 2.2748, "step": 18553 }, { "epoch": 0.784922582282765, "grad_norm": 0.22527389228343964, "learning_rate": 0.001, "loss": 1.3851, "step": 18554 }, { "epoch": 0.7849648870462814, "grad_norm": 0.16594509780406952, "learning_rate": 0.001, "loss": 2.1222, "step": 18555 }, { "epoch": 0.7850071918097978, "grad_norm": 0.1816147267818451, "learning_rate": 0.001, "loss": 2.4502, "step": 18556 }, { "epoch": 0.7850494965733141, "grad_norm": 0.14666712284088135, "learning_rate": 0.001, "loss": 1.4992, "step": 18557 }, { "epoch": 0.7850918013368305, "grad_norm": 0.14625667035579681, "learning_rate": 0.001, "loss": 2.2672, "step": 18558 }, { "epoch": 0.785134106100347, "grad_norm": 0.5537320375442505, "learning_rate": 0.001, "loss": 1.6027, "step": 18559 }, { "epoch": 0.7851764108638633, "grad_norm": 0.17199261486530304, "learning_rate": 0.001, "loss": 2.5807, "step": 18560 }, { "epoch": 0.7852187156273797, "grad_norm": 0.14282803237438202, "learning_rate": 0.001, "loss": 2.0102, "step": 18561 }, { "epoch": 0.7852610203908961, "grad_norm": 0.7231976985931396, "learning_rate": 0.001, "loss": 2.3132, "step": 18562 }, { "epoch": 0.7853033251544124, "grad_norm": 0.15639185905456543, "learning_rate": 0.001, "loss": 1.7835, "step": 18563 }, { "epoch": 0.7853456299179288, "grad_norm": 0.1664103865623474, "learning_rate": 0.001, "loss": 2.3088, "step": 18564 }, { "epoch": 0.7853879346814451, "grad_norm": 0.18371713161468506, "learning_rate": 0.001, "loss": 2.4793, "step": 18565 }, { "epoch": 0.7854302394449615, "grad_norm": 1.8584163188934326, "learning_rate": 0.001, "loss": 1.5245, "step": 18566 }, { "epoch": 0.7854725442084779, "grad_norm": 0.44644030928611755, "learning_rate": 0.001, "loss": 2.4425, "step": 18567 }, { "epoch": 0.7855148489719942, "grad_norm": 0.15669460594654083, "learning_rate": 0.001, "loss": 1.7045, "step": 18568 }, { "epoch": 0.7855571537355106, "grad_norm": 0.24448037147521973, "learning_rate": 0.001, "loss": 1.8752, "step": 18569 }, { "epoch": 0.785599458499027, "grad_norm": 0.1537441611289978, "learning_rate": 0.001, "loss": 2.3786, "step": 18570 }, { "epoch": 0.7856417632625433, "grad_norm": 0.14563751220703125, "learning_rate": 0.001, "loss": 2.2926, "step": 18571 }, { "epoch": 0.7856840680260597, "grad_norm": 0.17651155591011047, "learning_rate": 0.001, "loss": 2.319, "step": 18572 }, { "epoch": 0.7857263727895761, "grad_norm": 0.17922982573509216, "learning_rate": 0.001, "loss": 2.112, "step": 18573 }, { "epoch": 0.7857686775530924, "grad_norm": 0.1861950010061264, "learning_rate": 0.001, "loss": 2.7733, "step": 18574 }, { "epoch": 0.7858109823166088, "grad_norm": 0.156846284866333, "learning_rate": 0.001, "loss": 2.2694, "step": 18575 }, { "epoch": 0.7858532870801253, "grad_norm": 0.4748559892177582, "learning_rate": 0.001, "loss": 2.1577, "step": 18576 }, { "epoch": 0.7858955918436415, "grad_norm": 0.14767950773239136, "learning_rate": 0.001, "loss": 2.1209, "step": 18577 }, { "epoch": 0.785937896607158, "grad_norm": 0.5291089415550232, "learning_rate": 0.001, "loss": 2.5013, "step": 18578 }, { "epoch": 0.7859802013706744, "grad_norm": 0.1678914725780487, "learning_rate": 0.001, "loss": 1.7286, "step": 18579 }, { "epoch": 0.7860225061341907, "grad_norm": 0.1400989443063736, "learning_rate": 0.001, "loss": 1.9519, "step": 18580 }, { "epoch": 0.7860648108977071, "grad_norm": 0.20776335895061493, "learning_rate": 0.001, "loss": 2.9314, "step": 18581 }, { "epoch": 0.7861071156612235, "grad_norm": 0.16925200819969177, "learning_rate": 0.001, "loss": 3.7282, "step": 18582 }, { "epoch": 0.7861494204247398, "grad_norm": 0.14598102867603302, "learning_rate": 0.001, "loss": 2.3061, "step": 18583 }, { "epoch": 0.7861917251882562, "grad_norm": 0.6664958000183105, "learning_rate": 0.001, "loss": 2.0033, "step": 18584 }, { "epoch": 0.7862340299517726, "grad_norm": 0.14569751918315887, "learning_rate": 0.001, "loss": 2.4761, "step": 18585 }, { "epoch": 0.7862763347152889, "grad_norm": 0.2870123088359833, "learning_rate": 0.001, "loss": 2.782, "step": 18586 }, { "epoch": 0.7863186394788053, "grad_norm": 0.16477970778942108, "learning_rate": 0.001, "loss": 1.6753, "step": 18587 }, { "epoch": 0.7863609442423217, "grad_norm": 0.18875035643577576, "learning_rate": 0.001, "loss": 1.7905, "step": 18588 }, { "epoch": 0.786403249005838, "grad_norm": 0.1353473663330078, "learning_rate": 0.001, "loss": 3.2058, "step": 18589 }, { "epoch": 0.7864455537693544, "grad_norm": 0.2390325963497162, "learning_rate": 0.001, "loss": 2.155, "step": 18590 }, { "epoch": 0.7864878585328708, "grad_norm": 0.1434008628129959, "learning_rate": 0.001, "loss": 2.0458, "step": 18591 }, { "epoch": 0.7865301632963871, "grad_norm": 0.1311628371477127, "learning_rate": 0.001, "loss": 1.5251, "step": 18592 }, { "epoch": 0.7865724680599036, "grad_norm": 0.20478565990924835, "learning_rate": 0.001, "loss": 2.3965, "step": 18593 }, { "epoch": 0.78661477282342, "grad_norm": 0.157118022441864, "learning_rate": 0.001, "loss": 2.0589, "step": 18594 }, { "epoch": 0.7866570775869363, "grad_norm": 0.17026092112064362, "learning_rate": 0.001, "loss": 2.3213, "step": 18595 }, { "epoch": 0.7866993823504527, "grad_norm": 0.12616808712482452, "learning_rate": 0.001, "loss": 1.5168, "step": 18596 }, { "epoch": 0.7867416871139691, "grad_norm": 0.12917254865169525, "learning_rate": 0.001, "loss": 1.4579, "step": 18597 }, { "epoch": 0.7867839918774854, "grad_norm": 0.18053403496742249, "learning_rate": 0.001, "loss": 2.1042, "step": 18598 }, { "epoch": 0.7868262966410018, "grad_norm": 0.17954039573669434, "learning_rate": 0.001, "loss": 2.2459, "step": 18599 }, { "epoch": 0.7868686014045182, "grad_norm": 0.23645149171352386, "learning_rate": 0.001, "loss": 2.0196, "step": 18600 }, { "epoch": 0.7869109061680345, "grad_norm": 0.1373950093984604, "learning_rate": 0.001, "loss": 1.6001, "step": 18601 }, { "epoch": 0.7869532109315509, "grad_norm": 0.17674638330936432, "learning_rate": 0.001, "loss": 3.6847, "step": 18602 }, { "epoch": 0.7869955156950673, "grad_norm": 0.1795940101146698, "learning_rate": 0.001, "loss": 2.7799, "step": 18603 }, { "epoch": 0.7870378204585836, "grad_norm": 0.16243141889572144, "learning_rate": 0.001, "loss": 1.7403, "step": 18604 }, { "epoch": 0.7870801252221, "grad_norm": 13.920958518981934, "learning_rate": 0.001, "loss": 1.9834, "step": 18605 }, { "epoch": 0.7871224299856164, "grad_norm": 0.16762857139110565, "learning_rate": 0.001, "loss": 1.9104, "step": 18606 }, { "epoch": 0.7871647347491327, "grad_norm": 0.15232449769973755, "learning_rate": 0.001, "loss": 1.9612, "step": 18607 }, { "epoch": 0.7872070395126491, "grad_norm": 0.1300850510597229, "learning_rate": 0.001, "loss": 1.663, "step": 18608 }, { "epoch": 0.7872493442761654, "grad_norm": 0.12953154742717743, "learning_rate": 0.001, "loss": 2.6655, "step": 18609 }, { "epoch": 0.7872916490396819, "grad_norm": 0.1451409012079239, "learning_rate": 0.001, "loss": 2.2023, "step": 18610 }, { "epoch": 0.7873339538031983, "grad_norm": 0.370164692401886, "learning_rate": 0.001, "loss": 2.1399, "step": 18611 }, { "epoch": 0.7873762585667146, "grad_norm": 0.3032169044017792, "learning_rate": 0.001, "loss": 1.439, "step": 18612 }, { "epoch": 0.787418563330231, "grad_norm": 0.15478359162807465, "learning_rate": 0.001, "loss": 1.7436, "step": 18613 }, { "epoch": 0.7874608680937474, "grad_norm": 0.1523745208978653, "learning_rate": 0.001, "loss": 1.7031, "step": 18614 }, { "epoch": 0.7875031728572637, "grad_norm": 0.1674836277961731, "learning_rate": 0.001, "loss": 1.8924, "step": 18615 }, { "epoch": 0.7875454776207801, "grad_norm": 0.16084153950214386, "learning_rate": 0.001, "loss": 3.0371, "step": 18616 }, { "epoch": 0.7875877823842965, "grad_norm": 0.15043555200099945, "learning_rate": 0.001, "loss": 1.669, "step": 18617 }, { "epoch": 0.7876300871478128, "grad_norm": 0.15042801201343536, "learning_rate": 0.001, "loss": 1.4761, "step": 18618 }, { "epoch": 0.7876723919113292, "grad_norm": 0.20407025516033173, "learning_rate": 0.001, "loss": 2.5795, "step": 18619 }, { "epoch": 0.7877146966748456, "grad_norm": 0.37123432755470276, "learning_rate": 0.001, "loss": 1.9516, "step": 18620 }, { "epoch": 0.7877570014383619, "grad_norm": 0.14106714725494385, "learning_rate": 0.001, "loss": 3.1985, "step": 18621 }, { "epoch": 0.7877993062018783, "grad_norm": 0.16978901624679565, "learning_rate": 0.001, "loss": 2.6184, "step": 18622 }, { "epoch": 0.7878416109653947, "grad_norm": 0.32933205366134644, "learning_rate": 0.001, "loss": 3.5388, "step": 18623 }, { "epoch": 0.787883915728911, "grad_norm": 0.16782082617282867, "learning_rate": 0.001, "loss": 2.385, "step": 18624 }, { "epoch": 0.7879262204924274, "grad_norm": 0.16868159174919128, "learning_rate": 0.001, "loss": 2.1474, "step": 18625 }, { "epoch": 0.7879685252559439, "grad_norm": 0.20239630341529846, "learning_rate": 0.001, "loss": 2.5787, "step": 18626 }, { "epoch": 0.7880108300194602, "grad_norm": 0.49413931369781494, "learning_rate": 0.001, "loss": 2.7789, "step": 18627 }, { "epoch": 0.7880531347829766, "grad_norm": 0.19046078622341156, "learning_rate": 0.001, "loss": 2.8731, "step": 18628 }, { "epoch": 0.788095439546493, "grad_norm": 0.6242032647132874, "learning_rate": 0.001, "loss": 3.4162, "step": 18629 }, { "epoch": 0.7881377443100093, "grad_norm": 0.29068413376808167, "learning_rate": 0.001, "loss": 2.5984, "step": 18630 }, { "epoch": 0.7881800490735257, "grad_norm": 0.159669429063797, "learning_rate": 0.001, "loss": 2.885, "step": 18631 }, { "epoch": 0.7882223538370421, "grad_norm": 0.14053291082382202, "learning_rate": 0.001, "loss": 1.7608, "step": 18632 }, { "epoch": 0.7882646586005584, "grad_norm": 0.34683123230934143, "learning_rate": 0.001, "loss": 1.8457, "step": 18633 }, { "epoch": 0.7883069633640748, "grad_norm": 0.1669548749923706, "learning_rate": 0.001, "loss": 1.6075, "step": 18634 }, { "epoch": 0.7883492681275912, "grad_norm": 0.19117628037929535, "learning_rate": 0.001, "loss": 2.7102, "step": 18635 }, { "epoch": 0.7883915728911075, "grad_norm": 0.13999809324741364, "learning_rate": 0.001, "loss": 1.5316, "step": 18636 }, { "epoch": 0.7884338776546239, "grad_norm": 0.3912406265735626, "learning_rate": 0.001, "loss": 2.5317, "step": 18637 }, { "epoch": 0.7884761824181403, "grad_norm": 0.20486436784267426, "learning_rate": 0.001, "loss": 3.0638, "step": 18638 }, { "epoch": 0.7885184871816566, "grad_norm": 0.18025173246860504, "learning_rate": 0.001, "loss": 1.9768, "step": 18639 }, { "epoch": 0.788560791945173, "grad_norm": 0.1740838885307312, "learning_rate": 0.001, "loss": 3.0294, "step": 18640 }, { "epoch": 0.7886030967086894, "grad_norm": 0.18480761349201202, "learning_rate": 0.001, "loss": 2.3447, "step": 18641 }, { "epoch": 0.7886454014722057, "grad_norm": 14.460678100585938, "learning_rate": 0.001, "loss": 2.7443, "step": 18642 }, { "epoch": 0.7886877062357222, "grad_norm": 0.19740672409534454, "learning_rate": 0.001, "loss": 1.882, "step": 18643 }, { "epoch": 0.7887300109992386, "grad_norm": 0.13065767288208008, "learning_rate": 0.001, "loss": 2.2647, "step": 18644 }, { "epoch": 0.7887723157627549, "grad_norm": 0.49033114314079285, "learning_rate": 0.001, "loss": 2.3273, "step": 18645 }, { "epoch": 0.7888146205262713, "grad_norm": 0.16719037294387817, "learning_rate": 0.001, "loss": 2.3332, "step": 18646 }, { "epoch": 0.7888569252897877, "grad_norm": 0.16855984926223755, "learning_rate": 0.001, "loss": 3.0683, "step": 18647 }, { "epoch": 0.788899230053304, "grad_norm": 0.1336914449930191, "learning_rate": 0.001, "loss": 1.381, "step": 18648 }, { "epoch": 0.7889415348168204, "grad_norm": 0.16387201845645905, "learning_rate": 0.001, "loss": 2.3596, "step": 18649 }, { "epoch": 0.7889838395803368, "grad_norm": 2.3988749980926514, "learning_rate": 0.001, "loss": 2.2531, "step": 18650 }, { "epoch": 0.7890261443438531, "grad_norm": 0.18819689750671387, "learning_rate": 0.001, "loss": 2.171, "step": 18651 }, { "epoch": 0.7890684491073695, "grad_norm": 0.16322028636932373, "learning_rate": 0.001, "loss": 2.2966, "step": 18652 }, { "epoch": 0.7891107538708859, "grad_norm": 0.15296301245689392, "learning_rate": 0.001, "loss": 1.9929, "step": 18653 }, { "epoch": 0.7891530586344022, "grad_norm": 0.14490869641304016, "learning_rate": 0.001, "loss": 1.8859, "step": 18654 }, { "epoch": 0.7891953633979186, "grad_norm": 0.16145136952400208, "learning_rate": 0.001, "loss": 1.9047, "step": 18655 }, { "epoch": 0.7892376681614349, "grad_norm": 0.15146324038505554, "learning_rate": 0.001, "loss": 1.7612, "step": 18656 }, { "epoch": 0.7892799729249513, "grad_norm": 0.15252503752708435, "learning_rate": 0.001, "loss": 3.3884, "step": 18657 }, { "epoch": 0.7893222776884677, "grad_norm": 1.6242767572402954, "learning_rate": 0.001, "loss": 1.7785, "step": 18658 }, { "epoch": 0.789364582451984, "grad_norm": 0.19868817925453186, "learning_rate": 0.001, "loss": 3.0005, "step": 18659 }, { "epoch": 0.7894068872155005, "grad_norm": 0.24456727504730225, "learning_rate": 0.001, "loss": 1.73, "step": 18660 }, { "epoch": 0.7894491919790169, "grad_norm": 0.23000819981098175, "learning_rate": 0.001, "loss": 2.2173, "step": 18661 }, { "epoch": 0.7894914967425332, "grad_norm": 0.220015287399292, "learning_rate": 0.001, "loss": 2.1518, "step": 18662 }, { "epoch": 0.7895338015060496, "grad_norm": 0.20717963576316833, "learning_rate": 0.001, "loss": 3.1284, "step": 18663 }, { "epoch": 0.789576106269566, "grad_norm": 0.2126576453447342, "learning_rate": 0.001, "loss": 2.8512, "step": 18664 }, { "epoch": 0.7896184110330823, "grad_norm": 0.2596065402030945, "learning_rate": 0.001, "loss": 2.0507, "step": 18665 }, { "epoch": 0.7896607157965987, "grad_norm": 0.22077849507331848, "learning_rate": 0.001, "loss": 1.6807, "step": 18666 }, { "epoch": 0.7897030205601151, "grad_norm": 0.1828576773405075, "learning_rate": 0.001, "loss": 2.0024, "step": 18667 }, { "epoch": 0.7897453253236314, "grad_norm": 0.5003877282142639, "learning_rate": 0.001, "loss": 3.519, "step": 18668 }, { "epoch": 0.7897876300871478, "grad_norm": 0.17294269800186157, "learning_rate": 0.001, "loss": 2.7761, "step": 18669 }, { "epoch": 0.7898299348506642, "grad_norm": 0.19300001859664917, "learning_rate": 0.001, "loss": 1.9447, "step": 18670 }, { "epoch": 0.7898722396141805, "grad_norm": 1.2025370597839355, "learning_rate": 0.001, "loss": 3.5368, "step": 18671 }, { "epoch": 0.7899145443776969, "grad_norm": 0.14662951231002808, "learning_rate": 0.001, "loss": 2.5346, "step": 18672 }, { "epoch": 0.7899568491412133, "grad_norm": 2.872141122817993, "learning_rate": 0.001, "loss": 1.9286, "step": 18673 }, { "epoch": 0.7899991539047296, "grad_norm": 0.16066762804985046, "learning_rate": 0.001, "loss": 2.2664, "step": 18674 }, { "epoch": 0.790041458668246, "grad_norm": 0.16592992842197418, "learning_rate": 0.001, "loss": 1.8831, "step": 18675 }, { "epoch": 0.7900837634317625, "grad_norm": 0.16850398480892181, "learning_rate": 0.001, "loss": 1.8177, "step": 18676 }, { "epoch": 0.7901260681952788, "grad_norm": 0.16996052861213684, "learning_rate": 0.001, "loss": 1.9456, "step": 18677 }, { "epoch": 0.7901683729587952, "grad_norm": 0.39866968989372253, "learning_rate": 0.001, "loss": 2.8809, "step": 18678 }, { "epoch": 0.7902106777223116, "grad_norm": 0.16642092168331146, "learning_rate": 0.001, "loss": 2.6154, "step": 18679 }, { "epoch": 0.7902529824858279, "grad_norm": 0.17593850195407867, "learning_rate": 0.001, "loss": 1.7041, "step": 18680 }, { "epoch": 0.7902952872493443, "grad_norm": 0.17078939080238342, "learning_rate": 0.001, "loss": 2.9029, "step": 18681 }, { "epoch": 0.7903375920128607, "grad_norm": 2.226186990737915, "learning_rate": 0.001, "loss": 1.6863, "step": 18682 }, { "epoch": 0.790379896776377, "grad_norm": 1.4724563360214233, "learning_rate": 0.001, "loss": 2.1862, "step": 18683 }, { "epoch": 0.7904222015398934, "grad_norm": 0.1770937144756317, "learning_rate": 0.001, "loss": 2.435, "step": 18684 }, { "epoch": 0.7904645063034098, "grad_norm": 0.16543549299240112, "learning_rate": 0.001, "loss": 1.5855, "step": 18685 }, { "epoch": 0.7905068110669261, "grad_norm": 0.19836363196372986, "learning_rate": 0.001, "loss": 1.9606, "step": 18686 }, { "epoch": 0.7905491158304425, "grad_norm": 0.1877475082874298, "learning_rate": 0.001, "loss": 1.8057, "step": 18687 }, { "epoch": 0.7905914205939589, "grad_norm": 0.14974345266819, "learning_rate": 0.001, "loss": 2.067, "step": 18688 }, { "epoch": 0.7906337253574752, "grad_norm": 0.21924450993537903, "learning_rate": 0.001, "loss": 3.1355, "step": 18689 }, { "epoch": 0.7906760301209916, "grad_norm": 0.17212629318237305, "learning_rate": 0.001, "loss": 1.5735, "step": 18690 }, { "epoch": 0.790718334884508, "grad_norm": 0.4152930676937103, "learning_rate": 0.001, "loss": 2.7388, "step": 18691 }, { "epoch": 0.7907606396480243, "grad_norm": 0.1306760460138321, "learning_rate": 0.001, "loss": 1.4797, "step": 18692 }, { "epoch": 0.7908029444115408, "grad_norm": 0.166093111038208, "learning_rate": 0.001, "loss": 2.0058, "step": 18693 }, { "epoch": 0.7908452491750572, "grad_norm": 1.3968002796173096, "learning_rate": 0.001, "loss": 1.5695, "step": 18694 }, { "epoch": 0.7908875539385735, "grad_norm": 0.18365883827209473, "learning_rate": 0.001, "loss": 2.3175, "step": 18695 }, { "epoch": 0.7909298587020899, "grad_norm": 0.14197491109371185, "learning_rate": 0.001, "loss": 1.7524, "step": 18696 }, { "epoch": 0.7909721634656063, "grad_norm": 0.19356654584407806, "learning_rate": 0.001, "loss": 3.7002, "step": 18697 }, { "epoch": 0.7910144682291226, "grad_norm": 0.1772981733083725, "learning_rate": 0.001, "loss": 2.2998, "step": 18698 }, { "epoch": 0.791056772992639, "grad_norm": 0.2028152197599411, "learning_rate": 0.001, "loss": 1.9209, "step": 18699 }, { "epoch": 0.7910990777561553, "grad_norm": 0.4528128504753113, "learning_rate": 0.001, "loss": 1.9742, "step": 18700 }, { "epoch": 0.7911413825196717, "grad_norm": 0.19012227654457092, "learning_rate": 0.001, "loss": 2.475, "step": 18701 }, { "epoch": 0.7911836872831881, "grad_norm": 0.17222073674201965, "learning_rate": 0.001, "loss": 1.9615, "step": 18702 }, { "epoch": 0.7912259920467044, "grad_norm": 0.19183796644210815, "learning_rate": 0.001, "loss": 2.1465, "step": 18703 }, { "epoch": 0.7912682968102208, "grad_norm": 0.1738269031047821, "learning_rate": 0.001, "loss": 2.6244, "step": 18704 }, { "epoch": 0.7913106015737372, "grad_norm": 0.18650266528129578, "learning_rate": 0.001, "loss": 2.0074, "step": 18705 }, { "epoch": 0.7913529063372535, "grad_norm": 0.1911695897579193, "learning_rate": 0.001, "loss": 2.1624, "step": 18706 }, { "epoch": 0.7913952111007699, "grad_norm": 0.2884248197078705, "learning_rate": 0.001, "loss": 2.7871, "step": 18707 }, { "epoch": 0.7914375158642863, "grad_norm": 0.15906710922718048, "learning_rate": 0.001, "loss": 2.9539, "step": 18708 }, { "epoch": 0.7914798206278026, "grad_norm": 0.19278644025325775, "learning_rate": 0.001, "loss": 1.6402, "step": 18709 }, { "epoch": 0.791522125391319, "grad_norm": 0.15218260884284973, "learning_rate": 0.001, "loss": 1.8649, "step": 18710 }, { "epoch": 0.7915644301548355, "grad_norm": 0.1600116640329361, "learning_rate": 0.001, "loss": 1.6598, "step": 18711 }, { "epoch": 0.7916067349183518, "grad_norm": 0.15089303255081177, "learning_rate": 0.001, "loss": 1.9441, "step": 18712 }, { "epoch": 0.7916490396818682, "grad_norm": 0.15738871693611145, "learning_rate": 0.001, "loss": 1.888, "step": 18713 }, { "epoch": 0.7916913444453846, "grad_norm": 0.15308193862438202, "learning_rate": 0.001, "loss": 1.8442, "step": 18714 }, { "epoch": 0.7917336492089009, "grad_norm": 0.1537911295890808, "learning_rate": 0.001, "loss": 1.8999, "step": 18715 }, { "epoch": 0.7917759539724173, "grad_norm": 0.16866432130336761, "learning_rate": 0.001, "loss": 2.5294, "step": 18716 }, { "epoch": 0.7918182587359337, "grad_norm": 1.7140220403671265, "learning_rate": 0.001, "loss": 1.9505, "step": 18717 }, { "epoch": 0.79186056349945, "grad_norm": 0.15509742498397827, "learning_rate": 0.001, "loss": 1.7273, "step": 18718 }, { "epoch": 0.7919028682629664, "grad_norm": 0.16215214133262634, "learning_rate": 0.001, "loss": 3.0471, "step": 18719 }, { "epoch": 0.7919451730264828, "grad_norm": 0.1872335970401764, "learning_rate": 0.001, "loss": 3.027, "step": 18720 }, { "epoch": 0.7919874777899991, "grad_norm": 0.14136208593845367, "learning_rate": 0.001, "loss": 2.0332, "step": 18721 }, { "epoch": 0.7920297825535155, "grad_norm": 0.2219260334968567, "learning_rate": 0.001, "loss": 2.2754, "step": 18722 }, { "epoch": 0.7920720873170319, "grad_norm": 0.21660354733467102, "learning_rate": 0.001, "loss": 1.7934, "step": 18723 }, { "epoch": 0.7921143920805482, "grad_norm": 0.16659744083881378, "learning_rate": 0.001, "loss": 2.2417, "step": 18724 }, { "epoch": 0.7921566968440646, "grad_norm": 0.1875210851430893, "learning_rate": 0.001, "loss": 2.3443, "step": 18725 }, { "epoch": 0.792199001607581, "grad_norm": 0.1662781685590744, "learning_rate": 0.001, "loss": 1.914, "step": 18726 }, { "epoch": 0.7922413063710974, "grad_norm": 0.24961335957050323, "learning_rate": 0.001, "loss": 1.9302, "step": 18727 }, { "epoch": 0.7922836111346138, "grad_norm": 0.17793142795562744, "learning_rate": 0.001, "loss": 2.1055, "step": 18728 }, { "epoch": 0.7923259158981302, "grad_norm": 0.7194470763206482, "learning_rate": 0.001, "loss": 2.9523, "step": 18729 }, { "epoch": 0.7923682206616465, "grad_norm": 0.19897310435771942, "learning_rate": 0.001, "loss": 1.8951, "step": 18730 }, { "epoch": 0.7924105254251629, "grad_norm": 0.16758960485458374, "learning_rate": 0.001, "loss": 3.3784, "step": 18731 }, { "epoch": 0.7924528301886793, "grad_norm": 0.18163642287254333, "learning_rate": 0.001, "loss": 2.5377, "step": 18732 }, { "epoch": 0.7924951349521956, "grad_norm": 0.1600625365972519, "learning_rate": 0.001, "loss": 2.8553, "step": 18733 }, { "epoch": 0.792537439715712, "grad_norm": 0.17819511890411377, "learning_rate": 0.001, "loss": 3.1244, "step": 18734 }, { "epoch": 0.7925797444792284, "grad_norm": 0.16769671440124512, "learning_rate": 0.001, "loss": 1.4566, "step": 18735 }, { "epoch": 0.7926220492427447, "grad_norm": 0.18893221020698547, "learning_rate": 0.001, "loss": 2.7459, "step": 18736 }, { "epoch": 0.7926643540062611, "grad_norm": 0.16248752176761627, "learning_rate": 0.001, "loss": 1.829, "step": 18737 }, { "epoch": 0.7927066587697775, "grad_norm": 0.1498391479253769, "learning_rate": 0.001, "loss": 2.218, "step": 18738 }, { "epoch": 0.7927489635332938, "grad_norm": 0.2448561042547226, "learning_rate": 0.001, "loss": 1.7601, "step": 18739 }, { "epoch": 0.7927912682968102, "grad_norm": 0.17693863809108734, "learning_rate": 0.001, "loss": 1.7627, "step": 18740 }, { "epoch": 0.7928335730603266, "grad_norm": 0.13363198935985565, "learning_rate": 0.001, "loss": 1.4754, "step": 18741 }, { "epoch": 0.792875877823843, "grad_norm": 0.1259775161743164, "learning_rate": 0.001, "loss": 1.5831, "step": 18742 }, { "epoch": 0.7929181825873594, "grad_norm": 0.18019893765449524, "learning_rate": 0.001, "loss": 2.3857, "step": 18743 }, { "epoch": 0.7929604873508757, "grad_norm": 0.2442205846309662, "learning_rate": 0.001, "loss": 2.0327, "step": 18744 }, { "epoch": 0.7930027921143921, "grad_norm": 1.5287914276123047, "learning_rate": 0.001, "loss": 2.4658, "step": 18745 }, { "epoch": 0.7930450968779085, "grad_norm": 0.15980815887451172, "learning_rate": 0.001, "loss": 1.9778, "step": 18746 }, { "epoch": 0.7930874016414248, "grad_norm": 0.1930980682373047, "learning_rate": 0.001, "loss": 2.2091, "step": 18747 }, { "epoch": 0.7931297064049412, "grad_norm": 0.17652654647827148, "learning_rate": 0.001, "loss": 2.2903, "step": 18748 }, { "epoch": 0.7931720111684576, "grad_norm": 0.22159257531166077, "learning_rate": 0.001, "loss": 2.6479, "step": 18749 }, { "epoch": 0.7932143159319739, "grad_norm": 4.054377555847168, "learning_rate": 0.001, "loss": 1.5173, "step": 18750 }, { "epoch": 0.7932566206954903, "grad_norm": 0.17010121047496796, "learning_rate": 0.001, "loss": 2.6045, "step": 18751 }, { "epoch": 0.7932989254590067, "grad_norm": 0.15084238350391388, "learning_rate": 0.001, "loss": 2.4313, "step": 18752 }, { "epoch": 0.793341230222523, "grad_norm": 0.18187488615512848, "learning_rate": 0.001, "loss": 2.0466, "step": 18753 }, { "epoch": 0.7933835349860394, "grad_norm": 2.5695083141326904, "learning_rate": 0.001, "loss": 1.9052, "step": 18754 }, { "epoch": 0.7934258397495558, "grad_norm": 0.1252131462097168, "learning_rate": 0.001, "loss": 2.476, "step": 18755 }, { "epoch": 0.7934681445130721, "grad_norm": 0.194590225815773, "learning_rate": 0.001, "loss": 1.5803, "step": 18756 }, { "epoch": 0.7935104492765885, "grad_norm": 0.2729746103286743, "learning_rate": 0.001, "loss": 2.2466, "step": 18757 }, { "epoch": 0.793552754040105, "grad_norm": 0.2805977165699005, "learning_rate": 0.001, "loss": 2.5365, "step": 18758 }, { "epoch": 0.7935950588036212, "grad_norm": 0.18586377799510956, "learning_rate": 0.001, "loss": 1.7274, "step": 18759 }, { "epoch": 0.7936373635671377, "grad_norm": 0.16096092760562897, "learning_rate": 0.001, "loss": 3.2489, "step": 18760 }, { "epoch": 0.7936796683306541, "grad_norm": 0.14794796705245972, "learning_rate": 0.001, "loss": 1.7881, "step": 18761 }, { "epoch": 0.7937219730941704, "grad_norm": 0.17098478972911835, "learning_rate": 0.001, "loss": 3.5723, "step": 18762 }, { "epoch": 0.7937642778576868, "grad_norm": 0.1851399689912796, "learning_rate": 0.001, "loss": 2.101, "step": 18763 }, { "epoch": 0.7938065826212032, "grad_norm": 0.16206832230091095, "learning_rate": 0.001, "loss": 2.5929, "step": 18764 }, { "epoch": 0.7938488873847195, "grad_norm": 0.16010499000549316, "learning_rate": 0.001, "loss": 1.6522, "step": 18765 }, { "epoch": 0.7938911921482359, "grad_norm": 0.1725982278585434, "learning_rate": 0.001, "loss": 2.5854, "step": 18766 }, { "epoch": 0.7939334969117523, "grad_norm": 0.13452595472335815, "learning_rate": 0.001, "loss": 1.9167, "step": 18767 }, { "epoch": 0.7939758016752686, "grad_norm": 0.12271041423082352, "learning_rate": 0.001, "loss": 2.1516, "step": 18768 }, { "epoch": 0.794018106438785, "grad_norm": 0.15723437070846558, "learning_rate": 0.001, "loss": 2.172, "step": 18769 }, { "epoch": 0.7940604112023014, "grad_norm": 0.1402629315853119, "learning_rate": 0.001, "loss": 1.7417, "step": 18770 }, { "epoch": 0.7941027159658177, "grad_norm": 0.11944486945867538, "learning_rate": 0.001, "loss": 1.5726, "step": 18771 }, { "epoch": 0.7941450207293341, "grad_norm": 0.12228669226169586, "learning_rate": 0.001, "loss": 2.0272, "step": 18772 }, { "epoch": 0.7941873254928505, "grad_norm": 0.18956278264522552, "learning_rate": 0.001, "loss": 3.0162, "step": 18773 }, { "epoch": 0.7942296302563668, "grad_norm": 0.6293951272964478, "learning_rate": 0.001, "loss": 2.1262, "step": 18774 }, { "epoch": 0.7942719350198832, "grad_norm": 0.14916300773620605, "learning_rate": 0.001, "loss": 1.9637, "step": 18775 }, { "epoch": 0.7943142397833997, "grad_norm": 0.14974258840084076, "learning_rate": 0.001, "loss": 2.385, "step": 18776 }, { "epoch": 0.794356544546916, "grad_norm": 0.151955246925354, "learning_rate": 0.001, "loss": 2.1005, "step": 18777 }, { "epoch": 0.7943988493104324, "grad_norm": 0.13923077285289764, "learning_rate": 0.001, "loss": 2.3116, "step": 18778 }, { "epoch": 0.7944411540739488, "grad_norm": 0.14543022215366364, "learning_rate": 0.001, "loss": 2.7402, "step": 18779 }, { "epoch": 0.7944834588374651, "grad_norm": 0.15743476152420044, "learning_rate": 0.001, "loss": 2.3271, "step": 18780 }, { "epoch": 0.7945257636009815, "grad_norm": 0.1735035479068756, "learning_rate": 0.001, "loss": 1.9675, "step": 18781 }, { "epoch": 0.7945680683644979, "grad_norm": 0.1507929265499115, "learning_rate": 0.001, "loss": 1.7867, "step": 18782 }, { "epoch": 0.7946103731280142, "grad_norm": 0.47875481843948364, "learning_rate": 0.001, "loss": 1.931, "step": 18783 }, { "epoch": 0.7946526778915306, "grad_norm": 1.3483095169067383, "learning_rate": 0.001, "loss": 1.6993, "step": 18784 }, { "epoch": 0.794694982655047, "grad_norm": 0.13868524134159088, "learning_rate": 0.001, "loss": 1.9223, "step": 18785 }, { "epoch": 0.7947372874185633, "grad_norm": 0.2855166494846344, "learning_rate": 0.001, "loss": 1.5803, "step": 18786 }, { "epoch": 0.7947795921820797, "grad_norm": 0.16588838398456573, "learning_rate": 0.001, "loss": 2.3712, "step": 18787 }, { "epoch": 0.7948218969455961, "grad_norm": 0.1705247461795807, "learning_rate": 0.001, "loss": 2.6876, "step": 18788 }, { "epoch": 0.7948642017091124, "grad_norm": 0.19775214791297913, "learning_rate": 0.001, "loss": 2.2817, "step": 18789 }, { "epoch": 0.7949065064726288, "grad_norm": 0.691185474395752, "learning_rate": 0.001, "loss": 1.8467, "step": 18790 }, { "epoch": 0.7949488112361451, "grad_norm": 0.1467396765947342, "learning_rate": 0.001, "loss": 3.6287, "step": 18791 }, { "epoch": 0.7949911159996615, "grad_norm": 0.1282058209180832, "learning_rate": 0.001, "loss": 2.5868, "step": 18792 }, { "epoch": 0.795033420763178, "grad_norm": 0.16036106646060944, "learning_rate": 0.001, "loss": 1.739, "step": 18793 }, { "epoch": 0.7950757255266943, "grad_norm": 0.17441564798355103, "learning_rate": 0.001, "loss": 1.7735, "step": 18794 }, { "epoch": 0.7951180302902107, "grad_norm": 0.15197812020778656, "learning_rate": 0.001, "loss": 1.4933, "step": 18795 }, { "epoch": 0.7951603350537271, "grad_norm": 0.16789105534553528, "learning_rate": 0.001, "loss": 1.5755, "step": 18796 }, { "epoch": 0.7952026398172434, "grad_norm": 0.34609055519104004, "learning_rate": 0.001, "loss": 2.2578, "step": 18797 }, { "epoch": 0.7952449445807598, "grad_norm": 0.1598827838897705, "learning_rate": 0.001, "loss": 2.316, "step": 18798 }, { "epoch": 0.7952872493442762, "grad_norm": 0.15156984329223633, "learning_rate": 0.001, "loss": 2.456, "step": 18799 }, { "epoch": 0.7953295541077925, "grad_norm": 0.151535302400589, "learning_rate": 0.001, "loss": 1.4494, "step": 18800 }, { "epoch": 0.7953718588713089, "grad_norm": 0.19072596728801727, "learning_rate": 0.001, "loss": 2.3097, "step": 18801 }, { "epoch": 0.7954141636348253, "grad_norm": 0.14297893643379211, "learning_rate": 0.001, "loss": 1.9724, "step": 18802 }, { "epoch": 0.7954564683983416, "grad_norm": 25.156417846679688, "learning_rate": 0.001, "loss": 2.0334, "step": 18803 }, { "epoch": 0.795498773161858, "grad_norm": 2.4311437606811523, "learning_rate": 0.001, "loss": 2.0976, "step": 18804 }, { "epoch": 0.7955410779253744, "grad_norm": 0.24896959960460663, "learning_rate": 0.001, "loss": 2.2145, "step": 18805 }, { "epoch": 0.7955833826888907, "grad_norm": 0.17482326924800873, "learning_rate": 0.001, "loss": 2.1033, "step": 18806 }, { "epoch": 0.7956256874524071, "grad_norm": 0.13971750438213348, "learning_rate": 0.001, "loss": 1.864, "step": 18807 }, { "epoch": 0.7956679922159235, "grad_norm": 0.227436363697052, "learning_rate": 0.001, "loss": 2.6199, "step": 18808 }, { "epoch": 0.7957102969794398, "grad_norm": 0.14270329475402832, "learning_rate": 0.001, "loss": 2.267, "step": 18809 }, { "epoch": 0.7957526017429563, "grad_norm": 0.17308275401592255, "learning_rate": 0.001, "loss": 2.0791, "step": 18810 }, { "epoch": 0.7957949065064727, "grad_norm": 0.16473335027694702, "learning_rate": 0.001, "loss": 1.7909, "step": 18811 }, { "epoch": 0.795837211269989, "grad_norm": 0.17767003178596497, "learning_rate": 0.001, "loss": 3.3289, "step": 18812 }, { "epoch": 0.7958795160335054, "grad_norm": 0.182156041264534, "learning_rate": 0.001, "loss": 2.5886, "step": 18813 }, { "epoch": 0.7959218207970218, "grad_norm": 0.6405094861984253, "learning_rate": 0.001, "loss": 2.5443, "step": 18814 }, { "epoch": 0.7959641255605381, "grad_norm": 0.20531894266605377, "learning_rate": 0.001, "loss": 2.3202, "step": 18815 }, { "epoch": 0.7960064303240545, "grad_norm": 0.1372930109500885, "learning_rate": 0.001, "loss": 2.0397, "step": 18816 }, { "epoch": 0.7960487350875709, "grad_norm": 0.15792180597782135, "learning_rate": 0.001, "loss": 2.4564, "step": 18817 }, { "epoch": 0.7960910398510872, "grad_norm": 1.705318808555603, "learning_rate": 0.001, "loss": 1.7707, "step": 18818 }, { "epoch": 0.7961333446146036, "grad_norm": 0.1559934914112091, "learning_rate": 0.001, "loss": 1.7459, "step": 18819 }, { "epoch": 0.79617564937812, "grad_norm": 0.1450294852256775, "learning_rate": 0.001, "loss": 2.4879, "step": 18820 }, { "epoch": 0.7962179541416363, "grad_norm": 0.1646071970462799, "learning_rate": 0.001, "loss": 2.2083, "step": 18821 }, { "epoch": 0.7962602589051527, "grad_norm": 6.388550758361816, "learning_rate": 0.001, "loss": 3.5271, "step": 18822 }, { "epoch": 0.7963025636686691, "grad_norm": 0.13386476039886475, "learning_rate": 0.001, "loss": 2.1901, "step": 18823 }, { "epoch": 0.7963448684321854, "grad_norm": 0.15483008325099945, "learning_rate": 0.001, "loss": 2.7349, "step": 18824 }, { "epoch": 0.7963871731957018, "grad_norm": 0.16752897202968597, "learning_rate": 0.001, "loss": 2.2906, "step": 18825 }, { "epoch": 0.7964294779592183, "grad_norm": 0.17875881493091583, "learning_rate": 0.001, "loss": 2.8077, "step": 18826 }, { "epoch": 0.7964717827227346, "grad_norm": 0.16410550475120544, "learning_rate": 0.001, "loss": 2.6807, "step": 18827 }, { "epoch": 0.796514087486251, "grad_norm": 0.15869401395320892, "learning_rate": 0.001, "loss": 2.016, "step": 18828 }, { "epoch": 0.7965563922497674, "grad_norm": 0.24351370334625244, "learning_rate": 0.001, "loss": 2.5587, "step": 18829 }, { "epoch": 0.7965986970132837, "grad_norm": 0.16663382947444916, "learning_rate": 0.001, "loss": 1.3997, "step": 18830 }, { "epoch": 0.7966410017768001, "grad_norm": 0.15760239958763123, "learning_rate": 0.001, "loss": 1.8689, "step": 18831 }, { "epoch": 0.7966833065403165, "grad_norm": 0.19611531496047974, "learning_rate": 0.001, "loss": 1.8662, "step": 18832 }, { "epoch": 0.7967256113038328, "grad_norm": 0.1665828973054886, "learning_rate": 0.001, "loss": 1.6031, "step": 18833 }, { "epoch": 0.7967679160673492, "grad_norm": 1.8677862882614136, "learning_rate": 0.001, "loss": 1.6596, "step": 18834 }, { "epoch": 0.7968102208308655, "grad_norm": 16.954547882080078, "learning_rate": 0.001, "loss": 1.8566, "step": 18835 }, { "epoch": 0.7968525255943819, "grad_norm": 0.18517085909843445, "learning_rate": 0.001, "loss": 2.7776, "step": 18836 }, { "epoch": 0.7968948303578983, "grad_norm": 0.15459203720092773, "learning_rate": 0.001, "loss": 1.4028, "step": 18837 }, { "epoch": 0.7969371351214146, "grad_norm": 1.6629470586776733, "learning_rate": 0.001, "loss": 2.8192, "step": 18838 }, { "epoch": 0.796979439884931, "grad_norm": 0.20710648596286774, "learning_rate": 0.001, "loss": 2.1015, "step": 18839 }, { "epoch": 0.7970217446484474, "grad_norm": 0.2469254583120346, "learning_rate": 0.001, "loss": 2.866, "step": 18840 }, { "epoch": 0.7970640494119637, "grad_norm": 0.8012136816978455, "learning_rate": 0.001, "loss": 3.8139, "step": 18841 }, { "epoch": 0.7971063541754801, "grad_norm": 0.4675610661506653, "learning_rate": 0.001, "loss": 2.2759, "step": 18842 }, { "epoch": 0.7971486589389966, "grad_norm": 0.2675013244152069, "learning_rate": 0.001, "loss": 2.0761, "step": 18843 }, { "epoch": 0.7971909637025129, "grad_norm": 0.18237780034542084, "learning_rate": 0.001, "loss": 2.3745, "step": 18844 }, { "epoch": 0.7972332684660293, "grad_norm": 3.2315402030944824, "learning_rate": 0.001, "loss": 2.6036, "step": 18845 }, { "epoch": 0.7972755732295457, "grad_norm": 0.17838868498802185, "learning_rate": 0.001, "loss": 2.091, "step": 18846 }, { "epoch": 0.797317877993062, "grad_norm": 1.5021365880966187, "learning_rate": 0.001, "loss": 2.3548, "step": 18847 }, { "epoch": 0.7973601827565784, "grad_norm": 0.1974629908800125, "learning_rate": 0.001, "loss": 1.9397, "step": 18848 }, { "epoch": 0.7974024875200948, "grad_norm": 0.8569322228431702, "learning_rate": 0.001, "loss": 2.618, "step": 18849 }, { "epoch": 0.7974447922836111, "grad_norm": 0.20803521573543549, "learning_rate": 0.001, "loss": 2.1371, "step": 18850 }, { "epoch": 0.7974870970471275, "grad_norm": 0.24137185513973236, "learning_rate": 0.001, "loss": 2.1892, "step": 18851 }, { "epoch": 0.7975294018106439, "grad_norm": 0.7290237545967102, "learning_rate": 0.001, "loss": 1.9481, "step": 18852 }, { "epoch": 0.7975717065741602, "grad_norm": 0.1846807599067688, "learning_rate": 0.001, "loss": 1.9675, "step": 18853 }, { "epoch": 0.7976140113376766, "grad_norm": 0.19551526010036469, "learning_rate": 0.001, "loss": 2.5217, "step": 18854 }, { "epoch": 0.797656316101193, "grad_norm": 0.2642076909542084, "learning_rate": 0.001, "loss": 2.4063, "step": 18855 }, { "epoch": 0.7976986208647093, "grad_norm": 0.15774700045585632, "learning_rate": 0.001, "loss": 2.3304, "step": 18856 }, { "epoch": 0.7977409256282257, "grad_norm": 0.23456871509552002, "learning_rate": 0.001, "loss": 2.4911, "step": 18857 }, { "epoch": 0.7977832303917421, "grad_norm": 0.15300516784191132, "learning_rate": 0.001, "loss": 1.6669, "step": 18858 }, { "epoch": 0.7978255351552584, "grad_norm": 0.171369269490242, "learning_rate": 0.001, "loss": 2.9436, "step": 18859 }, { "epoch": 0.7978678399187749, "grad_norm": 0.17169873416423798, "learning_rate": 0.001, "loss": 2.2797, "step": 18860 }, { "epoch": 0.7979101446822913, "grad_norm": 0.17338159680366516, "learning_rate": 0.001, "loss": 1.5475, "step": 18861 }, { "epoch": 0.7979524494458076, "grad_norm": 0.19149628281593323, "learning_rate": 0.001, "loss": 2.2884, "step": 18862 }, { "epoch": 0.797994754209324, "grad_norm": 0.3046054244041443, "learning_rate": 0.001, "loss": 2.6577, "step": 18863 }, { "epoch": 0.7980370589728404, "grad_norm": 0.19914156198501587, "learning_rate": 0.001, "loss": 3.0521, "step": 18864 }, { "epoch": 0.7980793637363567, "grad_norm": 0.14342699944972992, "learning_rate": 0.001, "loss": 2.2552, "step": 18865 }, { "epoch": 0.7981216684998731, "grad_norm": 0.16391606628894806, "learning_rate": 0.001, "loss": 1.8426, "step": 18866 }, { "epoch": 0.7981639732633895, "grad_norm": 0.17975471913814545, "learning_rate": 0.001, "loss": 2.6264, "step": 18867 }, { "epoch": 0.7982062780269058, "grad_norm": 0.2970002293586731, "learning_rate": 0.001, "loss": 2.2272, "step": 18868 }, { "epoch": 0.7982485827904222, "grad_norm": 4.976659774780273, "learning_rate": 0.001, "loss": 2.746, "step": 18869 }, { "epoch": 0.7982908875539386, "grad_norm": 0.34519144892692566, "learning_rate": 0.001, "loss": 2.1262, "step": 18870 }, { "epoch": 0.7983331923174549, "grad_norm": 0.14414265751838684, "learning_rate": 0.001, "loss": 2.1009, "step": 18871 }, { "epoch": 0.7983754970809713, "grad_norm": 0.15004147589206696, "learning_rate": 0.001, "loss": 1.9651, "step": 18872 }, { "epoch": 0.7984178018444877, "grad_norm": 0.1422182321548462, "learning_rate": 0.001, "loss": 1.8298, "step": 18873 }, { "epoch": 0.798460106608004, "grad_norm": 0.1388990730047226, "learning_rate": 0.001, "loss": 1.8034, "step": 18874 }, { "epoch": 0.7985024113715204, "grad_norm": 0.14638282358646393, "learning_rate": 0.001, "loss": 2.0963, "step": 18875 }, { "epoch": 0.7985447161350369, "grad_norm": 0.17456696927547455, "learning_rate": 0.001, "loss": 1.9739, "step": 18876 }, { "epoch": 0.7985870208985532, "grad_norm": 0.17205384373664856, "learning_rate": 0.001, "loss": 3.2143, "step": 18877 }, { "epoch": 0.7986293256620696, "grad_norm": 0.16180726885795593, "learning_rate": 0.001, "loss": 2.422, "step": 18878 }, { "epoch": 0.7986716304255859, "grad_norm": 8.663548469543457, "learning_rate": 0.001, "loss": 2.9214, "step": 18879 }, { "epoch": 0.7987139351891023, "grad_norm": 0.1585705280303955, "learning_rate": 0.001, "loss": 2.3293, "step": 18880 }, { "epoch": 0.7987562399526187, "grad_norm": 0.19919650256633759, "learning_rate": 0.001, "loss": 1.7659, "step": 18881 }, { "epoch": 0.798798544716135, "grad_norm": 0.18491598963737488, "learning_rate": 0.001, "loss": 1.8007, "step": 18882 }, { "epoch": 0.7988408494796514, "grad_norm": 4.440988540649414, "learning_rate": 0.001, "loss": 2.3786, "step": 18883 }, { "epoch": 0.7988831542431678, "grad_norm": 0.346278578042984, "learning_rate": 0.001, "loss": 2.312, "step": 18884 }, { "epoch": 0.7989254590066841, "grad_norm": 0.16744297742843628, "learning_rate": 0.001, "loss": 3.3667, "step": 18885 }, { "epoch": 0.7989677637702005, "grad_norm": 0.2730819880962372, "learning_rate": 0.001, "loss": 1.373, "step": 18886 }, { "epoch": 0.7990100685337169, "grad_norm": 0.1686711460351944, "learning_rate": 0.001, "loss": 2.4004, "step": 18887 }, { "epoch": 0.7990523732972332, "grad_norm": 0.32986345887184143, "learning_rate": 0.001, "loss": 2.1665, "step": 18888 }, { "epoch": 0.7990946780607496, "grad_norm": 0.23133182525634766, "learning_rate": 0.001, "loss": 2.0615, "step": 18889 }, { "epoch": 0.799136982824266, "grad_norm": 0.18338367342948914, "learning_rate": 0.001, "loss": 2.3496, "step": 18890 }, { "epoch": 0.7991792875877823, "grad_norm": 0.15058721601963043, "learning_rate": 0.001, "loss": 1.7269, "step": 18891 }, { "epoch": 0.7992215923512987, "grad_norm": 3.878596544265747, "learning_rate": 0.001, "loss": 2.3469, "step": 18892 }, { "epoch": 0.7992638971148152, "grad_norm": 0.15892788767814636, "learning_rate": 0.001, "loss": 2.3426, "step": 18893 }, { "epoch": 0.7993062018783315, "grad_norm": 0.18293994665145874, "learning_rate": 0.001, "loss": 1.5293, "step": 18894 }, { "epoch": 0.7993485066418479, "grad_norm": 0.1805136799812317, "learning_rate": 0.001, "loss": 2.0846, "step": 18895 }, { "epoch": 0.7993908114053643, "grad_norm": 0.16091568768024445, "learning_rate": 0.001, "loss": 1.4057, "step": 18896 }, { "epoch": 0.7994331161688806, "grad_norm": 0.20409560203552246, "learning_rate": 0.001, "loss": 3.3809, "step": 18897 }, { "epoch": 0.799475420932397, "grad_norm": 0.16749215126037598, "learning_rate": 0.001, "loss": 2.3456, "step": 18898 }, { "epoch": 0.7995177256959134, "grad_norm": 0.165800079703331, "learning_rate": 0.001, "loss": 2.5899, "step": 18899 }, { "epoch": 0.7995600304594297, "grad_norm": 0.17839626967906952, "learning_rate": 0.001, "loss": 1.6863, "step": 18900 }, { "epoch": 0.7996023352229461, "grad_norm": 0.15647844970226288, "learning_rate": 0.001, "loss": 2.4963, "step": 18901 }, { "epoch": 0.7996446399864625, "grad_norm": 0.22941941022872925, "learning_rate": 0.001, "loss": 2.7686, "step": 18902 }, { "epoch": 0.7996869447499788, "grad_norm": 0.17130149900913239, "learning_rate": 0.001, "loss": 2.7059, "step": 18903 }, { "epoch": 0.7997292495134952, "grad_norm": 0.1701461225748062, "learning_rate": 0.001, "loss": 1.87, "step": 18904 }, { "epoch": 0.7997715542770116, "grad_norm": 0.514470100402832, "learning_rate": 0.001, "loss": 2.4732, "step": 18905 }, { "epoch": 0.7998138590405279, "grad_norm": 0.13958191871643066, "learning_rate": 0.001, "loss": 2.6836, "step": 18906 }, { "epoch": 0.7998561638040443, "grad_norm": 0.15818491578102112, "learning_rate": 0.001, "loss": 2.7445, "step": 18907 }, { "epoch": 0.7998984685675607, "grad_norm": 0.18377156555652618, "learning_rate": 0.001, "loss": 1.9231, "step": 18908 }, { "epoch": 0.799940773331077, "grad_norm": 0.35791802406311035, "learning_rate": 0.001, "loss": 2.0333, "step": 18909 }, { "epoch": 0.7999830780945935, "grad_norm": 0.3409176170825958, "learning_rate": 0.001, "loss": 2.113, "step": 18910 }, { "epoch": 0.8000253828581099, "grad_norm": 0.1757473349571228, "learning_rate": 0.001, "loss": 1.5102, "step": 18911 }, { "epoch": 0.8000676876216262, "grad_norm": 1.6405963897705078, "learning_rate": 0.001, "loss": 2.0664, "step": 18912 }, { "epoch": 0.8001099923851426, "grad_norm": 0.1534256935119629, "learning_rate": 0.001, "loss": 2.2177, "step": 18913 }, { "epoch": 0.800152297148659, "grad_norm": 0.21698273718357086, "learning_rate": 0.001, "loss": 2.74, "step": 18914 }, { "epoch": 0.8001946019121753, "grad_norm": 0.1605890989303589, "learning_rate": 0.001, "loss": 1.9252, "step": 18915 }, { "epoch": 0.8002369066756917, "grad_norm": 0.16456551849842072, "learning_rate": 0.001, "loss": 1.993, "step": 18916 }, { "epoch": 0.8002792114392081, "grad_norm": 0.1534430980682373, "learning_rate": 0.001, "loss": 1.7507, "step": 18917 }, { "epoch": 0.8003215162027244, "grad_norm": 0.17245927453041077, "learning_rate": 0.001, "loss": 3.3313, "step": 18918 }, { "epoch": 0.8003638209662408, "grad_norm": 0.2450478971004486, "learning_rate": 0.001, "loss": 2.6292, "step": 18919 }, { "epoch": 0.8004061257297572, "grad_norm": 0.2959800958633423, "learning_rate": 0.001, "loss": 2.498, "step": 18920 }, { "epoch": 0.8004484304932735, "grad_norm": 0.15317165851593018, "learning_rate": 0.001, "loss": 2.6568, "step": 18921 }, { "epoch": 0.8004907352567899, "grad_norm": 0.1782752275466919, "learning_rate": 0.001, "loss": 1.7211, "step": 18922 }, { "epoch": 0.8005330400203063, "grad_norm": 0.1783091425895691, "learning_rate": 0.001, "loss": 2.6381, "step": 18923 }, { "epoch": 0.8005753447838226, "grad_norm": 0.17199842631816864, "learning_rate": 0.001, "loss": 2.553, "step": 18924 }, { "epoch": 0.800617649547339, "grad_norm": 1.3231291770935059, "learning_rate": 0.001, "loss": 2.695, "step": 18925 }, { "epoch": 0.8006599543108553, "grad_norm": 2.2281360626220703, "learning_rate": 0.001, "loss": 2.2209, "step": 18926 }, { "epoch": 0.8007022590743718, "grad_norm": 0.20170575380325317, "learning_rate": 0.001, "loss": 2.7078, "step": 18927 }, { "epoch": 0.8007445638378882, "grad_norm": 0.17073240876197815, "learning_rate": 0.001, "loss": 2.4441, "step": 18928 }, { "epoch": 0.8007868686014045, "grad_norm": 0.15086358785629272, "learning_rate": 0.001, "loss": 2.0499, "step": 18929 }, { "epoch": 0.8008291733649209, "grad_norm": 0.8255175352096558, "learning_rate": 0.001, "loss": 2.4192, "step": 18930 }, { "epoch": 0.8008714781284373, "grad_norm": 0.1681467890739441, "learning_rate": 0.001, "loss": 2.2557, "step": 18931 }, { "epoch": 0.8009137828919536, "grad_norm": 0.14386318624019623, "learning_rate": 0.001, "loss": 1.9512, "step": 18932 }, { "epoch": 0.80095608765547, "grad_norm": 0.36584439873695374, "learning_rate": 0.001, "loss": 1.7842, "step": 18933 }, { "epoch": 0.8009983924189864, "grad_norm": 0.14948461949825287, "learning_rate": 0.001, "loss": 1.7132, "step": 18934 }, { "epoch": 0.8010406971825027, "grad_norm": 0.1546405851840973, "learning_rate": 0.001, "loss": 1.8271, "step": 18935 }, { "epoch": 0.8010830019460191, "grad_norm": 0.15600258111953735, "learning_rate": 0.001, "loss": 2.0882, "step": 18936 }, { "epoch": 0.8011253067095355, "grad_norm": 0.20449069142341614, "learning_rate": 0.001, "loss": 1.752, "step": 18937 }, { "epoch": 0.8011676114730518, "grad_norm": 0.143666610121727, "learning_rate": 0.001, "loss": 2.4691, "step": 18938 }, { "epoch": 0.8012099162365682, "grad_norm": 0.17219458520412445, "learning_rate": 0.001, "loss": 1.9348, "step": 18939 }, { "epoch": 0.8012522210000846, "grad_norm": 0.14504066109657288, "learning_rate": 0.001, "loss": 2.5255, "step": 18940 }, { "epoch": 0.8012945257636009, "grad_norm": 0.1552887260913849, "learning_rate": 0.001, "loss": 1.8034, "step": 18941 }, { "epoch": 0.8013368305271173, "grad_norm": 0.3989625871181488, "learning_rate": 0.001, "loss": 1.6939, "step": 18942 }, { "epoch": 0.8013791352906338, "grad_norm": 0.15734554827213287, "learning_rate": 0.001, "loss": 1.8861, "step": 18943 }, { "epoch": 0.8014214400541501, "grad_norm": 0.159386545419693, "learning_rate": 0.001, "loss": 2.3796, "step": 18944 }, { "epoch": 0.8014637448176665, "grad_norm": 0.1559273898601532, "learning_rate": 0.001, "loss": 3.0239, "step": 18945 }, { "epoch": 0.8015060495811829, "grad_norm": 0.15622538328170776, "learning_rate": 0.001, "loss": 2.6106, "step": 18946 }, { "epoch": 0.8015483543446992, "grad_norm": 0.17876319587230682, "learning_rate": 0.001, "loss": 1.8505, "step": 18947 }, { "epoch": 0.8015906591082156, "grad_norm": 0.1366472691297531, "learning_rate": 0.001, "loss": 1.9985, "step": 18948 }, { "epoch": 0.801632963871732, "grad_norm": 0.17091487348079681, "learning_rate": 0.001, "loss": 2.836, "step": 18949 }, { "epoch": 0.8016752686352483, "grad_norm": 0.1562683880329132, "learning_rate": 0.001, "loss": 1.4412, "step": 18950 }, { "epoch": 0.8017175733987647, "grad_norm": 0.1484891027212143, "learning_rate": 0.001, "loss": 1.8152, "step": 18951 }, { "epoch": 0.8017598781622811, "grad_norm": 1.3076022863388062, "learning_rate": 0.001, "loss": 2.7306, "step": 18952 }, { "epoch": 0.8018021829257974, "grad_norm": 0.16072845458984375, "learning_rate": 0.001, "loss": 2.0295, "step": 18953 }, { "epoch": 0.8018444876893138, "grad_norm": 0.14599545300006866, "learning_rate": 0.001, "loss": 2.7667, "step": 18954 }, { "epoch": 0.8018867924528302, "grad_norm": 0.15150395035743713, "learning_rate": 0.001, "loss": 3.1838, "step": 18955 }, { "epoch": 0.8019290972163465, "grad_norm": 0.18270723521709442, "learning_rate": 0.001, "loss": 1.6511, "step": 18956 }, { "epoch": 0.8019714019798629, "grad_norm": 0.1671307235956192, "learning_rate": 0.001, "loss": 2.9056, "step": 18957 }, { "epoch": 0.8020137067433794, "grad_norm": 0.14215819537639618, "learning_rate": 0.001, "loss": 2.8229, "step": 18958 }, { "epoch": 0.8020560115068956, "grad_norm": 0.14553506672382355, "learning_rate": 0.001, "loss": 1.3122, "step": 18959 }, { "epoch": 0.8020983162704121, "grad_norm": 0.1251428723335266, "learning_rate": 0.001, "loss": 1.9551, "step": 18960 }, { "epoch": 0.8021406210339285, "grad_norm": 0.7137900590896606, "learning_rate": 0.001, "loss": 2.8314, "step": 18961 }, { "epoch": 0.8021829257974448, "grad_norm": 0.1495407074689865, "learning_rate": 0.001, "loss": 1.7774, "step": 18962 }, { "epoch": 0.8022252305609612, "grad_norm": 0.14792723953723907, "learning_rate": 0.001, "loss": 2.1685, "step": 18963 }, { "epoch": 0.8022675353244776, "grad_norm": 0.7943689823150635, "learning_rate": 0.001, "loss": 2.1938, "step": 18964 }, { "epoch": 0.8023098400879939, "grad_norm": 19.49368667602539, "learning_rate": 0.001, "loss": 3.6481, "step": 18965 }, { "epoch": 0.8023521448515103, "grad_norm": 2.110476493835449, "learning_rate": 0.001, "loss": 1.4227, "step": 18966 }, { "epoch": 0.8023944496150267, "grad_norm": 0.13607414066791534, "learning_rate": 0.001, "loss": 1.6882, "step": 18967 }, { "epoch": 0.802436754378543, "grad_norm": 1.1978744268417358, "learning_rate": 0.001, "loss": 1.8216, "step": 18968 }, { "epoch": 0.8024790591420594, "grad_norm": 0.19706468284130096, "learning_rate": 0.001, "loss": 2.8467, "step": 18969 }, { "epoch": 0.8025213639055757, "grad_norm": 0.14383366703987122, "learning_rate": 0.001, "loss": 1.8751, "step": 18970 }, { "epoch": 0.8025636686690921, "grad_norm": 0.20489506423473358, "learning_rate": 0.001, "loss": 2.8281, "step": 18971 }, { "epoch": 0.8026059734326085, "grad_norm": 1.1792211532592773, "learning_rate": 0.001, "loss": 2.9794, "step": 18972 }, { "epoch": 0.8026482781961248, "grad_norm": 0.5277860164642334, "learning_rate": 0.001, "loss": 1.6409, "step": 18973 }, { "epoch": 0.8026905829596412, "grad_norm": 0.13644924759864807, "learning_rate": 0.001, "loss": 1.4607, "step": 18974 }, { "epoch": 0.8027328877231577, "grad_norm": 0.17572204768657684, "learning_rate": 0.001, "loss": 2.3758, "step": 18975 }, { "epoch": 0.802775192486674, "grad_norm": 0.356366366147995, "learning_rate": 0.001, "loss": 1.5677, "step": 18976 }, { "epoch": 0.8028174972501904, "grad_norm": 0.1653011590242386, "learning_rate": 0.001, "loss": 2.6266, "step": 18977 }, { "epoch": 0.8028598020137068, "grad_norm": 0.1638076901435852, "learning_rate": 0.001, "loss": 1.9443, "step": 18978 }, { "epoch": 0.8029021067772231, "grad_norm": 0.26030609011650085, "learning_rate": 0.001, "loss": 1.6647, "step": 18979 }, { "epoch": 0.8029444115407395, "grad_norm": 0.16686412692070007, "learning_rate": 0.001, "loss": 2.2475, "step": 18980 }, { "epoch": 0.8029867163042559, "grad_norm": 0.2093593031167984, "learning_rate": 0.001, "loss": 2.0305, "step": 18981 }, { "epoch": 0.8030290210677722, "grad_norm": 0.17999699711799622, "learning_rate": 0.001, "loss": 2.2384, "step": 18982 }, { "epoch": 0.8030713258312886, "grad_norm": 0.9324764013290405, "learning_rate": 0.001, "loss": 2.6971, "step": 18983 }, { "epoch": 0.803113630594805, "grad_norm": 0.17794398963451385, "learning_rate": 0.001, "loss": 2.0274, "step": 18984 }, { "epoch": 0.8031559353583213, "grad_norm": 0.18477022647857666, "learning_rate": 0.001, "loss": 2.2527, "step": 18985 }, { "epoch": 0.8031982401218377, "grad_norm": 0.1455012857913971, "learning_rate": 0.001, "loss": 2.1315, "step": 18986 }, { "epoch": 0.8032405448853541, "grad_norm": 1.0563513040542603, "learning_rate": 0.001, "loss": 2.3651, "step": 18987 }, { "epoch": 0.8032828496488704, "grad_norm": 0.28026530146598816, "learning_rate": 0.001, "loss": 3.0058, "step": 18988 }, { "epoch": 0.8033251544123868, "grad_norm": 58.185855865478516, "learning_rate": 0.001, "loss": 1.8485, "step": 18989 }, { "epoch": 0.8033674591759032, "grad_norm": 0.35726267099380493, "learning_rate": 0.001, "loss": 1.8895, "step": 18990 }, { "epoch": 0.8034097639394195, "grad_norm": 0.2682846486568451, "learning_rate": 0.001, "loss": 1.7967, "step": 18991 }, { "epoch": 0.803452068702936, "grad_norm": 0.16665484011173248, "learning_rate": 0.001, "loss": 2.3617, "step": 18992 }, { "epoch": 0.8034943734664524, "grad_norm": 0.16260682046413422, "learning_rate": 0.001, "loss": 2.4323, "step": 18993 }, { "epoch": 0.8035366782299687, "grad_norm": 0.16122184693813324, "learning_rate": 0.001, "loss": 2.6508, "step": 18994 }, { "epoch": 0.8035789829934851, "grad_norm": 0.16327929496765137, "learning_rate": 0.001, "loss": 1.5013, "step": 18995 }, { "epoch": 0.8036212877570015, "grad_norm": 0.19706536829471588, "learning_rate": 0.001, "loss": 2.5097, "step": 18996 }, { "epoch": 0.8036635925205178, "grad_norm": 0.3086377680301666, "learning_rate": 0.001, "loss": 1.9712, "step": 18997 }, { "epoch": 0.8037058972840342, "grad_norm": 0.1745164543390274, "learning_rate": 0.001, "loss": 1.9054, "step": 18998 }, { "epoch": 0.8037482020475506, "grad_norm": 0.20080901682376862, "learning_rate": 0.001, "loss": 2.6199, "step": 18999 }, { "epoch": 0.8037905068110669, "grad_norm": 0.8673862814903259, "learning_rate": 0.001, "loss": 1.8435, "step": 19000 }, { "epoch": 0.8038328115745833, "grad_norm": 0.17495474219322205, "learning_rate": 0.001, "loss": 2.3335, "step": 19001 }, { "epoch": 0.8038751163380997, "grad_norm": 0.2492198944091797, "learning_rate": 0.001, "loss": 3.421, "step": 19002 }, { "epoch": 0.803917421101616, "grad_norm": 0.2866990864276886, "learning_rate": 0.001, "loss": 2.3914, "step": 19003 }, { "epoch": 0.8039597258651324, "grad_norm": 0.1283341497182846, "learning_rate": 0.001, "loss": 1.6294, "step": 19004 }, { "epoch": 0.8040020306286488, "grad_norm": 0.18748348951339722, "learning_rate": 0.001, "loss": 3.779, "step": 19005 }, { "epoch": 0.8040443353921651, "grad_norm": 0.2228836864233017, "learning_rate": 0.001, "loss": 2.1684, "step": 19006 }, { "epoch": 0.8040866401556815, "grad_norm": 0.21044659614562988, "learning_rate": 0.001, "loss": 2.5293, "step": 19007 }, { "epoch": 0.804128944919198, "grad_norm": 0.13109223544597626, "learning_rate": 0.001, "loss": 1.7461, "step": 19008 }, { "epoch": 0.8041712496827143, "grad_norm": 0.16634885966777802, "learning_rate": 0.001, "loss": 1.6819, "step": 19009 }, { "epoch": 0.8042135544462307, "grad_norm": 26.008522033691406, "learning_rate": 0.001, "loss": 1.8201, "step": 19010 }, { "epoch": 0.8042558592097471, "grad_norm": 0.14524191617965698, "learning_rate": 0.001, "loss": 1.7735, "step": 19011 }, { "epoch": 0.8042981639732634, "grad_norm": 0.14269325137138367, "learning_rate": 0.001, "loss": 2.4637, "step": 19012 }, { "epoch": 0.8043404687367798, "grad_norm": 0.31678712368011475, "learning_rate": 0.001, "loss": 2.9312, "step": 19013 }, { "epoch": 0.8043827735002962, "grad_norm": 0.16404123604297638, "learning_rate": 0.001, "loss": 1.8627, "step": 19014 }, { "epoch": 0.8044250782638125, "grad_norm": 0.16034923493862152, "learning_rate": 0.001, "loss": 2.3652, "step": 19015 }, { "epoch": 0.8044673830273289, "grad_norm": 0.1982741504907608, "learning_rate": 0.001, "loss": 2.198, "step": 19016 }, { "epoch": 0.8045096877908452, "grad_norm": 0.1432885080575943, "learning_rate": 0.001, "loss": 1.7994, "step": 19017 }, { "epoch": 0.8045519925543616, "grad_norm": 0.2005062848329544, "learning_rate": 0.001, "loss": 2.2732, "step": 19018 }, { "epoch": 0.804594297317878, "grad_norm": 0.18627607822418213, "learning_rate": 0.001, "loss": 2.2855, "step": 19019 }, { "epoch": 0.8046366020813943, "grad_norm": 0.17629285156726837, "learning_rate": 0.001, "loss": 2.3181, "step": 19020 }, { "epoch": 0.8046789068449107, "grad_norm": 0.15572401881217957, "learning_rate": 0.001, "loss": 2.2486, "step": 19021 }, { "epoch": 0.8047212116084271, "grad_norm": 0.13063278794288635, "learning_rate": 0.001, "loss": 2.0039, "step": 19022 }, { "epoch": 0.8047635163719434, "grad_norm": 0.17360426485538483, "learning_rate": 0.001, "loss": 1.9815, "step": 19023 }, { "epoch": 0.8048058211354598, "grad_norm": 0.14736105501651764, "learning_rate": 0.001, "loss": 1.7787, "step": 19024 }, { "epoch": 0.8048481258989763, "grad_norm": 0.40037959814071655, "learning_rate": 0.001, "loss": 1.4608, "step": 19025 }, { "epoch": 0.8048904306624926, "grad_norm": 0.13307571411132812, "learning_rate": 0.001, "loss": 2.472, "step": 19026 }, { "epoch": 0.804932735426009, "grad_norm": 0.2580379545688629, "learning_rate": 0.001, "loss": 2.6533, "step": 19027 }, { "epoch": 0.8049750401895254, "grad_norm": 1.3895004987716675, "learning_rate": 0.001, "loss": 2.1698, "step": 19028 }, { "epoch": 0.8050173449530417, "grad_norm": 0.1485358327627182, "learning_rate": 0.001, "loss": 1.8783, "step": 19029 }, { "epoch": 0.8050596497165581, "grad_norm": 33.825557708740234, "learning_rate": 0.001, "loss": 2.6339, "step": 19030 }, { "epoch": 0.8051019544800745, "grad_norm": 0.15291500091552734, "learning_rate": 0.001, "loss": 1.5062, "step": 19031 }, { "epoch": 0.8051442592435908, "grad_norm": 0.19544963538646698, "learning_rate": 0.001, "loss": 2.7773, "step": 19032 }, { "epoch": 0.8051865640071072, "grad_norm": 0.7990001440048218, "learning_rate": 0.001, "loss": 2.6352, "step": 19033 }, { "epoch": 0.8052288687706236, "grad_norm": 0.20977818965911865, "learning_rate": 0.001, "loss": 2.6468, "step": 19034 }, { "epoch": 0.8052711735341399, "grad_norm": 0.3944449722766876, "learning_rate": 0.001, "loss": 2.3777, "step": 19035 }, { "epoch": 0.8053134782976563, "grad_norm": 0.1769852191209793, "learning_rate": 0.001, "loss": 1.7299, "step": 19036 }, { "epoch": 0.8053557830611727, "grad_norm": 0.2292184680700302, "learning_rate": 0.001, "loss": 2.9147, "step": 19037 }, { "epoch": 0.805398087824689, "grad_norm": 0.20135535299777985, "learning_rate": 0.001, "loss": 1.8794, "step": 19038 }, { "epoch": 0.8054403925882054, "grad_norm": 9.884795188903809, "learning_rate": 0.001, "loss": 2.4281, "step": 19039 }, { "epoch": 0.8054826973517218, "grad_norm": 0.22030168771743774, "learning_rate": 0.001, "loss": 2.7934, "step": 19040 }, { "epoch": 0.8055250021152381, "grad_norm": 0.3213954269886017, "learning_rate": 0.001, "loss": 1.9529, "step": 19041 }, { "epoch": 0.8055673068787546, "grad_norm": 0.18723493814468384, "learning_rate": 0.001, "loss": 2.0792, "step": 19042 }, { "epoch": 0.805609611642271, "grad_norm": 0.19658009707927704, "learning_rate": 0.001, "loss": 2.5037, "step": 19043 }, { "epoch": 0.8056519164057873, "grad_norm": 0.18413256108760834, "learning_rate": 0.001, "loss": 2.0197, "step": 19044 }, { "epoch": 0.8056942211693037, "grad_norm": 0.1551973521709442, "learning_rate": 0.001, "loss": 2.6713, "step": 19045 }, { "epoch": 0.8057365259328201, "grad_norm": 0.18275663256645203, "learning_rate": 0.001, "loss": 2.6608, "step": 19046 }, { "epoch": 0.8057788306963364, "grad_norm": 0.1611073613166809, "learning_rate": 0.001, "loss": 2.5312, "step": 19047 }, { "epoch": 0.8058211354598528, "grad_norm": 0.160042405128479, "learning_rate": 0.001, "loss": 1.8463, "step": 19048 }, { "epoch": 0.8058634402233692, "grad_norm": 0.19275878369808197, "learning_rate": 0.001, "loss": 1.818, "step": 19049 }, { "epoch": 0.8059057449868855, "grad_norm": 1.6935782432556152, "learning_rate": 0.001, "loss": 4.1077, "step": 19050 }, { "epoch": 0.8059480497504019, "grad_norm": 0.16775570809841156, "learning_rate": 0.001, "loss": 2.496, "step": 19051 }, { "epoch": 0.8059903545139183, "grad_norm": 0.1673063337802887, "learning_rate": 0.001, "loss": 2.0237, "step": 19052 }, { "epoch": 0.8060326592774346, "grad_norm": 0.17094041407108307, "learning_rate": 0.001, "loss": 1.7422, "step": 19053 }, { "epoch": 0.806074964040951, "grad_norm": 0.18810485303401947, "learning_rate": 0.001, "loss": 2.8559, "step": 19054 }, { "epoch": 0.8061172688044674, "grad_norm": 0.17345726490020752, "learning_rate": 0.001, "loss": 2.183, "step": 19055 }, { "epoch": 0.8061595735679837, "grad_norm": 0.27425530552864075, "learning_rate": 0.001, "loss": 2.9688, "step": 19056 }, { "epoch": 0.8062018783315001, "grad_norm": 16.195755004882812, "learning_rate": 0.001, "loss": 1.9708, "step": 19057 }, { "epoch": 0.8062441830950166, "grad_norm": 0.15767496824264526, "learning_rate": 0.001, "loss": 1.8641, "step": 19058 }, { "epoch": 0.8062864878585329, "grad_norm": 0.16588301956653595, "learning_rate": 0.001, "loss": 2.869, "step": 19059 }, { "epoch": 0.8063287926220493, "grad_norm": 0.15558847784996033, "learning_rate": 0.001, "loss": 1.9133, "step": 19060 }, { "epoch": 0.8063710973855656, "grad_norm": 0.16034120321273804, "learning_rate": 0.001, "loss": 3.0425, "step": 19061 }, { "epoch": 0.806413402149082, "grad_norm": 0.1412864327430725, "learning_rate": 0.001, "loss": 2.0012, "step": 19062 }, { "epoch": 0.8064557069125984, "grad_norm": 0.2045813649892807, "learning_rate": 0.001, "loss": 1.9367, "step": 19063 }, { "epoch": 0.8064980116761147, "grad_norm": 0.20166616141796112, "learning_rate": 0.001, "loss": 1.8073, "step": 19064 }, { "epoch": 0.8065403164396311, "grad_norm": 0.19779857993125916, "learning_rate": 0.001, "loss": 2.8934, "step": 19065 }, { "epoch": 0.8065826212031475, "grad_norm": 0.15480828285217285, "learning_rate": 0.001, "loss": 2.1785, "step": 19066 }, { "epoch": 0.8066249259666638, "grad_norm": 0.5121575593948364, "learning_rate": 0.001, "loss": 2.1465, "step": 19067 }, { "epoch": 0.8066672307301802, "grad_norm": 0.25678855180740356, "learning_rate": 0.001, "loss": 2.5373, "step": 19068 }, { "epoch": 0.8067095354936966, "grad_norm": 0.1567380726337433, "learning_rate": 0.001, "loss": 1.5213, "step": 19069 }, { "epoch": 0.8067518402572129, "grad_norm": 0.15887407958507538, "learning_rate": 0.001, "loss": 2.0361, "step": 19070 }, { "epoch": 0.8067941450207293, "grad_norm": 0.2849603593349457, "learning_rate": 0.001, "loss": 2.3657, "step": 19071 }, { "epoch": 0.8068364497842457, "grad_norm": 0.27255427837371826, "learning_rate": 0.001, "loss": 2.005, "step": 19072 }, { "epoch": 0.806878754547762, "grad_norm": 1.081209659576416, "learning_rate": 0.001, "loss": 2.7503, "step": 19073 }, { "epoch": 0.8069210593112784, "grad_norm": 0.20307116210460663, "learning_rate": 0.001, "loss": 2.4047, "step": 19074 }, { "epoch": 0.8069633640747949, "grad_norm": 0.3475703299045563, "learning_rate": 0.001, "loss": 2.1349, "step": 19075 }, { "epoch": 0.8070056688383112, "grad_norm": 0.18069538474082947, "learning_rate": 0.001, "loss": 2.8262, "step": 19076 }, { "epoch": 0.8070479736018276, "grad_norm": 0.16302266716957092, "learning_rate": 0.001, "loss": 2.1926, "step": 19077 }, { "epoch": 0.807090278365344, "grad_norm": 0.13929545879364014, "learning_rate": 0.001, "loss": 1.7021, "step": 19078 }, { "epoch": 0.8071325831288603, "grad_norm": 2.2254645824432373, "learning_rate": 0.001, "loss": 2.0774, "step": 19079 }, { "epoch": 0.8071748878923767, "grad_norm": 0.12101297825574875, "learning_rate": 0.001, "loss": 2.1299, "step": 19080 }, { "epoch": 0.8072171926558931, "grad_norm": 0.15485353767871857, "learning_rate": 0.001, "loss": 1.941, "step": 19081 }, { "epoch": 0.8072594974194094, "grad_norm": 0.16078881919384003, "learning_rate": 0.001, "loss": 2.2291, "step": 19082 }, { "epoch": 0.8073018021829258, "grad_norm": 1.5918997526168823, "learning_rate": 0.001, "loss": 3.3536, "step": 19083 }, { "epoch": 0.8073441069464422, "grad_norm": 0.16375018656253815, "learning_rate": 0.001, "loss": 2.5549, "step": 19084 }, { "epoch": 0.8073864117099585, "grad_norm": 1.0432242155075073, "learning_rate": 0.001, "loss": 2.6411, "step": 19085 }, { "epoch": 0.8074287164734749, "grad_norm": 0.1408742517232895, "learning_rate": 0.001, "loss": 1.4977, "step": 19086 }, { "epoch": 0.8074710212369913, "grad_norm": 0.13570459187030792, "learning_rate": 0.001, "loss": 3.3056, "step": 19087 }, { "epoch": 0.8075133260005076, "grad_norm": 0.22087857127189636, "learning_rate": 0.001, "loss": 2.2098, "step": 19088 }, { "epoch": 0.807555630764024, "grad_norm": 1.198239803314209, "learning_rate": 0.001, "loss": 3.1157, "step": 19089 }, { "epoch": 0.8075979355275404, "grad_norm": 0.1383177936077118, "learning_rate": 0.001, "loss": 2.3563, "step": 19090 }, { "epoch": 0.8076402402910567, "grad_norm": 0.14231859147548676, "learning_rate": 0.001, "loss": 1.863, "step": 19091 }, { "epoch": 0.8076825450545732, "grad_norm": 0.17325039207935333, "learning_rate": 0.001, "loss": 3.1426, "step": 19092 }, { "epoch": 0.8077248498180896, "grad_norm": 0.14337441325187683, "learning_rate": 0.001, "loss": 1.6843, "step": 19093 }, { "epoch": 0.8077671545816059, "grad_norm": 0.15518362820148468, "learning_rate": 0.001, "loss": 2.3449, "step": 19094 }, { "epoch": 0.8078094593451223, "grad_norm": 0.13496747612953186, "learning_rate": 0.001, "loss": 2.6607, "step": 19095 }, { "epoch": 0.8078517641086387, "grad_norm": 0.17812597751617432, "learning_rate": 0.001, "loss": 2.6483, "step": 19096 }, { "epoch": 0.807894068872155, "grad_norm": 0.24298779666423798, "learning_rate": 0.001, "loss": 1.6009, "step": 19097 }, { "epoch": 0.8079363736356714, "grad_norm": 0.13833343982696533, "learning_rate": 0.001, "loss": 1.7103, "step": 19098 }, { "epoch": 0.8079786783991878, "grad_norm": 0.17393560707569122, "learning_rate": 0.001, "loss": 3.2679, "step": 19099 }, { "epoch": 0.8080209831627041, "grad_norm": 0.1347343772649765, "learning_rate": 0.001, "loss": 1.3848, "step": 19100 }, { "epoch": 0.8080632879262205, "grad_norm": 0.3251754641532898, "learning_rate": 0.001, "loss": 2.6169, "step": 19101 }, { "epoch": 0.8081055926897369, "grad_norm": 0.13301436603069305, "learning_rate": 0.001, "loss": 1.8152, "step": 19102 }, { "epoch": 0.8081478974532532, "grad_norm": 0.35224834084510803, "learning_rate": 0.001, "loss": 1.9909, "step": 19103 }, { "epoch": 0.8081902022167696, "grad_norm": 0.2184954732656479, "learning_rate": 0.001, "loss": 2.3797, "step": 19104 }, { "epoch": 0.8082325069802859, "grad_norm": 0.14901390671730042, "learning_rate": 0.001, "loss": 2.0435, "step": 19105 }, { "epoch": 0.8082748117438023, "grad_norm": 0.1670936793088913, "learning_rate": 0.001, "loss": 1.9192, "step": 19106 }, { "epoch": 0.8083171165073187, "grad_norm": 0.1560143530368805, "learning_rate": 0.001, "loss": 1.6413, "step": 19107 }, { "epoch": 0.808359421270835, "grad_norm": 0.14152587950229645, "learning_rate": 0.001, "loss": 1.7786, "step": 19108 }, { "epoch": 0.8084017260343515, "grad_norm": 0.2246204912662506, "learning_rate": 0.001, "loss": 2.0439, "step": 19109 }, { "epoch": 0.8084440307978679, "grad_norm": 0.17294353246688843, "learning_rate": 0.001, "loss": 2.3991, "step": 19110 }, { "epoch": 0.8084863355613842, "grad_norm": 0.14539849758148193, "learning_rate": 0.001, "loss": 1.9719, "step": 19111 }, { "epoch": 0.8085286403249006, "grad_norm": 0.16276989877223969, "learning_rate": 0.001, "loss": 1.7146, "step": 19112 }, { "epoch": 0.808570945088417, "grad_norm": 9.473787307739258, "learning_rate": 0.001, "loss": 2.1896, "step": 19113 }, { "epoch": 0.8086132498519333, "grad_norm": 0.1611110121011734, "learning_rate": 0.001, "loss": 1.8925, "step": 19114 }, { "epoch": 0.8086555546154497, "grad_norm": 0.14507554471492767, "learning_rate": 0.001, "loss": 1.5477, "step": 19115 }, { "epoch": 0.8086978593789661, "grad_norm": 0.17105506360530853, "learning_rate": 0.001, "loss": 2.5952, "step": 19116 }, { "epoch": 0.8087401641424824, "grad_norm": 0.563596248626709, "learning_rate": 0.001, "loss": 2.5732, "step": 19117 }, { "epoch": 0.8087824689059988, "grad_norm": 0.2284759283065796, "learning_rate": 0.001, "loss": 3.6089, "step": 19118 }, { "epoch": 0.8088247736695152, "grad_norm": 0.2556639611721039, "learning_rate": 0.001, "loss": 1.9265, "step": 19119 }, { "epoch": 0.8088670784330315, "grad_norm": 0.1958867907524109, "learning_rate": 0.001, "loss": 2.395, "step": 19120 }, { "epoch": 0.8089093831965479, "grad_norm": 0.22015972435474396, "learning_rate": 0.001, "loss": 2.0847, "step": 19121 }, { "epoch": 0.8089516879600643, "grad_norm": 0.17518767714500427, "learning_rate": 0.001, "loss": 2.8718, "step": 19122 }, { "epoch": 0.8089939927235806, "grad_norm": 6.914678573608398, "learning_rate": 0.001, "loss": 1.9315, "step": 19123 }, { "epoch": 0.809036297487097, "grad_norm": 1.030540108680725, "learning_rate": 0.001, "loss": 2.9412, "step": 19124 }, { "epoch": 0.8090786022506135, "grad_norm": 28.061410903930664, "learning_rate": 0.001, "loss": 2.1573, "step": 19125 }, { "epoch": 0.8091209070141298, "grad_norm": 0.18755550682544708, "learning_rate": 0.001, "loss": 2.0356, "step": 19126 }, { "epoch": 0.8091632117776462, "grad_norm": 0.15464842319488525, "learning_rate": 0.001, "loss": 1.7849, "step": 19127 }, { "epoch": 0.8092055165411626, "grad_norm": 0.20664550364017487, "learning_rate": 0.001, "loss": 3.0477, "step": 19128 }, { "epoch": 0.8092478213046789, "grad_norm": 0.19141417741775513, "learning_rate": 0.001, "loss": 2.0378, "step": 19129 }, { "epoch": 0.8092901260681953, "grad_norm": 0.7262970805168152, "learning_rate": 0.001, "loss": 2.168, "step": 19130 }, { "epoch": 0.8093324308317117, "grad_norm": 0.18341998755931854, "learning_rate": 0.001, "loss": 2.529, "step": 19131 }, { "epoch": 0.809374735595228, "grad_norm": 0.16619554162025452, "learning_rate": 0.001, "loss": 1.9075, "step": 19132 }, { "epoch": 0.8094170403587444, "grad_norm": 0.16377653181552887, "learning_rate": 0.001, "loss": 2.7905, "step": 19133 }, { "epoch": 0.8094593451222608, "grad_norm": 0.15902559459209442, "learning_rate": 0.001, "loss": 2.0185, "step": 19134 }, { "epoch": 0.8095016498857771, "grad_norm": 0.1559763252735138, "learning_rate": 0.001, "loss": 2.0704, "step": 19135 }, { "epoch": 0.8095439546492935, "grad_norm": 0.1679486483335495, "learning_rate": 0.001, "loss": 2.4996, "step": 19136 }, { "epoch": 0.8095862594128099, "grad_norm": 0.15577644109725952, "learning_rate": 0.001, "loss": 2.4594, "step": 19137 }, { "epoch": 0.8096285641763262, "grad_norm": 0.14891107380390167, "learning_rate": 0.001, "loss": 1.9317, "step": 19138 }, { "epoch": 0.8096708689398426, "grad_norm": 0.20740364491939545, "learning_rate": 0.001, "loss": 1.7758, "step": 19139 }, { "epoch": 0.809713173703359, "grad_norm": 0.17380544543266296, "learning_rate": 0.001, "loss": 1.6315, "step": 19140 }, { "epoch": 0.8097554784668753, "grad_norm": 0.15323974192142487, "learning_rate": 0.001, "loss": 2.2311, "step": 19141 }, { "epoch": 0.8097977832303918, "grad_norm": 1.1222220659255981, "learning_rate": 0.001, "loss": 1.8476, "step": 19142 }, { "epoch": 0.8098400879939082, "grad_norm": 0.17634250223636627, "learning_rate": 0.001, "loss": 2.2164, "step": 19143 }, { "epoch": 0.8098823927574245, "grad_norm": 0.16410218179225922, "learning_rate": 0.001, "loss": 1.9059, "step": 19144 }, { "epoch": 0.8099246975209409, "grad_norm": 0.16236481070518494, "learning_rate": 0.001, "loss": 1.8111, "step": 19145 }, { "epoch": 0.8099670022844573, "grad_norm": 5.717217445373535, "learning_rate": 0.001, "loss": 2.1306, "step": 19146 }, { "epoch": 0.8100093070479736, "grad_norm": 0.1764141023159027, "learning_rate": 0.001, "loss": 1.4532, "step": 19147 }, { "epoch": 0.81005161181149, "grad_norm": 0.18373993039131165, "learning_rate": 0.001, "loss": 1.8148, "step": 19148 }, { "epoch": 0.8100939165750064, "grad_norm": 0.1714664250612259, "learning_rate": 0.001, "loss": 2.0202, "step": 19149 }, { "epoch": 0.8101362213385227, "grad_norm": 0.22828002274036407, "learning_rate": 0.001, "loss": 1.8922, "step": 19150 }, { "epoch": 0.8101785261020391, "grad_norm": 0.37784022092819214, "learning_rate": 0.001, "loss": 3.023, "step": 19151 }, { "epoch": 0.8102208308655554, "grad_norm": 0.17360255122184753, "learning_rate": 0.001, "loss": 2.1345, "step": 19152 }, { "epoch": 0.8102631356290718, "grad_norm": 0.16689980030059814, "learning_rate": 0.001, "loss": 2.1847, "step": 19153 }, { "epoch": 0.8103054403925882, "grad_norm": 0.23073500394821167, "learning_rate": 0.001, "loss": 2.0032, "step": 19154 }, { "epoch": 0.8103477451561045, "grad_norm": 0.1540898084640503, "learning_rate": 0.001, "loss": 1.4885, "step": 19155 }, { "epoch": 0.8103900499196209, "grad_norm": 2.4491209983825684, "learning_rate": 0.001, "loss": 2.0554, "step": 19156 }, { "epoch": 0.8104323546831373, "grad_norm": 0.191018208861351, "learning_rate": 0.001, "loss": 2.0957, "step": 19157 }, { "epoch": 0.8104746594466536, "grad_norm": 4.3804240226745605, "learning_rate": 0.001, "loss": 2.0839, "step": 19158 }, { "epoch": 0.81051696421017, "grad_norm": 0.41173967719078064, "learning_rate": 0.001, "loss": 2.0613, "step": 19159 }, { "epoch": 0.8105592689736865, "grad_norm": 0.13415801525115967, "learning_rate": 0.001, "loss": 1.7669, "step": 19160 }, { "epoch": 0.8106015737372028, "grad_norm": 0.15621332824230194, "learning_rate": 0.001, "loss": 2.5883, "step": 19161 }, { "epoch": 0.8106438785007192, "grad_norm": 2.067545175552368, "learning_rate": 0.001, "loss": 2.3052, "step": 19162 }, { "epoch": 0.8106861832642356, "grad_norm": 0.15664492547512054, "learning_rate": 0.001, "loss": 2.0756, "step": 19163 }, { "epoch": 0.8107284880277519, "grad_norm": 0.32015523314476013, "learning_rate": 0.001, "loss": 1.449, "step": 19164 }, { "epoch": 0.8107707927912683, "grad_norm": 0.18561597168445587, "learning_rate": 0.001, "loss": 3.2036, "step": 19165 }, { "epoch": 0.8108130975547847, "grad_norm": 0.1604049801826477, "learning_rate": 0.001, "loss": 2.1215, "step": 19166 }, { "epoch": 0.810855402318301, "grad_norm": 0.13937008380889893, "learning_rate": 0.001, "loss": 2.1111, "step": 19167 }, { "epoch": 0.8108977070818174, "grad_norm": 0.1511482447385788, "learning_rate": 0.001, "loss": 2.4686, "step": 19168 }, { "epoch": 0.8109400118453338, "grad_norm": 0.4182822108268738, "learning_rate": 0.001, "loss": 2.0505, "step": 19169 }, { "epoch": 0.8109823166088501, "grad_norm": 0.17562200129032135, "learning_rate": 0.001, "loss": 3.0039, "step": 19170 }, { "epoch": 0.8110246213723665, "grad_norm": 0.14364513754844666, "learning_rate": 0.001, "loss": 1.9338, "step": 19171 }, { "epoch": 0.8110669261358829, "grad_norm": 1.0508071184158325, "learning_rate": 0.001, "loss": 2.679, "step": 19172 }, { "epoch": 0.8111092308993992, "grad_norm": 0.14670781791210175, "learning_rate": 0.001, "loss": 1.7561, "step": 19173 }, { "epoch": 0.8111515356629156, "grad_norm": 0.12752504646778107, "learning_rate": 0.001, "loss": 1.1323, "step": 19174 }, { "epoch": 0.8111938404264321, "grad_norm": 0.13774631917476654, "learning_rate": 0.001, "loss": 2.117, "step": 19175 }, { "epoch": 0.8112361451899484, "grad_norm": 0.31936532258987427, "learning_rate": 0.001, "loss": 1.7696, "step": 19176 }, { "epoch": 0.8112784499534648, "grad_norm": 0.3710169196128845, "learning_rate": 0.001, "loss": 3.5819, "step": 19177 }, { "epoch": 0.8113207547169812, "grad_norm": 0.1603461056947708, "learning_rate": 0.001, "loss": 2.0246, "step": 19178 }, { "epoch": 0.8113630594804975, "grad_norm": 0.18489456176757812, "learning_rate": 0.001, "loss": 2.6565, "step": 19179 }, { "epoch": 0.8114053642440139, "grad_norm": 0.16004648804664612, "learning_rate": 0.001, "loss": 1.6997, "step": 19180 }, { "epoch": 0.8114476690075303, "grad_norm": 0.1679944545030594, "learning_rate": 0.001, "loss": 3.4693, "step": 19181 }, { "epoch": 0.8114899737710466, "grad_norm": 0.1983286440372467, "learning_rate": 0.001, "loss": 1.9687, "step": 19182 }, { "epoch": 0.811532278534563, "grad_norm": 0.5275279879570007, "learning_rate": 0.001, "loss": 1.5419, "step": 19183 }, { "epoch": 0.8115745832980794, "grad_norm": 0.15500518679618835, "learning_rate": 0.001, "loss": 2.7367, "step": 19184 }, { "epoch": 0.8116168880615957, "grad_norm": 0.16536766290664673, "learning_rate": 0.001, "loss": 2.1103, "step": 19185 }, { "epoch": 0.8116591928251121, "grad_norm": 0.17966187000274658, "learning_rate": 0.001, "loss": 2.0974, "step": 19186 }, { "epoch": 0.8117014975886285, "grad_norm": 4.907825946807861, "learning_rate": 0.001, "loss": 3.4044, "step": 19187 }, { "epoch": 0.8117438023521448, "grad_norm": 0.18678595125675201, "learning_rate": 0.001, "loss": 2.5199, "step": 19188 }, { "epoch": 0.8117861071156612, "grad_norm": 0.751160740852356, "learning_rate": 0.001, "loss": 2.3407, "step": 19189 }, { "epoch": 0.8118284118791776, "grad_norm": 0.1894712597131729, "learning_rate": 0.001, "loss": 2.066, "step": 19190 }, { "epoch": 0.811870716642694, "grad_norm": 0.174768328666687, "learning_rate": 0.001, "loss": 2.0651, "step": 19191 }, { "epoch": 0.8119130214062104, "grad_norm": 0.17409004271030426, "learning_rate": 0.001, "loss": 2.4093, "step": 19192 }, { "epoch": 0.8119553261697268, "grad_norm": 0.154340922832489, "learning_rate": 0.001, "loss": 2.1752, "step": 19193 }, { "epoch": 0.8119976309332431, "grad_norm": 0.18005342781543732, "learning_rate": 0.001, "loss": 2.8527, "step": 19194 }, { "epoch": 0.8120399356967595, "grad_norm": 0.1886720359325409, "learning_rate": 0.001, "loss": 2.052, "step": 19195 }, { "epoch": 0.8120822404602758, "grad_norm": 0.15437401831150055, "learning_rate": 0.001, "loss": 2.0393, "step": 19196 }, { "epoch": 0.8121245452237922, "grad_norm": 0.8311869502067566, "learning_rate": 0.001, "loss": 2.9646, "step": 19197 }, { "epoch": 0.8121668499873086, "grad_norm": 0.26318734884262085, "learning_rate": 0.001, "loss": 3.0618, "step": 19198 }, { "epoch": 0.8122091547508249, "grad_norm": 0.15267373621463776, "learning_rate": 0.001, "loss": 1.8216, "step": 19199 }, { "epoch": 0.8122514595143413, "grad_norm": 0.20256157219409943, "learning_rate": 0.001, "loss": 2.2829, "step": 19200 }, { "epoch": 0.8122937642778577, "grad_norm": 0.9764918088912964, "learning_rate": 0.001, "loss": 2.902, "step": 19201 }, { "epoch": 0.812336069041374, "grad_norm": 0.5934033393859863, "learning_rate": 0.001, "loss": 2.9391, "step": 19202 }, { "epoch": 0.8123783738048904, "grad_norm": 0.41964349150657654, "learning_rate": 0.001, "loss": 1.6686, "step": 19203 }, { "epoch": 0.8124206785684068, "grad_norm": 0.15301333367824554, "learning_rate": 0.001, "loss": 2.829, "step": 19204 }, { "epoch": 0.8124629833319231, "grad_norm": 0.15774710476398468, "learning_rate": 0.001, "loss": 2.467, "step": 19205 }, { "epoch": 0.8125052880954395, "grad_norm": 0.3927057683467865, "learning_rate": 0.001, "loss": 2.3201, "step": 19206 }, { "epoch": 0.812547592858956, "grad_norm": 4.193830490112305, "learning_rate": 0.001, "loss": 1.9153, "step": 19207 }, { "epoch": 0.8125898976224722, "grad_norm": 0.16809159517288208, "learning_rate": 0.001, "loss": 2.6222, "step": 19208 }, { "epoch": 0.8126322023859887, "grad_norm": 0.36612462997436523, "learning_rate": 0.001, "loss": 1.8079, "step": 19209 }, { "epoch": 0.8126745071495051, "grad_norm": 0.17396114766597748, "learning_rate": 0.001, "loss": 1.7655, "step": 19210 }, { "epoch": 0.8127168119130214, "grad_norm": 0.14558352530002594, "learning_rate": 0.001, "loss": 2.35, "step": 19211 }, { "epoch": 0.8127591166765378, "grad_norm": 0.1535443514585495, "learning_rate": 0.001, "loss": 1.8861, "step": 19212 }, { "epoch": 0.8128014214400542, "grad_norm": 0.14222368597984314, "learning_rate": 0.001, "loss": 1.6644, "step": 19213 }, { "epoch": 0.8128437262035705, "grad_norm": 0.16522639989852905, "learning_rate": 0.001, "loss": 2.5941, "step": 19214 }, { "epoch": 0.8128860309670869, "grad_norm": 1.800579309463501, "learning_rate": 0.001, "loss": 2.2554, "step": 19215 }, { "epoch": 0.8129283357306033, "grad_norm": 4.028626918792725, "learning_rate": 0.001, "loss": 1.5658, "step": 19216 }, { "epoch": 0.8129706404941196, "grad_norm": 0.15518246591091156, "learning_rate": 0.001, "loss": 1.8562, "step": 19217 }, { "epoch": 0.813012945257636, "grad_norm": 1.1874980926513672, "learning_rate": 0.001, "loss": 2.0366, "step": 19218 }, { "epoch": 0.8130552500211524, "grad_norm": 0.16775746643543243, "learning_rate": 0.001, "loss": 1.7891, "step": 19219 }, { "epoch": 0.8130975547846687, "grad_norm": 0.30443984270095825, "learning_rate": 0.001, "loss": 3.444, "step": 19220 }, { "epoch": 0.8131398595481851, "grad_norm": 0.1578860729932785, "learning_rate": 0.001, "loss": 1.8633, "step": 19221 }, { "epoch": 0.8131821643117015, "grad_norm": 17.17110252380371, "learning_rate": 0.001, "loss": 2.2716, "step": 19222 }, { "epoch": 0.8132244690752178, "grad_norm": 0.16925646364688873, "learning_rate": 0.001, "loss": 2.1797, "step": 19223 }, { "epoch": 0.8132667738387342, "grad_norm": 0.46783486008644104, "learning_rate": 0.001, "loss": 1.9686, "step": 19224 }, { "epoch": 0.8133090786022507, "grad_norm": 0.21370871365070343, "learning_rate": 0.001, "loss": 2.3163, "step": 19225 }, { "epoch": 0.813351383365767, "grad_norm": 2.3018229007720947, "learning_rate": 0.001, "loss": 3.1378, "step": 19226 }, { "epoch": 0.8133936881292834, "grad_norm": 0.26849448680877686, "learning_rate": 0.001, "loss": 2.8301, "step": 19227 }, { "epoch": 0.8134359928927998, "grad_norm": 0.36854100227355957, "learning_rate": 0.001, "loss": 2.2498, "step": 19228 }, { "epoch": 0.8134782976563161, "grad_norm": 0.44008371233940125, "learning_rate": 0.001, "loss": 2.2804, "step": 19229 }, { "epoch": 0.8135206024198325, "grad_norm": 0.2091284990310669, "learning_rate": 0.001, "loss": 2.2224, "step": 19230 }, { "epoch": 0.8135629071833489, "grad_norm": 0.2483440637588501, "learning_rate": 0.001, "loss": 2.2146, "step": 19231 }, { "epoch": 0.8136052119468652, "grad_norm": 0.19964353740215302, "learning_rate": 0.001, "loss": 1.861, "step": 19232 }, { "epoch": 0.8136475167103816, "grad_norm": 0.22766467928886414, "learning_rate": 0.001, "loss": 2.3613, "step": 19233 }, { "epoch": 0.813689821473898, "grad_norm": 0.20995555818080902, "learning_rate": 0.001, "loss": 2.4426, "step": 19234 }, { "epoch": 0.8137321262374143, "grad_norm": 0.16860271990299225, "learning_rate": 0.001, "loss": 2.3357, "step": 19235 }, { "epoch": 0.8137744310009307, "grad_norm": 0.18488185107707977, "learning_rate": 0.001, "loss": 2.7789, "step": 19236 }, { "epoch": 0.8138167357644471, "grad_norm": 2.0512919425964355, "learning_rate": 0.001, "loss": 2.3825, "step": 19237 }, { "epoch": 0.8138590405279634, "grad_norm": 0.39424288272857666, "learning_rate": 0.001, "loss": 2.3707, "step": 19238 }, { "epoch": 0.8139013452914798, "grad_norm": 0.18912164866924286, "learning_rate": 0.001, "loss": 2.5157, "step": 19239 }, { "epoch": 0.8139436500549961, "grad_norm": 0.17487472295761108, "learning_rate": 0.001, "loss": 2.0539, "step": 19240 }, { "epoch": 0.8139859548185125, "grad_norm": 0.1670231968164444, "learning_rate": 0.001, "loss": 2.7521, "step": 19241 }, { "epoch": 0.814028259582029, "grad_norm": 0.16060633957386017, "learning_rate": 0.001, "loss": 1.8548, "step": 19242 }, { "epoch": 0.8140705643455453, "grad_norm": 0.1655425727367401, "learning_rate": 0.001, "loss": 2.6851, "step": 19243 }, { "epoch": 0.8141128691090617, "grad_norm": 0.7323225736618042, "learning_rate": 0.001, "loss": 3.2264, "step": 19244 }, { "epoch": 0.8141551738725781, "grad_norm": 0.16756169497966766, "learning_rate": 0.001, "loss": 2.0877, "step": 19245 }, { "epoch": 0.8141974786360944, "grad_norm": 4.373884201049805, "learning_rate": 0.001, "loss": 3.0818, "step": 19246 }, { "epoch": 0.8142397833996108, "grad_norm": 0.15877367556095123, "learning_rate": 0.001, "loss": 2.477, "step": 19247 }, { "epoch": 0.8142820881631272, "grad_norm": 14.3695707321167, "learning_rate": 0.001, "loss": 3.6114, "step": 19248 }, { "epoch": 0.8143243929266435, "grad_norm": 0.35859042406082153, "learning_rate": 0.001, "loss": 2.4061, "step": 19249 }, { "epoch": 0.8143666976901599, "grad_norm": 0.17189420759677887, "learning_rate": 0.001, "loss": 2.6325, "step": 19250 }, { "epoch": 0.8144090024536763, "grad_norm": 0.1583157330751419, "learning_rate": 0.001, "loss": 2.0351, "step": 19251 }, { "epoch": 0.8144513072171926, "grad_norm": 0.1763736605644226, "learning_rate": 0.001, "loss": 1.9754, "step": 19252 }, { "epoch": 0.814493611980709, "grad_norm": 0.16979268193244934, "learning_rate": 0.001, "loss": 2.0023, "step": 19253 }, { "epoch": 0.8145359167442254, "grad_norm": 0.1730976700782776, "learning_rate": 0.001, "loss": 2.8671, "step": 19254 }, { "epoch": 0.8145782215077417, "grad_norm": 0.17058543860912323, "learning_rate": 0.001, "loss": 1.4882, "step": 19255 }, { "epoch": 0.8146205262712581, "grad_norm": 0.29904067516326904, "learning_rate": 0.001, "loss": 2.019, "step": 19256 }, { "epoch": 0.8146628310347745, "grad_norm": 0.6264715194702148, "learning_rate": 0.001, "loss": 2.8114, "step": 19257 }, { "epoch": 0.8147051357982908, "grad_norm": 0.14027170836925507, "learning_rate": 0.001, "loss": 3.2401, "step": 19258 }, { "epoch": 0.8147474405618073, "grad_norm": 0.16307754814624786, "learning_rate": 0.001, "loss": 2.6808, "step": 19259 }, { "epoch": 0.8147897453253237, "grad_norm": 0.9267063736915588, "learning_rate": 0.001, "loss": 2.3898, "step": 19260 }, { "epoch": 0.81483205008884, "grad_norm": 0.1487603634595871, "learning_rate": 0.001, "loss": 2.9254, "step": 19261 }, { "epoch": 0.8148743548523564, "grad_norm": 0.1394791156053543, "learning_rate": 0.001, "loss": 1.7672, "step": 19262 }, { "epoch": 0.8149166596158728, "grad_norm": 45.422454833984375, "learning_rate": 0.001, "loss": 2.5624, "step": 19263 }, { "epoch": 0.8149589643793891, "grad_norm": 0.4591314196586609, "learning_rate": 0.001, "loss": 2.3038, "step": 19264 }, { "epoch": 0.8150012691429055, "grad_norm": 0.13458524644374847, "learning_rate": 0.001, "loss": 2.2434, "step": 19265 }, { "epoch": 0.8150435739064219, "grad_norm": 0.14545901119709015, "learning_rate": 0.001, "loss": 2.3079, "step": 19266 }, { "epoch": 0.8150858786699382, "grad_norm": 0.16260690987110138, "learning_rate": 0.001, "loss": 2.7739, "step": 19267 }, { "epoch": 0.8151281834334546, "grad_norm": 0.15987445414066315, "learning_rate": 0.001, "loss": 1.6845, "step": 19268 }, { "epoch": 0.815170488196971, "grad_norm": 0.16004207730293274, "learning_rate": 0.001, "loss": 2.0792, "step": 19269 }, { "epoch": 0.8152127929604873, "grad_norm": 0.14027880132198334, "learning_rate": 0.001, "loss": 2.14, "step": 19270 }, { "epoch": 0.8152550977240037, "grad_norm": 0.1478835940361023, "learning_rate": 0.001, "loss": 1.4469, "step": 19271 }, { "epoch": 0.8152974024875201, "grad_norm": 0.6844944357872009, "learning_rate": 0.001, "loss": 2.0617, "step": 19272 }, { "epoch": 0.8153397072510364, "grad_norm": 0.24570874869823456, "learning_rate": 0.001, "loss": 2.0208, "step": 19273 }, { "epoch": 0.8153820120145528, "grad_norm": 0.15631866455078125, "learning_rate": 0.001, "loss": 1.826, "step": 19274 }, { "epoch": 0.8154243167780693, "grad_norm": 0.18372394144535065, "learning_rate": 0.001, "loss": 2.138, "step": 19275 }, { "epoch": 0.8154666215415856, "grad_norm": 0.6491008400917053, "learning_rate": 0.001, "loss": 3.212, "step": 19276 }, { "epoch": 0.815508926305102, "grad_norm": 0.13584677875041962, "learning_rate": 0.001, "loss": 2.0065, "step": 19277 }, { "epoch": 0.8155512310686184, "grad_norm": 0.16441628336906433, "learning_rate": 0.001, "loss": 1.7349, "step": 19278 }, { "epoch": 0.8155935358321347, "grad_norm": 0.16459910571575165, "learning_rate": 0.001, "loss": 1.9561, "step": 19279 }, { "epoch": 0.8156358405956511, "grad_norm": 0.8286261558532715, "learning_rate": 0.001, "loss": 1.744, "step": 19280 }, { "epoch": 0.8156781453591675, "grad_norm": 0.18971623480319977, "learning_rate": 0.001, "loss": 1.968, "step": 19281 }, { "epoch": 0.8157204501226838, "grad_norm": 0.15174627304077148, "learning_rate": 0.001, "loss": 1.5037, "step": 19282 }, { "epoch": 0.8157627548862002, "grad_norm": 0.1971951127052307, "learning_rate": 0.001, "loss": 1.9095, "step": 19283 }, { "epoch": 0.8158050596497166, "grad_norm": 0.6228017807006836, "learning_rate": 0.001, "loss": 3.6166, "step": 19284 }, { "epoch": 0.8158473644132329, "grad_norm": 0.20216991007328033, "learning_rate": 0.001, "loss": 2.2737, "step": 19285 }, { "epoch": 0.8158896691767493, "grad_norm": 0.15689660608768463, "learning_rate": 0.001, "loss": 2.5109, "step": 19286 }, { "epoch": 0.8159319739402656, "grad_norm": 0.16045741736888885, "learning_rate": 0.001, "loss": 2.3619, "step": 19287 }, { "epoch": 0.815974278703782, "grad_norm": 0.14327400922775269, "learning_rate": 0.001, "loss": 1.7969, "step": 19288 }, { "epoch": 0.8160165834672984, "grad_norm": 0.16195663809776306, "learning_rate": 0.001, "loss": 1.8645, "step": 19289 }, { "epoch": 0.8160588882308147, "grad_norm": 0.17734597623348236, "learning_rate": 0.001, "loss": 1.8547, "step": 19290 }, { "epoch": 0.8161011929943311, "grad_norm": 0.1504170149564743, "learning_rate": 0.001, "loss": 3.0344, "step": 19291 }, { "epoch": 0.8161434977578476, "grad_norm": 0.3913165032863617, "learning_rate": 0.001, "loss": 2.165, "step": 19292 }, { "epoch": 0.8161858025213639, "grad_norm": 0.14916856586933136, "learning_rate": 0.001, "loss": 2.5323, "step": 19293 }, { "epoch": 0.8162281072848803, "grad_norm": 0.18995776772499084, "learning_rate": 0.001, "loss": 3.1768, "step": 19294 }, { "epoch": 0.8162704120483967, "grad_norm": 0.16553127765655518, "learning_rate": 0.001, "loss": 2.8746, "step": 19295 }, { "epoch": 0.816312716811913, "grad_norm": 0.16702581942081451, "learning_rate": 0.001, "loss": 1.8839, "step": 19296 }, { "epoch": 0.8163550215754294, "grad_norm": 0.1927644908428192, "learning_rate": 0.001, "loss": 2.6619, "step": 19297 }, { "epoch": 0.8163973263389458, "grad_norm": 1.5043902397155762, "learning_rate": 0.001, "loss": 1.8784, "step": 19298 }, { "epoch": 0.8164396311024621, "grad_norm": 0.15827623009681702, "learning_rate": 0.001, "loss": 1.8496, "step": 19299 }, { "epoch": 0.8164819358659785, "grad_norm": 0.15658104419708252, "learning_rate": 0.001, "loss": 2.642, "step": 19300 }, { "epoch": 0.8165242406294949, "grad_norm": 0.2382393330335617, "learning_rate": 0.001, "loss": 2.8102, "step": 19301 }, { "epoch": 0.8165665453930112, "grad_norm": 0.7768169045448303, "learning_rate": 0.001, "loss": 2.6393, "step": 19302 }, { "epoch": 0.8166088501565276, "grad_norm": 0.9647676944732666, "learning_rate": 0.001, "loss": 2.4991, "step": 19303 }, { "epoch": 0.816651154920044, "grad_norm": 0.17201387882232666, "learning_rate": 0.001, "loss": 2.1453, "step": 19304 }, { "epoch": 0.8166934596835603, "grad_norm": 0.18046537041664124, "learning_rate": 0.001, "loss": 1.9127, "step": 19305 }, { "epoch": 0.8167357644470767, "grad_norm": 0.1487366259098053, "learning_rate": 0.001, "loss": 2.5597, "step": 19306 }, { "epoch": 0.8167780692105931, "grad_norm": 0.36740607023239136, "learning_rate": 0.001, "loss": 2.5074, "step": 19307 }, { "epoch": 0.8168203739741094, "grad_norm": 0.2924293875694275, "learning_rate": 0.001, "loss": 3.529, "step": 19308 }, { "epoch": 0.8168626787376259, "grad_norm": 0.14903929829597473, "learning_rate": 0.001, "loss": 3.0469, "step": 19309 }, { "epoch": 0.8169049835011423, "grad_norm": 0.1474577784538269, "learning_rate": 0.001, "loss": 2.6798, "step": 19310 }, { "epoch": 0.8169472882646586, "grad_norm": 0.14111989736557007, "learning_rate": 0.001, "loss": 2.0767, "step": 19311 }, { "epoch": 0.816989593028175, "grad_norm": 0.16078026592731476, "learning_rate": 0.001, "loss": 2.0343, "step": 19312 }, { "epoch": 0.8170318977916914, "grad_norm": 0.14191174507141113, "learning_rate": 0.001, "loss": 1.7657, "step": 19313 }, { "epoch": 0.8170742025552077, "grad_norm": 0.23834367096424103, "learning_rate": 0.001, "loss": 1.9448, "step": 19314 }, { "epoch": 0.8171165073187241, "grad_norm": 0.15897387266159058, "learning_rate": 0.001, "loss": 2.4028, "step": 19315 }, { "epoch": 0.8171588120822405, "grad_norm": 0.14142805337905884, "learning_rate": 0.001, "loss": 2.5824, "step": 19316 }, { "epoch": 0.8172011168457568, "grad_norm": 0.13879472017288208, "learning_rate": 0.001, "loss": 1.7327, "step": 19317 }, { "epoch": 0.8172434216092732, "grad_norm": 0.13679929077625275, "learning_rate": 0.001, "loss": 1.8926, "step": 19318 }, { "epoch": 0.8172857263727896, "grad_norm": 0.13544319570064545, "learning_rate": 0.001, "loss": 3.3204, "step": 19319 }, { "epoch": 0.8173280311363059, "grad_norm": 0.23128291964530945, "learning_rate": 0.001, "loss": 3.2543, "step": 19320 }, { "epoch": 0.8173703358998223, "grad_norm": 0.16951590776443481, "learning_rate": 0.001, "loss": 2.3761, "step": 19321 }, { "epoch": 0.8174126406633387, "grad_norm": 0.13225175440311432, "learning_rate": 0.001, "loss": 3.1764, "step": 19322 }, { "epoch": 0.817454945426855, "grad_norm": 0.22651726007461548, "learning_rate": 0.001, "loss": 2.3727, "step": 19323 }, { "epoch": 0.8174972501903714, "grad_norm": 0.14250196516513824, "learning_rate": 0.001, "loss": 2.555, "step": 19324 }, { "epoch": 0.8175395549538879, "grad_norm": 0.13862445950508118, "learning_rate": 0.001, "loss": 2.1272, "step": 19325 }, { "epoch": 0.8175818597174042, "grad_norm": 0.6835310459136963, "learning_rate": 0.001, "loss": 2.1929, "step": 19326 }, { "epoch": 0.8176241644809206, "grad_norm": 0.49331387877464294, "learning_rate": 0.001, "loss": 1.605, "step": 19327 }, { "epoch": 0.817666469244437, "grad_norm": 0.8337584733963013, "learning_rate": 0.001, "loss": 1.8696, "step": 19328 }, { "epoch": 0.8177087740079533, "grad_norm": 0.13888110220432281, "learning_rate": 0.001, "loss": 1.7783, "step": 19329 }, { "epoch": 0.8177510787714697, "grad_norm": 0.15153059363365173, "learning_rate": 0.001, "loss": 3.0953, "step": 19330 }, { "epoch": 0.817793383534986, "grad_norm": 0.16064806282520294, "learning_rate": 0.001, "loss": 2.2135, "step": 19331 }, { "epoch": 0.8178356882985024, "grad_norm": 0.18726807832717896, "learning_rate": 0.001, "loss": 1.5846, "step": 19332 }, { "epoch": 0.8178779930620188, "grad_norm": 1.7091985940933228, "learning_rate": 0.001, "loss": 1.8989, "step": 19333 }, { "epoch": 0.8179202978255351, "grad_norm": 10.048054695129395, "learning_rate": 0.001, "loss": 1.9446, "step": 19334 }, { "epoch": 0.8179626025890515, "grad_norm": 0.1563667356967926, "learning_rate": 0.001, "loss": 1.8827, "step": 19335 }, { "epoch": 0.8180049073525679, "grad_norm": 0.14625267684459686, "learning_rate": 0.001, "loss": 2.9246, "step": 19336 }, { "epoch": 0.8180472121160842, "grad_norm": 0.14250653982162476, "learning_rate": 0.001, "loss": 2.0407, "step": 19337 }, { "epoch": 0.8180895168796006, "grad_norm": 0.1519334465265274, "learning_rate": 0.001, "loss": 1.6979, "step": 19338 }, { "epoch": 0.818131821643117, "grad_norm": 0.18076607584953308, "learning_rate": 0.001, "loss": 1.4037, "step": 19339 }, { "epoch": 0.8181741264066333, "grad_norm": 0.20953752100467682, "learning_rate": 0.001, "loss": 2.7733, "step": 19340 }, { "epoch": 0.8182164311701497, "grad_norm": 0.17903457581996918, "learning_rate": 0.001, "loss": 1.9328, "step": 19341 }, { "epoch": 0.8182587359336662, "grad_norm": 0.1693604737520218, "learning_rate": 0.001, "loss": 1.6344, "step": 19342 }, { "epoch": 0.8183010406971825, "grad_norm": 0.18389926850795746, "learning_rate": 0.001, "loss": 1.6565, "step": 19343 }, { "epoch": 0.8183433454606989, "grad_norm": 0.15521776676177979, "learning_rate": 0.001, "loss": 2.1015, "step": 19344 }, { "epoch": 0.8183856502242153, "grad_norm": 0.1567741185426712, "learning_rate": 0.001, "loss": 2.3034, "step": 19345 }, { "epoch": 0.8184279549877316, "grad_norm": 0.1486789584159851, "learning_rate": 0.001, "loss": 1.6981, "step": 19346 }, { "epoch": 0.818470259751248, "grad_norm": 0.14541268348693848, "learning_rate": 0.001, "loss": 2.1189, "step": 19347 }, { "epoch": 0.8185125645147644, "grad_norm": 0.1486058086156845, "learning_rate": 0.001, "loss": 1.6406, "step": 19348 }, { "epoch": 0.8185548692782807, "grad_norm": 0.1677182912826538, "learning_rate": 0.001, "loss": 1.6275, "step": 19349 }, { "epoch": 0.8185971740417971, "grad_norm": 0.1867937445640564, "learning_rate": 0.001, "loss": 2.5879, "step": 19350 }, { "epoch": 0.8186394788053135, "grad_norm": 0.18571646511554718, "learning_rate": 0.001, "loss": 2.914, "step": 19351 }, { "epoch": 0.8186817835688298, "grad_norm": 0.17345742881298065, "learning_rate": 0.001, "loss": 2.0374, "step": 19352 }, { "epoch": 0.8187240883323462, "grad_norm": 0.164658322930336, "learning_rate": 0.001, "loss": 1.6735, "step": 19353 }, { "epoch": 0.8187663930958626, "grad_norm": 15.121390342712402, "learning_rate": 0.001, "loss": 2.3059, "step": 19354 }, { "epoch": 0.8188086978593789, "grad_norm": 0.16249455511569977, "learning_rate": 0.001, "loss": 2.387, "step": 19355 }, { "epoch": 0.8188510026228953, "grad_norm": 0.21848152577877045, "learning_rate": 0.001, "loss": 2.0407, "step": 19356 }, { "epoch": 0.8188933073864118, "grad_norm": 0.18375152349472046, "learning_rate": 0.001, "loss": 1.7933, "step": 19357 }, { "epoch": 0.818935612149928, "grad_norm": 0.1748533397912979, "learning_rate": 0.001, "loss": 3.2785, "step": 19358 }, { "epoch": 0.8189779169134445, "grad_norm": 0.18232855200767517, "learning_rate": 0.001, "loss": 2.2611, "step": 19359 }, { "epoch": 0.8190202216769609, "grad_norm": 0.811168909072876, "learning_rate": 0.001, "loss": 1.9481, "step": 19360 }, { "epoch": 0.8190625264404772, "grad_norm": 40.37857437133789, "learning_rate": 0.001, "loss": 1.5613, "step": 19361 }, { "epoch": 0.8191048312039936, "grad_norm": 1.3089685440063477, "learning_rate": 0.001, "loss": 3.6364, "step": 19362 }, { "epoch": 0.81914713596751, "grad_norm": 0.1557542085647583, "learning_rate": 0.001, "loss": 2.8861, "step": 19363 }, { "epoch": 0.8191894407310263, "grad_norm": 0.1572728157043457, "learning_rate": 0.001, "loss": 1.987, "step": 19364 }, { "epoch": 0.8192317454945427, "grad_norm": 0.13320891559123993, "learning_rate": 0.001, "loss": 2.0874, "step": 19365 }, { "epoch": 0.8192740502580591, "grad_norm": 0.18693287670612335, "learning_rate": 0.001, "loss": 1.5792, "step": 19366 }, { "epoch": 0.8193163550215754, "grad_norm": 0.17227143049240112, "learning_rate": 0.001, "loss": 2.8165, "step": 19367 }, { "epoch": 0.8193586597850918, "grad_norm": 0.19532877206802368, "learning_rate": 0.001, "loss": 2.0586, "step": 19368 }, { "epoch": 0.8194009645486082, "grad_norm": 0.18054965138435364, "learning_rate": 0.001, "loss": 2.1502, "step": 19369 }, { "epoch": 0.8194432693121245, "grad_norm": 0.3110874593257904, "learning_rate": 0.001, "loss": 3.9523, "step": 19370 }, { "epoch": 0.8194855740756409, "grad_norm": 0.2044413536787033, "learning_rate": 0.001, "loss": 2.5399, "step": 19371 }, { "epoch": 0.8195278788391573, "grad_norm": 0.21336640417575836, "learning_rate": 0.001, "loss": 2.1525, "step": 19372 }, { "epoch": 0.8195701836026736, "grad_norm": 0.22624215483665466, "learning_rate": 0.001, "loss": 2.2209, "step": 19373 }, { "epoch": 0.81961248836619, "grad_norm": 0.23245839774608612, "learning_rate": 0.001, "loss": 2.8199, "step": 19374 }, { "epoch": 0.8196547931297065, "grad_norm": 0.19043493270874023, "learning_rate": 0.001, "loss": 1.8749, "step": 19375 }, { "epoch": 0.8196970978932228, "grad_norm": 0.16715998947620392, "learning_rate": 0.001, "loss": 3.1058, "step": 19376 }, { "epoch": 0.8197394026567392, "grad_norm": 0.15719690918922424, "learning_rate": 0.001, "loss": 2.2472, "step": 19377 }, { "epoch": 0.8197817074202555, "grad_norm": 0.15933258831501007, "learning_rate": 0.001, "loss": 2.5364, "step": 19378 }, { "epoch": 0.8198240121837719, "grad_norm": 0.29013222455978394, "learning_rate": 0.001, "loss": 3.0306, "step": 19379 }, { "epoch": 0.8198663169472883, "grad_norm": 0.17771287262439728, "learning_rate": 0.001, "loss": 2.3702, "step": 19380 }, { "epoch": 0.8199086217108046, "grad_norm": 0.14083783328533173, "learning_rate": 0.001, "loss": 1.8384, "step": 19381 }, { "epoch": 0.819950926474321, "grad_norm": 0.17068462073802948, "learning_rate": 0.001, "loss": 2.4943, "step": 19382 }, { "epoch": 0.8199932312378374, "grad_norm": 0.1593535989522934, "learning_rate": 0.001, "loss": 2.0397, "step": 19383 }, { "epoch": 0.8200355360013537, "grad_norm": 0.3052305579185486, "learning_rate": 0.001, "loss": 2.3592, "step": 19384 }, { "epoch": 0.8200778407648701, "grad_norm": 0.1488254964351654, "learning_rate": 0.001, "loss": 1.8685, "step": 19385 }, { "epoch": 0.8201201455283865, "grad_norm": 0.18521372973918915, "learning_rate": 0.001, "loss": 2.578, "step": 19386 }, { "epoch": 0.8201624502919028, "grad_norm": 0.1931733936071396, "learning_rate": 0.001, "loss": 1.9068, "step": 19387 }, { "epoch": 0.8202047550554192, "grad_norm": 0.16335810720920563, "learning_rate": 0.001, "loss": 2.0929, "step": 19388 }, { "epoch": 0.8202470598189356, "grad_norm": 0.14352665841579437, "learning_rate": 0.001, "loss": 1.5255, "step": 19389 }, { "epoch": 0.8202893645824519, "grad_norm": 0.22038647532463074, "learning_rate": 0.001, "loss": 3.3152, "step": 19390 }, { "epoch": 0.8203316693459684, "grad_norm": 0.17173390090465546, "learning_rate": 0.001, "loss": 1.8423, "step": 19391 }, { "epoch": 0.8203739741094848, "grad_norm": 0.1474774330854416, "learning_rate": 0.001, "loss": 1.6557, "step": 19392 }, { "epoch": 0.8204162788730011, "grad_norm": 0.1488204449415207, "learning_rate": 0.001, "loss": 1.867, "step": 19393 }, { "epoch": 0.8204585836365175, "grad_norm": 0.26279768347740173, "learning_rate": 0.001, "loss": 2.287, "step": 19394 }, { "epoch": 0.8205008884000339, "grad_norm": 0.3085935413837433, "learning_rate": 0.001, "loss": 2.1505, "step": 19395 }, { "epoch": 0.8205431931635502, "grad_norm": 0.14529921114444733, "learning_rate": 0.001, "loss": 1.7498, "step": 19396 }, { "epoch": 0.8205854979270666, "grad_norm": 0.1698216050863266, "learning_rate": 0.001, "loss": 2.3125, "step": 19397 }, { "epoch": 0.820627802690583, "grad_norm": 0.1525857001543045, "learning_rate": 0.001, "loss": 2.6507, "step": 19398 }, { "epoch": 0.8206701074540993, "grad_norm": 0.48174238204956055, "learning_rate": 0.001, "loss": 1.9215, "step": 19399 }, { "epoch": 0.8207124122176157, "grad_norm": 0.1638476401567459, "learning_rate": 0.001, "loss": 2.4539, "step": 19400 }, { "epoch": 0.8207547169811321, "grad_norm": 0.1637507677078247, "learning_rate": 0.001, "loss": 1.5719, "step": 19401 }, { "epoch": 0.8207970217446484, "grad_norm": 0.1512572169303894, "learning_rate": 0.001, "loss": 2.4486, "step": 19402 }, { "epoch": 0.8208393265081648, "grad_norm": 0.12860354781150818, "learning_rate": 0.001, "loss": 2.5915, "step": 19403 }, { "epoch": 0.8208816312716812, "grad_norm": 0.15637515485286713, "learning_rate": 0.001, "loss": 2.1001, "step": 19404 }, { "epoch": 0.8209239360351975, "grad_norm": 0.13939064741134644, "learning_rate": 0.001, "loss": 2.1013, "step": 19405 }, { "epoch": 0.8209662407987139, "grad_norm": 0.1056860163807869, "learning_rate": 0.001, "loss": 1.4263, "step": 19406 }, { "epoch": 0.8210085455622304, "grad_norm": 0.14277103543281555, "learning_rate": 0.001, "loss": 2.5677, "step": 19407 }, { "epoch": 0.8210508503257467, "grad_norm": 0.12135271728038788, "learning_rate": 0.001, "loss": 2.5086, "step": 19408 }, { "epoch": 0.8210931550892631, "grad_norm": 0.6470211148262024, "learning_rate": 0.001, "loss": 2.8058, "step": 19409 }, { "epoch": 0.8211354598527795, "grad_norm": 0.14677073061466217, "learning_rate": 0.001, "loss": 2.0381, "step": 19410 }, { "epoch": 0.8211777646162958, "grad_norm": 0.14049549400806427, "learning_rate": 0.001, "loss": 2.1267, "step": 19411 }, { "epoch": 0.8212200693798122, "grad_norm": 0.1972610056400299, "learning_rate": 0.001, "loss": 1.4748, "step": 19412 }, { "epoch": 0.8212623741433286, "grad_norm": 0.16732430458068848, "learning_rate": 0.001, "loss": 2.0388, "step": 19413 }, { "epoch": 0.8213046789068449, "grad_norm": 0.17487087845802307, "learning_rate": 0.001, "loss": 1.8259, "step": 19414 }, { "epoch": 0.8213469836703613, "grad_norm": 0.2771112322807312, "learning_rate": 0.001, "loss": 2.8102, "step": 19415 }, { "epoch": 0.8213892884338777, "grad_norm": 0.15633077919483185, "learning_rate": 0.001, "loss": 2.9745, "step": 19416 }, { "epoch": 0.821431593197394, "grad_norm": 0.8696543574333191, "learning_rate": 0.001, "loss": 2.1251, "step": 19417 }, { "epoch": 0.8214738979609104, "grad_norm": 0.7702298164367676, "learning_rate": 0.001, "loss": 2.4299, "step": 19418 }, { "epoch": 0.8215162027244268, "grad_norm": 1.8494880199432373, "learning_rate": 0.001, "loss": 2.1975, "step": 19419 }, { "epoch": 0.8215585074879431, "grad_norm": 0.14353471994400024, "learning_rate": 0.001, "loss": 1.602, "step": 19420 }, { "epoch": 0.8216008122514595, "grad_norm": 0.152914896607399, "learning_rate": 0.001, "loss": 2.5665, "step": 19421 }, { "epoch": 0.8216431170149758, "grad_norm": 0.1569298803806305, "learning_rate": 0.001, "loss": 2.2651, "step": 19422 }, { "epoch": 0.8216854217784922, "grad_norm": 0.14512638747692108, "learning_rate": 0.001, "loss": 1.6607, "step": 19423 }, { "epoch": 0.8217277265420087, "grad_norm": 0.16484789550304413, "learning_rate": 0.001, "loss": 2.1772, "step": 19424 }, { "epoch": 0.821770031305525, "grad_norm": 0.1395651400089264, "learning_rate": 0.001, "loss": 1.821, "step": 19425 }, { "epoch": 0.8218123360690414, "grad_norm": 0.675366997718811, "learning_rate": 0.001, "loss": 2.4906, "step": 19426 }, { "epoch": 0.8218546408325578, "grad_norm": 0.14823488891124725, "learning_rate": 0.001, "loss": 2.0327, "step": 19427 }, { "epoch": 0.8218969455960741, "grad_norm": 0.14115604758262634, "learning_rate": 0.001, "loss": 1.8341, "step": 19428 }, { "epoch": 0.8219392503595905, "grad_norm": 0.14076372981071472, "learning_rate": 0.001, "loss": 2.0194, "step": 19429 }, { "epoch": 0.8219815551231069, "grad_norm": 0.1530357003211975, "learning_rate": 0.001, "loss": 1.8015, "step": 19430 }, { "epoch": 0.8220238598866232, "grad_norm": 0.1735893338918686, "learning_rate": 0.001, "loss": 2.0837, "step": 19431 }, { "epoch": 0.8220661646501396, "grad_norm": 264.96417236328125, "learning_rate": 0.001, "loss": 2.6431, "step": 19432 }, { "epoch": 0.822108469413656, "grad_norm": 0.16807694733142853, "learning_rate": 0.001, "loss": 2.2922, "step": 19433 }, { "epoch": 0.8221507741771723, "grad_norm": 0.1564405858516693, "learning_rate": 0.001, "loss": 1.6603, "step": 19434 }, { "epoch": 0.8221930789406887, "grad_norm": 0.1768580824136734, "learning_rate": 0.001, "loss": 1.8339, "step": 19435 }, { "epoch": 0.8222353837042051, "grad_norm": 0.16592250764369965, "learning_rate": 0.001, "loss": 2.0608, "step": 19436 }, { "epoch": 0.8222776884677214, "grad_norm": 0.8113108277320862, "learning_rate": 0.001, "loss": 2.8101, "step": 19437 }, { "epoch": 0.8223199932312378, "grad_norm": 3.1174583435058594, "learning_rate": 0.001, "loss": 2.0837, "step": 19438 }, { "epoch": 0.8223622979947542, "grad_norm": 0.1804606169462204, "learning_rate": 0.001, "loss": 1.8602, "step": 19439 }, { "epoch": 0.8224046027582705, "grad_norm": 0.2099166363477707, "learning_rate": 0.001, "loss": 2.2021, "step": 19440 }, { "epoch": 0.822446907521787, "grad_norm": 0.2024306058883667, "learning_rate": 0.001, "loss": 2.5865, "step": 19441 }, { "epoch": 0.8224892122853034, "grad_norm": 1.9643110036849976, "learning_rate": 0.001, "loss": 2.6401, "step": 19442 }, { "epoch": 0.8225315170488197, "grad_norm": 0.2750485837459564, "learning_rate": 0.001, "loss": 1.6251, "step": 19443 }, { "epoch": 0.8225738218123361, "grad_norm": 0.26389703154563904, "learning_rate": 0.001, "loss": 1.9309, "step": 19444 }, { "epoch": 0.8226161265758525, "grad_norm": 0.1789424866437912, "learning_rate": 0.001, "loss": 1.6066, "step": 19445 }, { "epoch": 0.8226584313393688, "grad_norm": 0.2300262451171875, "learning_rate": 0.001, "loss": 2.7709, "step": 19446 }, { "epoch": 0.8227007361028852, "grad_norm": 0.25446465611457825, "learning_rate": 0.001, "loss": 2.4887, "step": 19447 }, { "epoch": 0.8227430408664016, "grad_norm": 0.2057051956653595, "learning_rate": 0.001, "loss": 1.8832, "step": 19448 }, { "epoch": 0.8227853456299179, "grad_norm": 0.17963270843029022, "learning_rate": 0.001, "loss": 3.0981, "step": 19449 }, { "epoch": 0.8228276503934343, "grad_norm": 0.17611320316791534, "learning_rate": 0.001, "loss": 2.6242, "step": 19450 }, { "epoch": 0.8228699551569507, "grad_norm": 0.19677342474460602, "learning_rate": 0.001, "loss": 1.7286, "step": 19451 }, { "epoch": 0.822912259920467, "grad_norm": 0.19172005355358124, "learning_rate": 0.001, "loss": 1.6635, "step": 19452 }, { "epoch": 0.8229545646839834, "grad_norm": 0.17036627233028412, "learning_rate": 0.001, "loss": 1.5419, "step": 19453 }, { "epoch": 0.8229968694474998, "grad_norm": 0.1823369711637497, "learning_rate": 0.001, "loss": 2.7447, "step": 19454 }, { "epoch": 0.8230391742110161, "grad_norm": 0.30361148715019226, "learning_rate": 0.001, "loss": 2.7019, "step": 19455 }, { "epoch": 0.8230814789745325, "grad_norm": 0.14085571467876434, "learning_rate": 0.001, "loss": 2.5237, "step": 19456 }, { "epoch": 0.823123783738049, "grad_norm": 0.15281033515930176, "learning_rate": 0.001, "loss": 2.1091, "step": 19457 }, { "epoch": 0.8231660885015653, "grad_norm": 0.14232198894023895, "learning_rate": 0.001, "loss": 2.4139, "step": 19458 }, { "epoch": 0.8232083932650817, "grad_norm": 0.142531618475914, "learning_rate": 0.001, "loss": 2.5104, "step": 19459 }, { "epoch": 0.8232506980285981, "grad_norm": 0.14800485968589783, "learning_rate": 0.001, "loss": 1.6023, "step": 19460 }, { "epoch": 0.8232930027921144, "grad_norm": 0.15559349954128265, "learning_rate": 0.001, "loss": 2.9792, "step": 19461 }, { "epoch": 0.8233353075556308, "grad_norm": 0.1521608531475067, "learning_rate": 0.001, "loss": 2.7687, "step": 19462 }, { "epoch": 0.8233776123191472, "grad_norm": 0.14281997084617615, "learning_rate": 0.001, "loss": 1.9515, "step": 19463 }, { "epoch": 0.8234199170826635, "grad_norm": 0.9176727533340454, "learning_rate": 0.001, "loss": 1.5359, "step": 19464 }, { "epoch": 0.8234622218461799, "grad_norm": 0.14004619419574738, "learning_rate": 0.001, "loss": 2.0011, "step": 19465 }, { "epoch": 0.8235045266096962, "grad_norm": 0.14893396198749542, "learning_rate": 0.001, "loss": 2.1055, "step": 19466 }, { "epoch": 0.8235468313732126, "grad_norm": 0.14513948559761047, "learning_rate": 0.001, "loss": 2.2117, "step": 19467 }, { "epoch": 0.823589136136729, "grad_norm": 0.12800917029380798, "learning_rate": 0.001, "loss": 2.3386, "step": 19468 }, { "epoch": 0.8236314409002453, "grad_norm": 0.15164655447006226, "learning_rate": 0.001, "loss": 1.9978, "step": 19469 }, { "epoch": 0.8236737456637617, "grad_norm": 0.18457886576652527, "learning_rate": 0.001, "loss": 1.6669, "step": 19470 }, { "epoch": 0.8237160504272781, "grad_norm": 0.3100104331970215, "learning_rate": 0.001, "loss": 1.9054, "step": 19471 }, { "epoch": 0.8237583551907944, "grad_norm": 0.16601592302322388, "learning_rate": 0.001, "loss": 2.0019, "step": 19472 }, { "epoch": 0.8238006599543108, "grad_norm": 0.25218597054481506, "learning_rate": 0.001, "loss": 2.5409, "step": 19473 }, { "epoch": 0.8238429647178273, "grad_norm": 0.15414927899837494, "learning_rate": 0.001, "loss": 2.0852, "step": 19474 }, { "epoch": 0.8238852694813436, "grad_norm": 0.1316404789686203, "learning_rate": 0.001, "loss": 1.8446, "step": 19475 }, { "epoch": 0.82392757424486, "grad_norm": 0.19027061760425568, "learning_rate": 0.001, "loss": 2.2132, "step": 19476 }, { "epoch": 0.8239698790083764, "grad_norm": 1.3380327224731445, "learning_rate": 0.001, "loss": 1.9915, "step": 19477 }, { "epoch": 0.8240121837718927, "grad_norm": 0.14829513430595398, "learning_rate": 0.001, "loss": 2.1132, "step": 19478 }, { "epoch": 0.8240544885354091, "grad_norm": 0.18493659794330597, "learning_rate": 0.001, "loss": 2.2199, "step": 19479 }, { "epoch": 0.8240967932989255, "grad_norm": 0.13995139300823212, "learning_rate": 0.001, "loss": 1.894, "step": 19480 }, { "epoch": 0.8241390980624418, "grad_norm": 0.6962743401527405, "learning_rate": 0.001, "loss": 2.9984, "step": 19481 }, { "epoch": 0.8241814028259582, "grad_norm": 0.17796729505062103, "learning_rate": 0.001, "loss": 2.2849, "step": 19482 }, { "epoch": 0.8242237075894746, "grad_norm": 0.5168524980545044, "learning_rate": 0.001, "loss": 3.2221, "step": 19483 }, { "epoch": 0.8242660123529909, "grad_norm": 0.13838616013526917, "learning_rate": 0.001, "loss": 1.861, "step": 19484 }, { "epoch": 0.8243083171165073, "grad_norm": 1.7203575372695923, "learning_rate": 0.001, "loss": 1.9319, "step": 19485 }, { "epoch": 0.8243506218800237, "grad_norm": 0.13858763873577118, "learning_rate": 0.001, "loss": 2.8774, "step": 19486 }, { "epoch": 0.82439292664354, "grad_norm": 0.15123365819454193, "learning_rate": 0.001, "loss": 2.2039, "step": 19487 }, { "epoch": 0.8244352314070564, "grad_norm": 0.46859481930732727, "learning_rate": 0.001, "loss": 2.0706, "step": 19488 }, { "epoch": 0.8244775361705728, "grad_norm": 0.16294020414352417, "learning_rate": 0.001, "loss": 1.4793, "step": 19489 }, { "epoch": 0.8245198409340891, "grad_norm": 0.13173837959766388, "learning_rate": 0.001, "loss": 1.8825, "step": 19490 }, { "epoch": 0.8245621456976056, "grad_norm": 0.1741243302822113, "learning_rate": 0.001, "loss": 2.136, "step": 19491 }, { "epoch": 0.824604450461122, "grad_norm": 0.16755490005016327, "learning_rate": 0.001, "loss": 1.7519, "step": 19492 }, { "epoch": 0.8246467552246383, "grad_norm": 0.17870347201824188, "learning_rate": 0.001, "loss": 2.4803, "step": 19493 }, { "epoch": 0.8246890599881547, "grad_norm": 0.15022897720336914, "learning_rate": 0.001, "loss": 1.5368, "step": 19494 }, { "epoch": 0.8247313647516711, "grad_norm": 2.018383026123047, "learning_rate": 0.001, "loss": 1.3298, "step": 19495 }, { "epoch": 0.8247736695151874, "grad_norm": 0.15447530150413513, "learning_rate": 0.001, "loss": 1.6869, "step": 19496 }, { "epoch": 0.8248159742787038, "grad_norm": 6.437837600708008, "learning_rate": 0.001, "loss": 2.0111, "step": 19497 }, { "epoch": 0.8248582790422202, "grad_norm": 0.14872145652770996, "learning_rate": 0.001, "loss": 2.4922, "step": 19498 }, { "epoch": 0.8249005838057365, "grad_norm": 0.15467225015163422, "learning_rate": 0.001, "loss": 2.5399, "step": 19499 }, { "epoch": 0.8249428885692529, "grad_norm": 0.16929113864898682, "learning_rate": 0.001, "loss": 1.8642, "step": 19500 }, { "epoch": 0.8249851933327693, "grad_norm": 0.14469696581363678, "learning_rate": 0.001, "loss": 2.8897, "step": 19501 }, { "epoch": 0.8250274980962856, "grad_norm": 0.1643838882446289, "learning_rate": 0.001, "loss": 2.0871, "step": 19502 }, { "epoch": 0.825069802859802, "grad_norm": 0.14725084602832794, "learning_rate": 0.001, "loss": 1.7524, "step": 19503 }, { "epoch": 0.8251121076233184, "grad_norm": 0.14372789859771729, "learning_rate": 0.001, "loss": 3.4166, "step": 19504 }, { "epoch": 0.8251544123868347, "grad_norm": 0.155779629945755, "learning_rate": 0.001, "loss": 1.9682, "step": 19505 }, { "epoch": 0.8251967171503511, "grad_norm": 1.8167226314544678, "learning_rate": 0.001, "loss": 2.1802, "step": 19506 }, { "epoch": 0.8252390219138676, "grad_norm": 0.4720889925956726, "learning_rate": 0.001, "loss": 1.7518, "step": 19507 }, { "epoch": 0.8252813266773839, "grad_norm": 1.381333351135254, "learning_rate": 0.001, "loss": 2.3623, "step": 19508 }, { "epoch": 0.8253236314409003, "grad_norm": 0.3292122781276703, "learning_rate": 0.001, "loss": 1.4812, "step": 19509 }, { "epoch": 0.8253659362044167, "grad_norm": 0.16913935542106628, "learning_rate": 0.001, "loss": 1.9194, "step": 19510 }, { "epoch": 0.825408240967933, "grad_norm": 0.20442567765712738, "learning_rate": 0.001, "loss": 2.7706, "step": 19511 }, { "epoch": 0.8254505457314494, "grad_norm": 0.12844796478748322, "learning_rate": 0.001, "loss": 1.7295, "step": 19512 }, { "epoch": 0.8254928504949657, "grad_norm": 0.6568970084190369, "learning_rate": 0.001, "loss": 2.2554, "step": 19513 }, { "epoch": 0.8255351552584821, "grad_norm": 0.3706272542476654, "learning_rate": 0.001, "loss": 1.5584, "step": 19514 }, { "epoch": 0.8255774600219985, "grad_norm": 18.31789207458496, "learning_rate": 0.001, "loss": 2.2803, "step": 19515 }, { "epoch": 0.8256197647855148, "grad_norm": 0.15570005774497986, "learning_rate": 0.001, "loss": 2.3987, "step": 19516 }, { "epoch": 0.8256620695490312, "grad_norm": 0.1890856921672821, "learning_rate": 0.001, "loss": 2.9565, "step": 19517 }, { "epoch": 0.8257043743125476, "grad_norm": 0.17735014855861664, "learning_rate": 0.001, "loss": 1.7442, "step": 19518 }, { "epoch": 0.8257466790760639, "grad_norm": 0.5632053017616272, "learning_rate": 0.001, "loss": 3.1095, "step": 19519 }, { "epoch": 0.8257889838395803, "grad_norm": 0.1494271159172058, "learning_rate": 0.001, "loss": 1.583, "step": 19520 }, { "epoch": 0.8258312886030967, "grad_norm": 3.3606419563293457, "learning_rate": 0.001, "loss": 2.7092, "step": 19521 }, { "epoch": 0.825873593366613, "grad_norm": 0.162941113114357, "learning_rate": 0.001, "loss": 2.5138, "step": 19522 }, { "epoch": 0.8259158981301294, "grad_norm": 0.18008768558502197, "learning_rate": 0.001, "loss": 1.5397, "step": 19523 }, { "epoch": 0.8259582028936459, "grad_norm": 0.2005215436220169, "learning_rate": 0.001, "loss": 2.3149, "step": 19524 }, { "epoch": 0.8260005076571622, "grad_norm": 21.058948516845703, "learning_rate": 0.001, "loss": 1.64, "step": 19525 }, { "epoch": 0.8260428124206786, "grad_norm": 0.18747086822986603, "learning_rate": 0.001, "loss": 1.8543, "step": 19526 }, { "epoch": 0.826085117184195, "grad_norm": 0.16638407111167908, "learning_rate": 0.001, "loss": 3.0271, "step": 19527 }, { "epoch": 0.8261274219477113, "grad_norm": 0.26135191321372986, "learning_rate": 0.001, "loss": 1.9732, "step": 19528 }, { "epoch": 0.8261697267112277, "grad_norm": 0.906550943851471, "learning_rate": 0.001, "loss": 1.9329, "step": 19529 }, { "epoch": 0.8262120314747441, "grad_norm": 0.19989506900310516, "learning_rate": 0.001, "loss": 2.3301, "step": 19530 }, { "epoch": 0.8262543362382604, "grad_norm": 0.1638246327638626, "learning_rate": 0.001, "loss": 2.0817, "step": 19531 }, { "epoch": 0.8262966410017768, "grad_norm": 0.17549832165241241, "learning_rate": 0.001, "loss": 3.5144, "step": 19532 }, { "epoch": 0.8263389457652932, "grad_norm": 0.18301671743392944, "learning_rate": 0.001, "loss": 2.1419, "step": 19533 }, { "epoch": 0.8263812505288095, "grad_norm": 0.17851994931697845, "learning_rate": 0.001, "loss": 2.0738, "step": 19534 }, { "epoch": 0.8264235552923259, "grad_norm": 0.18530461192131042, "learning_rate": 0.001, "loss": 1.9832, "step": 19535 }, { "epoch": 0.8264658600558423, "grad_norm": 1.8142375946044922, "learning_rate": 0.001, "loss": 2.8591, "step": 19536 }, { "epoch": 0.8265081648193586, "grad_norm": 0.15847790241241455, "learning_rate": 0.001, "loss": 1.9201, "step": 19537 }, { "epoch": 0.826550469582875, "grad_norm": 0.8735809922218323, "learning_rate": 0.001, "loss": 3.1536, "step": 19538 }, { "epoch": 0.8265927743463914, "grad_norm": 0.18533921241760254, "learning_rate": 0.001, "loss": 2.4057, "step": 19539 }, { "epoch": 0.8266350791099077, "grad_norm": 2.2356624603271484, "learning_rate": 0.001, "loss": 2.8036, "step": 19540 }, { "epoch": 0.8266773838734242, "grad_norm": 0.1672806292772293, "learning_rate": 0.001, "loss": 2.093, "step": 19541 }, { "epoch": 0.8267196886369406, "grad_norm": 0.26861119270324707, "learning_rate": 0.001, "loss": 2.5095, "step": 19542 }, { "epoch": 0.8267619934004569, "grad_norm": 0.1656004935503006, "learning_rate": 0.001, "loss": 2.269, "step": 19543 }, { "epoch": 0.8268042981639733, "grad_norm": 0.1717766374349594, "learning_rate": 0.001, "loss": 1.9956, "step": 19544 }, { "epoch": 0.8268466029274897, "grad_norm": 0.21453844010829926, "learning_rate": 0.001, "loss": 3.3933, "step": 19545 }, { "epoch": 0.826888907691006, "grad_norm": 0.15061205625534058, "learning_rate": 0.001, "loss": 2.3823, "step": 19546 }, { "epoch": 0.8269312124545224, "grad_norm": 0.20553554594516754, "learning_rate": 0.001, "loss": 2.4435, "step": 19547 }, { "epoch": 0.8269735172180388, "grad_norm": 0.41001060605049133, "learning_rate": 0.001, "loss": 2.516, "step": 19548 }, { "epoch": 0.8270158219815551, "grad_norm": 0.16718702018260956, "learning_rate": 0.001, "loss": 2.3812, "step": 19549 }, { "epoch": 0.8270581267450715, "grad_norm": 0.13911297917366028, "learning_rate": 0.001, "loss": 1.7068, "step": 19550 }, { "epoch": 0.8271004315085879, "grad_norm": 4.676766872406006, "learning_rate": 0.001, "loss": 3.0646, "step": 19551 }, { "epoch": 0.8271427362721042, "grad_norm": 0.1728333830833435, "learning_rate": 0.001, "loss": 1.8783, "step": 19552 }, { "epoch": 0.8271850410356206, "grad_norm": 0.1679517775774002, "learning_rate": 0.001, "loss": 1.7856, "step": 19553 }, { "epoch": 0.827227345799137, "grad_norm": 0.1749308705329895, "learning_rate": 0.001, "loss": 1.9916, "step": 19554 }, { "epoch": 0.8272696505626533, "grad_norm": 0.6144496202468872, "learning_rate": 0.001, "loss": 2.504, "step": 19555 }, { "epoch": 0.8273119553261697, "grad_norm": 0.398067444562912, "learning_rate": 0.001, "loss": 2.041, "step": 19556 }, { "epoch": 0.827354260089686, "grad_norm": 0.15144911408424377, "learning_rate": 0.001, "loss": 2.1569, "step": 19557 }, { "epoch": 0.8273965648532025, "grad_norm": 0.1478147804737091, "learning_rate": 0.001, "loss": 2.3869, "step": 19558 }, { "epoch": 0.8274388696167189, "grad_norm": 0.17387191951274872, "learning_rate": 0.001, "loss": 3.1704, "step": 19559 }, { "epoch": 0.8274811743802352, "grad_norm": 0.7300922870635986, "learning_rate": 0.001, "loss": 2.1563, "step": 19560 }, { "epoch": 0.8275234791437516, "grad_norm": 0.17235367000102997, "learning_rate": 0.001, "loss": 1.8931, "step": 19561 }, { "epoch": 0.827565783907268, "grad_norm": 0.21332013607025146, "learning_rate": 0.001, "loss": 2.7753, "step": 19562 }, { "epoch": 0.8276080886707843, "grad_norm": 0.18801867961883545, "learning_rate": 0.001, "loss": 1.9607, "step": 19563 }, { "epoch": 0.8276503934343007, "grad_norm": 0.15900994837284088, "learning_rate": 0.001, "loss": 2.6224, "step": 19564 }, { "epoch": 0.8276926981978171, "grad_norm": 0.1822887808084488, "learning_rate": 0.001, "loss": 2.2022, "step": 19565 }, { "epoch": 0.8277350029613334, "grad_norm": 0.1728331744670868, "learning_rate": 0.001, "loss": 1.7164, "step": 19566 }, { "epoch": 0.8277773077248498, "grad_norm": 0.18861548602581024, "learning_rate": 0.001, "loss": 2.6136, "step": 19567 }, { "epoch": 0.8278196124883662, "grad_norm": 4.526989459991455, "learning_rate": 0.001, "loss": 2.0578, "step": 19568 }, { "epoch": 0.8278619172518825, "grad_norm": 1.0483940839767456, "learning_rate": 0.001, "loss": 2.2614, "step": 19569 }, { "epoch": 0.8279042220153989, "grad_norm": 6.750152587890625, "learning_rate": 0.001, "loss": 1.6585, "step": 19570 }, { "epoch": 0.8279465267789153, "grad_norm": 0.15433065593242645, "learning_rate": 0.001, "loss": 1.8853, "step": 19571 }, { "epoch": 0.8279888315424316, "grad_norm": 7.699106693267822, "learning_rate": 0.001, "loss": 1.9598, "step": 19572 }, { "epoch": 0.828031136305948, "grad_norm": 0.1841927170753479, "learning_rate": 0.001, "loss": 1.8607, "step": 19573 }, { "epoch": 0.8280734410694645, "grad_norm": 0.16091950237751007, "learning_rate": 0.001, "loss": 2.6873, "step": 19574 }, { "epoch": 0.8281157458329808, "grad_norm": 0.188730388879776, "learning_rate": 0.001, "loss": 2.4136, "step": 19575 }, { "epoch": 0.8281580505964972, "grad_norm": 0.18793466687202454, "learning_rate": 0.001, "loss": 2.9059, "step": 19576 }, { "epoch": 0.8282003553600136, "grad_norm": 10.438246726989746, "learning_rate": 0.001, "loss": 2.4116, "step": 19577 }, { "epoch": 0.8282426601235299, "grad_norm": 1.2651994228363037, "learning_rate": 0.001, "loss": 2.6492, "step": 19578 }, { "epoch": 0.8282849648870463, "grad_norm": 0.23723463714122772, "learning_rate": 0.001, "loss": 1.6084, "step": 19579 }, { "epoch": 0.8283272696505627, "grad_norm": 0.18241967260837555, "learning_rate": 0.001, "loss": 2.2461, "step": 19580 }, { "epoch": 0.828369574414079, "grad_norm": 0.1818530410528183, "learning_rate": 0.001, "loss": 2.7537, "step": 19581 }, { "epoch": 0.8284118791775954, "grad_norm": 0.20046694576740265, "learning_rate": 0.001, "loss": 1.8172, "step": 19582 }, { "epoch": 0.8284541839411118, "grad_norm": 0.20764821767807007, "learning_rate": 0.001, "loss": 1.5514, "step": 19583 }, { "epoch": 0.8284964887046281, "grad_norm": 0.19038403034210205, "learning_rate": 0.001, "loss": 2.4937, "step": 19584 }, { "epoch": 0.8285387934681445, "grad_norm": 13.164509773254395, "learning_rate": 0.001, "loss": 2.2863, "step": 19585 }, { "epoch": 0.8285810982316609, "grad_norm": 0.18057294189929962, "learning_rate": 0.001, "loss": 2.6837, "step": 19586 }, { "epoch": 0.8286234029951772, "grad_norm": 0.20905058085918427, "learning_rate": 0.001, "loss": 2.1163, "step": 19587 }, { "epoch": 0.8286657077586936, "grad_norm": 0.23860158026218414, "learning_rate": 0.001, "loss": 2.5112, "step": 19588 }, { "epoch": 0.82870801252221, "grad_norm": 0.1781463325023651, "learning_rate": 0.001, "loss": 3.0306, "step": 19589 }, { "epoch": 0.8287503172857263, "grad_norm": 0.7263326644897461, "learning_rate": 0.001, "loss": 1.8875, "step": 19590 }, { "epoch": 0.8287926220492428, "grad_norm": 0.8033844232559204, "learning_rate": 0.001, "loss": 2.5529, "step": 19591 }, { "epoch": 0.8288349268127592, "grad_norm": 0.18857400119304657, "learning_rate": 0.001, "loss": 2.7359, "step": 19592 }, { "epoch": 0.8288772315762755, "grad_norm": 0.16333748400211334, "learning_rate": 0.001, "loss": 2.0609, "step": 19593 }, { "epoch": 0.8289195363397919, "grad_norm": 0.19050641357898712, "learning_rate": 0.001, "loss": 1.7926, "step": 19594 }, { "epoch": 0.8289618411033083, "grad_norm": 0.1565246731042862, "learning_rate": 0.001, "loss": 2.0922, "step": 19595 }, { "epoch": 0.8290041458668246, "grad_norm": 0.13431625068187714, "learning_rate": 0.001, "loss": 1.9739, "step": 19596 }, { "epoch": 0.829046450630341, "grad_norm": 4.049526214599609, "learning_rate": 0.001, "loss": 3.013, "step": 19597 }, { "epoch": 0.8290887553938574, "grad_norm": 0.15808945894241333, "learning_rate": 0.001, "loss": 2.2172, "step": 19598 }, { "epoch": 0.8291310601573737, "grad_norm": 0.15318995714187622, "learning_rate": 0.001, "loss": 2.4165, "step": 19599 }, { "epoch": 0.8291733649208901, "grad_norm": 0.16532470285892487, "learning_rate": 0.001, "loss": 1.7708, "step": 19600 }, { "epoch": 0.8292156696844064, "grad_norm": 0.21278615295886993, "learning_rate": 0.001, "loss": 1.9634, "step": 19601 }, { "epoch": 0.8292579744479228, "grad_norm": 0.16973280906677246, "learning_rate": 0.001, "loss": 2.0971, "step": 19602 }, { "epoch": 0.8293002792114392, "grad_norm": 0.191205233335495, "learning_rate": 0.001, "loss": 2.2638, "step": 19603 }, { "epoch": 0.8293425839749555, "grad_norm": 0.18113046884536743, "learning_rate": 0.001, "loss": 1.898, "step": 19604 }, { "epoch": 0.8293848887384719, "grad_norm": 7.267598628997803, "learning_rate": 0.001, "loss": 1.8977, "step": 19605 }, { "epoch": 0.8294271935019883, "grad_norm": 0.16045303642749786, "learning_rate": 0.001, "loss": 1.5686, "step": 19606 }, { "epoch": 0.8294694982655046, "grad_norm": 0.15106256306171417, "learning_rate": 0.001, "loss": 2.2489, "step": 19607 }, { "epoch": 0.829511803029021, "grad_norm": 0.16756461560726166, "learning_rate": 0.001, "loss": 1.7174, "step": 19608 }, { "epoch": 0.8295541077925375, "grad_norm": 0.6214344501495361, "learning_rate": 0.001, "loss": 2.2515, "step": 19609 }, { "epoch": 0.8295964125560538, "grad_norm": 0.1721925288438797, "learning_rate": 0.001, "loss": 1.7405, "step": 19610 }, { "epoch": 0.8296387173195702, "grad_norm": 0.1743050515651703, "learning_rate": 0.001, "loss": 1.6597, "step": 19611 }, { "epoch": 0.8296810220830866, "grad_norm": 0.5375502109527588, "learning_rate": 0.001, "loss": 2.6013, "step": 19612 }, { "epoch": 0.8297233268466029, "grad_norm": 2.799589157104492, "learning_rate": 0.001, "loss": 1.9554, "step": 19613 }, { "epoch": 0.8297656316101193, "grad_norm": 0.17461428046226501, "learning_rate": 0.001, "loss": 1.6489, "step": 19614 }, { "epoch": 0.8298079363736357, "grad_norm": 0.16564109921455383, "learning_rate": 0.001, "loss": 1.7323, "step": 19615 }, { "epoch": 0.829850241137152, "grad_norm": 0.2872896194458008, "learning_rate": 0.001, "loss": 2.0232, "step": 19616 }, { "epoch": 0.8298925459006684, "grad_norm": 0.13868144154548645, "learning_rate": 0.001, "loss": 1.6235, "step": 19617 }, { "epoch": 0.8299348506641848, "grad_norm": 0.17989665269851685, "learning_rate": 0.001, "loss": 1.8112, "step": 19618 }, { "epoch": 0.8299771554277011, "grad_norm": 0.42621657252311707, "learning_rate": 0.001, "loss": 2.8575, "step": 19619 }, { "epoch": 0.8300194601912175, "grad_norm": 0.24013011157512665, "learning_rate": 0.001, "loss": 2.0607, "step": 19620 }, { "epoch": 0.8300617649547339, "grad_norm": 0.22159551084041595, "learning_rate": 0.001, "loss": 2.8096, "step": 19621 }, { "epoch": 0.8301040697182502, "grad_norm": 0.5154104232788086, "learning_rate": 0.001, "loss": 2.2271, "step": 19622 }, { "epoch": 0.8301463744817666, "grad_norm": 0.9761685729026794, "learning_rate": 0.001, "loss": 2.3908, "step": 19623 }, { "epoch": 0.8301886792452831, "grad_norm": 0.36297187209129333, "learning_rate": 0.001, "loss": 3.5581, "step": 19624 }, { "epoch": 0.8302309840087994, "grad_norm": 0.15795831382274628, "learning_rate": 0.001, "loss": 1.8869, "step": 19625 }, { "epoch": 0.8302732887723158, "grad_norm": 0.19538545608520508, "learning_rate": 0.001, "loss": 2.2421, "step": 19626 }, { "epoch": 0.8303155935358322, "grad_norm": 0.1804639995098114, "learning_rate": 0.001, "loss": 2.4326, "step": 19627 }, { "epoch": 0.8303578982993485, "grad_norm": 0.14908581972122192, "learning_rate": 0.001, "loss": 2.9382, "step": 19628 }, { "epoch": 0.8304002030628649, "grad_norm": 0.21959789097309113, "learning_rate": 0.001, "loss": 2.003, "step": 19629 }, { "epoch": 0.8304425078263813, "grad_norm": 0.5916885733604431, "learning_rate": 0.001, "loss": 3.7824, "step": 19630 }, { "epoch": 0.8304848125898976, "grad_norm": 0.1725091189146042, "learning_rate": 0.001, "loss": 1.8455, "step": 19631 }, { "epoch": 0.830527117353414, "grad_norm": 1.093875765800476, "learning_rate": 0.001, "loss": 1.9038, "step": 19632 }, { "epoch": 0.8305694221169304, "grad_norm": 0.1586592197418213, "learning_rate": 0.001, "loss": 1.7105, "step": 19633 }, { "epoch": 0.8306117268804467, "grad_norm": 0.1464533805847168, "learning_rate": 0.001, "loss": 1.5658, "step": 19634 }, { "epoch": 0.8306540316439631, "grad_norm": 0.2901553213596344, "learning_rate": 0.001, "loss": 1.9834, "step": 19635 }, { "epoch": 0.8306963364074795, "grad_norm": 0.2217240035533905, "learning_rate": 0.001, "loss": 2.5072, "step": 19636 }, { "epoch": 0.8307386411709958, "grad_norm": 0.24927441775798798, "learning_rate": 0.001, "loss": 1.8134, "step": 19637 }, { "epoch": 0.8307809459345122, "grad_norm": 0.1451728343963623, "learning_rate": 0.001, "loss": 2.3247, "step": 19638 }, { "epoch": 0.8308232506980286, "grad_norm": 0.1595974713563919, "learning_rate": 0.001, "loss": 1.5477, "step": 19639 }, { "epoch": 0.830865555461545, "grad_norm": 0.4610111117362976, "learning_rate": 0.001, "loss": 1.5437, "step": 19640 }, { "epoch": 0.8309078602250614, "grad_norm": 0.1525607854127884, "learning_rate": 0.001, "loss": 2.1338, "step": 19641 }, { "epoch": 0.8309501649885778, "grad_norm": 0.16522721946239471, "learning_rate": 0.001, "loss": 3.4636, "step": 19642 }, { "epoch": 0.8309924697520941, "grad_norm": 0.4826166033744812, "learning_rate": 0.001, "loss": 1.8839, "step": 19643 }, { "epoch": 0.8310347745156105, "grad_norm": 0.15211009979248047, "learning_rate": 0.001, "loss": 1.7515, "step": 19644 }, { "epoch": 0.8310770792791269, "grad_norm": 0.13759112358093262, "learning_rate": 0.001, "loss": 1.6183, "step": 19645 }, { "epoch": 0.8311193840426432, "grad_norm": 0.1878431886434555, "learning_rate": 0.001, "loss": 1.7729, "step": 19646 }, { "epoch": 0.8311616888061596, "grad_norm": 0.5903341174125671, "learning_rate": 0.001, "loss": 2.8315, "step": 19647 }, { "epoch": 0.8312039935696759, "grad_norm": 0.16062727570533752, "learning_rate": 0.001, "loss": 2.2591, "step": 19648 }, { "epoch": 0.8312462983331923, "grad_norm": 0.1513572633266449, "learning_rate": 0.001, "loss": 1.9523, "step": 19649 }, { "epoch": 0.8312886030967087, "grad_norm": 3.5382885932922363, "learning_rate": 0.001, "loss": 1.8145, "step": 19650 }, { "epoch": 0.831330907860225, "grad_norm": 0.6378043293952942, "learning_rate": 0.001, "loss": 2.7972, "step": 19651 }, { "epoch": 0.8313732126237414, "grad_norm": 0.5603189468383789, "learning_rate": 0.001, "loss": 1.9918, "step": 19652 }, { "epoch": 0.8314155173872578, "grad_norm": 0.7151978611946106, "learning_rate": 0.001, "loss": 1.8135, "step": 19653 }, { "epoch": 0.8314578221507741, "grad_norm": 0.14867694675922394, "learning_rate": 0.001, "loss": 1.9327, "step": 19654 }, { "epoch": 0.8315001269142905, "grad_norm": 0.15285193920135498, "learning_rate": 0.001, "loss": 1.8189, "step": 19655 }, { "epoch": 0.831542431677807, "grad_norm": 0.26532498002052307, "learning_rate": 0.001, "loss": 2.4956, "step": 19656 }, { "epoch": 0.8315847364413232, "grad_norm": 0.18189120292663574, "learning_rate": 0.001, "loss": 2.8454, "step": 19657 }, { "epoch": 0.8316270412048397, "grad_norm": 0.335979700088501, "learning_rate": 0.001, "loss": 1.7169, "step": 19658 }, { "epoch": 0.8316693459683561, "grad_norm": 1.2381075620651245, "learning_rate": 0.001, "loss": 1.5355, "step": 19659 }, { "epoch": 0.8317116507318724, "grad_norm": 0.18235164880752563, "learning_rate": 0.001, "loss": 2.2521, "step": 19660 }, { "epoch": 0.8317539554953888, "grad_norm": 0.5001907348632812, "learning_rate": 0.001, "loss": 2.8203, "step": 19661 }, { "epoch": 0.8317962602589052, "grad_norm": 0.13656170666217804, "learning_rate": 0.001, "loss": 2.2657, "step": 19662 }, { "epoch": 0.8318385650224215, "grad_norm": 0.1643732637166977, "learning_rate": 0.001, "loss": 1.7424, "step": 19663 }, { "epoch": 0.8318808697859379, "grad_norm": 0.15448181331157684, "learning_rate": 0.001, "loss": 1.4355, "step": 19664 }, { "epoch": 0.8319231745494543, "grad_norm": 2.5105960369110107, "learning_rate": 0.001, "loss": 1.913, "step": 19665 }, { "epoch": 0.8319654793129706, "grad_norm": 0.16451245546340942, "learning_rate": 0.001, "loss": 2.2253, "step": 19666 }, { "epoch": 0.832007784076487, "grad_norm": 0.15725699067115784, "learning_rate": 0.001, "loss": 1.9037, "step": 19667 }, { "epoch": 0.8320500888400034, "grad_norm": 0.32337695360183716, "learning_rate": 0.001, "loss": 2.0358, "step": 19668 }, { "epoch": 0.8320923936035197, "grad_norm": 0.21804969012737274, "learning_rate": 0.001, "loss": 1.8434, "step": 19669 }, { "epoch": 0.8321346983670361, "grad_norm": 5.778602123260498, "learning_rate": 0.001, "loss": 1.8731, "step": 19670 }, { "epoch": 0.8321770031305525, "grad_norm": 0.21234005689620972, "learning_rate": 0.001, "loss": 1.9701, "step": 19671 }, { "epoch": 0.8322193078940688, "grad_norm": 2.040515661239624, "learning_rate": 0.001, "loss": 3.1024, "step": 19672 }, { "epoch": 0.8322616126575852, "grad_norm": 0.14439794421195984, "learning_rate": 0.001, "loss": 1.4982, "step": 19673 }, { "epoch": 0.8323039174211017, "grad_norm": 0.14553527534008026, "learning_rate": 0.001, "loss": 2.5106, "step": 19674 }, { "epoch": 0.832346222184618, "grad_norm": 0.1754305213689804, "learning_rate": 0.001, "loss": 2.5787, "step": 19675 }, { "epoch": 0.8323885269481344, "grad_norm": 0.13530834019184113, "learning_rate": 0.001, "loss": 1.6652, "step": 19676 }, { "epoch": 0.8324308317116508, "grad_norm": 16.148717880249023, "learning_rate": 0.001, "loss": 1.9761, "step": 19677 }, { "epoch": 0.8324731364751671, "grad_norm": 0.16768808662891388, "learning_rate": 0.001, "loss": 2.5138, "step": 19678 }, { "epoch": 0.8325154412386835, "grad_norm": 0.16565167903900146, "learning_rate": 0.001, "loss": 1.8295, "step": 19679 }, { "epoch": 0.8325577460021999, "grad_norm": 0.24499014019966125, "learning_rate": 0.001, "loss": 2.4988, "step": 19680 }, { "epoch": 0.8326000507657162, "grad_norm": 0.1661049723625183, "learning_rate": 0.001, "loss": 2.2289, "step": 19681 }, { "epoch": 0.8326423555292326, "grad_norm": 0.16528615355491638, "learning_rate": 0.001, "loss": 1.5157, "step": 19682 }, { "epoch": 0.832684660292749, "grad_norm": 0.1665087193250656, "learning_rate": 0.001, "loss": 2.2805, "step": 19683 }, { "epoch": 0.8327269650562653, "grad_norm": 0.1810268610715866, "learning_rate": 0.001, "loss": 2.3202, "step": 19684 }, { "epoch": 0.8327692698197817, "grad_norm": 0.15981633961200714, "learning_rate": 0.001, "loss": 1.511, "step": 19685 }, { "epoch": 0.8328115745832981, "grad_norm": 0.43922242522239685, "learning_rate": 0.001, "loss": 3.167, "step": 19686 }, { "epoch": 0.8328538793468144, "grad_norm": 0.16174915432929993, "learning_rate": 0.001, "loss": 1.8022, "step": 19687 }, { "epoch": 0.8328961841103308, "grad_norm": 0.12966187298297882, "learning_rate": 0.001, "loss": 2.7875, "step": 19688 }, { "epoch": 0.8329384888738472, "grad_norm": 0.17948752641677856, "learning_rate": 0.001, "loss": 1.7173, "step": 19689 }, { "epoch": 0.8329807936373635, "grad_norm": 0.17646683752536774, "learning_rate": 0.001, "loss": 1.9492, "step": 19690 }, { "epoch": 0.83302309840088, "grad_norm": 0.17098137736320496, "learning_rate": 0.001, "loss": 1.5657, "step": 19691 }, { "epoch": 0.8330654031643963, "grad_norm": 7.42794942855835, "learning_rate": 0.001, "loss": 1.7042, "step": 19692 }, { "epoch": 0.8331077079279127, "grad_norm": 0.15731237828731537, "learning_rate": 0.001, "loss": 2.4516, "step": 19693 }, { "epoch": 0.8331500126914291, "grad_norm": 0.16267119348049164, "learning_rate": 0.001, "loss": 1.8393, "step": 19694 }, { "epoch": 0.8331923174549454, "grad_norm": 0.1380946934223175, "learning_rate": 0.001, "loss": 2.3719, "step": 19695 }, { "epoch": 0.8332346222184618, "grad_norm": 0.1593260020017624, "learning_rate": 0.001, "loss": 1.9913, "step": 19696 }, { "epoch": 0.8332769269819782, "grad_norm": 0.19638261198997498, "learning_rate": 0.001, "loss": 2.1259, "step": 19697 }, { "epoch": 0.8333192317454945, "grad_norm": 0.1739358901977539, "learning_rate": 0.001, "loss": 2.602, "step": 19698 }, { "epoch": 0.8333615365090109, "grad_norm": 0.28092655539512634, "learning_rate": 0.001, "loss": 1.8167, "step": 19699 }, { "epoch": 0.8334038412725273, "grad_norm": 0.15453416109085083, "learning_rate": 0.001, "loss": 1.8884, "step": 19700 }, { "epoch": 0.8334461460360436, "grad_norm": 0.2648554742336273, "learning_rate": 0.001, "loss": 3.4063, "step": 19701 }, { "epoch": 0.83348845079956, "grad_norm": 0.15070085227489471, "learning_rate": 0.001, "loss": 1.9261, "step": 19702 }, { "epoch": 0.8335307555630764, "grad_norm": 0.15136472880840302, "learning_rate": 0.001, "loss": 1.8056, "step": 19703 }, { "epoch": 0.8335730603265927, "grad_norm": 0.17444024980068207, "learning_rate": 0.001, "loss": 2.3862, "step": 19704 }, { "epoch": 0.8336153650901091, "grad_norm": 0.20091457664966583, "learning_rate": 0.001, "loss": 2.6777, "step": 19705 }, { "epoch": 0.8336576698536255, "grad_norm": 0.17483146488666534, "learning_rate": 0.001, "loss": 3.9762, "step": 19706 }, { "epoch": 0.8336999746171418, "grad_norm": 0.14179664850234985, "learning_rate": 0.001, "loss": 1.7762, "step": 19707 }, { "epoch": 0.8337422793806583, "grad_norm": 0.14632867276668549, "learning_rate": 0.001, "loss": 2.4196, "step": 19708 }, { "epoch": 0.8337845841441747, "grad_norm": 0.1618465930223465, "learning_rate": 0.001, "loss": 2.0228, "step": 19709 }, { "epoch": 0.833826888907691, "grad_norm": 0.17605137825012207, "learning_rate": 0.001, "loss": 2.255, "step": 19710 }, { "epoch": 0.8338691936712074, "grad_norm": 0.1627848893404007, "learning_rate": 0.001, "loss": 2.5271, "step": 19711 }, { "epoch": 0.8339114984347238, "grad_norm": 0.15712319314479828, "learning_rate": 0.001, "loss": 1.6956, "step": 19712 }, { "epoch": 0.8339538031982401, "grad_norm": 0.2377215325832367, "learning_rate": 0.001, "loss": 2.1427, "step": 19713 }, { "epoch": 0.8339961079617565, "grad_norm": 0.14248542487621307, "learning_rate": 0.001, "loss": 2.9125, "step": 19714 }, { "epoch": 0.8340384127252729, "grad_norm": 0.1505517214536667, "learning_rate": 0.001, "loss": 1.4674, "step": 19715 }, { "epoch": 0.8340807174887892, "grad_norm": 0.11878804862499237, "learning_rate": 0.001, "loss": 1.7173, "step": 19716 }, { "epoch": 0.8341230222523056, "grad_norm": 0.1470642238855362, "learning_rate": 0.001, "loss": 1.3979, "step": 19717 }, { "epoch": 0.834165327015822, "grad_norm": 0.14857931435108185, "learning_rate": 0.001, "loss": 2.3536, "step": 19718 }, { "epoch": 0.8342076317793383, "grad_norm": 0.14904291927814484, "learning_rate": 0.001, "loss": 1.7355, "step": 19719 }, { "epoch": 0.8342499365428547, "grad_norm": 4.184340476989746, "learning_rate": 0.001, "loss": 2.0502, "step": 19720 }, { "epoch": 0.8342922413063711, "grad_norm": 0.16382192075252533, "learning_rate": 0.001, "loss": 3.3113, "step": 19721 }, { "epoch": 0.8343345460698874, "grad_norm": 2.076079845428467, "learning_rate": 0.001, "loss": 2.152, "step": 19722 }, { "epoch": 0.8343768508334038, "grad_norm": 0.13855180144309998, "learning_rate": 0.001, "loss": 1.7025, "step": 19723 }, { "epoch": 0.8344191555969203, "grad_norm": 0.1491064429283142, "learning_rate": 0.001, "loss": 1.7431, "step": 19724 }, { "epoch": 0.8344614603604366, "grad_norm": 0.3308009207248688, "learning_rate": 0.001, "loss": 1.3676, "step": 19725 }, { "epoch": 0.834503765123953, "grad_norm": 1.1953833103179932, "learning_rate": 0.001, "loss": 2.9253, "step": 19726 }, { "epoch": 0.8345460698874694, "grad_norm": 0.11848998814821243, "learning_rate": 0.001, "loss": 1.6943, "step": 19727 }, { "epoch": 0.8345883746509857, "grad_norm": 1.0493426322937012, "learning_rate": 0.001, "loss": 1.9968, "step": 19728 }, { "epoch": 0.8346306794145021, "grad_norm": 0.13267287611961365, "learning_rate": 0.001, "loss": 1.927, "step": 19729 }, { "epoch": 0.8346729841780185, "grad_norm": 0.1327977031469345, "learning_rate": 0.001, "loss": 2.9165, "step": 19730 }, { "epoch": 0.8347152889415348, "grad_norm": 0.16689974069595337, "learning_rate": 0.001, "loss": 2.0789, "step": 19731 }, { "epoch": 0.8347575937050512, "grad_norm": 0.17344988882541656, "learning_rate": 0.001, "loss": 2.5676, "step": 19732 }, { "epoch": 0.8347998984685676, "grad_norm": 0.14860732853412628, "learning_rate": 0.001, "loss": 1.6259, "step": 19733 }, { "epoch": 0.8348422032320839, "grad_norm": 0.15001067519187927, "learning_rate": 0.001, "loss": 1.5225, "step": 19734 }, { "epoch": 0.8348845079956003, "grad_norm": 0.16379481554031372, "learning_rate": 0.001, "loss": 3.1107, "step": 19735 }, { "epoch": 0.8349268127591166, "grad_norm": 0.16210788488388062, "learning_rate": 0.001, "loss": 1.9486, "step": 19736 }, { "epoch": 0.834969117522633, "grad_norm": 0.17542105913162231, "learning_rate": 0.001, "loss": 1.36, "step": 19737 }, { "epoch": 0.8350114222861494, "grad_norm": 0.168275848031044, "learning_rate": 0.001, "loss": 2.1156, "step": 19738 }, { "epoch": 0.8350537270496657, "grad_norm": 0.153310164809227, "learning_rate": 0.001, "loss": 2.1324, "step": 19739 }, { "epoch": 0.8350960318131821, "grad_norm": 1.9082057476043701, "learning_rate": 0.001, "loss": 2.4097, "step": 19740 }, { "epoch": 0.8351383365766986, "grad_norm": 0.1443844437599182, "learning_rate": 0.001, "loss": 2.4301, "step": 19741 }, { "epoch": 0.8351806413402149, "grad_norm": 0.14315207302570343, "learning_rate": 0.001, "loss": 1.3867, "step": 19742 }, { "epoch": 0.8352229461037313, "grad_norm": 0.15107427537441254, "learning_rate": 0.001, "loss": 3.8243, "step": 19743 }, { "epoch": 0.8352652508672477, "grad_norm": 0.1486080139875412, "learning_rate": 0.001, "loss": 1.3748, "step": 19744 }, { "epoch": 0.835307555630764, "grad_norm": 0.13850051164627075, "learning_rate": 0.001, "loss": 2.7205, "step": 19745 }, { "epoch": 0.8353498603942804, "grad_norm": 0.1894737184047699, "learning_rate": 0.001, "loss": 2.0814, "step": 19746 }, { "epoch": 0.8353921651577968, "grad_norm": 3.34781551361084, "learning_rate": 0.001, "loss": 1.7776, "step": 19747 }, { "epoch": 0.8354344699213131, "grad_norm": 0.14483730494976044, "learning_rate": 0.001, "loss": 2.1921, "step": 19748 }, { "epoch": 0.8354767746848295, "grad_norm": 0.15554209053516388, "learning_rate": 0.001, "loss": 2.1626, "step": 19749 }, { "epoch": 0.8355190794483459, "grad_norm": 0.2667386531829834, "learning_rate": 0.001, "loss": 2.3544, "step": 19750 }, { "epoch": 0.8355613842118622, "grad_norm": 0.17081429064273834, "learning_rate": 0.001, "loss": 3.0333, "step": 19751 }, { "epoch": 0.8356036889753786, "grad_norm": 0.18492451310157776, "learning_rate": 0.001, "loss": 2.3743, "step": 19752 }, { "epoch": 0.835645993738895, "grad_norm": 1.0934829711914062, "learning_rate": 0.001, "loss": 2.1734, "step": 19753 }, { "epoch": 0.8356882985024113, "grad_norm": 0.2422739565372467, "learning_rate": 0.001, "loss": 1.8901, "step": 19754 }, { "epoch": 0.8357306032659277, "grad_norm": 0.39902499318122864, "learning_rate": 0.001, "loss": 2.4519, "step": 19755 }, { "epoch": 0.8357729080294442, "grad_norm": 18.87704849243164, "learning_rate": 0.001, "loss": 4.1573, "step": 19756 }, { "epoch": 0.8358152127929604, "grad_norm": 0.1568903774023056, "learning_rate": 0.001, "loss": 1.8906, "step": 19757 }, { "epoch": 0.8358575175564769, "grad_norm": 0.2181655913591385, "learning_rate": 0.001, "loss": 2.7009, "step": 19758 }, { "epoch": 0.8358998223199933, "grad_norm": 0.14595481753349304, "learning_rate": 0.001, "loss": 1.4677, "step": 19759 }, { "epoch": 0.8359421270835096, "grad_norm": 0.18573489785194397, "learning_rate": 0.001, "loss": 1.9539, "step": 19760 }, { "epoch": 0.835984431847026, "grad_norm": 0.19582517445087433, "learning_rate": 0.001, "loss": 1.9018, "step": 19761 }, { "epoch": 0.8360267366105424, "grad_norm": 0.18559232354164124, "learning_rate": 0.001, "loss": 2.0573, "step": 19762 }, { "epoch": 0.8360690413740587, "grad_norm": 0.2434457689523697, "learning_rate": 0.001, "loss": 2.4941, "step": 19763 }, { "epoch": 0.8361113461375751, "grad_norm": 0.24671140313148499, "learning_rate": 0.001, "loss": 2.2932, "step": 19764 }, { "epoch": 0.8361536509010915, "grad_norm": 55.95083999633789, "learning_rate": 0.001, "loss": 1.4447, "step": 19765 }, { "epoch": 0.8361959556646078, "grad_norm": 0.18943527340888977, "learning_rate": 0.001, "loss": 1.8799, "step": 19766 }, { "epoch": 0.8362382604281242, "grad_norm": 0.248708114027977, "learning_rate": 0.001, "loss": 2.1987, "step": 19767 }, { "epoch": 0.8362805651916406, "grad_norm": 0.2755674719810486, "learning_rate": 0.001, "loss": 2.5924, "step": 19768 }, { "epoch": 0.8363228699551569, "grad_norm": 0.8433470726013184, "learning_rate": 0.001, "loss": 2.656, "step": 19769 }, { "epoch": 0.8363651747186733, "grad_norm": 0.25172102451324463, "learning_rate": 0.001, "loss": 2.0833, "step": 19770 }, { "epoch": 0.8364074794821897, "grad_norm": 0.20284956693649292, "learning_rate": 0.001, "loss": 1.7729, "step": 19771 }, { "epoch": 0.836449784245706, "grad_norm": 0.16717220842838287, "learning_rate": 0.001, "loss": 1.6458, "step": 19772 }, { "epoch": 0.8364920890092225, "grad_norm": 0.18188956379890442, "learning_rate": 0.001, "loss": 2.6662, "step": 19773 }, { "epoch": 0.8365343937727389, "grad_norm": 0.19936378300189972, "learning_rate": 0.001, "loss": 2.1813, "step": 19774 }, { "epoch": 0.8365766985362552, "grad_norm": 0.24121548235416412, "learning_rate": 0.001, "loss": 2.8356, "step": 19775 }, { "epoch": 0.8366190032997716, "grad_norm": 2.9213600158691406, "learning_rate": 0.001, "loss": 2.4832, "step": 19776 }, { "epoch": 0.836661308063288, "grad_norm": 0.16870221495628357, "learning_rate": 0.001, "loss": 2.8586, "step": 19777 }, { "epoch": 0.8367036128268043, "grad_norm": 0.1627093255519867, "learning_rate": 0.001, "loss": 1.8701, "step": 19778 }, { "epoch": 0.8367459175903207, "grad_norm": 0.18144871294498444, "learning_rate": 0.001, "loss": 2.6356, "step": 19779 }, { "epoch": 0.8367882223538371, "grad_norm": 0.20875905454158783, "learning_rate": 0.001, "loss": 2.1233, "step": 19780 }, { "epoch": 0.8368305271173534, "grad_norm": 0.17586883902549744, "learning_rate": 0.001, "loss": 2.2282, "step": 19781 }, { "epoch": 0.8368728318808698, "grad_norm": 0.17279373109340668, "learning_rate": 0.001, "loss": 1.6294, "step": 19782 }, { "epoch": 0.8369151366443861, "grad_norm": 0.19814880192279816, "learning_rate": 0.001, "loss": 1.8659, "step": 19783 }, { "epoch": 0.8369574414079025, "grad_norm": 0.3138713538646698, "learning_rate": 0.001, "loss": 2.3541, "step": 19784 }, { "epoch": 0.8369997461714189, "grad_norm": 0.15233896672725677, "learning_rate": 0.001, "loss": 2.0767, "step": 19785 }, { "epoch": 0.8370420509349352, "grad_norm": 9.646958351135254, "learning_rate": 0.001, "loss": 2.3433, "step": 19786 }, { "epoch": 0.8370843556984516, "grad_norm": 0.3921230733394623, "learning_rate": 0.001, "loss": 1.7355, "step": 19787 }, { "epoch": 0.837126660461968, "grad_norm": 0.17926689982414246, "learning_rate": 0.001, "loss": 2.008, "step": 19788 }, { "epoch": 0.8371689652254843, "grad_norm": 0.17443296313285828, "learning_rate": 0.001, "loss": 2.0443, "step": 19789 }, { "epoch": 0.8372112699890008, "grad_norm": 0.1694057285785675, "learning_rate": 0.001, "loss": 1.7924, "step": 19790 }, { "epoch": 0.8372535747525172, "grad_norm": 0.16491025686264038, "learning_rate": 0.001, "loss": 2.1166, "step": 19791 }, { "epoch": 0.8372958795160335, "grad_norm": 0.14807593822479248, "learning_rate": 0.001, "loss": 1.6911, "step": 19792 }, { "epoch": 0.8373381842795499, "grad_norm": 0.19752711057662964, "learning_rate": 0.001, "loss": 2.2279, "step": 19793 }, { "epoch": 0.8373804890430663, "grad_norm": 0.1440856158733368, "learning_rate": 0.001, "loss": 2.3304, "step": 19794 }, { "epoch": 0.8374227938065826, "grad_norm": 0.2171926349401474, "learning_rate": 0.001, "loss": 2.6035, "step": 19795 }, { "epoch": 0.837465098570099, "grad_norm": 0.1901613473892212, "learning_rate": 0.001, "loss": 2.6214, "step": 19796 }, { "epoch": 0.8375074033336154, "grad_norm": 0.15385551750659943, "learning_rate": 0.001, "loss": 1.7298, "step": 19797 }, { "epoch": 0.8375497080971317, "grad_norm": 1.251185655593872, "learning_rate": 0.001, "loss": 2.613, "step": 19798 }, { "epoch": 0.8375920128606481, "grad_norm": 0.14876984059810638, "learning_rate": 0.001, "loss": 2.2236, "step": 19799 }, { "epoch": 0.8376343176241645, "grad_norm": 0.19195450842380524, "learning_rate": 0.001, "loss": 2.2847, "step": 19800 }, { "epoch": 0.8376766223876808, "grad_norm": 0.14749985933303833, "learning_rate": 0.001, "loss": 1.8272, "step": 19801 }, { "epoch": 0.8377189271511972, "grad_norm": 0.18800973892211914, "learning_rate": 0.001, "loss": 1.738, "step": 19802 }, { "epoch": 0.8377612319147136, "grad_norm": 27.659873962402344, "learning_rate": 0.001, "loss": 2.2451, "step": 19803 }, { "epoch": 0.8378035366782299, "grad_norm": 0.16692310571670532, "learning_rate": 0.001, "loss": 2.4913, "step": 19804 }, { "epoch": 0.8378458414417463, "grad_norm": 0.2250969558954239, "learning_rate": 0.001, "loss": 2.685, "step": 19805 }, { "epoch": 0.8378881462052628, "grad_norm": 0.13324400782585144, "learning_rate": 0.001, "loss": 2.6045, "step": 19806 }, { "epoch": 0.837930450968779, "grad_norm": 2.1039130687713623, "learning_rate": 0.001, "loss": 2.5165, "step": 19807 }, { "epoch": 0.8379727557322955, "grad_norm": 0.14113382995128632, "learning_rate": 0.001, "loss": 1.6369, "step": 19808 }, { "epoch": 0.8380150604958119, "grad_norm": 0.15121187269687653, "learning_rate": 0.001, "loss": 1.6652, "step": 19809 }, { "epoch": 0.8380573652593282, "grad_norm": 0.15335388481616974, "learning_rate": 0.001, "loss": 2.2542, "step": 19810 }, { "epoch": 0.8380996700228446, "grad_norm": 3.9381353855133057, "learning_rate": 0.001, "loss": 2.8601, "step": 19811 }, { "epoch": 0.838141974786361, "grad_norm": 0.7249288558959961, "learning_rate": 0.001, "loss": 2.4747, "step": 19812 }, { "epoch": 0.8381842795498773, "grad_norm": 6.57703971862793, "learning_rate": 0.001, "loss": 1.7587, "step": 19813 }, { "epoch": 0.8382265843133937, "grad_norm": 0.7364828586578369, "learning_rate": 0.001, "loss": 3.9219, "step": 19814 }, { "epoch": 0.8382688890769101, "grad_norm": 0.18288707733154297, "learning_rate": 0.001, "loss": 2.7903, "step": 19815 }, { "epoch": 0.8383111938404264, "grad_norm": 0.1727524846792221, "learning_rate": 0.001, "loss": 1.9508, "step": 19816 }, { "epoch": 0.8383534986039428, "grad_norm": 0.1617797166109085, "learning_rate": 0.001, "loss": 3.1475, "step": 19817 }, { "epoch": 0.8383958033674592, "grad_norm": 0.16539444029331207, "learning_rate": 0.001, "loss": 2.1155, "step": 19818 }, { "epoch": 0.8384381081309755, "grad_norm": 4.960668563842773, "learning_rate": 0.001, "loss": 2.3467, "step": 19819 }, { "epoch": 0.8384804128944919, "grad_norm": 0.20643018186092377, "learning_rate": 0.001, "loss": 2.0452, "step": 19820 }, { "epoch": 0.8385227176580083, "grad_norm": 2.9467222690582275, "learning_rate": 0.001, "loss": 2.9591, "step": 19821 }, { "epoch": 0.8385650224215246, "grad_norm": 0.13099977374076843, "learning_rate": 0.001, "loss": 1.5804, "step": 19822 }, { "epoch": 0.838607327185041, "grad_norm": 0.15258918702602386, "learning_rate": 0.001, "loss": 2.4391, "step": 19823 }, { "epoch": 0.8386496319485575, "grad_norm": 0.18933935463428497, "learning_rate": 0.001, "loss": 2.2122, "step": 19824 }, { "epoch": 0.8386919367120738, "grad_norm": 0.14285174012184143, "learning_rate": 0.001, "loss": 2.3817, "step": 19825 }, { "epoch": 0.8387342414755902, "grad_norm": 1.277612328529358, "learning_rate": 0.001, "loss": 2.2287, "step": 19826 }, { "epoch": 0.8387765462391065, "grad_norm": 0.12771695852279663, "learning_rate": 0.001, "loss": 1.9689, "step": 19827 }, { "epoch": 0.8388188510026229, "grad_norm": 0.16007982194423676, "learning_rate": 0.001, "loss": 2.2604, "step": 19828 }, { "epoch": 0.8388611557661393, "grad_norm": 0.625321090221405, "learning_rate": 0.001, "loss": 1.6921, "step": 19829 }, { "epoch": 0.8389034605296556, "grad_norm": 0.44362181425094604, "learning_rate": 0.001, "loss": 2.2605, "step": 19830 }, { "epoch": 0.838945765293172, "grad_norm": 0.16147875785827637, "learning_rate": 0.001, "loss": 1.9878, "step": 19831 }, { "epoch": 0.8389880700566884, "grad_norm": 0.37815141677856445, "learning_rate": 0.001, "loss": 2.8435, "step": 19832 }, { "epoch": 0.8390303748202047, "grad_norm": 0.15180058777332306, "learning_rate": 0.001, "loss": 1.7832, "step": 19833 }, { "epoch": 0.8390726795837211, "grad_norm": 0.16070739924907684, "learning_rate": 0.001, "loss": 2.4271, "step": 19834 }, { "epoch": 0.8391149843472375, "grad_norm": 2.378019332885742, "learning_rate": 0.001, "loss": 2.1971, "step": 19835 }, { "epoch": 0.8391572891107538, "grad_norm": 0.13618861138820648, "learning_rate": 0.001, "loss": 2.482, "step": 19836 }, { "epoch": 0.8391995938742702, "grad_norm": 0.1882878690958023, "learning_rate": 0.001, "loss": 2.0153, "step": 19837 }, { "epoch": 0.8392418986377866, "grad_norm": 0.24582351744174957, "learning_rate": 0.001, "loss": 2.3494, "step": 19838 }, { "epoch": 0.8392842034013029, "grad_norm": 0.22917570173740387, "learning_rate": 0.001, "loss": 2.0407, "step": 19839 }, { "epoch": 0.8393265081648194, "grad_norm": 0.15284200012683868, "learning_rate": 0.001, "loss": 2.142, "step": 19840 }, { "epoch": 0.8393688129283358, "grad_norm": 0.15016065537929535, "learning_rate": 0.001, "loss": 1.9708, "step": 19841 }, { "epoch": 0.8394111176918521, "grad_norm": 0.17199917137622833, "learning_rate": 0.001, "loss": 2.0482, "step": 19842 }, { "epoch": 0.8394534224553685, "grad_norm": 2.0709526538848877, "learning_rate": 0.001, "loss": 2.1923, "step": 19843 }, { "epoch": 0.8394957272188849, "grad_norm": 0.14755693078041077, "learning_rate": 0.001, "loss": 2.07, "step": 19844 }, { "epoch": 0.8395380319824012, "grad_norm": 0.45920902490615845, "learning_rate": 0.001, "loss": 3.1264, "step": 19845 }, { "epoch": 0.8395803367459176, "grad_norm": 0.1509585976600647, "learning_rate": 0.001, "loss": 1.4513, "step": 19846 }, { "epoch": 0.839622641509434, "grad_norm": 0.15342389047145844, "learning_rate": 0.001, "loss": 2.4979, "step": 19847 }, { "epoch": 0.8396649462729503, "grad_norm": 0.16700981557369232, "learning_rate": 0.001, "loss": 1.6099, "step": 19848 }, { "epoch": 0.8397072510364667, "grad_norm": 0.200181245803833, "learning_rate": 0.001, "loss": 1.9247, "step": 19849 }, { "epoch": 0.8397495557999831, "grad_norm": 0.13869711756706238, "learning_rate": 0.001, "loss": 1.6663, "step": 19850 }, { "epoch": 0.8397918605634994, "grad_norm": 0.19390735030174255, "learning_rate": 0.001, "loss": 2.6041, "step": 19851 }, { "epoch": 0.8398341653270158, "grad_norm": 0.15957386791706085, "learning_rate": 0.001, "loss": 1.6804, "step": 19852 }, { "epoch": 0.8398764700905322, "grad_norm": 0.1435195654630661, "learning_rate": 0.001, "loss": 2.222, "step": 19853 }, { "epoch": 0.8399187748540485, "grad_norm": 0.4569486379623413, "learning_rate": 0.001, "loss": 1.7374, "step": 19854 }, { "epoch": 0.8399610796175649, "grad_norm": 0.6527416706085205, "learning_rate": 0.001, "loss": 1.8554, "step": 19855 }, { "epoch": 0.8400033843810814, "grad_norm": 0.36150479316711426, "learning_rate": 0.001, "loss": 2.1568, "step": 19856 }, { "epoch": 0.8400456891445977, "grad_norm": 0.16183467209339142, "learning_rate": 0.001, "loss": 2.5408, "step": 19857 }, { "epoch": 0.8400879939081141, "grad_norm": 0.14693614840507507, "learning_rate": 0.001, "loss": 1.7957, "step": 19858 }, { "epoch": 0.8401302986716305, "grad_norm": 0.16061703860759735, "learning_rate": 0.001, "loss": 1.9237, "step": 19859 }, { "epoch": 0.8401726034351468, "grad_norm": 0.1884782463312149, "learning_rate": 0.001, "loss": 1.936, "step": 19860 }, { "epoch": 0.8402149081986632, "grad_norm": 0.15317420661449432, "learning_rate": 0.001, "loss": 1.857, "step": 19861 }, { "epoch": 0.8402572129621796, "grad_norm": 0.17697061598300934, "learning_rate": 0.001, "loss": 2.4018, "step": 19862 }, { "epoch": 0.8402995177256959, "grad_norm": 0.14646954834461212, "learning_rate": 0.001, "loss": 1.6583, "step": 19863 }, { "epoch": 0.8403418224892123, "grad_norm": 0.1281052678823471, "learning_rate": 0.001, "loss": 3.3, "step": 19864 }, { "epoch": 0.8403841272527287, "grad_norm": 0.12583167850971222, "learning_rate": 0.001, "loss": 2.1985, "step": 19865 }, { "epoch": 0.840426432016245, "grad_norm": 0.16362233459949493, "learning_rate": 0.001, "loss": 1.9579, "step": 19866 }, { "epoch": 0.8404687367797614, "grad_norm": 14.114072799682617, "learning_rate": 0.001, "loss": 2.0031, "step": 19867 }, { "epoch": 0.8405110415432778, "grad_norm": 1.4896159172058105, "learning_rate": 0.001, "loss": 2.8004, "step": 19868 }, { "epoch": 0.8405533463067941, "grad_norm": 0.15967680513858795, "learning_rate": 0.001, "loss": 2.0756, "step": 19869 }, { "epoch": 0.8405956510703105, "grad_norm": 0.1979799121618271, "learning_rate": 0.001, "loss": 2.3233, "step": 19870 }, { "epoch": 0.8406379558338269, "grad_norm": 0.3304239809513092, "learning_rate": 0.001, "loss": 2.0972, "step": 19871 }, { "epoch": 0.8406802605973432, "grad_norm": 0.13525773584842682, "learning_rate": 0.001, "loss": 1.4994, "step": 19872 }, { "epoch": 0.8407225653608597, "grad_norm": 1.1187829971313477, "learning_rate": 0.001, "loss": 2.6048, "step": 19873 }, { "epoch": 0.840764870124376, "grad_norm": 1.9175190925598145, "learning_rate": 0.001, "loss": 2.1303, "step": 19874 }, { "epoch": 0.8408071748878924, "grad_norm": 0.17562279105186462, "learning_rate": 0.001, "loss": 2.3723, "step": 19875 }, { "epoch": 0.8408494796514088, "grad_norm": 0.14081628620624542, "learning_rate": 0.001, "loss": 2.3064, "step": 19876 }, { "epoch": 0.8408917844149251, "grad_norm": 0.4894697368144989, "learning_rate": 0.001, "loss": 2.8948, "step": 19877 }, { "epoch": 0.8409340891784415, "grad_norm": 0.19866247475147247, "learning_rate": 0.001, "loss": 2.9252, "step": 19878 }, { "epoch": 0.8409763939419579, "grad_norm": 0.4068894684314728, "learning_rate": 0.001, "loss": 2.2374, "step": 19879 }, { "epoch": 0.8410186987054742, "grad_norm": 0.1639883816242218, "learning_rate": 0.001, "loss": 2.0532, "step": 19880 }, { "epoch": 0.8410610034689906, "grad_norm": 0.3562491536140442, "learning_rate": 0.001, "loss": 4.0336, "step": 19881 }, { "epoch": 0.841103308232507, "grad_norm": 0.13724088668823242, "learning_rate": 0.001, "loss": 1.8362, "step": 19882 }, { "epoch": 0.8411456129960233, "grad_norm": 0.13581812381744385, "learning_rate": 0.001, "loss": 1.3958, "step": 19883 }, { "epoch": 0.8411879177595397, "grad_norm": 0.17569448053836823, "learning_rate": 0.001, "loss": 2.8263, "step": 19884 }, { "epoch": 0.8412302225230561, "grad_norm": 0.14061707258224487, "learning_rate": 0.001, "loss": 2.2112, "step": 19885 }, { "epoch": 0.8412725272865724, "grad_norm": 0.9066682457923889, "learning_rate": 0.001, "loss": 2.0064, "step": 19886 }, { "epoch": 0.8413148320500888, "grad_norm": 0.13263536989688873, "learning_rate": 0.001, "loss": 2.3227, "step": 19887 }, { "epoch": 0.8413571368136052, "grad_norm": 0.1647445261478424, "learning_rate": 0.001, "loss": 3.9107, "step": 19888 }, { "epoch": 0.8413994415771215, "grad_norm": 0.3302064538002014, "learning_rate": 0.001, "loss": 2.5003, "step": 19889 }, { "epoch": 0.841441746340638, "grad_norm": 0.14488548040390015, "learning_rate": 0.001, "loss": 2.0212, "step": 19890 }, { "epoch": 0.8414840511041544, "grad_norm": 0.1621946543455124, "learning_rate": 0.001, "loss": 2.5805, "step": 19891 }, { "epoch": 0.8415263558676707, "grad_norm": 0.13542355597019196, "learning_rate": 0.001, "loss": 1.8591, "step": 19892 }, { "epoch": 0.8415686606311871, "grad_norm": 0.13882328569889069, "learning_rate": 0.001, "loss": 1.6565, "step": 19893 }, { "epoch": 0.8416109653947035, "grad_norm": 0.18400336802005768, "learning_rate": 0.001, "loss": 2.3863, "step": 19894 }, { "epoch": 0.8416532701582198, "grad_norm": 0.14745768904685974, "learning_rate": 0.001, "loss": 1.6539, "step": 19895 }, { "epoch": 0.8416955749217362, "grad_norm": 0.5481185913085938, "learning_rate": 0.001, "loss": 2.4827, "step": 19896 }, { "epoch": 0.8417378796852526, "grad_norm": 0.18567869067192078, "learning_rate": 0.001, "loss": 1.7485, "step": 19897 }, { "epoch": 0.8417801844487689, "grad_norm": 40.28692626953125, "learning_rate": 0.001, "loss": 2.2203, "step": 19898 }, { "epoch": 0.8418224892122853, "grad_norm": 0.17718671262264252, "learning_rate": 0.001, "loss": 2.2116, "step": 19899 }, { "epoch": 0.8418647939758017, "grad_norm": 0.20770712196826935, "learning_rate": 0.001, "loss": 1.6705, "step": 19900 }, { "epoch": 0.841907098739318, "grad_norm": 0.33446425199508667, "learning_rate": 0.001, "loss": 2.2231, "step": 19901 }, { "epoch": 0.8419494035028344, "grad_norm": 0.15271897614002228, "learning_rate": 0.001, "loss": 2.0796, "step": 19902 }, { "epoch": 0.8419917082663508, "grad_norm": 0.3776164650917053, "learning_rate": 0.001, "loss": 2.7789, "step": 19903 }, { "epoch": 0.8420340130298671, "grad_norm": 0.18110236525535583, "learning_rate": 0.001, "loss": 3.1009, "step": 19904 }, { "epoch": 0.8420763177933835, "grad_norm": 0.14142876863479614, "learning_rate": 0.001, "loss": 1.5932, "step": 19905 }, { "epoch": 0.8421186225569, "grad_norm": 0.1594587117433548, "learning_rate": 0.001, "loss": 2.9, "step": 19906 }, { "epoch": 0.8421609273204163, "grad_norm": 0.17610591650009155, "learning_rate": 0.001, "loss": 2.0221, "step": 19907 }, { "epoch": 0.8422032320839327, "grad_norm": 0.1743817925453186, "learning_rate": 0.001, "loss": 2.0761, "step": 19908 }, { "epoch": 0.8422455368474491, "grad_norm": 0.3120170831680298, "learning_rate": 0.001, "loss": 3.1248, "step": 19909 }, { "epoch": 0.8422878416109654, "grad_norm": 0.14950038492679596, "learning_rate": 0.001, "loss": 3.4022, "step": 19910 }, { "epoch": 0.8423301463744818, "grad_norm": 0.17454718053340912, "learning_rate": 0.001, "loss": 1.8271, "step": 19911 }, { "epoch": 0.8423724511379982, "grad_norm": 0.18811197578907013, "learning_rate": 0.001, "loss": 2.814, "step": 19912 }, { "epoch": 0.8424147559015145, "grad_norm": 0.13598325848579407, "learning_rate": 0.001, "loss": 2.043, "step": 19913 }, { "epoch": 0.8424570606650309, "grad_norm": 0.14503377676010132, "learning_rate": 0.001, "loss": 1.8979, "step": 19914 }, { "epoch": 0.8424993654285473, "grad_norm": 0.16746678948402405, "learning_rate": 0.001, "loss": 1.9977, "step": 19915 }, { "epoch": 0.8425416701920636, "grad_norm": 8.220918655395508, "learning_rate": 0.001, "loss": 2.3796, "step": 19916 }, { "epoch": 0.84258397495558, "grad_norm": 0.20891021192073822, "learning_rate": 0.001, "loss": 2.4267, "step": 19917 }, { "epoch": 0.8426262797190963, "grad_norm": 0.15309026837348938, "learning_rate": 0.001, "loss": 2.0111, "step": 19918 }, { "epoch": 0.8426685844826127, "grad_norm": 1.157206654548645, "learning_rate": 0.001, "loss": 3.1127, "step": 19919 }, { "epoch": 0.8427108892461291, "grad_norm": 0.1353883147239685, "learning_rate": 0.001, "loss": 1.5835, "step": 19920 }, { "epoch": 0.8427531940096454, "grad_norm": 0.16961653530597687, "learning_rate": 0.001, "loss": 1.4123, "step": 19921 }, { "epoch": 0.8427954987731618, "grad_norm": 0.15629060566425323, "learning_rate": 0.001, "loss": 1.8995, "step": 19922 }, { "epoch": 0.8428378035366783, "grad_norm": 14.496636390686035, "learning_rate": 0.001, "loss": 1.9417, "step": 19923 }, { "epoch": 0.8428801083001946, "grad_norm": 0.16082890331745148, "learning_rate": 0.001, "loss": 1.7762, "step": 19924 }, { "epoch": 0.842922413063711, "grad_norm": 0.4194250702857971, "learning_rate": 0.001, "loss": 1.7252, "step": 19925 }, { "epoch": 0.8429647178272274, "grad_norm": 0.25590166449546814, "learning_rate": 0.001, "loss": 2.1403, "step": 19926 }, { "epoch": 0.8430070225907437, "grad_norm": 0.6089497804641724, "learning_rate": 0.001, "loss": 2.328, "step": 19927 }, { "epoch": 0.8430493273542601, "grad_norm": 0.17226269841194153, "learning_rate": 0.001, "loss": 1.7442, "step": 19928 }, { "epoch": 0.8430916321177765, "grad_norm": 12.525866508483887, "learning_rate": 0.001, "loss": 2.9423, "step": 19929 }, { "epoch": 0.8431339368812928, "grad_norm": 0.15503224730491638, "learning_rate": 0.001, "loss": 2.119, "step": 19930 }, { "epoch": 0.8431762416448092, "grad_norm": 0.5280601978302002, "learning_rate": 0.001, "loss": 2.2592, "step": 19931 }, { "epoch": 0.8432185464083256, "grad_norm": 0.19108590483665466, "learning_rate": 0.001, "loss": 1.7601, "step": 19932 }, { "epoch": 0.8432608511718419, "grad_norm": 0.2428058534860611, "learning_rate": 0.001, "loss": 3.0192, "step": 19933 }, { "epoch": 0.8433031559353583, "grad_norm": 0.16223500669002533, "learning_rate": 0.001, "loss": 2.3037, "step": 19934 }, { "epoch": 0.8433454606988747, "grad_norm": 5.290875434875488, "learning_rate": 0.001, "loss": 2.3735, "step": 19935 }, { "epoch": 0.843387765462391, "grad_norm": 0.139143168926239, "learning_rate": 0.001, "loss": 1.7609, "step": 19936 }, { "epoch": 0.8434300702259074, "grad_norm": 0.2556076645851135, "learning_rate": 0.001, "loss": 2.5694, "step": 19937 }, { "epoch": 0.8434723749894238, "grad_norm": 0.17417319118976593, "learning_rate": 0.001, "loss": 2.2479, "step": 19938 }, { "epoch": 0.8435146797529401, "grad_norm": 0.15048249065876007, "learning_rate": 0.001, "loss": 2.2647, "step": 19939 }, { "epoch": 0.8435569845164566, "grad_norm": 0.32785776257514954, "learning_rate": 0.001, "loss": 2.3088, "step": 19940 }, { "epoch": 0.843599289279973, "grad_norm": 0.16994161903858185, "learning_rate": 0.001, "loss": 2.9571, "step": 19941 }, { "epoch": 0.8436415940434893, "grad_norm": 0.16292929649353027, "learning_rate": 0.001, "loss": 2.268, "step": 19942 }, { "epoch": 0.8436838988070057, "grad_norm": 0.1685224175453186, "learning_rate": 0.001, "loss": 3.1586, "step": 19943 }, { "epoch": 0.8437262035705221, "grad_norm": 0.2571621239185333, "learning_rate": 0.001, "loss": 2.2422, "step": 19944 }, { "epoch": 0.8437685083340384, "grad_norm": 0.1383311152458191, "learning_rate": 0.001, "loss": 1.6768, "step": 19945 }, { "epoch": 0.8438108130975548, "grad_norm": 0.1316850483417511, "learning_rate": 0.001, "loss": 1.3501, "step": 19946 }, { "epoch": 0.8438531178610712, "grad_norm": 0.1473502814769745, "learning_rate": 0.001, "loss": 2.4567, "step": 19947 }, { "epoch": 0.8438954226245875, "grad_norm": 0.2394011914730072, "learning_rate": 0.001, "loss": 1.5157, "step": 19948 }, { "epoch": 0.8439377273881039, "grad_norm": 0.21005521714687347, "learning_rate": 0.001, "loss": 3.4788, "step": 19949 }, { "epoch": 0.8439800321516203, "grad_norm": 0.22676526010036469, "learning_rate": 0.001, "loss": 1.5542, "step": 19950 }, { "epoch": 0.8440223369151366, "grad_norm": 0.19313664734363556, "learning_rate": 0.001, "loss": 2.9371, "step": 19951 }, { "epoch": 0.844064641678653, "grad_norm": 0.18418525159358978, "learning_rate": 0.001, "loss": 3.3624, "step": 19952 }, { "epoch": 0.8441069464421694, "grad_norm": 0.15955480933189392, "learning_rate": 0.001, "loss": 2.4359, "step": 19953 }, { "epoch": 0.8441492512056857, "grad_norm": 0.15745678544044495, "learning_rate": 0.001, "loss": 2.4083, "step": 19954 }, { "epoch": 0.8441915559692021, "grad_norm": 0.1493915468454361, "learning_rate": 0.001, "loss": 2.0375, "step": 19955 }, { "epoch": 0.8442338607327186, "grad_norm": 0.13950100541114807, "learning_rate": 0.001, "loss": 2.7912, "step": 19956 }, { "epoch": 0.8442761654962349, "grad_norm": 0.47593954205513, "learning_rate": 0.001, "loss": 1.4144, "step": 19957 }, { "epoch": 0.8443184702597513, "grad_norm": 0.17434100806713104, "learning_rate": 0.001, "loss": 2.265, "step": 19958 }, { "epoch": 0.8443607750232677, "grad_norm": 0.12682586908340454, "learning_rate": 0.001, "loss": 2.5613, "step": 19959 }, { "epoch": 0.844403079786784, "grad_norm": 1.8817963600158691, "learning_rate": 0.001, "loss": 1.676, "step": 19960 }, { "epoch": 0.8444453845503004, "grad_norm": 0.19121527671813965, "learning_rate": 0.001, "loss": 1.512, "step": 19961 }, { "epoch": 0.8444876893138167, "grad_norm": 0.1444321870803833, "learning_rate": 0.001, "loss": 1.92, "step": 19962 }, { "epoch": 0.8445299940773331, "grad_norm": 0.16925615072250366, "learning_rate": 0.001, "loss": 1.6687, "step": 19963 }, { "epoch": 0.8445722988408495, "grad_norm": 4.560879707336426, "learning_rate": 0.001, "loss": 2.2629, "step": 19964 }, { "epoch": 0.8446146036043658, "grad_norm": 0.13227243721485138, "learning_rate": 0.001, "loss": 2.5459, "step": 19965 }, { "epoch": 0.8446569083678822, "grad_norm": 0.15780355036258698, "learning_rate": 0.001, "loss": 2.7421, "step": 19966 }, { "epoch": 0.8446992131313986, "grad_norm": 0.1389780193567276, "learning_rate": 0.001, "loss": 2.5234, "step": 19967 }, { "epoch": 0.8447415178949149, "grad_norm": 0.15538786351680756, "learning_rate": 0.001, "loss": 2.0989, "step": 19968 }, { "epoch": 0.8447838226584313, "grad_norm": 9.396347999572754, "learning_rate": 0.001, "loss": 1.7852, "step": 19969 }, { "epoch": 0.8448261274219477, "grad_norm": 0.2267782837152481, "learning_rate": 0.001, "loss": 1.8979, "step": 19970 }, { "epoch": 0.844868432185464, "grad_norm": 0.1871020346879959, "learning_rate": 0.001, "loss": 3.3206, "step": 19971 }, { "epoch": 0.8449107369489804, "grad_norm": 0.17954017221927643, "learning_rate": 0.001, "loss": 1.9435, "step": 19972 }, { "epoch": 0.8449530417124969, "grad_norm": 0.146229088306427, "learning_rate": 0.001, "loss": 1.5802, "step": 19973 }, { "epoch": 0.8449953464760132, "grad_norm": 0.1864139437675476, "learning_rate": 0.001, "loss": 2.0258, "step": 19974 }, { "epoch": 0.8450376512395296, "grad_norm": 0.1569264978170395, "learning_rate": 0.001, "loss": 1.9869, "step": 19975 }, { "epoch": 0.845079956003046, "grad_norm": 0.15957815945148468, "learning_rate": 0.001, "loss": 2.4842, "step": 19976 }, { "epoch": 0.8451222607665623, "grad_norm": 0.16529959440231323, "learning_rate": 0.001, "loss": 1.9408, "step": 19977 }, { "epoch": 0.8451645655300787, "grad_norm": 0.1697777956724167, "learning_rate": 0.001, "loss": 2.1509, "step": 19978 }, { "epoch": 0.8452068702935951, "grad_norm": 1.7855709791183472, "learning_rate": 0.001, "loss": 3.2382, "step": 19979 }, { "epoch": 0.8452491750571114, "grad_norm": 0.17001429200172424, "learning_rate": 0.001, "loss": 2.8689, "step": 19980 }, { "epoch": 0.8452914798206278, "grad_norm": 0.17882667481899261, "learning_rate": 0.001, "loss": 1.466, "step": 19981 }, { "epoch": 0.8453337845841442, "grad_norm": 3.9639627933502197, "learning_rate": 0.001, "loss": 1.8731, "step": 19982 }, { "epoch": 0.8453760893476605, "grad_norm": 0.14277885854244232, "learning_rate": 0.001, "loss": 1.8745, "step": 19983 }, { "epoch": 0.8454183941111769, "grad_norm": 0.17287899553775787, "learning_rate": 0.001, "loss": 2.4062, "step": 19984 }, { "epoch": 0.8454606988746933, "grad_norm": 0.1661319136619568, "learning_rate": 0.001, "loss": 1.7102, "step": 19985 }, { "epoch": 0.8455030036382096, "grad_norm": 0.1925491839647293, "learning_rate": 0.001, "loss": 2.2246, "step": 19986 }, { "epoch": 0.845545308401726, "grad_norm": 0.15277276933193207, "learning_rate": 0.001, "loss": 2.5972, "step": 19987 }, { "epoch": 0.8455876131652424, "grad_norm": 0.1781436949968338, "learning_rate": 0.001, "loss": 3.143, "step": 19988 }, { "epoch": 0.8456299179287587, "grad_norm": 0.15217041969299316, "learning_rate": 0.001, "loss": 2.5483, "step": 19989 }, { "epoch": 0.8456722226922752, "grad_norm": 0.15171130001544952, "learning_rate": 0.001, "loss": 1.8737, "step": 19990 }, { "epoch": 0.8457145274557916, "grad_norm": 0.15747137367725372, "learning_rate": 0.001, "loss": 1.7848, "step": 19991 }, { "epoch": 0.8457568322193079, "grad_norm": 0.22473227977752686, "learning_rate": 0.001, "loss": 2.192, "step": 19992 }, { "epoch": 0.8457991369828243, "grad_norm": 0.19137416779994965, "learning_rate": 0.001, "loss": 4.3493, "step": 19993 }, { "epoch": 0.8458414417463407, "grad_norm": 0.17920900881290436, "learning_rate": 0.001, "loss": 2.6782, "step": 19994 }, { "epoch": 0.845883746509857, "grad_norm": 0.16247224807739258, "learning_rate": 0.001, "loss": 2.0712, "step": 19995 }, { "epoch": 0.8459260512733734, "grad_norm": 2.3473522663116455, "learning_rate": 0.001, "loss": 2.1859, "step": 19996 }, { "epoch": 0.8459683560368898, "grad_norm": 0.1629837155342102, "learning_rate": 0.001, "loss": 2.4058, "step": 19997 }, { "epoch": 0.8460106608004061, "grad_norm": 0.1541110724210739, "learning_rate": 0.001, "loss": 2.6553, "step": 19998 }, { "epoch": 0.8460529655639225, "grad_norm": 0.12468776851892471, "learning_rate": 0.001, "loss": 2.0798, "step": 19999 }, { "epoch": 0.8460952703274389, "grad_norm": 0.13102620840072632, "learning_rate": 0.001, "loss": 3.1871, "step": 20000 }, { "epoch": 0.8461375750909552, "grad_norm": 0.16048361361026764, "learning_rate": 0.001, "loss": 1.6277, "step": 20001 }, { "epoch": 0.8461798798544716, "grad_norm": 0.12173058837652206, "learning_rate": 0.001, "loss": 1.9958, "step": 20002 }, { "epoch": 0.846222184617988, "grad_norm": 0.1663282960653305, "learning_rate": 0.001, "loss": 1.9365, "step": 20003 }, { "epoch": 0.8462644893815043, "grad_norm": 0.2031364142894745, "learning_rate": 0.001, "loss": 2.5175, "step": 20004 }, { "epoch": 0.8463067941450207, "grad_norm": 0.17239537835121155, "learning_rate": 0.001, "loss": 1.9938, "step": 20005 }, { "epoch": 0.8463490989085372, "grad_norm": 0.14862409234046936, "learning_rate": 0.001, "loss": 1.9449, "step": 20006 }, { "epoch": 0.8463914036720535, "grad_norm": 0.18297027051448822, "learning_rate": 0.001, "loss": 2.2749, "step": 20007 }, { "epoch": 0.8464337084355699, "grad_norm": 0.16779938340187073, "learning_rate": 0.001, "loss": 1.6555, "step": 20008 }, { "epoch": 0.8464760131990862, "grad_norm": 0.21444658935070038, "learning_rate": 0.001, "loss": 2.8099, "step": 20009 }, { "epoch": 0.8465183179626026, "grad_norm": 0.13690084218978882, "learning_rate": 0.001, "loss": 3.1176, "step": 20010 }, { "epoch": 0.846560622726119, "grad_norm": 0.235006645321846, "learning_rate": 0.001, "loss": 2.1184, "step": 20011 }, { "epoch": 0.8466029274896353, "grad_norm": 0.1450556516647339, "learning_rate": 0.001, "loss": 1.8434, "step": 20012 }, { "epoch": 0.8466452322531517, "grad_norm": 0.14639632403850555, "learning_rate": 0.001, "loss": 1.8493, "step": 20013 }, { "epoch": 0.8466875370166681, "grad_norm": 0.12522344291210175, "learning_rate": 0.001, "loss": 1.5215, "step": 20014 }, { "epoch": 0.8467298417801844, "grad_norm": 0.19824448227882385, "learning_rate": 0.001, "loss": 3.5594, "step": 20015 }, { "epoch": 0.8467721465437008, "grad_norm": 0.13851766288280487, "learning_rate": 0.001, "loss": 2.2443, "step": 20016 }, { "epoch": 0.8468144513072172, "grad_norm": 0.13753733038902283, "learning_rate": 0.001, "loss": 2.3559, "step": 20017 }, { "epoch": 0.8468567560707335, "grad_norm": 0.15897375345230103, "learning_rate": 0.001, "loss": 1.8587, "step": 20018 }, { "epoch": 0.8468990608342499, "grad_norm": 0.13712716102600098, "learning_rate": 0.001, "loss": 2.0603, "step": 20019 }, { "epoch": 0.8469413655977663, "grad_norm": 0.1391967236995697, "learning_rate": 0.001, "loss": 2.0169, "step": 20020 }, { "epoch": 0.8469836703612826, "grad_norm": 0.1359555423259735, "learning_rate": 0.001, "loss": 2.2155, "step": 20021 }, { "epoch": 0.847025975124799, "grad_norm": 0.7110766768455505, "learning_rate": 0.001, "loss": 3.0314, "step": 20022 }, { "epoch": 0.8470682798883155, "grad_norm": 0.1405889242887497, "learning_rate": 0.001, "loss": 2.1806, "step": 20023 }, { "epoch": 0.8471105846518318, "grad_norm": 0.14044038951396942, "learning_rate": 0.001, "loss": 1.9486, "step": 20024 }, { "epoch": 0.8471528894153482, "grad_norm": 0.122348852455616, "learning_rate": 0.001, "loss": 1.5374, "step": 20025 }, { "epoch": 0.8471951941788646, "grad_norm": 0.14094999432563782, "learning_rate": 0.001, "loss": 1.7621, "step": 20026 }, { "epoch": 0.8472374989423809, "grad_norm": 0.6236584782600403, "learning_rate": 0.001, "loss": 1.964, "step": 20027 }, { "epoch": 0.8472798037058973, "grad_norm": 0.16254521906375885, "learning_rate": 0.001, "loss": 2.0317, "step": 20028 }, { "epoch": 0.8473221084694137, "grad_norm": 0.16386757791042328, "learning_rate": 0.001, "loss": 2.4891, "step": 20029 }, { "epoch": 0.84736441323293, "grad_norm": 0.26580187678337097, "learning_rate": 0.001, "loss": 1.803, "step": 20030 }, { "epoch": 0.8474067179964464, "grad_norm": 0.13852697610855103, "learning_rate": 0.001, "loss": 1.9764, "step": 20031 }, { "epoch": 0.8474490227599628, "grad_norm": 0.13823279738426208, "learning_rate": 0.001, "loss": 1.6498, "step": 20032 }, { "epoch": 0.8474913275234791, "grad_norm": 0.15287473797798157, "learning_rate": 0.001, "loss": 2.5265, "step": 20033 }, { "epoch": 0.8475336322869955, "grad_norm": 0.661291241645813, "learning_rate": 0.001, "loss": 2.3638, "step": 20034 }, { "epoch": 0.8475759370505119, "grad_norm": 0.16113339364528656, "learning_rate": 0.001, "loss": 2.0843, "step": 20035 }, { "epoch": 0.8476182418140282, "grad_norm": 0.1465645283460617, "learning_rate": 0.001, "loss": 2.0648, "step": 20036 }, { "epoch": 0.8476605465775446, "grad_norm": 1.3468478918075562, "learning_rate": 0.001, "loss": 2.1217, "step": 20037 }, { "epoch": 0.847702851341061, "grad_norm": 1.280560851097107, "learning_rate": 0.001, "loss": 2.0666, "step": 20038 }, { "epoch": 0.8477451561045773, "grad_norm": 0.17766667902469635, "learning_rate": 0.001, "loss": 2.116, "step": 20039 }, { "epoch": 0.8477874608680938, "grad_norm": 0.20155932009220123, "learning_rate": 0.001, "loss": 1.9426, "step": 20040 }, { "epoch": 0.8478297656316102, "grad_norm": 0.2062946856021881, "learning_rate": 0.001, "loss": 2.7091, "step": 20041 }, { "epoch": 0.8478720703951265, "grad_norm": 0.1285688877105713, "learning_rate": 0.001, "loss": 2.7954, "step": 20042 }, { "epoch": 0.8479143751586429, "grad_norm": 2.014345407485962, "learning_rate": 0.001, "loss": 2.2796, "step": 20043 }, { "epoch": 0.8479566799221593, "grad_norm": 6.951234817504883, "learning_rate": 0.001, "loss": 3.4023, "step": 20044 }, { "epoch": 0.8479989846856756, "grad_norm": 0.16423997282981873, "learning_rate": 0.001, "loss": 2.1239, "step": 20045 }, { "epoch": 0.848041289449192, "grad_norm": 0.14751717448234558, "learning_rate": 0.001, "loss": 2.3457, "step": 20046 }, { "epoch": 0.8480835942127084, "grad_norm": 0.1324821263551712, "learning_rate": 0.001, "loss": 1.9863, "step": 20047 }, { "epoch": 0.8481258989762247, "grad_norm": 0.2402341365814209, "learning_rate": 0.001, "loss": 1.8448, "step": 20048 }, { "epoch": 0.8481682037397411, "grad_norm": 7.083425045013428, "learning_rate": 0.001, "loss": 2.5456, "step": 20049 }, { "epoch": 0.8482105085032575, "grad_norm": 0.1683296114206314, "learning_rate": 0.001, "loss": 2.432, "step": 20050 }, { "epoch": 0.8482528132667738, "grad_norm": 0.18871085345745087, "learning_rate": 0.001, "loss": 1.9026, "step": 20051 }, { "epoch": 0.8482951180302902, "grad_norm": 0.13885819911956787, "learning_rate": 0.001, "loss": 1.7946, "step": 20052 }, { "epoch": 0.8483374227938065, "grad_norm": 0.13285697996616364, "learning_rate": 0.001, "loss": 1.395, "step": 20053 }, { "epoch": 0.8483797275573229, "grad_norm": 0.1700686663389206, "learning_rate": 0.001, "loss": 2.1973, "step": 20054 }, { "epoch": 0.8484220323208393, "grad_norm": 0.20374974608421326, "learning_rate": 0.001, "loss": 2.6799, "step": 20055 }, { "epoch": 0.8484643370843556, "grad_norm": 0.2053978443145752, "learning_rate": 0.001, "loss": 1.7101, "step": 20056 }, { "epoch": 0.8485066418478721, "grad_norm": 0.16068226099014282, "learning_rate": 0.001, "loss": 2.1583, "step": 20057 }, { "epoch": 0.8485489466113885, "grad_norm": 0.1553301364183426, "learning_rate": 0.001, "loss": 2.122, "step": 20058 }, { "epoch": 0.8485912513749048, "grad_norm": 0.16317340731620789, "learning_rate": 0.001, "loss": 1.9723, "step": 20059 }, { "epoch": 0.8486335561384212, "grad_norm": 0.6233834028244019, "learning_rate": 0.001, "loss": 1.8776, "step": 20060 }, { "epoch": 0.8486758609019376, "grad_norm": 0.1380307525396347, "learning_rate": 0.001, "loss": 2.1375, "step": 20061 }, { "epoch": 0.8487181656654539, "grad_norm": 0.17783686518669128, "learning_rate": 0.001, "loss": 2.2367, "step": 20062 }, { "epoch": 0.8487604704289703, "grad_norm": 0.14636710286140442, "learning_rate": 0.001, "loss": 1.6968, "step": 20063 }, { "epoch": 0.8488027751924867, "grad_norm": 0.2252545803785324, "learning_rate": 0.001, "loss": 2.1908, "step": 20064 }, { "epoch": 0.848845079956003, "grad_norm": 0.129207044839859, "learning_rate": 0.001, "loss": 2.0196, "step": 20065 }, { "epoch": 0.8488873847195194, "grad_norm": 471.605712890625, "learning_rate": 0.001, "loss": 2.1384, "step": 20066 }, { "epoch": 0.8489296894830358, "grad_norm": 0.18888062238693237, "learning_rate": 0.001, "loss": 2.9906, "step": 20067 }, { "epoch": 0.8489719942465521, "grad_norm": 0.16716241836547852, "learning_rate": 0.001, "loss": 2.5418, "step": 20068 }, { "epoch": 0.8490142990100685, "grad_norm": 0.23846466839313507, "learning_rate": 0.001, "loss": 1.9297, "step": 20069 }, { "epoch": 0.8490566037735849, "grad_norm": 0.2950774133205414, "learning_rate": 0.001, "loss": 2.0279, "step": 20070 }, { "epoch": 0.8490989085371012, "grad_norm": 0.18450148403644562, "learning_rate": 0.001, "loss": 1.901, "step": 20071 }, { "epoch": 0.8491412133006176, "grad_norm": 0.15849687159061432, "learning_rate": 0.001, "loss": 1.8239, "step": 20072 }, { "epoch": 0.8491835180641341, "grad_norm": 0.17029528319835663, "learning_rate": 0.001, "loss": 1.5521, "step": 20073 }, { "epoch": 0.8492258228276504, "grad_norm": 0.1589442491531372, "learning_rate": 0.001, "loss": 1.79, "step": 20074 }, { "epoch": 0.8492681275911668, "grad_norm": 0.19395272433757782, "learning_rate": 0.001, "loss": 2.1332, "step": 20075 }, { "epoch": 0.8493104323546832, "grad_norm": 0.1506047248840332, "learning_rate": 0.001, "loss": 2.3834, "step": 20076 }, { "epoch": 0.8493527371181995, "grad_norm": 0.1427820324897766, "learning_rate": 0.001, "loss": 1.5925, "step": 20077 }, { "epoch": 0.8493950418817159, "grad_norm": 0.14985975623130798, "learning_rate": 0.001, "loss": 2.5144, "step": 20078 }, { "epoch": 0.8494373466452323, "grad_norm": 10.775849342346191, "learning_rate": 0.001, "loss": 1.8041, "step": 20079 }, { "epoch": 0.8494796514087486, "grad_norm": 0.16326069831848145, "learning_rate": 0.001, "loss": 1.98, "step": 20080 }, { "epoch": 0.849521956172265, "grad_norm": 0.6944580674171448, "learning_rate": 0.001, "loss": 3.5848, "step": 20081 }, { "epoch": 0.8495642609357814, "grad_norm": 0.17368917167186737, "learning_rate": 0.001, "loss": 2.6593, "step": 20082 }, { "epoch": 0.8496065656992977, "grad_norm": 0.17858272790908813, "learning_rate": 0.001, "loss": 2.2117, "step": 20083 }, { "epoch": 0.8496488704628141, "grad_norm": 0.2750988304615021, "learning_rate": 0.001, "loss": 2.6538, "step": 20084 }, { "epoch": 0.8496911752263305, "grad_norm": 0.4688890874385834, "learning_rate": 0.001, "loss": 2.3822, "step": 20085 }, { "epoch": 0.8497334799898468, "grad_norm": 0.43606501817703247, "learning_rate": 0.001, "loss": 3.1274, "step": 20086 }, { "epoch": 0.8497757847533632, "grad_norm": 0.1552165150642395, "learning_rate": 0.001, "loss": 1.8759, "step": 20087 }, { "epoch": 0.8498180895168796, "grad_norm": 0.14785443246364594, "learning_rate": 0.001, "loss": 2.4866, "step": 20088 }, { "epoch": 0.849860394280396, "grad_norm": 0.14098820090293884, "learning_rate": 0.001, "loss": 1.97, "step": 20089 }, { "epoch": 0.8499026990439124, "grad_norm": 0.14183367788791656, "learning_rate": 0.001, "loss": 1.7328, "step": 20090 }, { "epoch": 0.8499450038074288, "grad_norm": 0.2905732989311218, "learning_rate": 0.001, "loss": 2.5444, "step": 20091 }, { "epoch": 0.8499873085709451, "grad_norm": 6.777652263641357, "learning_rate": 0.001, "loss": 1.8801, "step": 20092 }, { "epoch": 0.8500296133344615, "grad_norm": 0.1453995257616043, "learning_rate": 0.001, "loss": 1.6809, "step": 20093 }, { "epoch": 0.8500719180979779, "grad_norm": 0.18101413547992706, "learning_rate": 0.001, "loss": 2.2516, "step": 20094 }, { "epoch": 0.8501142228614942, "grad_norm": 7.015632152557373, "learning_rate": 0.001, "loss": 1.9355, "step": 20095 }, { "epoch": 0.8501565276250106, "grad_norm": 0.21599209308624268, "learning_rate": 0.001, "loss": 1.6428, "step": 20096 }, { "epoch": 0.8501988323885269, "grad_norm": 0.19514302909374237, "learning_rate": 0.001, "loss": 1.8462, "step": 20097 }, { "epoch": 0.8502411371520433, "grad_norm": 0.3018536865711212, "learning_rate": 0.001, "loss": 2.9103, "step": 20098 }, { "epoch": 0.8502834419155597, "grad_norm": 0.220729798078537, "learning_rate": 0.001, "loss": 2.7488, "step": 20099 }, { "epoch": 0.850325746679076, "grad_norm": 0.9878958463668823, "learning_rate": 0.001, "loss": 1.8072, "step": 20100 }, { "epoch": 0.8503680514425924, "grad_norm": 0.19620686769485474, "learning_rate": 0.001, "loss": 1.6413, "step": 20101 }, { "epoch": 0.8504103562061088, "grad_norm": 0.4310097098350525, "learning_rate": 0.001, "loss": 1.7159, "step": 20102 }, { "epoch": 0.8504526609696251, "grad_norm": 0.352130264043808, "learning_rate": 0.001, "loss": 2.5859, "step": 20103 }, { "epoch": 0.8504949657331415, "grad_norm": 0.3044179081916809, "learning_rate": 0.001, "loss": 1.5859, "step": 20104 }, { "epoch": 0.850537270496658, "grad_norm": 0.2029111385345459, "learning_rate": 0.001, "loss": 2.3172, "step": 20105 }, { "epoch": 0.8505795752601742, "grad_norm": 1.908150315284729, "learning_rate": 0.001, "loss": 2.2062, "step": 20106 }, { "epoch": 0.8506218800236907, "grad_norm": 0.14281626045703888, "learning_rate": 0.001, "loss": 1.5915, "step": 20107 }, { "epoch": 0.8506641847872071, "grad_norm": 0.16866661608219147, "learning_rate": 0.001, "loss": 2.6013, "step": 20108 }, { "epoch": 0.8507064895507234, "grad_norm": 0.20509420335292816, "learning_rate": 0.001, "loss": 2.5722, "step": 20109 }, { "epoch": 0.8507487943142398, "grad_norm": 0.17332904040813446, "learning_rate": 0.001, "loss": 1.7543, "step": 20110 }, { "epoch": 0.8507910990777562, "grad_norm": 0.1855224370956421, "learning_rate": 0.001, "loss": 2.1487, "step": 20111 }, { "epoch": 0.8508334038412725, "grad_norm": 0.17093543708324432, "learning_rate": 0.001, "loss": 2.3077, "step": 20112 }, { "epoch": 0.8508757086047889, "grad_norm": 10.589107513427734, "learning_rate": 0.001, "loss": 2.2467, "step": 20113 }, { "epoch": 0.8509180133683053, "grad_norm": 0.18264487385749817, "learning_rate": 0.001, "loss": 2.2559, "step": 20114 }, { "epoch": 0.8509603181318216, "grad_norm": 0.19633848965168, "learning_rate": 0.001, "loss": 2.1265, "step": 20115 }, { "epoch": 0.851002622895338, "grad_norm": 0.1672111302614212, "learning_rate": 0.001, "loss": 2.9774, "step": 20116 }, { "epoch": 0.8510449276588544, "grad_norm": 0.25148797035217285, "learning_rate": 0.001, "loss": 2.0833, "step": 20117 }, { "epoch": 0.8510872324223707, "grad_norm": 0.22718261182308197, "learning_rate": 0.001, "loss": 3.6184, "step": 20118 }, { "epoch": 0.8511295371858871, "grad_norm": 3.537106990814209, "learning_rate": 0.001, "loss": 2.5408, "step": 20119 }, { "epoch": 0.8511718419494035, "grad_norm": 0.16952507197856903, "learning_rate": 0.001, "loss": 2.31, "step": 20120 }, { "epoch": 0.8512141467129198, "grad_norm": 0.23125673830509186, "learning_rate": 0.001, "loss": 2.2545, "step": 20121 }, { "epoch": 0.8512564514764362, "grad_norm": 0.19513604044914246, "learning_rate": 0.001, "loss": 3.295, "step": 20122 }, { "epoch": 0.8512987562399527, "grad_norm": 0.20079001784324646, "learning_rate": 0.001, "loss": 1.5811, "step": 20123 }, { "epoch": 0.851341061003469, "grad_norm": 0.22401869297027588, "learning_rate": 0.001, "loss": 1.9916, "step": 20124 }, { "epoch": 0.8513833657669854, "grad_norm": 0.19173742830753326, "learning_rate": 0.001, "loss": 2.5162, "step": 20125 }, { "epoch": 0.8514256705305018, "grad_norm": 5.623570919036865, "learning_rate": 0.001, "loss": 1.7293, "step": 20126 }, { "epoch": 0.8514679752940181, "grad_norm": 0.2150885909795761, "learning_rate": 0.001, "loss": 2.5409, "step": 20127 }, { "epoch": 0.8515102800575345, "grad_norm": 0.2799954116344452, "learning_rate": 0.001, "loss": 3.2083, "step": 20128 }, { "epoch": 0.8515525848210509, "grad_norm": 0.19551977515220642, "learning_rate": 0.001, "loss": 2.7637, "step": 20129 }, { "epoch": 0.8515948895845672, "grad_norm": 0.24339886009693146, "learning_rate": 0.001, "loss": 3.3285, "step": 20130 }, { "epoch": 0.8516371943480836, "grad_norm": 0.25382792949676514, "learning_rate": 0.001, "loss": 1.9141, "step": 20131 }, { "epoch": 0.8516794991116, "grad_norm": 0.17884451150894165, "learning_rate": 0.001, "loss": 2.4345, "step": 20132 }, { "epoch": 0.8517218038751163, "grad_norm": 0.9406694769859314, "learning_rate": 0.001, "loss": 2.2004, "step": 20133 }, { "epoch": 0.8517641086386327, "grad_norm": 0.874098002910614, "learning_rate": 0.001, "loss": 2.1619, "step": 20134 }, { "epoch": 0.8518064134021491, "grad_norm": 0.17671692371368408, "learning_rate": 0.001, "loss": 2.7296, "step": 20135 }, { "epoch": 0.8518487181656654, "grad_norm": 0.16355298459529877, "learning_rate": 0.001, "loss": 3.0569, "step": 20136 }, { "epoch": 0.8518910229291818, "grad_norm": 0.24405351281166077, "learning_rate": 0.001, "loss": 1.9078, "step": 20137 }, { "epoch": 0.8519333276926983, "grad_norm": 0.14516574144363403, "learning_rate": 0.001, "loss": 1.629, "step": 20138 }, { "epoch": 0.8519756324562145, "grad_norm": 0.1458577811717987, "learning_rate": 0.001, "loss": 1.9627, "step": 20139 }, { "epoch": 0.852017937219731, "grad_norm": 5.451334476470947, "learning_rate": 0.001, "loss": 2.0621, "step": 20140 }, { "epoch": 0.8520602419832474, "grad_norm": 0.15075600147247314, "learning_rate": 0.001, "loss": 1.9024, "step": 20141 }, { "epoch": 0.8521025467467637, "grad_norm": 0.16598421335220337, "learning_rate": 0.001, "loss": 1.7182, "step": 20142 }, { "epoch": 0.8521448515102801, "grad_norm": 0.14806927740573883, "learning_rate": 0.001, "loss": 1.511, "step": 20143 }, { "epoch": 0.8521871562737964, "grad_norm": 0.8803728818893433, "learning_rate": 0.001, "loss": 2.5187, "step": 20144 }, { "epoch": 0.8522294610373128, "grad_norm": 0.14872333407402039, "learning_rate": 0.001, "loss": 2.2161, "step": 20145 }, { "epoch": 0.8522717658008292, "grad_norm": 0.14645099639892578, "learning_rate": 0.001, "loss": 1.8971, "step": 20146 }, { "epoch": 0.8523140705643455, "grad_norm": 0.16836774349212646, "learning_rate": 0.001, "loss": 2.1939, "step": 20147 }, { "epoch": 0.8523563753278619, "grad_norm": 0.24831850826740265, "learning_rate": 0.001, "loss": 2.191, "step": 20148 }, { "epoch": 0.8523986800913783, "grad_norm": 0.1501324474811554, "learning_rate": 0.001, "loss": 2.3453, "step": 20149 }, { "epoch": 0.8524409848548946, "grad_norm": 0.15773944556713104, "learning_rate": 0.001, "loss": 2.0649, "step": 20150 }, { "epoch": 0.852483289618411, "grad_norm": 0.13998405635356903, "learning_rate": 0.001, "loss": 2.1773, "step": 20151 }, { "epoch": 0.8525255943819274, "grad_norm": 0.13668106496334076, "learning_rate": 0.001, "loss": 3.5096, "step": 20152 }, { "epoch": 0.8525678991454437, "grad_norm": 2.312943935394287, "learning_rate": 0.001, "loss": 2.0723, "step": 20153 }, { "epoch": 0.8526102039089601, "grad_norm": 0.12327218800783157, "learning_rate": 0.001, "loss": 1.5741, "step": 20154 }, { "epoch": 0.8526525086724766, "grad_norm": 0.13412897288799286, "learning_rate": 0.001, "loss": 1.9401, "step": 20155 }, { "epoch": 0.8526948134359928, "grad_norm": 0.8391098976135254, "learning_rate": 0.001, "loss": 2.408, "step": 20156 }, { "epoch": 0.8527371181995093, "grad_norm": 0.14044369757175446, "learning_rate": 0.001, "loss": 2.7366, "step": 20157 }, { "epoch": 0.8527794229630257, "grad_norm": 0.15021243691444397, "learning_rate": 0.001, "loss": 1.9738, "step": 20158 }, { "epoch": 0.852821727726542, "grad_norm": 0.14377515017986298, "learning_rate": 0.001, "loss": 2.2906, "step": 20159 }, { "epoch": 0.8528640324900584, "grad_norm": 0.17275890707969666, "learning_rate": 0.001, "loss": 1.9053, "step": 20160 }, { "epoch": 0.8529063372535748, "grad_norm": 0.6215050220489502, "learning_rate": 0.001, "loss": 2.247, "step": 20161 }, { "epoch": 0.8529486420170911, "grad_norm": 0.16696125268936157, "learning_rate": 0.001, "loss": 2.4084, "step": 20162 }, { "epoch": 0.8529909467806075, "grad_norm": 1.0884296894073486, "learning_rate": 0.001, "loss": 2.9437, "step": 20163 }, { "epoch": 0.8530332515441239, "grad_norm": 0.1563209444284439, "learning_rate": 0.001, "loss": 1.662, "step": 20164 }, { "epoch": 0.8530755563076402, "grad_norm": 0.1429872363805771, "learning_rate": 0.001, "loss": 2.4649, "step": 20165 }, { "epoch": 0.8531178610711566, "grad_norm": 2.498642921447754, "learning_rate": 0.001, "loss": 3.1658, "step": 20166 }, { "epoch": 0.853160165834673, "grad_norm": 0.20113472640514374, "learning_rate": 0.001, "loss": 2.6068, "step": 20167 }, { "epoch": 0.8532024705981893, "grad_norm": 0.14465253055095673, "learning_rate": 0.001, "loss": 2.3038, "step": 20168 }, { "epoch": 0.8532447753617057, "grad_norm": 0.16391976177692413, "learning_rate": 0.001, "loss": 2.1344, "step": 20169 }, { "epoch": 0.8532870801252221, "grad_norm": 0.16122137010097504, "learning_rate": 0.001, "loss": 3.441, "step": 20170 }, { "epoch": 0.8533293848887384, "grad_norm": 0.7779273390769958, "learning_rate": 0.001, "loss": 1.4662, "step": 20171 }, { "epoch": 0.8533716896522549, "grad_norm": 24.723020553588867, "learning_rate": 0.001, "loss": 1.8251, "step": 20172 }, { "epoch": 0.8534139944157713, "grad_norm": 0.2329087257385254, "learning_rate": 0.001, "loss": 2.7639, "step": 20173 }, { "epoch": 0.8534562991792876, "grad_norm": 0.14637401700019836, "learning_rate": 0.001, "loss": 1.5176, "step": 20174 }, { "epoch": 0.853498603942804, "grad_norm": 0.16772529482841492, "learning_rate": 0.001, "loss": 2.0491, "step": 20175 }, { "epoch": 0.8535409087063204, "grad_norm": 0.22313785552978516, "learning_rate": 0.001, "loss": 2.9042, "step": 20176 }, { "epoch": 0.8535832134698367, "grad_norm": 0.16870731115341187, "learning_rate": 0.001, "loss": 2.4619, "step": 20177 }, { "epoch": 0.8536255182333531, "grad_norm": 0.1313748061656952, "learning_rate": 0.001, "loss": 1.8931, "step": 20178 }, { "epoch": 0.8536678229968695, "grad_norm": 0.1411811113357544, "learning_rate": 0.001, "loss": 1.7761, "step": 20179 }, { "epoch": 0.8537101277603858, "grad_norm": 0.17591390013694763, "learning_rate": 0.001, "loss": 2.2475, "step": 20180 }, { "epoch": 0.8537524325239022, "grad_norm": 0.12935340404510498, "learning_rate": 0.001, "loss": 2.8795, "step": 20181 }, { "epoch": 0.8537947372874186, "grad_norm": 0.16716565191745758, "learning_rate": 0.001, "loss": 1.745, "step": 20182 }, { "epoch": 0.8538370420509349, "grad_norm": 0.4727073907852173, "learning_rate": 0.001, "loss": 1.8856, "step": 20183 }, { "epoch": 0.8538793468144513, "grad_norm": 0.13519716262817383, "learning_rate": 0.001, "loss": 1.5485, "step": 20184 }, { "epoch": 0.8539216515779677, "grad_norm": 0.14166143536567688, "learning_rate": 0.001, "loss": 1.9562, "step": 20185 }, { "epoch": 0.853963956341484, "grad_norm": 0.6362976431846619, "learning_rate": 0.001, "loss": 1.7597, "step": 20186 }, { "epoch": 0.8540062611050004, "grad_norm": 0.1704770028591156, "learning_rate": 0.001, "loss": 2.1233, "step": 20187 }, { "epoch": 0.8540485658685167, "grad_norm": 0.1493573784828186, "learning_rate": 0.001, "loss": 1.9775, "step": 20188 }, { "epoch": 0.8540908706320332, "grad_norm": 0.14581403136253357, "learning_rate": 0.001, "loss": 1.6185, "step": 20189 }, { "epoch": 0.8541331753955496, "grad_norm": 4.783538341522217, "learning_rate": 0.001, "loss": 2.7544, "step": 20190 }, { "epoch": 0.8541754801590659, "grad_norm": 1.13120436668396, "learning_rate": 0.001, "loss": 2.2622, "step": 20191 }, { "epoch": 0.8542177849225823, "grad_norm": 0.18143315613269806, "learning_rate": 0.001, "loss": 2.1182, "step": 20192 }, { "epoch": 0.8542600896860987, "grad_norm": 0.37129276990890503, "learning_rate": 0.001, "loss": 2.6587, "step": 20193 }, { "epoch": 0.854302394449615, "grad_norm": 2.5433599948883057, "learning_rate": 0.001, "loss": 2.0521, "step": 20194 }, { "epoch": 0.8543446992131314, "grad_norm": 0.16446053981781006, "learning_rate": 0.001, "loss": 1.7504, "step": 20195 }, { "epoch": 0.8543870039766478, "grad_norm": 0.16676220297813416, "learning_rate": 0.001, "loss": 1.6936, "step": 20196 }, { "epoch": 0.8544293087401641, "grad_norm": 0.15930965542793274, "learning_rate": 0.001, "loss": 1.8836, "step": 20197 }, { "epoch": 0.8544716135036805, "grad_norm": 0.16597270965576172, "learning_rate": 0.001, "loss": 1.7821, "step": 20198 }, { "epoch": 0.8545139182671969, "grad_norm": 0.18009643256664276, "learning_rate": 0.001, "loss": 2.971, "step": 20199 }, { "epoch": 0.8545562230307132, "grad_norm": 0.6921260952949524, "learning_rate": 0.001, "loss": 2.1276, "step": 20200 }, { "epoch": 0.8545985277942296, "grad_norm": 0.1574309915304184, "learning_rate": 0.001, "loss": 2.1452, "step": 20201 }, { "epoch": 0.854640832557746, "grad_norm": 0.16404499113559723, "learning_rate": 0.001, "loss": 2.189, "step": 20202 }, { "epoch": 0.8546831373212623, "grad_norm": 0.13803629577159882, "learning_rate": 0.001, "loss": 1.8653, "step": 20203 }, { "epoch": 0.8547254420847787, "grad_norm": 0.21450106799602509, "learning_rate": 0.001, "loss": 2.7563, "step": 20204 }, { "epoch": 0.8547677468482952, "grad_norm": 0.1293824017047882, "learning_rate": 0.001, "loss": 1.6771, "step": 20205 }, { "epoch": 0.8548100516118115, "grad_norm": 0.14951960742473602, "learning_rate": 0.001, "loss": 2.6877, "step": 20206 }, { "epoch": 0.8548523563753279, "grad_norm": 1.9558665752410889, "learning_rate": 0.001, "loss": 1.7355, "step": 20207 }, { "epoch": 0.8548946611388443, "grad_norm": 0.16537365317344666, "learning_rate": 0.001, "loss": 1.9014, "step": 20208 }, { "epoch": 0.8549369659023606, "grad_norm": 0.15604209899902344, "learning_rate": 0.001, "loss": 2.6344, "step": 20209 }, { "epoch": 0.854979270665877, "grad_norm": 0.18542635440826416, "learning_rate": 0.001, "loss": 2.0202, "step": 20210 }, { "epoch": 0.8550215754293934, "grad_norm": 0.4348926246166229, "learning_rate": 0.001, "loss": 2.0383, "step": 20211 }, { "epoch": 0.8550638801929097, "grad_norm": 0.24259275197982788, "learning_rate": 0.001, "loss": 2.2676, "step": 20212 }, { "epoch": 0.8551061849564261, "grad_norm": 0.1625998318195343, "learning_rate": 0.001, "loss": 1.8147, "step": 20213 }, { "epoch": 0.8551484897199425, "grad_norm": 0.1525462120771408, "learning_rate": 0.001, "loss": 2.0379, "step": 20214 }, { "epoch": 0.8551907944834588, "grad_norm": 0.1710134744644165, "learning_rate": 0.001, "loss": 2.2464, "step": 20215 }, { "epoch": 0.8552330992469752, "grad_norm": 0.16427335143089294, "learning_rate": 0.001, "loss": 1.8928, "step": 20216 }, { "epoch": 0.8552754040104916, "grad_norm": 0.1514868587255478, "learning_rate": 0.001, "loss": 2.0247, "step": 20217 }, { "epoch": 0.8553177087740079, "grad_norm": 0.16171973943710327, "learning_rate": 0.001, "loss": 1.8759, "step": 20218 }, { "epoch": 0.8553600135375243, "grad_norm": 1.2077202796936035, "learning_rate": 0.001, "loss": 2.7401, "step": 20219 }, { "epoch": 0.8554023183010407, "grad_norm": 0.2370886206626892, "learning_rate": 0.001, "loss": 3.1154, "step": 20220 }, { "epoch": 0.855444623064557, "grad_norm": 0.43571701645851135, "learning_rate": 0.001, "loss": 1.787, "step": 20221 }, { "epoch": 0.8554869278280735, "grad_norm": 0.358680784702301, "learning_rate": 0.001, "loss": 2.0282, "step": 20222 }, { "epoch": 0.8555292325915899, "grad_norm": 0.18976160883903503, "learning_rate": 0.001, "loss": 2.3883, "step": 20223 }, { "epoch": 0.8555715373551062, "grad_norm": 0.1478756070137024, "learning_rate": 0.001, "loss": 1.8335, "step": 20224 }, { "epoch": 0.8556138421186226, "grad_norm": 0.31914398074150085, "learning_rate": 0.001, "loss": 2.1218, "step": 20225 }, { "epoch": 0.855656146882139, "grad_norm": 0.16509748995304108, "learning_rate": 0.001, "loss": 2.9447, "step": 20226 }, { "epoch": 0.8556984516456553, "grad_norm": 0.20428349077701569, "learning_rate": 0.001, "loss": 1.7563, "step": 20227 }, { "epoch": 0.8557407564091717, "grad_norm": 0.15375010669231415, "learning_rate": 0.001, "loss": 2.957, "step": 20228 }, { "epoch": 0.8557830611726881, "grad_norm": 0.1550106555223465, "learning_rate": 0.001, "loss": 2.0625, "step": 20229 }, { "epoch": 0.8558253659362044, "grad_norm": 0.7794061303138733, "learning_rate": 0.001, "loss": 2.9718, "step": 20230 }, { "epoch": 0.8558676706997208, "grad_norm": 0.1645902544260025, "learning_rate": 0.001, "loss": 1.8263, "step": 20231 }, { "epoch": 0.8559099754632372, "grad_norm": 0.14967839419841766, "learning_rate": 0.001, "loss": 1.828, "step": 20232 }, { "epoch": 0.8559522802267535, "grad_norm": 0.3559803366661072, "learning_rate": 0.001, "loss": 2.069, "step": 20233 }, { "epoch": 0.8559945849902699, "grad_norm": 0.1499088853597641, "learning_rate": 0.001, "loss": 1.9467, "step": 20234 }, { "epoch": 0.8560368897537862, "grad_norm": 0.15247589349746704, "learning_rate": 0.001, "loss": 2.4844, "step": 20235 }, { "epoch": 0.8560791945173026, "grad_norm": 0.16640542447566986, "learning_rate": 0.001, "loss": 2.222, "step": 20236 }, { "epoch": 0.856121499280819, "grad_norm": 0.18216025829315186, "learning_rate": 0.001, "loss": 2.7879, "step": 20237 }, { "epoch": 0.8561638040443353, "grad_norm": 0.2965571880340576, "learning_rate": 0.001, "loss": 2.3667, "step": 20238 }, { "epoch": 0.8562061088078518, "grad_norm": 193.41860961914062, "learning_rate": 0.001, "loss": 1.4623, "step": 20239 }, { "epoch": 0.8562484135713682, "grad_norm": 0.18015336990356445, "learning_rate": 0.001, "loss": 1.3319, "step": 20240 }, { "epoch": 0.8562907183348845, "grad_norm": 0.1855952888727188, "learning_rate": 0.001, "loss": 2.0279, "step": 20241 }, { "epoch": 0.8563330230984009, "grad_norm": 0.14489391446113586, "learning_rate": 0.001, "loss": 1.9138, "step": 20242 }, { "epoch": 0.8563753278619173, "grad_norm": 0.19585612416267395, "learning_rate": 0.001, "loss": 1.921, "step": 20243 }, { "epoch": 0.8564176326254336, "grad_norm": 0.1694945991039276, "learning_rate": 0.001, "loss": 1.8727, "step": 20244 }, { "epoch": 0.85645993738895, "grad_norm": 0.2455274611711502, "learning_rate": 0.001, "loss": 2.0229, "step": 20245 }, { "epoch": 0.8565022421524664, "grad_norm": 0.5265862941741943, "learning_rate": 0.001, "loss": 2.5353, "step": 20246 }, { "epoch": 0.8565445469159827, "grad_norm": 0.7586323022842407, "learning_rate": 0.001, "loss": 2.2412, "step": 20247 }, { "epoch": 0.8565868516794991, "grad_norm": 0.1906781643629074, "learning_rate": 0.001, "loss": 2.3801, "step": 20248 }, { "epoch": 0.8566291564430155, "grad_norm": 0.18281270563602448, "learning_rate": 0.001, "loss": 1.8586, "step": 20249 }, { "epoch": 0.8566714612065318, "grad_norm": 0.2404775768518448, "learning_rate": 0.001, "loss": 3.2444, "step": 20250 }, { "epoch": 0.8567137659700482, "grad_norm": 0.1609775424003601, "learning_rate": 0.001, "loss": 2.8216, "step": 20251 }, { "epoch": 0.8567560707335646, "grad_norm": 2.4240500926971436, "learning_rate": 0.001, "loss": 1.9752, "step": 20252 }, { "epoch": 0.8567983754970809, "grad_norm": 0.2600942552089691, "learning_rate": 0.001, "loss": 2.9503, "step": 20253 }, { "epoch": 0.8568406802605973, "grad_norm": 0.19915421307086945, "learning_rate": 0.001, "loss": 2.0628, "step": 20254 }, { "epoch": 0.8568829850241138, "grad_norm": 0.16194456815719604, "learning_rate": 0.001, "loss": 2.0255, "step": 20255 }, { "epoch": 0.85692528978763, "grad_norm": 0.15854515135288239, "learning_rate": 0.001, "loss": 2.5253, "step": 20256 }, { "epoch": 0.8569675945511465, "grad_norm": 0.13806360960006714, "learning_rate": 0.001, "loss": 1.7646, "step": 20257 }, { "epoch": 0.8570098993146629, "grad_norm": 0.47766992449760437, "learning_rate": 0.001, "loss": 2.2224, "step": 20258 }, { "epoch": 0.8570522040781792, "grad_norm": 0.168570414185524, "learning_rate": 0.001, "loss": 3.5105, "step": 20259 }, { "epoch": 0.8570945088416956, "grad_norm": 0.1976046860218048, "learning_rate": 0.001, "loss": 2.3813, "step": 20260 }, { "epoch": 0.857136813605212, "grad_norm": 0.21734581887722015, "learning_rate": 0.001, "loss": 2.301, "step": 20261 }, { "epoch": 0.8571791183687283, "grad_norm": 0.1577748954296112, "learning_rate": 0.001, "loss": 1.7362, "step": 20262 }, { "epoch": 0.8572214231322447, "grad_norm": 0.1552111953496933, "learning_rate": 0.001, "loss": 2.7548, "step": 20263 }, { "epoch": 0.8572637278957611, "grad_norm": 0.15418484807014465, "learning_rate": 0.001, "loss": 2.4548, "step": 20264 }, { "epoch": 0.8573060326592774, "grad_norm": 0.2172391414642334, "learning_rate": 0.001, "loss": 2.6717, "step": 20265 }, { "epoch": 0.8573483374227938, "grad_norm": 0.15989260375499725, "learning_rate": 0.001, "loss": 1.8661, "step": 20266 }, { "epoch": 0.8573906421863102, "grad_norm": 2.102097988128662, "learning_rate": 0.001, "loss": 1.6953, "step": 20267 }, { "epoch": 0.8574329469498265, "grad_norm": 0.13255327939987183, "learning_rate": 0.001, "loss": 2.1407, "step": 20268 }, { "epoch": 0.8574752517133429, "grad_norm": 0.1770564168691635, "learning_rate": 0.001, "loss": 2.2512, "step": 20269 }, { "epoch": 0.8575175564768593, "grad_norm": 0.14766398072242737, "learning_rate": 0.001, "loss": 1.7971, "step": 20270 }, { "epoch": 0.8575598612403756, "grad_norm": 0.22131910920143127, "learning_rate": 0.001, "loss": 2.2181, "step": 20271 }, { "epoch": 0.857602166003892, "grad_norm": 0.3644495904445648, "learning_rate": 0.001, "loss": 2.25, "step": 20272 }, { "epoch": 0.8576444707674085, "grad_norm": 0.14953762292861938, "learning_rate": 0.001, "loss": 2.2675, "step": 20273 }, { "epoch": 0.8576867755309248, "grad_norm": 0.16283674538135529, "learning_rate": 0.001, "loss": 1.8331, "step": 20274 }, { "epoch": 0.8577290802944412, "grad_norm": 0.1207035556435585, "learning_rate": 0.001, "loss": 2.1584, "step": 20275 }, { "epoch": 0.8577713850579576, "grad_norm": 0.12849994003772736, "learning_rate": 0.001, "loss": 2.2343, "step": 20276 }, { "epoch": 0.8578136898214739, "grad_norm": 2.081145763397217, "learning_rate": 0.001, "loss": 1.8637, "step": 20277 }, { "epoch": 0.8578559945849903, "grad_norm": 0.1410299390554428, "learning_rate": 0.001, "loss": 2.8566, "step": 20278 }, { "epoch": 0.8578982993485066, "grad_norm": 0.13738563656806946, "learning_rate": 0.001, "loss": 2.2659, "step": 20279 }, { "epoch": 0.857940604112023, "grad_norm": 0.13959841430187225, "learning_rate": 0.001, "loss": 2.9711, "step": 20280 }, { "epoch": 0.8579829088755394, "grad_norm": 0.1706565022468567, "learning_rate": 0.001, "loss": 2.1167, "step": 20281 }, { "epoch": 0.8580252136390557, "grad_norm": 0.14428362250328064, "learning_rate": 0.001, "loss": 2.3054, "step": 20282 }, { "epoch": 0.8580675184025721, "grad_norm": 0.1867976188659668, "learning_rate": 0.001, "loss": 1.8198, "step": 20283 }, { "epoch": 0.8581098231660885, "grad_norm": 0.1552973836660385, "learning_rate": 0.001, "loss": 2.5998, "step": 20284 }, { "epoch": 0.8581521279296048, "grad_norm": 0.1912197321653366, "learning_rate": 0.001, "loss": 1.9071, "step": 20285 }, { "epoch": 0.8581944326931212, "grad_norm": 0.18686088919639587, "learning_rate": 0.001, "loss": 1.5572, "step": 20286 }, { "epoch": 0.8582367374566376, "grad_norm": 0.21159033477306366, "learning_rate": 0.001, "loss": 2.1987, "step": 20287 }, { "epoch": 0.8582790422201539, "grad_norm": 0.16880102455615997, "learning_rate": 0.001, "loss": 2.0401, "step": 20288 }, { "epoch": 0.8583213469836704, "grad_norm": 0.16665174067020416, "learning_rate": 0.001, "loss": 1.763, "step": 20289 }, { "epoch": 0.8583636517471868, "grad_norm": 0.5530775189399719, "learning_rate": 0.001, "loss": 2.763, "step": 20290 }, { "epoch": 0.8584059565107031, "grad_norm": 0.1329568773508072, "learning_rate": 0.001, "loss": 1.8301, "step": 20291 }, { "epoch": 0.8584482612742195, "grad_norm": 22.826807022094727, "learning_rate": 0.001, "loss": 2.5194, "step": 20292 }, { "epoch": 0.8584905660377359, "grad_norm": 1.763818621635437, "learning_rate": 0.001, "loss": 3.4182, "step": 20293 }, { "epoch": 0.8585328708012522, "grad_norm": 0.1884799599647522, "learning_rate": 0.001, "loss": 1.8589, "step": 20294 }, { "epoch": 0.8585751755647686, "grad_norm": 0.1700194627046585, "learning_rate": 0.001, "loss": 2.0548, "step": 20295 }, { "epoch": 0.858617480328285, "grad_norm": 0.17256033420562744, "learning_rate": 0.001, "loss": 2.0633, "step": 20296 }, { "epoch": 0.8586597850918013, "grad_norm": 0.1553211808204651, "learning_rate": 0.001, "loss": 2.8999, "step": 20297 }, { "epoch": 0.8587020898553177, "grad_norm": 0.17085745930671692, "learning_rate": 0.001, "loss": 2.191, "step": 20298 }, { "epoch": 0.8587443946188341, "grad_norm": 0.18587633967399597, "learning_rate": 0.001, "loss": 2.1725, "step": 20299 }, { "epoch": 0.8587866993823504, "grad_norm": 0.1755942404270172, "learning_rate": 0.001, "loss": 2.9481, "step": 20300 }, { "epoch": 0.8588290041458668, "grad_norm": 0.6229716539382935, "learning_rate": 0.001, "loss": 4.1505, "step": 20301 }, { "epoch": 0.8588713089093832, "grad_norm": 0.14304950833320618, "learning_rate": 0.001, "loss": 1.9846, "step": 20302 }, { "epoch": 0.8589136136728995, "grad_norm": 0.1368221938610077, "learning_rate": 0.001, "loss": 2.0029, "step": 20303 }, { "epoch": 0.8589559184364159, "grad_norm": 0.1675972193479538, "learning_rate": 0.001, "loss": 1.9319, "step": 20304 }, { "epoch": 0.8589982231999324, "grad_norm": 0.1587626338005066, "learning_rate": 0.001, "loss": 1.855, "step": 20305 }, { "epoch": 0.8590405279634487, "grad_norm": 0.3222064673900604, "learning_rate": 0.001, "loss": 3.0261, "step": 20306 }, { "epoch": 0.8590828327269651, "grad_norm": 0.13862450420856476, "learning_rate": 0.001, "loss": 2.2649, "step": 20307 }, { "epoch": 0.8591251374904815, "grad_norm": 0.20226208865642548, "learning_rate": 0.001, "loss": 2.0139, "step": 20308 }, { "epoch": 0.8591674422539978, "grad_norm": 0.16037043929100037, "learning_rate": 0.001, "loss": 2.5581, "step": 20309 }, { "epoch": 0.8592097470175142, "grad_norm": 0.17345164716243744, "learning_rate": 0.001, "loss": 1.9058, "step": 20310 }, { "epoch": 0.8592520517810306, "grad_norm": 0.4555630385875702, "learning_rate": 0.001, "loss": 3.2089, "step": 20311 }, { "epoch": 0.8592943565445469, "grad_norm": 0.6636331677436829, "learning_rate": 0.001, "loss": 1.7678, "step": 20312 }, { "epoch": 0.8593366613080633, "grad_norm": 1.500398874282837, "learning_rate": 0.001, "loss": 1.8767, "step": 20313 }, { "epoch": 0.8593789660715797, "grad_norm": 0.11064822226762772, "learning_rate": 0.001, "loss": 2.8582, "step": 20314 }, { "epoch": 0.859421270835096, "grad_norm": 0.2151585817337036, "learning_rate": 0.001, "loss": 1.8232, "step": 20315 }, { "epoch": 0.8594635755986124, "grad_norm": 0.14713852107524872, "learning_rate": 0.001, "loss": 2.7334, "step": 20316 }, { "epoch": 0.8595058803621288, "grad_norm": 0.13766881823539734, "learning_rate": 0.001, "loss": 1.6997, "step": 20317 }, { "epoch": 0.8595481851256451, "grad_norm": 0.18348439037799835, "learning_rate": 0.001, "loss": 2.0182, "step": 20318 }, { "epoch": 0.8595904898891615, "grad_norm": 0.12168294191360474, "learning_rate": 0.001, "loss": 3.2898, "step": 20319 }, { "epoch": 0.859632794652678, "grad_norm": 0.12945544719696045, "learning_rate": 0.001, "loss": 1.9306, "step": 20320 }, { "epoch": 0.8596750994161942, "grad_norm": 0.13580989837646484, "learning_rate": 0.001, "loss": 1.7863, "step": 20321 }, { "epoch": 0.8597174041797107, "grad_norm": 0.1241777166724205, "learning_rate": 0.001, "loss": 2.0355, "step": 20322 }, { "epoch": 0.859759708943227, "grad_norm": 0.15077315270900726, "learning_rate": 0.001, "loss": 2.9516, "step": 20323 }, { "epoch": 0.8598020137067434, "grad_norm": 0.1805119663476944, "learning_rate": 0.001, "loss": 2.0972, "step": 20324 }, { "epoch": 0.8598443184702598, "grad_norm": 0.24641673266887665, "learning_rate": 0.001, "loss": 2.3205, "step": 20325 }, { "epoch": 0.8598866232337761, "grad_norm": 0.15835225582122803, "learning_rate": 0.001, "loss": 2.644, "step": 20326 }, { "epoch": 0.8599289279972925, "grad_norm": 0.1202114075422287, "learning_rate": 0.001, "loss": 1.4397, "step": 20327 }, { "epoch": 0.8599712327608089, "grad_norm": 0.13486960530281067, "learning_rate": 0.001, "loss": 1.5115, "step": 20328 }, { "epoch": 0.8600135375243252, "grad_norm": 0.14606483280658722, "learning_rate": 0.001, "loss": 1.9028, "step": 20329 }, { "epoch": 0.8600558422878416, "grad_norm": 0.14410161972045898, "learning_rate": 0.001, "loss": 2.147, "step": 20330 }, { "epoch": 0.860098147051358, "grad_norm": 0.14581367373466492, "learning_rate": 0.001, "loss": 2.5193, "step": 20331 }, { "epoch": 0.8601404518148743, "grad_norm": 1.6199474334716797, "learning_rate": 0.001, "loss": 1.8664, "step": 20332 }, { "epoch": 0.8601827565783907, "grad_norm": 0.14424443244934082, "learning_rate": 0.001, "loss": 2.7133, "step": 20333 }, { "epoch": 0.8602250613419071, "grad_norm": 2.242705821990967, "learning_rate": 0.001, "loss": 1.5209, "step": 20334 }, { "epoch": 0.8602673661054234, "grad_norm": 0.2191903442144394, "learning_rate": 0.001, "loss": 1.4768, "step": 20335 }, { "epoch": 0.8603096708689398, "grad_norm": 0.1648568958044052, "learning_rate": 0.001, "loss": 1.5056, "step": 20336 }, { "epoch": 0.8603519756324562, "grad_norm": 0.16343936324119568, "learning_rate": 0.001, "loss": 1.8221, "step": 20337 }, { "epoch": 0.8603942803959725, "grad_norm": 0.20765286684036255, "learning_rate": 0.001, "loss": 1.8847, "step": 20338 }, { "epoch": 0.860436585159489, "grad_norm": 0.16252531111240387, "learning_rate": 0.001, "loss": 1.9047, "step": 20339 }, { "epoch": 0.8604788899230054, "grad_norm": 0.4384823739528656, "learning_rate": 0.001, "loss": 2.4298, "step": 20340 }, { "epoch": 0.8605211946865217, "grad_norm": 0.1340947151184082, "learning_rate": 0.001, "loss": 2.4211, "step": 20341 }, { "epoch": 0.8605634994500381, "grad_norm": 4.556620121002197, "learning_rate": 0.001, "loss": 1.8013, "step": 20342 }, { "epoch": 0.8606058042135545, "grad_norm": 0.2241327315568924, "learning_rate": 0.001, "loss": 2.6166, "step": 20343 }, { "epoch": 0.8606481089770708, "grad_norm": 0.18115611374378204, "learning_rate": 0.001, "loss": 2.5444, "step": 20344 }, { "epoch": 0.8606904137405872, "grad_norm": 0.15894751250743866, "learning_rate": 0.001, "loss": 2.2565, "step": 20345 }, { "epoch": 0.8607327185041036, "grad_norm": 0.9839511513710022, "learning_rate": 0.001, "loss": 1.7355, "step": 20346 }, { "epoch": 0.8607750232676199, "grad_norm": 0.14979402720928192, "learning_rate": 0.001, "loss": 2.9431, "step": 20347 }, { "epoch": 0.8608173280311363, "grad_norm": 0.22843679785728455, "learning_rate": 0.001, "loss": 2.2349, "step": 20348 }, { "epoch": 0.8608596327946527, "grad_norm": 0.26081418991088867, "learning_rate": 0.001, "loss": 2.255, "step": 20349 }, { "epoch": 0.860901937558169, "grad_norm": 0.20167173445224762, "learning_rate": 0.001, "loss": 3.1905, "step": 20350 }, { "epoch": 0.8609442423216854, "grad_norm": 0.22882631421089172, "learning_rate": 0.001, "loss": 2.6694, "step": 20351 }, { "epoch": 0.8609865470852018, "grad_norm": 0.23098324239253998, "learning_rate": 0.001, "loss": 1.9321, "step": 20352 }, { "epoch": 0.8610288518487181, "grad_norm": 0.18798838555812836, "learning_rate": 0.001, "loss": 2.3873, "step": 20353 }, { "epoch": 0.8610711566122345, "grad_norm": 0.20491915941238403, "learning_rate": 0.001, "loss": 1.764, "step": 20354 }, { "epoch": 0.861113461375751, "grad_norm": 0.2087901085615158, "learning_rate": 0.001, "loss": 2.1135, "step": 20355 }, { "epoch": 0.8611557661392673, "grad_norm": 0.2142881155014038, "learning_rate": 0.001, "loss": 2.0897, "step": 20356 }, { "epoch": 0.8611980709027837, "grad_norm": 2.1609714031219482, "learning_rate": 0.001, "loss": 4.1002, "step": 20357 }, { "epoch": 0.8612403756663001, "grad_norm": 0.21619194746017456, "learning_rate": 0.001, "loss": 2.0694, "step": 20358 }, { "epoch": 0.8612826804298164, "grad_norm": 738.5827026367188, "learning_rate": 0.001, "loss": 3.4232, "step": 20359 }, { "epoch": 0.8613249851933328, "grad_norm": 0.20825865864753723, "learning_rate": 0.001, "loss": 2.4266, "step": 20360 }, { "epoch": 0.8613672899568492, "grad_norm": 0.41342079639434814, "learning_rate": 0.001, "loss": 3.1381, "step": 20361 }, { "epoch": 0.8614095947203655, "grad_norm": 0.2036539614200592, "learning_rate": 0.001, "loss": 2.0657, "step": 20362 }, { "epoch": 0.8614518994838819, "grad_norm": 0.9940122365951538, "learning_rate": 0.001, "loss": 2.2586, "step": 20363 }, { "epoch": 0.8614942042473983, "grad_norm": 0.19654519855976105, "learning_rate": 0.001, "loss": 1.582, "step": 20364 }, { "epoch": 0.8615365090109146, "grad_norm": 0.1715071052312851, "learning_rate": 0.001, "loss": 1.2585, "step": 20365 }, { "epoch": 0.861578813774431, "grad_norm": 0.14859987795352936, "learning_rate": 0.001, "loss": 2.478, "step": 20366 }, { "epoch": 0.8616211185379474, "grad_norm": 0.18359853327274323, "learning_rate": 0.001, "loss": 1.8721, "step": 20367 }, { "epoch": 0.8616634233014637, "grad_norm": 0.24993368983268738, "learning_rate": 0.001, "loss": 2.4343, "step": 20368 }, { "epoch": 0.8617057280649801, "grad_norm": 0.2071733921766281, "learning_rate": 0.001, "loss": 2.7097, "step": 20369 }, { "epoch": 0.8617480328284964, "grad_norm": 2.571146249771118, "learning_rate": 0.001, "loss": 2.0202, "step": 20370 }, { "epoch": 0.8617903375920128, "grad_norm": 0.5075177550315857, "learning_rate": 0.001, "loss": 2.1632, "step": 20371 }, { "epoch": 0.8618326423555293, "grad_norm": 0.18654043972492218, "learning_rate": 0.001, "loss": 2.4338, "step": 20372 }, { "epoch": 0.8618749471190456, "grad_norm": 0.2202577441930771, "learning_rate": 0.001, "loss": 3.0875, "step": 20373 }, { "epoch": 0.861917251882562, "grad_norm": 3.0814690589904785, "learning_rate": 0.001, "loss": 2.5025, "step": 20374 }, { "epoch": 0.8619595566460784, "grad_norm": 1.5214474201202393, "learning_rate": 0.001, "loss": 3.0009, "step": 20375 }, { "epoch": 0.8620018614095947, "grad_norm": 0.14105013012886047, "learning_rate": 0.001, "loss": 1.2828, "step": 20376 }, { "epoch": 0.8620441661731111, "grad_norm": 0.16624034941196442, "learning_rate": 0.001, "loss": 2.2575, "step": 20377 }, { "epoch": 0.8620864709366275, "grad_norm": 0.14870575070381165, "learning_rate": 0.001, "loss": 2.1555, "step": 20378 }, { "epoch": 0.8621287757001438, "grad_norm": 0.17573565244674683, "learning_rate": 0.001, "loss": 1.6783, "step": 20379 }, { "epoch": 0.8621710804636602, "grad_norm": 0.3848056197166443, "learning_rate": 0.001, "loss": 2.9054, "step": 20380 }, { "epoch": 0.8622133852271766, "grad_norm": 4.15639591217041, "learning_rate": 0.001, "loss": 3.0232, "step": 20381 }, { "epoch": 0.8622556899906929, "grad_norm": 0.9076377749443054, "learning_rate": 0.001, "loss": 2.6062, "step": 20382 }, { "epoch": 0.8622979947542093, "grad_norm": 0.19111178815364838, "learning_rate": 0.001, "loss": 2.8008, "step": 20383 }, { "epoch": 0.8623402995177257, "grad_norm": 0.15568110346794128, "learning_rate": 0.001, "loss": 1.6041, "step": 20384 }, { "epoch": 0.862382604281242, "grad_norm": 0.15372473001480103, "learning_rate": 0.001, "loss": 2.1381, "step": 20385 }, { "epoch": 0.8624249090447584, "grad_norm": 6.081607341766357, "learning_rate": 0.001, "loss": 2.3537, "step": 20386 }, { "epoch": 0.8624672138082748, "grad_norm": 0.15494304895401, "learning_rate": 0.001, "loss": 2.4234, "step": 20387 }, { "epoch": 0.8625095185717911, "grad_norm": 0.2465604841709137, "learning_rate": 0.001, "loss": 2.8873, "step": 20388 }, { "epoch": 0.8625518233353076, "grad_norm": 0.16855676472187042, "learning_rate": 0.001, "loss": 2.7802, "step": 20389 }, { "epoch": 0.862594128098824, "grad_norm": 0.1656714677810669, "learning_rate": 0.001, "loss": 2.3807, "step": 20390 }, { "epoch": 0.8626364328623403, "grad_norm": 0.38702452182769775, "learning_rate": 0.001, "loss": 2.1454, "step": 20391 }, { "epoch": 0.8626787376258567, "grad_norm": 0.17900040745735168, "learning_rate": 0.001, "loss": 3.0575, "step": 20392 }, { "epoch": 0.8627210423893731, "grad_norm": 0.17749975621700287, "learning_rate": 0.001, "loss": 1.8404, "step": 20393 }, { "epoch": 0.8627633471528894, "grad_norm": 0.17908912897109985, "learning_rate": 0.001, "loss": 1.9364, "step": 20394 }, { "epoch": 0.8628056519164058, "grad_norm": 0.7642483115196228, "learning_rate": 0.001, "loss": 3.3047, "step": 20395 }, { "epoch": 0.8628479566799222, "grad_norm": 0.2033914178609848, "learning_rate": 0.001, "loss": 2.6474, "step": 20396 }, { "epoch": 0.8628902614434385, "grad_norm": 0.7764859199523926, "learning_rate": 0.001, "loss": 2.9559, "step": 20397 }, { "epoch": 0.8629325662069549, "grad_norm": 0.1633528769016266, "learning_rate": 0.001, "loss": 2.0213, "step": 20398 }, { "epoch": 0.8629748709704713, "grad_norm": 0.16931723058223724, "learning_rate": 0.001, "loss": 1.7263, "step": 20399 }, { "epoch": 0.8630171757339876, "grad_norm": 0.1585397720336914, "learning_rate": 0.001, "loss": 2.8618, "step": 20400 }, { "epoch": 0.863059480497504, "grad_norm": 0.1430475264787674, "learning_rate": 0.001, "loss": 1.8755, "step": 20401 }, { "epoch": 0.8631017852610204, "grad_norm": 0.12753239274024963, "learning_rate": 0.001, "loss": 2.3516, "step": 20402 }, { "epoch": 0.8631440900245367, "grad_norm": 0.1464456021785736, "learning_rate": 0.001, "loss": 2.6126, "step": 20403 }, { "epoch": 0.8631863947880531, "grad_norm": 0.20379814505577087, "learning_rate": 0.001, "loss": 1.7753, "step": 20404 }, { "epoch": 0.8632286995515696, "grad_norm": 0.15957339107990265, "learning_rate": 0.001, "loss": 1.6083, "step": 20405 }, { "epoch": 0.8632710043150859, "grad_norm": 0.2491006702184677, "learning_rate": 0.001, "loss": 2.6279, "step": 20406 }, { "epoch": 0.8633133090786023, "grad_norm": 0.1599300354719162, "learning_rate": 0.001, "loss": 2.2997, "step": 20407 }, { "epoch": 0.8633556138421187, "grad_norm": 0.14536763727664948, "learning_rate": 0.001, "loss": 2.2268, "step": 20408 }, { "epoch": 0.863397918605635, "grad_norm": 0.12874636054039001, "learning_rate": 0.001, "loss": 2.0432, "step": 20409 }, { "epoch": 0.8634402233691514, "grad_norm": 0.1630754917860031, "learning_rate": 0.001, "loss": 1.519, "step": 20410 }, { "epoch": 0.8634825281326678, "grad_norm": 0.18456068634986877, "learning_rate": 0.001, "loss": 2.3714, "step": 20411 }, { "epoch": 0.8635248328961841, "grad_norm": 0.3489452004432678, "learning_rate": 0.001, "loss": 2.1257, "step": 20412 }, { "epoch": 0.8635671376597005, "grad_norm": 0.13253667950630188, "learning_rate": 0.001, "loss": 1.619, "step": 20413 }, { "epoch": 0.8636094424232168, "grad_norm": 0.14252302050590515, "learning_rate": 0.001, "loss": 1.8472, "step": 20414 }, { "epoch": 0.8636517471867332, "grad_norm": 0.3455297350883484, "learning_rate": 0.001, "loss": 1.9293, "step": 20415 }, { "epoch": 0.8636940519502496, "grad_norm": 0.14407595992088318, "learning_rate": 0.001, "loss": 2.3692, "step": 20416 }, { "epoch": 0.8637363567137659, "grad_norm": 0.1891782134771347, "learning_rate": 0.001, "loss": 2.3615, "step": 20417 }, { "epoch": 0.8637786614772823, "grad_norm": 0.17591020464897156, "learning_rate": 0.001, "loss": 2.9479, "step": 20418 }, { "epoch": 0.8638209662407987, "grad_norm": 2.7036447525024414, "learning_rate": 0.001, "loss": 1.6538, "step": 20419 }, { "epoch": 0.863863271004315, "grad_norm": 0.14303186535835266, "learning_rate": 0.001, "loss": 2.2801, "step": 20420 }, { "epoch": 0.8639055757678314, "grad_norm": 0.15167652070522308, "learning_rate": 0.001, "loss": 2.1755, "step": 20421 }, { "epoch": 0.8639478805313479, "grad_norm": 0.1477757692337036, "learning_rate": 0.001, "loss": 1.8386, "step": 20422 }, { "epoch": 0.8639901852948642, "grad_norm": 0.1253604143857956, "learning_rate": 0.001, "loss": 1.2549, "step": 20423 }, { "epoch": 0.8640324900583806, "grad_norm": 0.13061819970607758, "learning_rate": 0.001, "loss": 1.4896, "step": 20424 }, { "epoch": 0.864074794821897, "grad_norm": 0.21047991514205933, "learning_rate": 0.001, "loss": 2.2363, "step": 20425 }, { "epoch": 0.8641170995854133, "grad_norm": 0.1477625072002411, "learning_rate": 0.001, "loss": 1.5357, "step": 20426 }, { "epoch": 0.8641594043489297, "grad_norm": 0.19324536621570587, "learning_rate": 0.001, "loss": 1.9138, "step": 20427 }, { "epoch": 0.8642017091124461, "grad_norm": 0.508693516254425, "learning_rate": 0.001, "loss": 2.217, "step": 20428 }, { "epoch": 0.8642440138759624, "grad_norm": 0.15548713505268097, "learning_rate": 0.001, "loss": 1.7976, "step": 20429 }, { "epoch": 0.8642863186394788, "grad_norm": 0.18489326536655426, "learning_rate": 0.001, "loss": 3.005, "step": 20430 }, { "epoch": 0.8643286234029952, "grad_norm": 0.1529025137424469, "learning_rate": 0.001, "loss": 2.1427, "step": 20431 }, { "epoch": 0.8643709281665115, "grad_norm": 0.1769225001335144, "learning_rate": 0.001, "loss": 1.596, "step": 20432 }, { "epoch": 0.8644132329300279, "grad_norm": 0.12776906788349152, "learning_rate": 0.001, "loss": 1.8726, "step": 20433 }, { "epoch": 0.8644555376935443, "grad_norm": 0.1688249409198761, "learning_rate": 0.001, "loss": 1.9496, "step": 20434 }, { "epoch": 0.8644978424570606, "grad_norm": 0.6416119337081909, "learning_rate": 0.001, "loss": 2.3831, "step": 20435 }, { "epoch": 0.864540147220577, "grad_norm": 0.14832055568695068, "learning_rate": 0.001, "loss": 1.8445, "step": 20436 }, { "epoch": 0.8645824519840934, "grad_norm": 0.25075727701187134, "learning_rate": 0.001, "loss": 2.4437, "step": 20437 }, { "epoch": 0.8646247567476097, "grad_norm": 0.15989474952220917, "learning_rate": 0.001, "loss": 1.8512, "step": 20438 }, { "epoch": 0.8646670615111262, "grad_norm": 0.1592397540807724, "learning_rate": 0.001, "loss": 2.8129, "step": 20439 }, { "epoch": 0.8647093662746426, "grad_norm": 0.14796599745750427, "learning_rate": 0.001, "loss": 1.5769, "step": 20440 }, { "epoch": 0.8647516710381589, "grad_norm": 0.1716962605714798, "learning_rate": 0.001, "loss": 2.3114, "step": 20441 }, { "epoch": 0.8647939758016753, "grad_norm": 0.4446561336517334, "learning_rate": 0.001, "loss": 1.3337, "step": 20442 }, { "epoch": 0.8648362805651917, "grad_norm": 0.15819323062896729, "learning_rate": 0.001, "loss": 2.7212, "step": 20443 }, { "epoch": 0.864878585328708, "grad_norm": 1.2492092847824097, "learning_rate": 0.001, "loss": 2.112, "step": 20444 }, { "epoch": 0.8649208900922244, "grad_norm": 0.1683206856250763, "learning_rate": 0.001, "loss": 2.6859, "step": 20445 }, { "epoch": 0.8649631948557408, "grad_norm": 0.18615072965621948, "learning_rate": 0.001, "loss": 2.0135, "step": 20446 }, { "epoch": 0.8650054996192571, "grad_norm": 0.16364848613739014, "learning_rate": 0.001, "loss": 2.1575, "step": 20447 }, { "epoch": 0.8650478043827735, "grad_norm": 0.1338125467300415, "learning_rate": 0.001, "loss": 1.5287, "step": 20448 }, { "epoch": 0.8650901091462899, "grad_norm": 0.14159221947193146, "learning_rate": 0.001, "loss": 2.4444, "step": 20449 }, { "epoch": 0.8651324139098062, "grad_norm": 0.15110106766223907, "learning_rate": 0.001, "loss": 2.3197, "step": 20450 }, { "epoch": 0.8651747186733226, "grad_norm": 0.13722842931747437, "learning_rate": 0.001, "loss": 1.7881, "step": 20451 }, { "epoch": 0.865217023436839, "grad_norm": 0.13986808061599731, "learning_rate": 0.001, "loss": 1.699, "step": 20452 }, { "epoch": 0.8652593282003553, "grad_norm": 0.19141452014446259, "learning_rate": 0.001, "loss": 1.6697, "step": 20453 }, { "epoch": 0.8653016329638717, "grad_norm": 0.15517202019691467, "learning_rate": 0.001, "loss": 3.0925, "step": 20454 }, { "epoch": 0.8653439377273882, "grad_norm": 0.13340319693088531, "learning_rate": 0.001, "loss": 1.3833, "step": 20455 }, { "epoch": 0.8653862424909045, "grad_norm": 0.14518952369689941, "learning_rate": 0.001, "loss": 1.6842, "step": 20456 }, { "epoch": 0.8654285472544209, "grad_norm": 0.15267087519168854, "learning_rate": 0.001, "loss": 1.7472, "step": 20457 }, { "epoch": 0.8654708520179372, "grad_norm": 0.16295918822288513, "learning_rate": 0.001, "loss": 1.6337, "step": 20458 }, { "epoch": 0.8655131567814536, "grad_norm": 0.3994349241256714, "learning_rate": 0.001, "loss": 2.1025, "step": 20459 }, { "epoch": 0.86555546154497, "grad_norm": 0.14686226844787598, "learning_rate": 0.001, "loss": 2.2015, "step": 20460 }, { "epoch": 0.8655977663084863, "grad_norm": 2.716871500015259, "learning_rate": 0.001, "loss": 3.038, "step": 20461 }, { "epoch": 0.8656400710720027, "grad_norm": 0.152231365442276, "learning_rate": 0.001, "loss": 2.2926, "step": 20462 }, { "epoch": 0.8656823758355191, "grad_norm": 0.16370278596878052, "learning_rate": 0.001, "loss": 2.3014, "step": 20463 }, { "epoch": 0.8657246805990354, "grad_norm": 0.18813279271125793, "learning_rate": 0.001, "loss": 3.1802, "step": 20464 }, { "epoch": 0.8657669853625518, "grad_norm": 2.1478288173675537, "learning_rate": 0.001, "loss": 2.8626, "step": 20465 }, { "epoch": 0.8658092901260682, "grad_norm": 0.6545858979225159, "learning_rate": 0.001, "loss": 1.976, "step": 20466 }, { "epoch": 0.8658515948895845, "grad_norm": 0.163694828748703, "learning_rate": 0.001, "loss": 1.7941, "step": 20467 }, { "epoch": 0.8658938996531009, "grad_norm": 0.15311507880687714, "learning_rate": 0.001, "loss": 1.8408, "step": 20468 }, { "epoch": 0.8659362044166173, "grad_norm": 0.13591715693473816, "learning_rate": 0.001, "loss": 1.8756, "step": 20469 }, { "epoch": 0.8659785091801336, "grad_norm": 0.12307523190975189, "learning_rate": 0.001, "loss": 2.4576, "step": 20470 }, { "epoch": 0.86602081394365, "grad_norm": 0.23300588130950928, "learning_rate": 0.001, "loss": 1.5551, "step": 20471 }, { "epoch": 0.8660631187071665, "grad_norm": 0.30247703194618225, "learning_rate": 0.001, "loss": 1.9095, "step": 20472 }, { "epoch": 0.8661054234706828, "grad_norm": 0.12208034843206406, "learning_rate": 0.001, "loss": 2.1441, "step": 20473 }, { "epoch": 0.8661477282341992, "grad_norm": 0.427654504776001, "learning_rate": 0.001, "loss": 3.1424, "step": 20474 }, { "epoch": 0.8661900329977156, "grad_norm": 0.15135295689105988, "learning_rate": 0.001, "loss": 1.4906, "step": 20475 }, { "epoch": 0.8662323377612319, "grad_norm": 0.15214575827121735, "learning_rate": 0.001, "loss": 2.8835, "step": 20476 }, { "epoch": 0.8662746425247483, "grad_norm": 0.3086780309677124, "learning_rate": 0.001, "loss": 3.6189, "step": 20477 }, { "epoch": 0.8663169472882647, "grad_norm": 0.2229657918214798, "learning_rate": 0.001, "loss": 2.7436, "step": 20478 }, { "epoch": 0.866359252051781, "grad_norm": 0.21993772685527802, "learning_rate": 0.001, "loss": 2.4626, "step": 20479 }, { "epoch": 0.8664015568152974, "grad_norm": 7.104289531707764, "learning_rate": 0.001, "loss": 2.2925, "step": 20480 }, { "epoch": 0.8664438615788138, "grad_norm": 0.18431110680103302, "learning_rate": 0.001, "loss": 2.1564, "step": 20481 }, { "epoch": 0.8664861663423301, "grad_norm": 0.17499950528144836, "learning_rate": 0.001, "loss": 2.6352, "step": 20482 }, { "epoch": 0.8665284711058465, "grad_norm": 0.17781849205493927, "learning_rate": 0.001, "loss": 2.3366, "step": 20483 }, { "epoch": 0.8665707758693629, "grad_norm": 0.1824718415737152, "learning_rate": 0.001, "loss": 1.9301, "step": 20484 }, { "epoch": 0.8666130806328792, "grad_norm": 0.14647133648395538, "learning_rate": 0.001, "loss": 2.0635, "step": 20485 }, { "epoch": 0.8666553853963956, "grad_norm": 0.17452256381511688, "learning_rate": 0.001, "loss": 2.4909, "step": 20486 }, { "epoch": 0.866697690159912, "grad_norm": 0.15065862238407135, "learning_rate": 0.001, "loss": 2.3384, "step": 20487 }, { "epoch": 0.8667399949234283, "grad_norm": 0.19424915313720703, "learning_rate": 0.001, "loss": 2.3333, "step": 20488 }, { "epoch": 0.8667822996869448, "grad_norm": 0.15558700263500214, "learning_rate": 0.001, "loss": 2.7007, "step": 20489 }, { "epoch": 0.8668246044504612, "grad_norm": 0.14827044308185577, "learning_rate": 0.001, "loss": 1.6913, "step": 20490 }, { "epoch": 0.8668669092139775, "grad_norm": 0.48052114248275757, "learning_rate": 0.001, "loss": 2.0873, "step": 20491 }, { "epoch": 0.8669092139774939, "grad_norm": 0.1724882572889328, "learning_rate": 0.001, "loss": 2.2251, "step": 20492 }, { "epoch": 0.8669515187410103, "grad_norm": 0.2082466185092926, "learning_rate": 0.001, "loss": 2.881, "step": 20493 }, { "epoch": 0.8669938235045266, "grad_norm": 0.14403583109378815, "learning_rate": 0.001, "loss": 1.8366, "step": 20494 }, { "epoch": 0.867036128268043, "grad_norm": 0.1699734479188919, "learning_rate": 0.001, "loss": 3.2508, "step": 20495 }, { "epoch": 0.8670784330315594, "grad_norm": 0.1627493053674698, "learning_rate": 0.001, "loss": 1.6992, "step": 20496 }, { "epoch": 0.8671207377950757, "grad_norm": 0.1512979418039322, "learning_rate": 0.001, "loss": 1.8145, "step": 20497 }, { "epoch": 0.8671630425585921, "grad_norm": 0.1480405628681183, "learning_rate": 0.001, "loss": 2.0845, "step": 20498 }, { "epoch": 0.8672053473221085, "grad_norm": 0.12783098220825195, "learning_rate": 0.001, "loss": 3.377, "step": 20499 }, { "epoch": 0.8672476520856248, "grad_norm": 0.13607032597064972, "learning_rate": 0.001, "loss": 1.555, "step": 20500 }, { "epoch": 0.8672899568491412, "grad_norm": 0.1391061395406723, "learning_rate": 0.001, "loss": 1.4297, "step": 20501 }, { "epoch": 0.8673322616126576, "grad_norm": 0.12696748971939087, "learning_rate": 0.001, "loss": 1.5243, "step": 20502 }, { "epoch": 0.8673745663761739, "grad_norm": 0.15007275342941284, "learning_rate": 0.001, "loss": 2.754, "step": 20503 }, { "epoch": 0.8674168711396903, "grad_norm": 0.12282463908195496, "learning_rate": 0.001, "loss": 2.6723, "step": 20504 }, { "epoch": 0.8674591759032066, "grad_norm": 0.14588630199432373, "learning_rate": 0.001, "loss": 2.4033, "step": 20505 }, { "epoch": 0.8675014806667231, "grad_norm": 0.518017053604126, "learning_rate": 0.001, "loss": 2.2406, "step": 20506 }, { "epoch": 0.8675437854302395, "grad_norm": 0.14224489033222198, "learning_rate": 0.001, "loss": 1.4533, "step": 20507 }, { "epoch": 0.8675860901937558, "grad_norm": 0.13003142178058624, "learning_rate": 0.001, "loss": 2.5315, "step": 20508 }, { "epoch": 0.8676283949572722, "grad_norm": 0.14314842224121094, "learning_rate": 0.001, "loss": 3.1466, "step": 20509 }, { "epoch": 0.8676706997207886, "grad_norm": 0.18065118789672852, "learning_rate": 0.001, "loss": 2.1668, "step": 20510 }, { "epoch": 0.8677130044843049, "grad_norm": 0.16202561557292938, "learning_rate": 0.001, "loss": 1.7793, "step": 20511 }, { "epoch": 0.8677553092478213, "grad_norm": 0.13975529372692108, "learning_rate": 0.001, "loss": 2.85, "step": 20512 }, { "epoch": 0.8677976140113377, "grad_norm": 0.15059174597263336, "learning_rate": 0.001, "loss": 1.3228, "step": 20513 }, { "epoch": 0.867839918774854, "grad_norm": 0.15195417404174805, "learning_rate": 0.001, "loss": 1.7705, "step": 20514 }, { "epoch": 0.8678822235383704, "grad_norm": 0.159585103392601, "learning_rate": 0.001, "loss": 2.5388, "step": 20515 }, { "epoch": 0.8679245283018868, "grad_norm": 0.14191685616970062, "learning_rate": 0.001, "loss": 2.1223, "step": 20516 }, { "epoch": 0.8679668330654031, "grad_norm": 0.18546472489833832, "learning_rate": 0.001, "loss": 2.4321, "step": 20517 }, { "epoch": 0.8680091378289195, "grad_norm": 0.13588781654834747, "learning_rate": 0.001, "loss": 2.0784, "step": 20518 }, { "epoch": 0.8680514425924359, "grad_norm": 15.1656494140625, "learning_rate": 0.001, "loss": 1.8606, "step": 20519 }, { "epoch": 0.8680937473559522, "grad_norm": 0.1273522973060608, "learning_rate": 0.001, "loss": 1.9861, "step": 20520 }, { "epoch": 0.8681360521194686, "grad_norm": 0.3258301317691803, "learning_rate": 0.001, "loss": 2.4265, "step": 20521 }, { "epoch": 0.8681783568829851, "grad_norm": 0.1691277176141739, "learning_rate": 0.001, "loss": 1.8709, "step": 20522 }, { "epoch": 0.8682206616465014, "grad_norm": 0.1776563823223114, "learning_rate": 0.001, "loss": 1.9714, "step": 20523 }, { "epoch": 0.8682629664100178, "grad_norm": 0.19368451833724976, "learning_rate": 0.001, "loss": 3.038, "step": 20524 }, { "epoch": 0.8683052711735342, "grad_norm": 0.15642546117305756, "learning_rate": 0.001, "loss": 2.7047, "step": 20525 }, { "epoch": 0.8683475759370505, "grad_norm": 0.23724526166915894, "learning_rate": 0.001, "loss": 2.3636, "step": 20526 }, { "epoch": 0.8683898807005669, "grad_norm": 0.14943532645702362, "learning_rate": 0.001, "loss": 1.8734, "step": 20527 }, { "epoch": 0.8684321854640833, "grad_norm": 0.15623928606510162, "learning_rate": 0.001, "loss": 2.4747, "step": 20528 }, { "epoch": 0.8684744902275996, "grad_norm": 0.24184876680374146, "learning_rate": 0.001, "loss": 1.9066, "step": 20529 }, { "epoch": 0.868516794991116, "grad_norm": 0.523157000541687, "learning_rate": 0.001, "loss": 2.8953, "step": 20530 }, { "epoch": 0.8685590997546324, "grad_norm": 0.1399133801460266, "learning_rate": 0.001, "loss": 2.914, "step": 20531 }, { "epoch": 0.8686014045181487, "grad_norm": 0.3917766809463501, "learning_rate": 0.001, "loss": 1.447, "step": 20532 }, { "epoch": 0.8686437092816651, "grad_norm": 0.15621374547481537, "learning_rate": 0.001, "loss": 2.3168, "step": 20533 }, { "epoch": 0.8686860140451815, "grad_norm": 0.12716899812221527, "learning_rate": 0.001, "loss": 1.4502, "step": 20534 }, { "epoch": 0.8687283188086978, "grad_norm": 0.14455370604991913, "learning_rate": 0.001, "loss": 2.2383, "step": 20535 }, { "epoch": 0.8687706235722142, "grad_norm": 0.14087846875190735, "learning_rate": 0.001, "loss": 2.0278, "step": 20536 }, { "epoch": 0.8688129283357306, "grad_norm": 0.17462292313575745, "learning_rate": 0.001, "loss": 1.7217, "step": 20537 }, { "epoch": 0.868855233099247, "grad_norm": 0.16990859806537628, "learning_rate": 0.001, "loss": 3.8208, "step": 20538 }, { "epoch": 0.8688975378627634, "grad_norm": 0.131483256816864, "learning_rate": 0.001, "loss": 1.4108, "step": 20539 }, { "epoch": 0.8689398426262798, "grad_norm": 0.13738611340522766, "learning_rate": 0.001, "loss": 2.644, "step": 20540 }, { "epoch": 0.8689821473897961, "grad_norm": 0.2787913978099823, "learning_rate": 0.001, "loss": 1.9129, "step": 20541 }, { "epoch": 0.8690244521533125, "grad_norm": 0.8234505653381348, "learning_rate": 0.001, "loss": 1.6344, "step": 20542 }, { "epoch": 0.8690667569168289, "grad_norm": 0.15260133147239685, "learning_rate": 0.001, "loss": 1.8331, "step": 20543 }, { "epoch": 0.8691090616803452, "grad_norm": 0.12822067737579346, "learning_rate": 0.001, "loss": 2.4685, "step": 20544 }, { "epoch": 0.8691513664438616, "grad_norm": 0.14594188332557678, "learning_rate": 0.001, "loss": 1.5119, "step": 20545 }, { "epoch": 0.869193671207378, "grad_norm": 0.14818280935287476, "learning_rate": 0.001, "loss": 2.4656, "step": 20546 }, { "epoch": 0.8692359759708943, "grad_norm": 0.26366791129112244, "learning_rate": 0.001, "loss": 2.9585, "step": 20547 }, { "epoch": 0.8692782807344107, "grad_norm": 0.17102564871311188, "learning_rate": 0.001, "loss": 2.0877, "step": 20548 }, { "epoch": 0.869320585497927, "grad_norm": 0.1857893019914627, "learning_rate": 0.001, "loss": 2.5178, "step": 20549 }, { "epoch": 0.8693628902614434, "grad_norm": 0.26888343691825867, "learning_rate": 0.001, "loss": 1.9708, "step": 20550 }, { "epoch": 0.8694051950249598, "grad_norm": 0.4817790687084198, "learning_rate": 0.001, "loss": 1.3568, "step": 20551 }, { "epoch": 0.8694474997884761, "grad_norm": 0.6389137506484985, "learning_rate": 0.001, "loss": 1.9148, "step": 20552 }, { "epoch": 0.8694898045519925, "grad_norm": 0.17424023151397705, "learning_rate": 0.001, "loss": 1.8895, "step": 20553 }, { "epoch": 0.869532109315509, "grad_norm": 0.15143568813800812, "learning_rate": 0.001, "loss": 1.5483, "step": 20554 }, { "epoch": 0.8695744140790252, "grad_norm": 0.13719114661216736, "learning_rate": 0.001, "loss": 1.8091, "step": 20555 }, { "epoch": 0.8696167188425417, "grad_norm": 0.1803659200668335, "learning_rate": 0.001, "loss": 2.2232, "step": 20556 }, { "epoch": 0.8696590236060581, "grad_norm": 1.9404886960983276, "learning_rate": 0.001, "loss": 1.9271, "step": 20557 }, { "epoch": 0.8697013283695744, "grad_norm": 0.4086678624153137, "learning_rate": 0.001, "loss": 2.0375, "step": 20558 }, { "epoch": 0.8697436331330908, "grad_norm": 0.1896570473909378, "learning_rate": 0.001, "loss": 2.4307, "step": 20559 }, { "epoch": 0.8697859378966072, "grad_norm": 0.1653144508600235, "learning_rate": 0.001, "loss": 2.1597, "step": 20560 }, { "epoch": 0.8698282426601235, "grad_norm": 0.38588979840278625, "learning_rate": 0.001, "loss": 1.9474, "step": 20561 }, { "epoch": 0.8698705474236399, "grad_norm": 0.20403960347175598, "learning_rate": 0.001, "loss": 2.1097, "step": 20562 }, { "epoch": 0.8699128521871563, "grad_norm": 69.25297546386719, "learning_rate": 0.001, "loss": 2.0653, "step": 20563 }, { "epoch": 0.8699551569506726, "grad_norm": 7.444055557250977, "learning_rate": 0.001, "loss": 1.743, "step": 20564 }, { "epoch": 0.869997461714189, "grad_norm": 0.1590234786272049, "learning_rate": 0.001, "loss": 1.8023, "step": 20565 }, { "epoch": 0.8700397664777054, "grad_norm": 0.3205440640449524, "learning_rate": 0.001, "loss": 1.9231, "step": 20566 }, { "epoch": 0.8700820712412217, "grad_norm": 0.19606617093086243, "learning_rate": 0.001, "loss": 2.6873, "step": 20567 }, { "epoch": 0.8701243760047381, "grad_norm": 0.44805920124053955, "learning_rate": 0.001, "loss": 2.4899, "step": 20568 }, { "epoch": 0.8701666807682545, "grad_norm": 0.22577597200870514, "learning_rate": 0.001, "loss": 2.2703, "step": 20569 }, { "epoch": 0.8702089855317708, "grad_norm": 0.168071910738945, "learning_rate": 0.001, "loss": 1.7944, "step": 20570 }, { "epoch": 0.8702512902952872, "grad_norm": 0.17601118981838226, "learning_rate": 0.001, "loss": 1.8357, "step": 20571 }, { "epoch": 0.8702935950588037, "grad_norm": 0.1971052885055542, "learning_rate": 0.001, "loss": 1.9707, "step": 20572 }, { "epoch": 0.87033589982232, "grad_norm": 0.16236917674541473, "learning_rate": 0.001, "loss": 1.9057, "step": 20573 }, { "epoch": 0.8703782045858364, "grad_norm": 0.22024036943912506, "learning_rate": 0.001, "loss": 1.9232, "step": 20574 }, { "epoch": 0.8704205093493528, "grad_norm": 0.18604035675525665, "learning_rate": 0.001, "loss": 2.3201, "step": 20575 }, { "epoch": 0.8704628141128691, "grad_norm": 0.19944395124912262, "learning_rate": 0.001, "loss": 2.2175, "step": 20576 }, { "epoch": 0.8705051188763855, "grad_norm": 6.948441982269287, "learning_rate": 0.001, "loss": 2.322, "step": 20577 }, { "epoch": 0.8705474236399019, "grad_norm": 0.19903208315372467, "learning_rate": 0.001, "loss": 2.2643, "step": 20578 }, { "epoch": 0.8705897284034182, "grad_norm": 0.15716981887817383, "learning_rate": 0.001, "loss": 1.4093, "step": 20579 }, { "epoch": 0.8706320331669346, "grad_norm": 0.13574804365634918, "learning_rate": 0.001, "loss": 2.0119, "step": 20580 }, { "epoch": 0.870674337930451, "grad_norm": 0.15148890018463135, "learning_rate": 0.001, "loss": 2.0046, "step": 20581 }, { "epoch": 0.8707166426939673, "grad_norm": 0.15020988881587982, "learning_rate": 0.001, "loss": 2.022, "step": 20582 }, { "epoch": 0.8707589474574837, "grad_norm": 0.15000440180301666, "learning_rate": 0.001, "loss": 2.3382, "step": 20583 }, { "epoch": 0.8708012522210001, "grad_norm": 0.13530394434928894, "learning_rate": 0.001, "loss": 1.8668, "step": 20584 }, { "epoch": 0.8708435569845164, "grad_norm": 0.17123256623744965, "learning_rate": 0.001, "loss": 2.1002, "step": 20585 }, { "epoch": 0.8708858617480328, "grad_norm": 0.15746386349201202, "learning_rate": 0.001, "loss": 2.6803, "step": 20586 }, { "epoch": 0.8709281665115493, "grad_norm": 3.3827056884765625, "learning_rate": 0.001, "loss": 2.1643, "step": 20587 }, { "epoch": 0.8709704712750655, "grad_norm": 0.16982562839984894, "learning_rate": 0.001, "loss": 2.244, "step": 20588 }, { "epoch": 0.871012776038582, "grad_norm": 2.7574925422668457, "learning_rate": 0.001, "loss": 2.6158, "step": 20589 }, { "epoch": 0.8710550808020984, "grad_norm": 0.18137569725513458, "learning_rate": 0.001, "loss": 1.7065, "step": 20590 }, { "epoch": 0.8710973855656147, "grad_norm": 0.18702997267246246, "learning_rate": 0.001, "loss": 1.7625, "step": 20591 }, { "epoch": 0.8711396903291311, "grad_norm": 0.21095755696296692, "learning_rate": 0.001, "loss": 2.8752, "step": 20592 }, { "epoch": 0.8711819950926475, "grad_norm": 0.410576730966568, "learning_rate": 0.001, "loss": 1.894, "step": 20593 }, { "epoch": 0.8712242998561638, "grad_norm": 0.19124916195869446, "learning_rate": 0.001, "loss": 1.6841, "step": 20594 }, { "epoch": 0.8712666046196802, "grad_norm": 0.1525120586156845, "learning_rate": 0.001, "loss": 2.486, "step": 20595 }, { "epoch": 0.8713089093831965, "grad_norm": 0.2803274393081665, "learning_rate": 0.001, "loss": 2.2124, "step": 20596 }, { "epoch": 0.8713512141467129, "grad_norm": 6.939259052276611, "learning_rate": 0.001, "loss": 1.5995, "step": 20597 }, { "epoch": 0.8713935189102293, "grad_norm": 0.14391854405403137, "learning_rate": 0.001, "loss": 2.7667, "step": 20598 }, { "epoch": 0.8714358236737456, "grad_norm": 0.15434549748897552, "learning_rate": 0.001, "loss": 1.9091, "step": 20599 }, { "epoch": 0.871478128437262, "grad_norm": 0.17144039273262024, "learning_rate": 0.001, "loss": 1.7429, "step": 20600 }, { "epoch": 0.8715204332007784, "grad_norm": 0.15034642815589905, "learning_rate": 0.001, "loss": 1.6829, "step": 20601 }, { "epoch": 0.8715627379642947, "grad_norm": 0.17373819649219513, "learning_rate": 0.001, "loss": 1.9474, "step": 20602 }, { "epoch": 0.8716050427278111, "grad_norm": 0.7324623465538025, "learning_rate": 0.001, "loss": 2.0428, "step": 20603 }, { "epoch": 0.8716473474913276, "grad_norm": 1.5604692697525024, "learning_rate": 0.001, "loss": 3.489, "step": 20604 }, { "epoch": 0.8716896522548438, "grad_norm": 0.2737126350402832, "learning_rate": 0.001, "loss": 2.5385, "step": 20605 }, { "epoch": 0.8717319570183603, "grad_norm": 0.20344385504722595, "learning_rate": 0.001, "loss": 2.0625, "step": 20606 }, { "epoch": 0.8717742617818767, "grad_norm": 0.16309478878974915, "learning_rate": 0.001, "loss": 2.4926, "step": 20607 }, { "epoch": 0.871816566545393, "grad_norm": 0.17930370569229126, "learning_rate": 0.001, "loss": 1.9174, "step": 20608 }, { "epoch": 0.8718588713089094, "grad_norm": 0.25973576307296753, "learning_rate": 0.001, "loss": 2.8351, "step": 20609 }, { "epoch": 0.8719011760724258, "grad_norm": 2.8675005435943604, "learning_rate": 0.001, "loss": 3.2205, "step": 20610 }, { "epoch": 0.8719434808359421, "grad_norm": 0.17134712636470795, "learning_rate": 0.001, "loss": 2.1032, "step": 20611 }, { "epoch": 0.8719857855994585, "grad_norm": 0.14888092875480652, "learning_rate": 0.001, "loss": 2.7499, "step": 20612 }, { "epoch": 0.8720280903629749, "grad_norm": 0.13400807976722717, "learning_rate": 0.001, "loss": 1.7189, "step": 20613 }, { "epoch": 0.8720703951264912, "grad_norm": 0.12388193607330322, "learning_rate": 0.001, "loss": 2.486, "step": 20614 }, { "epoch": 0.8721126998900076, "grad_norm": 0.1444062441587448, "learning_rate": 0.001, "loss": 1.9426, "step": 20615 }, { "epoch": 0.872155004653524, "grad_norm": 0.13056041300296783, "learning_rate": 0.001, "loss": 1.5044, "step": 20616 }, { "epoch": 0.8721973094170403, "grad_norm": 0.1537250131368637, "learning_rate": 0.001, "loss": 2.2731, "step": 20617 }, { "epoch": 0.8722396141805567, "grad_norm": 0.21203458309173584, "learning_rate": 0.001, "loss": 2.0906, "step": 20618 }, { "epoch": 0.8722819189440731, "grad_norm": 0.19286859035491943, "learning_rate": 0.001, "loss": 1.6875, "step": 20619 }, { "epoch": 0.8723242237075894, "grad_norm": 2.2411351203918457, "learning_rate": 0.001, "loss": 2.294, "step": 20620 }, { "epoch": 0.8723665284711059, "grad_norm": 0.12386021763086319, "learning_rate": 0.001, "loss": 1.9246, "step": 20621 }, { "epoch": 0.8724088332346223, "grad_norm": 0.1696896106004715, "learning_rate": 0.001, "loss": 2.8406, "step": 20622 }, { "epoch": 0.8724511379981386, "grad_norm": 0.14747262001037598, "learning_rate": 0.001, "loss": 2.1123, "step": 20623 }, { "epoch": 0.872493442761655, "grad_norm": 0.6882927417755127, "learning_rate": 0.001, "loss": 2.9728, "step": 20624 }, { "epoch": 0.8725357475251714, "grad_norm": 0.13660912215709686, "learning_rate": 0.001, "loss": 1.3383, "step": 20625 }, { "epoch": 0.8725780522886877, "grad_norm": 0.15439388155937195, "learning_rate": 0.001, "loss": 2.3997, "step": 20626 }, { "epoch": 0.8726203570522041, "grad_norm": 0.16791464388370514, "learning_rate": 0.001, "loss": 2.0219, "step": 20627 }, { "epoch": 0.8726626618157205, "grad_norm": 0.17568817734718323, "learning_rate": 0.001, "loss": 2.2818, "step": 20628 }, { "epoch": 0.8727049665792368, "grad_norm": 0.1622392237186432, "learning_rate": 0.001, "loss": 2.0132, "step": 20629 }, { "epoch": 0.8727472713427532, "grad_norm": 0.15402914583683014, "learning_rate": 0.001, "loss": 1.813, "step": 20630 }, { "epoch": 0.8727895761062696, "grad_norm": 0.13786296546459198, "learning_rate": 0.001, "loss": 1.9361, "step": 20631 }, { "epoch": 0.8728318808697859, "grad_norm": 0.19948144257068634, "learning_rate": 0.001, "loss": 2.8847, "step": 20632 }, { "epoch": 0.8728741856333023, "grad_norm": 0.1480330228805542, "learning_rate": 0.001, "loss": 1.8211, "step": 20633 }, { "epoch": 0.8729164903968187, "grad_norm": 0.16000570356845856, "learning_rate": 0.001, "loss": 1.5714, "step": 20634 }, { "epoch": 0.872958795160335, "grad_norm": 1.0622891187667847, "learning_rate": 0.001, "loss": 2.1747, "step": 20635 }, { "epoch": 0.8730010999238514, "grad_norm": 0.192660391330719, "learning_rate": 0.001, "loss": 2.0402, "step": 20636 }, { "epoch": 0.8730434046873679, "grad_norm": 0.17580977082252502, "learning_rate": 0.001, "loss": 1.9806, "step": 20637 }, { "epoch": 0.8730857094508842, "grad_norm": 0.14421142637729645, "learning_rate": 0.001, "loss": 1.8746, "step": 20638 }, { "epoch": 0.8731280142144006, "grad_norm": 0.14612741768360138, "learning_rate": 0.001, "loss": 1.8898, "step": 20639 }, { "epoch": 0.8731703189779169, "grad_norm": 0.4607209861278534, "learning_rate": 0.001, "loss": 1.58, "step": 20640 }, { "epoch": 0.8732126237414333, "grad_norm": 0.13212980329990387, "learning_rate": 0.001, "loss": 2.2389, "step": 20641 }, { "epoch": 0.8732549285049497, "grad_norm": 0.14031372964382172, "learning_rate": 0.001, "loss": 2.0144, "step": 20642 }, { "epoch": 0.873297233268466, "grad_norm": 0.1437501460313797, "learning_rate": 0.001, "loss": 1.7929, "step": 20643 }, { "epoch": 0.8733395380319824, "grad_norm": 0.13045533001422882, "learning_rate": 0.001, "loss": 2.6569, "step": 20644 }, { "epoch": 0.8733818427954988, "grad_norm": 0.16348956525325775, "learning_rate": 0.001, "loss": 2.2738, "step": 20645 }, { "epoch": 0.8734241475590151, "grad_norm": 0.14668866991996765, "learning_rate": 0.001, "loss": 2.4409, "step": 20646 }, { "epoch": 0.8734664523225315, "grad_norm": 0.12461234629154205, "learning_rate": 0.001, "loss": 2.0134, "step": 20647 }, { "epoch": 0.8735087570860479, "grad_norm": 0.16764739155769348, "learning_rate": 0.001, "loss": 1.9933, "step": 20648 }, { "epoch": 0.8735510618495642, "grad_norm": 0.15134289860725403, "learning_rate": 0.001, "loss": 2.7479, "step": 20649 }, { "epoch": 0.8735933666130806, "grad_norm": 0.16396795213222504, "learning_rate": 0.001, "loss": 2.1933, "step": 20650 }, { "epoch": 0.873635671376597, "grad_norm": 0.15628643333911896, "learning_rate": 0.001, "loss": 1.9693, "step": 20651 }, { "epoch": 0.8736779761401133, "grad_norm": 0.15194100141525269, "learning_rate": 0.001, "loss": 2.0662, "step": 20652 }, { "epoch": 0.8737202809036297, "grad_norm": 0.14548543095588684, "learning_rate": 0.001, "loss": 1.4378, "step": 20653 }, { "epoch": 0.8737625856671462, "grad_norm": 0.4701218903064728, "learning_rate": 0.001, "loss": 2.1941, "step": 20654 }, { "epoch": 0.8738048904306625, "grad_norm": 0.2454076111316681, "learning_rate": 0.001, "loss": 1.673, "step": 20655 }, { "epoch": 0.8738471951941789, "grad_norm": 0.1675247997045517, "learning_rate": 0.001, "loss": 1.6599, "step": 20656 }, { "epoch": 0.8738894999576953, "grad_norm": 0.1386534571647644, "learning_rate": 0.001, "loss": 2.4196, "step": 20657 }, { "epoch": 0.8739318047212116, "grad_norm": 0.1279524713754654, "learning_rate": 0.001, "loss": 1.4271, "step": 20658 }, { "epoch": 0.873974109484728, "grad_norm": 1.2931326627731323, "learning_rate": 0.001, "loss": 1.8761, "step": 20659 }, { "epoch": 0.8740164142482444, "grad_norm": 0.18578656017780304, "learning_rate": 0.001, "loss": 1.5095, "step": 20660 }, { "epoch": 0.8740587190117607, "grad_norm": 0.17007368803024292, "learning_rate": 0.001, "loss": 1.7172, "step": 20661 }, { "epoch": 0.8741010237752771, "grad_norm": 0.1408626288175583, "learning_rate": 0.001, "loss": 2.1397, "step": 20662 }, { "epoch": 0.8741433285387935, "grad_norm": 0.1541232317686081, "learning_rate": 0.001, "loss": 2.3023, "step": 20663 }, { "epoch": 0.8741856333023098, "grad_norm": 1.1258147954940796, "learning_rate": 0.001, "loss": 1.9507, "step": 20664 }, { "epoch": 0.8742279380658262, "grad_norm": 0.13171681761741638, "learning_rate": 0.001, "loss": 2.7839, "step": 20665 }, { "epoch": 0.8742702428293426, "grad_norm": 0.13253676891326904, "learning_rate": 0.001, "loss": 2.3643, "step": 20666 }, { "epoch": 0.8743125475928589, "grad_norm": 0.1618097722530365, "learning_rate": 0.001, "loss": 2.0003, "step": 20667 }, { "epoch": 0.8743548523563753, "grad_norm": 0.1612173318862915, "learning_rate": 0.001, "loss": 1.7852, "step": 20668 }, { "epoch": 0.8743971571198917, "grad_norm": 0.20455177128314972, "learning_rate": 0.001, "loss": 1.7746, "step": 20669 }, { "epoch": 0.874439461883408, "grad_norm": 0.2330559939146042, "learning_rate": 0.001, "loss": 2.5461, "step": 20670 }, { "epoch": 0.8744817666469245, "grad_norm": 4.225989818572998, "learning_rate": 0.001, "loss": 2.0473, "step": 20671 }, { "epoch": 0.8745240714104409, "grad_norm": 4.388603210449219, "learning_rate": 0.001, "loss": 2.6031, "step": 20672 }, { "epoch": 0.8745663761739572, "grad_norm": 0.6356297135353088, "learning_rate": 0.001, "loss": 1.7837, "step": 20673 }, { "epoch": 0.8746086809374736, "grad_norm": 0.5951091647148132, "learning_rate": 0.001, "loss": 2.1406, "step": 20674 }, { "epoch": 0.87465098570099, "grad_norm": 0.1605328619480133, "learning_rate": 0.001, "loss": 1.8724, "step": 20675 }, { "epoch": 0.8746932904645063, "grad_norm": 0.22600284218788147, "learning_rate": 0.001, "loss": 1.5535, "step": 20676 }, { "epoch": 0.8747355952280227, "grad_norm": 0.15503069758415222, "learning_rate": 0.001, "loss": 1.7348, "step": 20677 }, { "epoch": 0.8747778999915391, "grad_norm": 0.5110114216804504, "learning_rate": 0.001, "loss": 2.3343, "step": 20678 }, { "epoch": 0.8748202047550554, "grad_norm": 0.16068077087402344, "learning_rate": 0.001, "loss": 2.4127, "step": 20679 }, { "epoch": 0.8748625095185718, "grad_norm": 4.883456230163574, "learning_rate": 0.001, "loss": 2.3072, "step": 20680 }, { "epoch": 0.8749048142820882, "grad_norm": 0.1795644909143448, "learning_rate": 0.001, "loss": 2.4872, "step": 20681 }, { "epoch": 0.8749471190456045, "grad_norm": 0.14965955913066864, "learning_rate": 0.001, "loss": 2.6468, "step": 20682 }, { "epoch": 0.8749894238091209, "grad_norm": 0.17840637266635895, "learning_rate": 0.001, "loss": 1.908, "step": 20683 }, { "epoch": 0.8750317285726372, "grad_norm": 0.18810167908668518, "learning_rate": 0.001, "loss": 1.8151, "step": 20684 }, { "epoch": 0.8750740333361536, "grad_norm": 0.20336748659610748, "learning_rate": 0.001, "loss": 2.9232, "step": 20685 }, { "epoch": 0.87511633809967, "grad_norm": 0.2506280243396759, "learning_rate": 0.001, "loss": 2.7538, "step": 20686 }, { "epoch": 0.8751586428631863, "grad_norm": 0.24045564234256744, "learning_rate": 0.001, "loss": 2.1233, "step": 20687 }, { "epoch": 0.8752009476267028, "grad_norm": 0.1784004122018814, "learning_rate": 0.001, "loss": 2.0315, "step": 20688 }, { "epoch": 0.8752432523902192, "grad_norm": 0.19089365005493164, "learning_rate": 0.001, "loss": 3.481, "step": 20689 }, { "epoch": 0.8752855571537355, "grad_norm": 0.15477164089679718, "learning_rate": 0.001, "loss": 2.0263, "step": 20690 }, { "epoch": 0.8753278619172519, "grad_norm": 0.22737683355808258, "learning_rate": 0.001, "loss": 3.2862, "step": 20691 }, { "epoch": 0.8753701666807683, "grad_norm": 0.142948180437088, "learning_rate": 0.001, "loss": 2.0765, "step": 20692 }, { "epoch": 0.8754124714442846, "grad_norm": 0.2728869616985321, "learning_rate": 0.001, "loss": 1.7859, "step": 20693 }, { "epoch": 0.875454776207801, "grad_norm": 2.2189605236053467, "learning_rate": 0.001, "loss": 2.2061, "step": 20694 }, { "epoch": 0.8754970809713174, "grad_norm": 0.28525206446647644, "learning_rate": 0.001, "loss": 2.3435, "step": 20695 }, { "epoch": 0.8755393857348337, "grad_norm": 2.395468235015869, "learning_rate": 0.001, "loss": 3.1819, "step": 20696 }, { "epoch": 0.8755816904983501, "grad_norm": 0.16036246716976166, "learning_rate": 0.001, "loss": 2.1362, "step": 20697 }, { "epoch": 0.8756239952618665, "grad_norm": 0.15271493792533875, "learning_rate": 0.001, "loss": 2.0569, "step": 20698 }, { "epoch": 0.8756663000253828, "grad_norm": 0.2659335732460022, "learning_rate": 0.001, "loss": 2.4376, "step": 20699 }, { "epoch": 0.8757086047888992, "grad_norm": 0.6031797528266907, "learning_rate": 0.001, "loss": 2.0957, "step": 20700 }, { "epoch": 0.8757509095524156, "grad_norm": 0.16728293895721436, "learning_rate": 0.001, "loss": 2.6738, "step": 20701 }, { "epoch": 0.8757932143159319, "grad_norm": 0.17319273948669434, "learning_rate": 0.001, "loss": 2.0715, "step": 20702 }, { "epoch": 0.8758355190794483, "grad_norm": 0.1556321531534195, "learning_rate": 0.001, "loss": 2.0015, "step": 20703 }, { "epoch": 0.8758778238429648, "grad_norm": 0.15744656324386597, "learning_rate": 0.001, "loss": 1.8593, "step": 20704 }, { "epoch": 0.875920128606481, "grad_norm": 0.13687211275100708, "learning_rate": 0.001, "loss": 1.7606, "step": 20705 }, { "epoch": 0.8759624333699975, "grad_norm": 0.20388025045394897, "learning_rate": 0.001, "loss": 2.3, "step": 20706 }, { "epoch": 0.8760047381335139, "grad_norm": 0.1699550747871399, "learning_rate": 0.001, "loss": 2.1128, "step": 20707 }, { "epoch": 0.8760470428970302, "grad_norm": 0.17213287949562073, "learning_rate": 0.001, "loss": 3.0813, "step": 20708 }, { "epoch": 0.8760893476605466, "grad_norm": 0.14642708003520966, "learning_rate": 0.001, "loss": 1.7773, "step": 20709 }, { "epoch": 0.876131652424063, "grad_norm": 0.24943861365318298, "learning_rate": 0.001, "loss": 1.8321, "step": 20710 }, { "epoch": 0.8761739571875793, "grad_norm": 0.26976755261421204, "learning_rate": 0.001, "loss": 1.9682, "step": 20711 }, { "epoch": 0.8762162619510957, "grad_norm": 0.2460852712392807, "learning_rate": 0.001, "loss": 1.6967, "step": 20712 }, { "epoch": 0.8762585667146121, "grad_norm": 0.13001763820648193, "learning_rate": 0.001, "loss": 1.8121, "step": 20713 }, { "epoch": 0.8763008714781284, "grad_norm": 0.17002592980861664, "learning_rate": 0.001, "loss": 2.4601, "step": 20714 }, { "epoch": 0.8763431762416448, "grad_norm": 0.15308377146720886, "learning_rate": 0.001, "loss": 2.1304, "step": 20715 }, { "epoch": 0.8763854810051612, "grad_norm": 0.19930648803710938, "learning_rate": 0.001, "loss": 2.1169, "step": 20716 }, { "epoch": 0.8764277857686775, "grad_norm": 0.15591351687908173, "learning_rate": 0.001, "loss": 3.023, "step": 20717 }, { "epoch": 0.8764700905321939, "grad_norm": 0.38943207263946533, "learning_rate": 0.001, "loss": 3.6015, "step": 20718 }, { "epoch": 0.8765123952957103, "grad_norm": 0.16425389051437378, "learning_rate": 0.001, "loss": 1.75, "step": 20719 }, { "epoch": 0.8765547000592266, "grad_norm": 0.1728183478116989, "learning_rate": 0.001, "loss": 2.144, "step": 20720 }, { "epoch": 0.876597004822743, "grad_norm": 1.0912002325057983, "learning_rate": 0.001, "loss": 2.133, "step": 20721 }, { "epoch": 0.8766393095862595, "grad_norm": 0.6945961117744446, "learning_rate": 0.001, "loss": 3.1325, "step": 20722 }, { "epoch": 0.8766816143497758, "grad_norm": 0.1368747353553772, "learning_rate": 0.001, "loss": 1.4968, "step": 20723 }, { "epoch": 0.8767239191132922, "grad_norm": 0.1539839655160904, "learning_rate": 0.001, "loss": 3.2301, "step": 20724 }, { "epoch": 0.8767662238768086, "grad_norm": 0.1409660428762436, "learning_rate": 0.001, "loss": 1.2512, "step": 20725 }, { "epoch": 0.8768085286403249, "grad_norm": 0.5920034050941467, "learning_rate": 0.001, "loss": 1.7211, "step": 20726 }, { "epoch": 0.8768508334038413, "grad_norm": 0.7836800217628479, "learning_rate": 0.001, "loss": 2.5221, "step": 20727 }, { "epoch": 0.8768931381673577, "grad_norm": 0.1688200980424881, "learning_rate": 0.001, "loss": 1.8585, "step": 20728 }, { "epoch": 0.876935442930874, "grad_norm": 2.877446174621582, "learning_rate": 0.001, "loss": 1.2657, "step": 20729 }, { "epoch": 0.8769777476943904, "grad_norm": 0.1525486409664154, "learning_rate": 0.001, "loss": 2.808, "step": 20730 }, { "epoch": 0.8770200524579067, "grad_norm": 0.17019006609916687, "learning_rate": 0.001, "loss": 2.4638, "step": 20731 }, { "epoch": 0.8770623572214231, "grad_norm": 5.263099670410156, "learning_rate": 0.001, "loss": 2.6641, "step": 20732 }, { "epoch": 0.8771046619849395, "grad_norm": 0.18145911395549774, "learning_rate": 0.001, "loss": 2.248, "step": 20733 }, { "epoch": 0.8771469667484558, "grad_norm": 0.1718670129776001, "learning_rate": 0.001, "loss": 2.284, "step": 20734 }, { "epoch": 0.8771892715119722, "grad_norm": 0.16717854142189026, "learning_rate": 0.001, "loss": 2.2931, "step": 20735 }, { "epoch": 0.8772315762754886, "grad_norm": 0.17688849568367004, "learning_rate": 0.001, "loss": 2.4102, "step": 20736 }, { "epoch": 0.8772738810390049, "grad_norm": 0.19576434791088104, "learning_rate": 0.001, "loss": 2.9707, "step": 20737 }, { "epoch": 0.8773161858025214, "grad_norm": 0.16361911594867706, "learning_rate": 0.001, "loss": 1.8927, "step": 20738 }, { "epoch": 0.8773584905660378, "grad_norm": 0.20847147703170776, "learning_rate": 0.001, "loss": 2.5862, "step": 20739 }, { "epoch": 0.8774007953295541, "grad_norm": 0.1973280906677246, "learning_rate": 0.001, "loss": 1.5039, "step": 20740 }, { "epoch": 0.8774431000930705, "grad_norm": 0.18212057650089264, "learning_rate": 0.001, "loss": 2.205, "step": 20741 }, { "epoch": 0.8774854048565869, "grad_norm": 0.7270743250846863, "learning_rate": 0.001, "loss": 1.9571, "step": 20742 }, { "epoch": 0.8775277096201032, "grad_norm": 0.18873396515846252, "learning_rate": 0.001, "loss": 1.9786, "step": 20743 }, { "epoch": 0.8775700143836196, "grad_norm": 0.18425945937633514, "learning_rate": 0.001, "loss": 2.1827, "step": 20744 }, { "epoch": 0.877612319147136, "grad_norm": 0.23821745812892914, "learning_rate": 0.001, "loss": 2.3047, "step": 20745 }, { "epoch": 0.8776546239106523, "grad_norm": 0.16767555475234985, "learning_rate": 0.001, "loss": 3.2195, "step": 20746 }, { "epoch": 0.8776969286741687, "grad_norm": 0.2009771168231964, "learning_rate": 0.001, "loss": 2.3288, "step": 20747 }, { "epoch": 0.8777392334376851, "grad_norm": 0.19222243130207062, "learning_rate": 0.001, "loss": 2.1529, "step": 20748 }, { "epoch": 0.8777815382012014, "grad_norm": 0.28680652379989624, "learning_rate": 0.001, "loss": 1.7814, "step": 20749 }, { "epoch": 0.8778238429647178, "grad_norm": 0.18338562548160553, "learning_rate": 0.001, "loss": 3.939, "step": 20750 }, { "epoch": 0.8778661477282342, "grad_norm": 0.9251672029495239, "learning_rate": 0.001, "loss": 1.9422, "step": 20751 }, { "epoch": 0.8779084524917505, "grad_norm": 0.5910675525665283, "learning_rate": 0.001, "loss": 1.9307, "step": 20752 }, { "epoch": 0.877950757255267, "grad_norm": 0.2055887132883072, "learning_rate": 0.001, "loss": 2.2307, "step": 20753 }, { "epoch": 0.8779930620187834, "grad_norm": 0.27294355630874634, "learning_rate": 0.001, "loss": 2.7468, "step": 20754 }, { "epoch": 0.8780353667822997, "grad_norm": 0.14302602410316467, "learning_rate": 0.001, "loss": 1.4932, "step": 20755 }, { "epoch": 0.8780776715458161, "grad_norm": 0.15693305432796478, "learning_rate": 0.001, "loss": 2.1727, "step": 20756 }, { "epoch": 0.8781199763093325, "grad_norm": 2.7029690742492676, "learning_rate": 0.001, "loss": 2.2942, "step": 20757 }, { "epoch": 0.8781622810728488, "grad_norm": 0.16679178178310394, "learning_rate": 0.001, "loss": 2.4154, "step": 20758 }, { "epoch": 0.8782045858363652, "grad_norm": 0.29728999733924866, "learning_rate": 0.001, "loss": 1.5379, "step": 20759 }, { "epoch": 0.8782468905998816, "grad_norm": 0.7710385918617249, "learning_rate": 0.001, "loss": 2.386, "step": 20760 }, { "epoch": 0.8782891953633979, "grad_norm": 0.2641301453113556, "learning_rate": 0.001, "loss": 1.7395, "step": 20761 }, { "epoch": 0.8783315001269143, "grad_norm": 0.15455834567546844, "learning_rate": 0.001, "loss": 2.7225, "step": 20762 }, { "epoch": 0.8783738048904307, "grad_norm": 1.485619068145752, "learning_rate": 0.001, "loss": 2.3048, "step": 20763 }, { "epoch": 0.878416109653947, "grad_norm": 0.675614595413208, "learning_rate": 0.001, "loss": 1.5047, "step": 20764 }, { "epoch": 0.8784584144174634, "grad_norm": 0.18644104897975922, "learning_rate": 0.001, "loss": 1.9652, "step": 20765 }, { "epoch": 0.8785007191809798, "grad_norm": 0.1697007417678833, "learning_rate": 0.001, "loss": 1.9008, "step": 20766 }, { "epoch": 0.8785430239444961, "grad_norm": 0.1929423213005066, "learning_rate": 0.001, "loss": 2.574, "step": 20767 }, { "epoch": 0.8785853287080125, "grad_norm": 0.3777572810649872, "learning_rate": 0.001, "loss": 2.3181, "step": 20768 }, { "epoch": 0.878627633471529, "grad_norm": 0.17042919993400574, "learning_rate": 0.001, "loss": 2.2234, "step": 20769 }, { "epoch": 0.8786699382350452, "grad_norm": 0.19270406663417816, "learning_rate": 0.001, "loss": 1.4735, "step": 20770 }, { "epoch": 0.8787122429985617, "grad_norm": 0.2564689517021179, "learning_rate": 0.001, "loss": 1.9759, "step": 20771 }, { "epoch": 0.8787545477620781, "grad_norm": 4.633014678955078, "learning_rate": 0.001, "loss": 1.7299, "step": 20772 }, { "epoch": 0.8787968525255944, "grad_norm": 0.14340277016162872, "learning_rate": 0.001, "loss": 1.7605, "step": 20773 }, { "epoch": 0.8788391572891108, "grad_norm": 0.1704220324754715, "learning_rate": 0.001, "loss": 2.0632, "step": 20774 }, { "epoch": 0.8788814620526271, "grad_norm": 0.17304351925849915, "learning_rate": 0.001, "loss": 2.2619, "step": 20775 }, { "epoch": 0.8789237668161435, "grad_norm": 0.1750032901763916, "learning_rate": 0.001, "loss": 1.918, "step": 20776 }, { "epoch": 0.8789660715796599, "grad_norm": 4.89594030380249, "learning_rate": 0.001, "loss": 1.3372, "step": 20777 }, { "epoch": 0.8790083763431762, "grad_norm": 0.172307088971138, "learning_rate": 0.001, "loss": 2.11, "step": 20778 }, { "epoch": 0.8790506811066926, "grad_norm": 0.1498982012271881, "learning_rate": 0.001, "loss": 3.3868, "step": 20779 }, { "epoch": 0.879092985870209, "grad_norm": 0.1848546415567398, "learning_rate": 0.001, "loss": 1.8795, "step": 20780 }, { "epoch": 0.8791352906337253, "grad_norm": 2.189716100692749, "learning_rate": 0.001, "loss": 3.0549, "step": 20781 }, { "epoch": 0.8791775953972417, "grad_norm": 0.7227365970611572, "learning_rate": 0.001, "loss": 2.0596, "step": 20782 }, { "epoch": 0.8792199001607581, "grad_norm": 0.1363682597875595, "learning_rate": 0.001, "loss": 1.9833, "step": 20783 }, { "epoch": 0.8792622049242744, "grad_norm": 0.4821024239063263, "learning_rate": 0.001, "loss": 2.1675, "step": 20784 }, { "epoch": 0.8793045096877908, "grad_norm": 0.16702604293823242, "learning_rate": 0.001, "loss": 2.4858, "step": 20785 }, { "epoch": 0.8793468144513072, "grad_norm": 9.462590217590332, "learning_rate": 0.001, "loss": 1.7947, "step": 20786 }, { "epoch": 0.8793891192148235, "grad_norm": 0.14354799687862396, "learning_rate": 0.001, "loss": 2.4982, "step": 20787 }, { "epoch": 0.87943142397834, "grad_norm": 0.17250943183898926, "learning_rate": 0.001, "loss": 2.817, "step": 20788 }, { "epoch": 0.8794737287418564, "grad_norm": 0.13783836364746094, "learning_rate": 0.001, "loss": 2.5655, "step": 20789 }, { "epoch": 0.8795160335053727, "grad_norm": 0.18853969871997833, "learning_rate": 0.001, "loss": 2.2248, "step": 20790 }, { "epoch": 0.8795583382688891, "grad_norm": 0.1598181277513504, "learning_rate": 0.001, "loss": 2.6343, "step": 20791 }, { "epoch": 0.8796006430324055, "grad_norm": 0.16585583984851837, "learning_rate": 0.001, "loss": 1.8722, "step": 20792 }, { "epoch": 0.8796429477959218, "grad_norm": 0.20554226636886597, "learning_rate": 0.001, "loss": 2.6009, "step": 20793 }, { "epoch": 0.8796852525594382, "grad_norm": 2.5491397380828857, "learning_rate": 0.001, "loss": 1.7133, "step": 20794 }, { "epoch": 0.8797275573229546, "grad_norm": 0.1531909704208374, "learning_rate": 0.001, "loss": 2.1605, "step": 20795 }, { "epoch": 0.8797698620864709, "grad_norm": 0.16960613429546356, "learning_rate": 0.001, "loss": 2.6522, "step": 20796 }, { "epoch": 0.8798121668499873, "grad_norm": 0.20041488111019135, "learning_rate": 0.001, "loss": 1.9546, "step": 20797 }, { "epoch": 0.8798544716135037, "grad_norm": 0.20710138976573944, "learning_rate": 0.001, "loss": 2.7599, "step": 20798 }, { "epoch": 0.87989677637702, "grad_norm": 0.153824120759964, "learning_rate": 0.001, "loss": 2.2814, "step": 20799 }, { "epoch": 0.8799390811405364, "grad_norm": 0.1634339690208435, "learning_rate": 0.001, "loss": 2.3355, "step": 20800 }, { "epoch": 0.8799813859040528, "grad_norm": 0.19159682095050812, "learning_rate": 0.001, "loss": 1.9803, "step": 20801 }, { "epoch": 0.8800236906675691, "grad_norm": 0.40518084168434143, "learning_rate": 0.001, "loss": 2.6101, "step": 20802 }, { "epoch": 0.8800659954310855, "grad_norm": 0.19787724316120148, "learning_rate": 0.001, "loss": 2.0322, "step": 20803 }, { "epoch": 0.880108300194602, "grad_norm": 0.18997430801391602, "learning_rate": 0.001, "loss": 1.7237, "step": 20804 }, { "epoch": 0.8801506049581183, "grad_norm": 0.4623604118824005, "learning_rate": 0.001, "loss": 2.3957, "step": 20805 }, { "epoch": 0.8801929097216347, "grad_norm": 0.14696352183818817, "learning_rate": 0.001, "loss": 2.1396, "step": 20806 }, { "epoch": 0.8802352144851511, "grad_norm": 0.2328002154827118, "learning_rate": 0.001, "loss": 2.0078, "step": 20807 }, { "epoch": 0.8802775192486674, "grad_norm": 0.16920128464698792, "learning_rate": 0.001, "loss": 1.444, "step": 20808 }, { "epoch": 0.8803198240121838, "grad_norm": 0.30305415391921997, "learning_rate": 0.001, "loss": 3.162, "step": 20809 }, { "epoch": 0.8803621287757002, "grad_norm": 0.2750433087348938, "learning_rate": 0.001, "loss": 1.8929, "step": 20810 }, { "epoch": 0.8804044335392165, "grad_norm": 0.38231226801872253, "learning_rate": 0.001, "loss": 2.9644, "step": 20811 }, { "epoch": 0.8804467383027329, "grad_norm": 0.14932866394519806, "learning_rate": 0.001, "loss": 2.08, "step": 20812 }, { "epoch": 0.8804890430662493, "grad_norm": 0.17217305302619934, "learning_rate": 0.001, "loss": 2.4867, "step": 20813 }, { "epoch": 0.8805313478297656, "grad_norm": 0.1518954336643219, "learning_rate": 0.001, "loss": 2.6287, "step": 20814 }, { "epoch": 0.880573652593282, "grad_norm": 0.1420203298330307, "learning_rate": 0.001, "loss": 2.0718, "step": 20815 }, { "epoch": 0.8806159573567984, "grad_norm": 0.15746936202049255, "learning_rate": 0.001, "loss": 1.3726, "step": 20816 }, { "epoch": 0.8806582621203147, "grad_norm": 0.20516842603683472, "learning_rate": 0.001, "loss": 2.5636, "step": 20817 }, { "epoch": 0.8807005668838311, "grad_norm": 0.18061251938343048, "learning_rate": 0.001, "loss": 1.9442, "step": 20818 }, { "epoch": 0.8807428716473474, "grad_norm": 0.19963960349559784, "learning_rate": 0.001, "loss": 1.8834, "step": 20819 }, { "epoch": 0.8807851764108638, "grad_norm": 0.2850932776927948, "learning_rate": 0.001, "loss": 2.3297, "step": 20820 }, { "epoch": 0.8808274811743803, "grad_norm": 0.16823464632034302, "learning_rate": 0.001, "loss": 2.6427, "step": 20821 }, { "epoch": 0.8808697859378966, "grad_norm": 0.16549649834632874, "learning_rate": 0.001, "loss": 2.9588, "step": 20822 }, { "epoch": 0.880912090701413, "grad_norm": 0.18909013271331787, "learning_rate": 0.001, "loss": 2.0608, "step": 20823 }, { "epoch": 0.8809543954649294, "grad_norm": 0.16195768117904663, "learning_rate": 0.001, "loss": 2.2387, "step": 20824 }, { "epoch": 0.8809967002284457, "grad_norm": 0.14382191002368927, "learning_rate": 0.001, "loss": 2.3996, "step": 20825 }, { "epoch": 0.8810390049919621, "grad_norm": 0.1621260792016983, "learning_rate": 0.001, "loss": 2.7321, "step": 20826 }, { "epoch": 0.8810813097554785, "grad_norm": 0.13346368074417114, "learning_rate": 0.001, "loss": 3.0278, "step": 20827 }, { "epoch": 0.8811236145189948, "grad_norm": 0.14685030281543732, "learning_rate": 0.001, "loss": 1.7467, "step": 20828 }, { "epoch": 0.8811659192825112, "grad_norm": 0.15578360855579376, "learning_rate": 0.001, "loss": 3.2155, "step": 20829 }, { "epoch": 0.8812082240460276, "grad_norm": 1.1892778873443604, "learning_rate": 0.001, "loss": 2.3981, "step": 20830 }, { "epoch": 0.8812505288095439, "grad_norm": 0.13254669308662415, "learning_rate": 0.001, "loss": 2.8435, "step": 20831 }, { "epoch": 0.8812928335730603, "grad_norm": 0.1270717978477478, "learning_rate": 0.001, "loss": 2.3439, "step": 20832 }, { "epoch": 0.8813351383365767, "grad_norm": 0.1563170850276947, "learning_rate": 0.001, "loss": 1.8762, "step": 20833 }, { "epoch": 0.881377443100093, "grad_norm": 0.8175224661827087, "learning_rate": 0.001, "loss": 2.7527, "step": 20834 }, { "epoch": 0.8814197478636094, "grad_norm": 0.13206033408641815, "learning_rate": 0.001, "loss": 2.0897, "step": 20835 }, { "epoch": 0.8814620526271258, "grad_norm": 0.2126411646604538, "learning_rate": 0.001, "loss": 3.5215, "step": 20836 }, { "epoch": 0.8815043573906421, "grad_norm": 0.12830336391925812, "learning_rate": 0.001, "loss": 1.3334, "step": 20837 }, { "epoch": 0.8815466621541586, "grad_norm": 0.601836621761322, "learning_rate": 0.001, "loss": 1.9787, "step": 20838 }, { "epoch": 0.881588966917675, "grad_norm": 0.13936357200145721, "learning_rate": 0.001, "loss": 2.0659, "step": 20839 }, { "epoch": 0.8816312716811913, "grad_norm": 0.1285770684480667, "learning_rate": 0.001, "loss": 2.1898, "step": 20840 }, { "epoch": 0.8816735764447077, "grad_norm": 0.12381594628095627, "learning_rate": 0.001, "loss": 1.7729, "step": 20841 }, { "epoch": 0.8817158812082241, "grad_norm": 0.12983018159866333, "learning_rate": 0.001, "loss": 2.5275, "step": 20842 }, { "epoch": 0.8817581859717404, "grad_norm": 0.1269802451133728, "learning_rate": 0.001, "loss": 2.2177, "step": 20843 }, { "epoch": 0.8818004907352568, "grad_norm": 0.12933611869812012, "learning_rate": 0.001, "loss": 1.416, "step": 20844 }, { "epoch": 0.8818427954987732, "grad_norm": 0.5921683311462402, "learning_rate": 0.001, "loss": 2.5021, "step": 20845 }, { "epoch": 0.8818851002622895, "grad_norm": 0.15616628527641296, "learning_rate": 0.001, "loss": 1.7597, "step": 20846 }, { "epoch": 0.8819274050258059, "grad_norm": 0.16037492454051971, "learning_rate": 0.001, "loss": 3.31, "step": 20847 }, { "epoch": 0.8819697097893223, "grad_norm": 0.19823972880840302, "learning_rate": 0.001, "loss": 2.1503, "step": 20848 }, { "epoch": 0.8820120145528386, "grad_norm": 0.17855338752269745, "learning_rate": 0.001, "loss": 2.6852, "step": 20849 }, { "epoch": 0.882054319316355, "grad_norm": 0.15427084267139435, "learning_rate": 0.001, "loss": 2.0922, "step": 20850 }, { "epoch": 0.8820966240798714, "grad_norm": 0.14406783878803253, "learning_rate": 0.001, "loss": 2.3578, "step": 20851 }, { "epoch": 0.8821389288433877, "grad_norm": 0.16214242577552795, "learning_rate": 0.001, "loss": 1.3277, "step": 20852 }, { "epoch": 0.8821812336069041, "grad_norm": 0.1684330701828003, "learning_rate": 0.001, "loss": 2.0406, "step": 20853 }, { "epoch": 0.8822235383704206, "grad_norm": 8.746175765991211, "learning_rate": 0.001, "loss": 2.3627, "step": 20854 }, { "epoch": 0.8822658431339369, "grad_norm": 0.17695072293281555, "learning_rate": 0.001, "loss": 1.8508, "step": 20855 }, { "epoch": 0.8823081478974533, "grad_norm": 0.147260844707489, "learning_rate": 0.001, "loss": 1.9841, "step": 20856 }, { "epoch": 0.8823504526609697, "grad_norm": 0.13849042356014252, "learning_rate": 0.001, "loss": 1.6593, "step": 20857 }, { "epoch": 0.882392757424486, "grad_norm": 1.9527419805526733, "learning_rate": 0.001, "loss": 2.0942, "step": 20858 }, { "epoch": 0.8824350621880024, "grad_norm": 0.18281447887420654, "learning_rate": 0.001, "loss": 3.9133, "step": 20859 }, { "epoch": 0.8824773669515188, "grad_norm": 0.8285083174705505, "learning_rate": 0.001, "loss": 2.7313, "step": 20860 }, { "epoch": 0.8825196717150351, "grad_norm": 0.163629949092865, "learning_rate": 0.001, "loss": 1.8258, "step": 20861 }, { "epoch": 0.8825619764785515, "grad_norm": 0.6261329054832458, "learning_rate": 0.001, "loss": 1.9942, "step": 20862 }, { "epoch": 0.8826042812420679, "grad_norm": 0.11821243911981583, "learning_rate": 0.001, "loss": 2.0009, "step": 20863 }, { "epoch": 0.8826465860055842, "grad_norm": 0.17022605240345, "learning_rate": 0.001, "loss": 2.0427, "step": 20864 }, { "epoch": 0.8826888907691006, "grad_norm": 0.28422749042510986, "learning_rate": 0.001, "loss": 2.141, "step": 20865 }, { "epoch": 0.8827311955326169, "grad_norm": 0.1458466500043869, "learning_rate": 0.001, "loss": 1.9478, "step": 20866 }, { "epoch": 0.8827735002961333, "grad_norm": 0.1322123408317566, "learning_rate": 0.001, "loss": 1.4996, "step": 20867 }, { "epoch": 0.8828158050596497, "grad_norm": 0.1259404420852661, "learning_rate": 0.001, "loss": 1.9031, "step": 20868 }, { "epoch": 0.882858109823166, "grad_norm": 0.23596730828285217, "learning_rate": 0.001, "loss": 1.8853, "step": 20869 }, { "epoch": 0.8829004145866824, "grad_norm": 0.15984271466732025, "learning_rate": 0.001, "loss": 3.1109, "step": 20870 }, { "epoch": 0.8829427193501989, "grad_norm": 0.1410568356513977, "learning_rate": 0.001, "loss": 2.5422, "step": 20871 }, { "epoch": 0.8829850241137152, "grad_norm": 0.15058191120624542, "learning_rate": 0.001, "loss": 2.1368, "step": 20872 }, { "epoch": 0.8830273288772316, "grad_norm": 0.1391940861940384, "learning_rate": 0.001, "loss": 2.1887, "step": 20873 }, { "epoch": 0.883069633640748, "grad_norm": 0.1491466760635376, "learning_rate": 0.001, "loss": 1.7719, "step": 20874 }, { "epoch": 0.8831119384042643, "grad_norm": 0.13309025764465332, "learning_rate": 0.001, "loss": 1.9236, "step": 20875 }, { "epoch": 0.8831542431677807, "grad_norm": 0.1632324457168579, "learning_rate": 0.001, "loss": 2.0839, "step": 20876 }, { "epoch": 0.8831965479312971, "grad_norm": 0.26213958859443665, "learning_rate": 0.001, "loss": 2.0513, "step": 20877 }, { "epoch": 0.8832388526948134, "grad_norm": 0.14815469086170197, "learning_rate": 0.001, "loss": 2.0672, "step": 20878 }, { "epoch": 0.8832811574583298, "grad_norm": 0.243429496884346, "learning_rate": 0.001, "loss": 3.2333, "step": 20879 }, { "epoch": 0.8833234622218462, "grad_norm": 0.5001935958862305, "learning_rate": 0.001, "loss": 2.6958, "step": 20880 }, { "epoch": 0.8833657669853625, "grad_norm": 0.14644937217235565, "learning_rate": 0.001, "loss": 1.6308, "step": 20881 }, { "epoch": 0.8834080717488789, "grad_norm": 0.1332141011953354, "learning_rate": 0.001, "loss": 1.7561, "step": 20882 }, { "epoch": 0.8834503765123953, "grad_norm": 0.5288890600204468, "learning_rate": 0.001, "loss": 2.2926, "step": 20883 }, { "epoch": 0.8834926812759116, "grad_norm": 0.14572833478450775, "learning_rate": 0.001, "loss": 2.7297, "step": 20884 }, { "epoch": 0.883534986039428, "grad_norm": 0.16958162188529968, "learning_rate": 0.001, "loss": 2.3938, "step": 20885 }, { "epoch": 0.8835772908029444, "grad_norm": 0.16472996771335602, "learning_rate": 0.001, "loss": 2.0189, "step": 20886 }, { "epoch": 0.8836195955664607, "grad_norm": 0.14302243292331696, "learning_rate": 0.001, "loss": 1.9094, "step": 20887 }, { "epoch": 0.8836619003299772, "grad_norm": 0.14594019949436188, "learning_rate": 0.001, "loss": 1.9324, "step": 20888 }, { "epoch": 0.8837042050934936, "grad_norm": 0.1699601113796234, "learning_rate": 0.001, "loss": 2.5119, "step": 20889 }, { "epoch": 0.8837465098570099, "grad_norm": 0.16255362331867218, "learning_rate": 0.001, "loss": 3.1678, "step": 20890 }, { "epoch": 0.8837888146205263, "grad_norm": 0.2458498328924179, "learning_rate": 0.001, "loss": 1.8441, "step": 20891 }, { "epoch": 0.8838311193840427, "grad_norm": 0.14497283101081848, "learning_rate": 0.001, "loss": 2.5701, "step": 20892 }, { "epoch": 0.883873424147559, "grad_norm": 2.2221720218658447, "learning_rate": 0.001, "loss": 2.7294, "step": 20893 }, { "epoch": 0.8839157289110754, "grad_norm": 0.17696496844291687, "learning_rate": 0.001, "loss": 2.1181, "step": 20894 }, { "epoch": 0.8839580336745918, "grad_norm": 0.1702883541584015, "learning_rate": 0.001, "loss": 2.9156, "step": 20895 }, { "epoch": 0.8840003384381081, "grad_norm": 0.1452932506799698, "learning_rate": 0.001, "loss": 1.5805, "step": 20896 }, { "epoch": 0.8840426432016245, "grad_norm": 0.14172394573688507, "learning_rate": 0.001, "loss": 1.5672, "step": 20897 }, { "epoch": 0.8840849479651409, "grad_norm": 0.1812184453010559, "learning_rate": 0.001, "loss": 2.7763, "step": 20898 }, { "epoch": 0.8841272527286572, "grad_norm": 0.1486140936613083, "learning_rate": 0.001, "loss": 2.2094, "step": 20899 }, { "epoch": 0.8841695574921736, "grad_norm": 0.17032857239246368, "learning_rate": 0.001, "loss": 1.8822, "step": 20900 }, { "epoch": 0.88421186225569, "grad_norm": 0.21682803332805634, "learning_rate": 0.001, "loss": 2.0348, "step": 20901 }, { "epoch": 0.8842541670192063, "grad_norm": 0.1323508471250534, "learning_rate": 0.001, "loss": 1.682, "step": 20902 }, { "epoch": 0.8842964717827227, "grad_norm": 0.3486897647380829, "learning_rate": 0.001, "loss": 2.2666, "step": 20903 }, { "epoch": 0.8843387765462392, "grad_norm": 3.172335386276245, "learning_rate": 0.001, "loss": 2.2502, "step": 20904 }, { "epoch": 0.8843810813097555, "grad_norm": 0.14836838841438293, "learning_rate": 0.001, "loss": 1.4472, "step": 20905 }, { "epoch": 0.8844233860732719, "grad_norm": 0.195405974984169, "learning_rate": 0.001, "loss": 1.8995, "step": 20906 }, { "epoch": 0.8844656908367883, "grad_norm": 0.41760605573654175, "learning_rate": 0.001, "loss": 2.139, "step": 20907 }, { "epoch": 0.8845079956003046, "grad_norm": 0.16416040062904358, "learning_rate": 0.001, "loss": 2.0343, "step": 20908 }, { "epoch": 0.884550300363821, "grad_norm": 0.16729791462421417, "learning_rate": 0.001, "loss": 2.0194, "step": 20909 }, { "epoch": 0.8845926051273373, "grad_norm": 0.15572674572467804, "learning_rate": 0.001, "loss": 1.7202, "step": 20910 }, { "epoch": 0.8846349098908537, "grad_norm": 36.179466247558594, "learning_rate": 0.001, "loss": 1.8561, "step": 20911 }, { "epoch": 0.8846772146543701, "grad_norm": 0.15077073872089386, "learning_rate": 0.001, "loss": 1.6993, "step": 20912 }, { "epoch": 0.8847195194178864, "grad_norm": 0.1646421253681183, "learning_rate": 0.001, "loss": 2.058, "step": 20913 }, { "epoch": 0.8847618241814028, "grad_norm": 0.2249441295862198, "learning_rate": 0.001, "loss": 2.8581, "step": 20914 }, { "epoch": 0.8848041289449192, "grad_norm": 0.19129936397075653, "learning_rate": 0.001, "loss": 2.1256, "step": 20915 }, { "epoch": 0.8848464337084355, "grad_norm": 0.16032202541828156, "learning_rate": 0.001, "loss": 1.7309, "step": 20916 }, { "epoch": 0.8848887384719519, "grad_norm": 0.21041136980056763, "learning_rate": 0.001, "loss": 3.1444, "step": 20917 }, { "epoch": 0.8849310432354683, "grad_norm": 0.1504105031490326, "learning_rate": 0.001, "loss": 1.4867, "step": 20918 }, { "epoch": 0.8849733479989846, "grad_norm": 0.16670987010002136, "learning_rate": 0.001, "loss": 1.6321, "step": 20919 }, { "epoch": 0.885015652762501, "grad_norm": 0.22837378084659576, "learning_rate": 0.001, "loss": 2.2452, "step": 20920 }, { "epoch": 0.8850579575260175, "grad_norm": 0.15080545842647552, "learning_rate": 0.001, "loss": 2.2631, "step": 20921 }, { "epoch": 0.8851002622895338, "grad_norm": 0.16845811903476715, "learning_rate": 0.001, "loss": 1.9818, "step": 20922 }, { "epoch": 0.8851425670530502, "grad_norm": 0.6838306784629822, "learning_rate": 0.001, "loss": 1.6603, "step": 20923 }, { "epoch": 0.8851848718165666, "grad_norm": 0.12884469330310822, "learning_rate": 0.001, "loss": 1.7758, "step": 20924 }, { "epoch": 0.8852271765800829, "grad_norm": 0.7844867706298828, "learning_rate": 0.001, "loss": 2.031, "step": 20925 }, { "epoch": 0.8852694813435993, "grad_norm": 0.1945054531097412, "learning_rate": 0.001, "loss": 3.3753, "step": 20926 }, { "epoch": 0.8853117861071157, "grad_norm": 30.440494537353516, "learning_rate": 0.001, "loss": 2.9578, "step": 20927 }, { "epoch": 0.885354090870632, "grad_norm": 0.13637934625148773, "learning_rate": 0.001, "loss": 1.677, "step": 20928 }, { "epoch": 0.8853963956341484, "grad_norm": 0.7692943811416626, "learning_rate": 0.001, "loss": 2.1675, "step": 20929 }, { "epoch": 0.8854387003976648, "grad_norm": 0.9168503880500793, "learning_rate": 0.001, "loss": 2.1578, "step": 20930 }, { "epoch": 0.8854810051611811, "grad_norm": 0.1996372491121292, "learning_rate": 0.001, "loss": 2.6837, "step": 20931 }, { "epoch": 0.8855233099246975, "grad_norm": 0.15608003735542297, "learning_rate": 0.001, "loss": 3.4255, "step": 20932 }, { "epoch": 0.8855656146882139, "grad_norm": 0.7648287415504456, "learning_rate": 0.001, "loss": 1.6965, "step": 20933 }, { "epoch": 0.8856079194517302, "grad_norm": 8.121953964233398, "learning_rate": 0.001, "loss": 2.2537, "step": 20934 }, { "epoch": 0.8856502242152466, "grad_norm": 2.677286148071289, "learning_rate": 0.001, "loss": 3.2583, "step": 20935 }, { "epoch": 0.885692528978763, "grad_norm": 0.17523032426834106, "learning_rate": 0.001, "loss": 2.1655, "step": 20936 }, { "epoch": 0.8857348337422793, "grad_norm": 0.16115929186344147, "learning_rate": 0.001, "loss": 1.5949, "step": 20937 }, { "epoch": 0.8857771385057958, "grad_norm": 0.18050625920295715, "learning_rate": 0.001, "loss": 2.2334, "step": 20938 }, { "epoch": 0.8858194432693122, "grad_norm": 0.19290222227573395, "learning_rate": 0.001, "loss": 2.4616, "step": 20939 }, { "epoch": 0.8858617480328285, "grad_norm": 0.15666300058364868, "learning_rate": 0.001, "loss": 2.5885, "step": 20940 }, { "epoch": 0.8859040527963449, "grad_norm": 0.21814775466918945, "learning_rate": 0.001, "loss": 1.8548, "step": 20941 }, { "epoch": 0.8859463575598613, "grad_norm": 0.18262353539466858, "learning_rate": 0.001, "loss": 1.8053, "step": 20942 }, { "epoch": 0.8859886623233776, "grad_norm": 0.4183075726032257, "learning_rate": 0.001, "loss": 2.103, "step": 20943 }, { "epoch": 0.886030967086894, "grad_norm": 0.18610745668411255, "learning_rate": 0.001, "loss": 2.7605, "step": 20944 }, { "epoch": 0.8860732718504104, "grad_norm": 10.397679328918457, "learning_rate": 0.001, "loss": 2.124, "step": 20945 }, { "epoch": 0.8861155766139267, "grad_norm": 0.14724650979042053, "learning_rate": 0.001, "loss": 2.377, "step": 20946 }, { "epoch": 0.8861578813774431, "grad_norm": 0.4719243049621582, "learning_rate": 0.001, "loss": 1.979, "step": 20947 }, { "epoch": 0.8862001861409595, "grad_norm": 0.21508583426475525, "learning_rate": 0.001, "loss": 1.8572, "step": 20948 }, { "epoch": 0.8862424909044758, "grad_norm": 0.16444562375545502, "learning_rate": 0.001, "loss": 1.8839, "step": 20949 }, { "epoch": 0.8862847956679922, "grad_norm": 0.22835667431354523, "learning_rate": 0.001, "loss": 2.5681, "step": 20950 }, { "epoch": 0.8863271004315086, "grad_norm": 0.17170853912830353, "learning_rate": 0.001, "loss": 2.0785, "step": 20951 }, { "epoch": 0.8863694051950249, "grad_norm": 0.135117307305336, "learning_rate": 0.001, "loss": 2.4973, "step": 20952 }, { "epoch": 0.8864117099585413, "grad_norm": 1.2881138324737549, "learning_rate": 0.001, "loss": 1.8363, "step": 20953 }, { "epoch": 0.8864540147220576, "grad_norm": 0.1489884853363037, "learning_rate": 0.001, "loss": 1.9252, "step": 20954 }, { "epoch": 0.8864963194855741, "grad_norm": 0.14874590933322906, "learning_rate": 0.001, "loss": 1.4085, "step": 20955 }, { "epoch": 0.8865386242490905, "grad_norm": 0.13370899856090546, "learning_rate": 0.001, "loss": 2.2094, "step": 20956 }, { "epoch": 0.8865809290126068, "grad_norm": 4.428647041320801, "learning_rate": 0.001, "loss": 2.2555, "step": 20957 }, { "epoch": 0.8866232337761232, "grad_norm": 0.12054305523633957, "learning_rate": 0.001, "loss": 2.2724, "step": 20958 }, { "epoch": 0.8866655385396396, "grad_norm": 0.19234895706176758, "learning_rate": 0.001, "loss": 2.7066, "step": 20959 }, { "epoch": 0.8867078433031559, "grad_norm": 0.18497143685817719, "learning_rate": 0.001, "loss": 2.3736, "step": 20960 }, { "epoch": 0.8867501480666723, "grad_norm": 0.2834151089191437, "learning_rate": 0.001, "loss": 2.2192, "step": 20961 }, { "epoch": 0.8867924528301887, "grad_norm": 0.18010100722312927, "learning_rate": 0.001, "loss": 2.4076, "step": 20962 }, { "epoch": 0.886834757593705, "grad_norm": 0.3335997462272644, "learning_rate": 0.001, "loss": 3.2665, "step": 20963 }, { "epoch": 0.8868770623572214, "grad_norm": 0.15331315994262695, "learning_rate": 0.001, "loss": 2.5892, "step": 20964 }, { "epoch": 0.8869193671207378, "grad_norm": 0.16281548142433167, "learning_rate": 0.001, "loss": 2.1734, "step": 20965 }, { "epoch": 0.8869616718842541, "grad_norm": 0.17195676267147064, "learning_rate": 0.001, "loss": 2.339, "step": 20966 }, { "epoch": 0.8870039766477705, "grad_norm": 0.13720469176769257, "learning_rate": 0.001, "loss": 1.9478, "step": 20967 }, { "epoch": 0.8870462814112869, "grad_norm": 0.17597544193267822, "learning_rate": 0.001, "loss": 1.9665, "step": 20968 }, { "epoch": 0.8870885861748032, "grad_norm": 0.1732187271118164, "learning_rate": 0.001, "loss": 1.9538, "step": 20969 }, { "epoch": 0.8871308909383196, "grad_norm": 0.14298303425312042, "learning_rate": 0.001, "loss": 2.8327, "step": 20970 }, { "epoch": 0.8871731957018361, "grad_norm": 0.15061107277870178, "learning_rate": 0.001, "loss": 2.009, "step": 20971 }, { "epoch": 0.8872155004653524, "grad_norm": 0.14526939392089844, "learning_rate": 0.001, "loss": 1.8338, "step": 20972 }, { "epoch": 0.8872578052288688, "grad_norm": 0.1140129417181015, "learning_rate": 0.001, "loss": 1.2921, "step": 20973 }, { "epoch": 0.8873001099923852, "grad_norm": 9.253741264343262, "learning_rate": 0.001, "loss": 2.1857, "step": 20974 }, { "epoch": 0.8873424147559015, "grad_norm": 0.12813687324523926, "learning_rate": 0.001, "loss": 1.8503, "step": 20975 }, { "epoch": 0.8873847195194179, "grad_norm": 0.1430562436580658, "learning_rate": 0.001, "loss": 1.9686, "step": 20976 }, { "epoch": 0.8874270242829343, "grad_norm": 0.1441165953874588, "learning_rate": 0.001, "loss": 2.0366, "step": 20977 }, { "epoch": 0.8874693290464506, "grad_norm": 0.6266450881958008, "learning_rate": 0.001, "loss": 1.4746, "step": 20978 }, { "epoch": 0.887511633809967, "grad_norm": 0.13469688594341278, "learning_rate": 0.001, "loss": 2.3698, "step": 20979 }, { "epoch": 0.8875539385734834, "grad_norm": 0.13266874849796295, "learning_rate": 0.001, "loss": 1.6635, "step": 20980 }, { "epoch": 0.8875962433369997, "grad_norm": 25.529739379882812, "learning_rate": 0.001, "loss": 2.0676, "step": 20981 }, { "epoch": 0.8876385481005161, "grad_norm": 0.2457568198442459, "learning_rate": 0.001, "loss": 3.1092, "step": 20982 }, { "epoch": 0.8876808528640325, "grad_norm": 0.15447290241718292, "learning_rate": 0.001, "loss": 1.8904, "step": 20983 }, { "epoch": 0.8877231576275488, "grad_norm": 0.280048131942749, "learning_rate": 0.001, "loss": 2.263, "step": 20984 }, { "epoch": 0.8877654623910652, "grad_norm": 0.11680661141872406, "learning_rate": 0.001, "loss": 1.9202, "step": 20985 }, { "epoch": 0.8878077671545817, "grad_norm": 16.68817138671875, "learning_rate": 0.001, "loss": 1.7731, "step": 20986 }, { "epoch": 0.887850071918098, "grad_norm": 0.17112363874912262, "learning_rate": 0.001, "loss": 3.0417, "step": 20987 }, { "epoch": 0.8878923766816144, "grad_norm": 0.19305647909641266, "learning_rate": 0.001, "loss": 1.4348, "step": 20988 }, { "epoch": 0.8879346814451308, "grad_norm": 0.162348210811615, "learning_rate": 0.001, "loss": 1.9347, "step": 20989 }, { "epoch": 0.8879769862086471, "grad_norm": 0.14823856949806213, "learning_rate": 0.001, "loss": 1.9564, "step": 20990 }, { "epoch": 0.8880192909721635, "grad_norm": 29.342689514160156, "learning_rate": 0.001, "loss": 2.334, "step": 20991 }, { "epoch": 0.8880615957356799, "grad_norm": 0.7693207859992981, "learning_rate": 0.001, "loss": 1.766, "step": 20992 }, { "epoch": 0.8881039004991962, "grad_norm": 1.0036860704421997, "learning_rate": 0.001, "loss": 2.2152, "step": 20993 }, { "epoch": 0.8881462052627126, "grad_norm": 3.9714372158050537, "learning_rate": 0.001, "loss": 2.1756, "step": 20994 }, { "epoch": 0.888188510026229, "grad_norm": 0.21561984717845917, "learning_rate": 0.001, "loss": 1.7397, "step": 20995 }, { "epoch": 0.8882308147897453, "grad_norm": 0.1669800579547882, "learning_rate": 0.001, "loss": 2.7163, "step": 20996 }, { "epoch": 0.8882731195532617, "grad_norm": 0.19429895281791687, "learning_rate": 0.001, "loss": 2.321, "step": 20997 }, { "epoch": 0.8883154243167781, "grad_norm": 0.21047398447990417, "learning_rate": 0.001, "loss": 1.6531, "step": 20998 }, { "epoch": 0.8883577290802944, "grad_norm": 0.20937636494636536, "learning_rate": 0.001, "loss": 2.1078, "step": 20999 }, { "epoch": 0.8884000338438108, "grad_norm": 0.4300979673862457, "learning_rate": 0.001, "loss": 2.4772, "step": 21000 }, { "epoch": 0.8884423386073271, "grad_norm": 0.18528619408607483, "learning_rate": 0.001, "loss": 1.5542, "step": 21001 }, { "epoch": 0.8884846433708435, "grad_norm": 0.989427387714386, "learning_rate": 0.001, "loss": 1.9901, "step": 21002 }, { "epoch": 0.88852694813436, "grad_norm": 0.19814398884773254, "learning_rate": 0.001, "loss": 2.3798, "step": 21003 }, { "epoch": 0.8885692528978762, "grad_norm": 0.16753339767456055, "learning_rate": 0.001, "loss": 2.8676, "step": 21004 }, { "epoch": 0.8886115576613927, "grad_norm": 0.1605309545993805, "learning_rate": 0.001, "loss": 1.5876, "step": 21005 }, { "epoch": 0.8886538624249091, "grad_norm": 1.0436333417892456, "learning_rate": 0.001, "loss": 3.0352, "step": 21006 }, { "epoch": 0.8886961671884254, "grad_norm": 0.5114812850952148, "learning_rate": 0.001, "loss": 1.9538, "step": 21007 }, { "epoch": 0.8887384719519418, "grad_norm": 0.16753728687763214, "learning_rate": 0.001, "loss": 1.6974, "step": 21008 }, { "epoch": 0.8887807767154582, "grad_norm": 0.17720456421375275, "learning_rate": 0.001, "loss": 2.0584, "step": 21009 }, { "epoch": 0.8888230814789745, "grad_norm": 0.23170483112335205, "learning_rate": 0.001, "loss": 3.2882, "step": 21010 }, { "epoch": 0.8888653862424909, "grad_norm": 0.41039520502090454, "learning_rate": 0.001, "loss": 2.4214, "step": 21011 }, { "epoch": 0.8889076910060073, "grad_norm": 0.1671202927827835, "learning_rate": 0.001, "loss": 1.606, "step": 21012 }, { "epoch": 0.8889499957695236, "grad_norm": 0.2298729419708252, "learning_rate": 0.001, "loss": 2.035, "step": 21013 }, { "epoch": 0.88899230053304, "grad_norm": 0.1773902326822281, "learning_rate": 0.001, "loss": 2.9743, "step": 21014 }, { "epoch": 0.8890346052965564, "grad_norm": 0.15954598784446716, "learning_rate": 0.001, "loss": 1.7877, "step": 21015 }, { "epoch": 0.8890769100600727, "grad_norm": 0.1797361969947815, "learning_rate": 0.001, "loss": 2.4128, "step": 21016 }, { "epoch": 0.8891192148235891, "grad_norm": 0.1873590052127838, "learning_rate": 0.001, "loss": 2.0114, "step": 21017 }, { "epoch": 0.8891615195871055, "grad_norm": 0.20286858081817627, "learning_rate": 0.001, "loss": 1.7583, "step": 21018 }, { "epoch": 0.8892038243506218, "grad_norm": 0.17168866097927094, "learning_rate": 0.001, "loss": 1.9782, "step": 21019 }, { "epoch": 0.8892461291141383, "grad_norm": 0.1769993156194687, "learning_rate": 0.001, "loss": 1.922, "step": 21020 }, { "epoch": 0.8892884338776547, "grad_norm": 0.1596214920282364, "learning_rate": 0.001, "loss": 2.1562, "step": 21021 }, { "epoch": 0.889330738641171, "grad_norm": 1.326114535331726, "learning_rate": 0.001, "loss": 1.683, "step": 21022 }, { "epoch": 0.8893730434046874, "grad_norm": 0.19646044075489044, "learning_rate": 0.001, "loss": 2.3388, "step": 21023 }, { "epoch": 0.8894153481682038, "grad_norm": 0.173504039645195, "learning_rate": 0.001, "loss": 1.6195, "step": 21024 }, { "epoch": 0.8894576529317201, "grad_norm": 0.145315483212471, "learning_rate": 0.001, "loss": 2.3937, "step": 21025 }, { "epoch": 0.8894999576952365, "grad_norm": 0.3077605366706848, "learning_rate": 0.001, "loss": 3.0489, "step": 21026 }, { "epoch": 0.8895422624587529, "grad_norm": 0.15277047455310822, "learning_rate": 0.001, "loss": 1.9625, "step": 21027 }, { "epoch": 0.8895845672222692, "grad_norm": 7.509362697601318, "learning_rate": 0.001, "loss": 3.2375, "step": 21028 }, { "epoch": 0.8896268719857856, "grad_norm": 0.23799103498458862, "learning_rate": 0.001, "loss": 1.8929, "step": 21029 }, { "epoch": 0.889669176749302, "grad_norm": 0.16391988098621368, "learning_rate": 0.001, "loss": 2.3133, "step": 21030 }, { "epoch": 0.8897114815128183, "grad_norm": 0.13483858108520508, "learning_rate": 0.001, "loss": 1.8435, "step": 21031 }, { "epoch": 0.8897537862763347, "grad_norm": 0.1675466150045395, "learning_rate": 0.001, "loss": 2.2034, "step": 21032 }, { "epoch": 0.8897960910398511, "grad_norm": 0.5026352405548096, "learning_rate": 0.001, "loss": 3.6788, "step": 21033 }, { "epoch": 0.8898383958033674, "grad_norm": 0.1782296597957611, "learning_rate": 0.001, "loss": 1.971, "step": 21034 }, { "epoch": 0.8898807005668838, "grad_norm": 0.1635374128818512, "learning_rate": 0.001, "loss": 1.5724, "step": 21035 }, { "epoch": 0.8899230053304003, "grad_norm": 0.17833854258060455, "learning_rate": 0.001, "loss": 2.2641, "step": 21036 }, { "epoch": 0.8899653100939166, "grad_norm": 0.15048396587371826, "learning_rate": 0.001, "loss": 1.8958, "step": 21037 }, { "epoch": 0.890007614857433, "grad_norm": 0.4295186996459961, "learning_rate": 0.001, "loss": 3.0759, "step": 21038 }, { "epoch": 0.8900499196209494, "grad_norm": 0.26509931683540344, "learning_rate": 0.001, "loss": 3.175, "step": 21039 }, { "epoch": 0.8900922243844657, "grad_norm": 0.21517863869667053, "learning_rate": 0.001, "loss": 2.5894, "step": 21040 }, { "epoch": 0.8901345291479821, "grad_norm": 0.15803544223308563, "learning_rate": 0.001, "loss": 1.7448, "step": 21041 }, { "epoch": 0.8901768339114985, "grad_norm": 0.6315363645553589, "learning_rate": 0.001, "loss": 2.1491, "step": 21042 }, { "epoch": 0.8902191386750148, "grad_norm": 0.1378752887248993, "learning_rate": 0.001, "loss": 1.8168, "step": 21043 }, { "epoch": 0.8902614434385312, "grad_norm": 0.16324613988399506, "learning_rate": 0.001, "loss": 3.0616, "step": 21044 }, { "epoch": 0.8903037482020475, "grad_norm": 0.16827210783958435, "learning_rate": 0.001, "loss": 1.9812, "step": 21045 }, { "epoch": 0.8903460529655639, "grad_norm": 0.16511641442775726, "learning_rate": 0.001, "loss": 2.405, "step": 21046 }, { "epoch": 0.8903883577290803, "grad_norm": 0.14627037942409515, "learning_rate": 0.001, "loss": 1.4163, "step": 21047 }, { "epoch": 0.8904306624925966, "grad_norm": 8.328338623046875, "learning_rate": 0.001, "loss": 2.2162, "step": 21048 }, { "epoch": 0.890472967256113, "grad_norm": 0.2969225347042084, "learning_rate": 0.001, "loss": 1.7355, "step": 21049 }, { "epoch": 0.8905152720196294, "grad_norm": 0.17002110183238983, "learning_rate": 0.001, "loss": 1.5175, "step": 21050 }, { "epoch": 0.8905575767831457, "grad_norm": 4.17087459564209, "learning_rate": 0.001, "loss": 1.9892, "step": 21051 }, { "epoch": 0.8905998815466621, "grad_norm": 0.1925853043794632, "learning_rate": 0.001, "loss": 1.745, "step": 21052 }, { "epoch": 0.8906421863101786, "grad_norm": 0.19640986621379852, "learning_rate": 0.001, "loss": 2.2911, "step": 21053 }, { "epoch": 0.8906844910736949, "grad_norm": 0.3781888484954834, "learning_rate": 0.001, "loss": 1.5467, "step": 21054 }, { "epoch": 0.8907267958372113, "grad_norm": 0.20206791162490845, "learning_rate": 0.001, "loss": 1.8796, "step": 21055 }, { "epoch": 0.8907691006007277, "grad_norm": 0.13937760889530182, "learning_rate": 0.001, "loss": 2.0243, "step": 21056 }, { "epoch": 0.890811405364244, "grad_norm": 0.2623637318611145, "learning_rate": 0.001, "loss": 1.7412, "step": 21057 }, { "epoch": 0.8908537101277604, "grad_norm": 0.21680723130702972, "learning_rate": 0.001, "loss": 2.2334, "step": 21058 }, { "epoch": 0.8908960148912768, "grad_norm": 0.18895690143108368, "learning_rate": 0.001, "loss": 1.9865, "step": 21059 }, { "epoch": 0.8909383196547931, "grad_norm": 0.1650429368019104, "learning_rate": 0.001, "loss": 2.9103, "step": 21060 }, { "epoch": 0.8909806244183095, "grad_norm": 0.32935771346092224, "learning_rate": 0.001, "loss": 2.0757, "step": 21061 }, { "epoch": 0.8910229291818259, "grad_norm": 0.14048244059085846, "learning_rate": 0.001, "loss": 1.6505, "step": 21062 }, { "epoch": 0.8910652339453422, "grad_norm": 0.15878313779830933, "learning_rate": 0.001, "loss": 2.1735, "step": 21063 }, { "epoch": 0.8911075387088586, "grad_norm": 0.17732056975364685, "learning_rate": 0.001, "loss": 1.7493, "step": 21064 }, { "epoch": 0.891149843472375, "grad_norm": 0.15534935891628265, "learning_rate": 0.001, "loss": 2.3862, "step": 21065 }, { "epoch": 0.8911921482358913, "grad_norm": 0.5262129902839661, "learning_rate": 0.001, "loss": 1.3208, "step": 21066 }, { "epoch": 0.8912344529994077, "grad_norm": 0.15670378506183624, "learning_rate": 0.001, "loss": 2.1793, "step": 21067 }, { "epoch": 0.8912767577629241, "grad_norm": 1.0509247779846191, "learning_rate": 0.001, "loss": 1.7555, "step": 21068 }, { "epoch": 0.8913190625264404, "grad_norm": 0.16009117662906647, "learning_rate": 0.001, "loss": 1.5403, "step": 21069 }, { "epoch": 0.8913613672899569, "grad_norm": 0.19776473939418793, "learning_rate": 0.001, "loss": 1.93, "step": 21070 }, { "epoch": 0.8914036720534733, "grad_norm": 0.12899529933929443, "learning_rate": 0.001, "loss": 1.7009, "step": 21071 }, { "epoch": 0.8914459768169896, "grad_norm": 0.49440255761146545, "learning_rate": 0.001, "loss": 2.0824, "step": 21072 }, { "epoch": 0.891488281580506, "grad_norm": 0.1522773802280426, "learning_rate": 0.001, "loss": 2.4829, "step": 21073 }, { "epoch": 0.8915305863440224, "grad_norm": 0.16316981613636017, "learning_rate": 0.001, "loss": 1.7877, "step": 21074 }, { "epoch": 0.8915728911075387, "grad_norm": 0.18345019221305847, "learning_rate": 0.001, "loss": 1.7857, "step": 21075 }, { "epoch": 0.8916151958710551, "grad_norm": 0.19031278789043427, "learning_rate": 0.001, "loss": 2.7861, "step": 21076 }, { "epoch": 0.8916575006345715, "grad_norm": 0.14421750605106354, "learning_rate": 0.001, "loss": 2.4813, "step": 21077 }, { "epoch": 0.8916998053980878, "grad_norm": 0.13803726434707642, "learning_rate": 0.001, "loss": 1.7713, "step": 21078 }, { "epoch": 0.8917421101616042, "grad_norm": 0.16265204548835754, "learning_rate": 0.001, "loss": 1.8057, "step": 21079 }, { "epoch": 0.8917844149251206, "grad_norm": 0.17232954502105713, "learning_rate": 0.001, "loss": 1.8699, "step": 21080 }, { "epoch": 0.8918267196886369, "grad_norm": 0.14032989740371704, "learning_rate": 0.001, "loss": 1.632, "step": 21081 }, { "epoch": 0.8918690244521533, "grad_norm": 0.16456128656864166, "learning_rate": 0.001, "loss": 2.1099, "step": 21082 }, { "epoch": 0.8919113292156697, "grad_norm": 0.6810503005981445, "learning_rate": 0.001, "loss": 2.6714, "step": 21083 }, { "epoch": 0.891953633979186, "grad_norm": 0.12685897946357727, "learning_rate": 0.001, "loss": 1.3731, "step": 21084 }, { "epoch": 0.8919959387427024, "grad_norm": 0.17308743298053741, "learning_rate": 0.001, "loss": 2.6058, "step": 21085 }, { "epoch": 0.8920382435062189, "grad_norm": 0.17059041559696198, "learning_rate": 0.001, "loss": 2.7914, "step": 21086 }, { "epoch": 0.8920805482697352, "grad_norm": 0.14783084392547607, "learning_rate": 0.001, "loss": 1.8769, "step": 21087 }, { "epoch": 0.8921228530332516, "grad_norm": 0.1687219738960266, "learning_rate": 0.001, "loss": 2.3727, "step": 21088 }, { "epoch": 0.892165157796768, "grad_norm": 0.1436719000339508, "learning_rate": 0.001, "loss": 1.8052, "step": 21089 }, { "epoch": 0.8922074625602843, "grad_norm": 0.18090786039829254, "learning_rate": 0.001, "loss": 1.6288, "step": 21090 }, { "epoch": 0.8922497673238007, "grad_norm": 0.2179936170578003, "learning_rate": 0.001, "loss": 1.6717, "step": 21091 }, { "epoch": 0.892292072087317, "grad_norm": 0.36641108989715576, "learning_rate": 0.001, "loss": 2.6457, "step": 21092 }, { "epoch": 0.8923343768508334, "grad_norm": 0.13992157578468323, "learning_rate": 0.001, "loss": 3.2825, "step": 21093 }, { "epoch": 0.8923766816143498, "grad_norm": 2.9790940284729004, "learning_rate": 0.001, "loss": 2.9316, "step": 21094 }, { "epoch": 0.8924189863778661, "grad_norm": 0.1301809698343277, "learning_rate": 0.001, "loss": 1.6709, "step": 21095 }, { "epoch": 0.8924612911413825, "grad_norm": 0.13229301571846008, "learning_rate": 0.001, "loss": 1.9206, "step": 21096 }, { "epoch": 0.8925035959048989, "grad_norm": 3.1202783584594727, "learning_rate": 0.001, "loss": 1.9438, "step": 21097 }, { "epoch": 0.8925459006684152, "grad_norm": 0.17747105658054352, "learning_rate": 0.001, "loss": 2.134, "step": 21098 }, { "epoch": 0.8925882054319316, "grad_norm": 0.16291233897209167, "learning_rate": 0.001, "loss": 2.8814, "step": 21099 }, { "epoch": 0.892630510195448, "grad_norm": 0.13646738231182098, "learning_rate": 0.001, "loss": 2.265, "step": 21100 }, { "epoch": 0.8926728149589643, "grad_norm": 0.19404670596122742, "learning_rate": 0.001, "loss": 2.3592, "step": 21101 }, { "epoch": 0.8927151197224807, "grad_norm": 0.5409544110298157, "learning_rate": 0.001, "loss": 2.8194, "step": 21102 }, { "epoch": 0.8927574244859972, "grad_norm": 36.899574279785156, "learning_rate": 0.001, "loss": 1.8905, "step": 21103 }, { "epoch": 0.8927997292495135, "grad_norm": 0.705781877040863, "learning_rate": 0.001, "loss": 2.5204, "step": 21104 }, { "epoch": 0.8928420340130299, "grad_norm": 0.14836551249027252, "learning_rate": 0.001, "loss": 1.9536, "step": 21105 }, { "epoch": 0.8928843387765463, "grad_norm": 0.14388398826122284, "learning_rate": 0.001, "loss": 2.0019, "step": 21106 }, { "epoch": 0.8929266435400626, "grad_norm": 0.15267707407474518, "learning_rate": 0.001, "loss": 1.9647, "step": 21107 }, { "epoch": 0.892968948303579, "grad_norm": 0.8427422046661377, "learning_rate": 0.001, "loss": 2.3751, "step": 21108 }, { "epoch": 0.8930112530670954, "grad_norm": 0.13757242262363434, "learning_rate": 0.001, "loss": 1.9751, "step": 21109 }, { "epoch": 0.8930535578306117, "grad_norm": 4.548515319824219, "learning_rate": 0.001, "loss": 1.8397, "step": 21110 }, { "epoch": 0.8930958625941281, "grad_norm": 0.14274819195270538, "learning_rate": 0.001, "loss": 1.7187, "step": 21111 }, { "epoch": 0.8931381673576445, "grad_norm": 0.15041141211986542, "learning_rate": 0.001, "loss": 2.6026, "step": 21112 }, { "epoch": 0.8931804721211608, "grad_norm": 0.1963375061750412, "learning_rate": 0.001, "loss": 2.2654, "step": 21113 }, { "epoch": 0.8932227768846772, "grad_norm": 0.14106130599975586, "learning_rate": 0.001, "loss": 2.6673, "step": 21114 }, { "epoch": 0.8932650816481936, "grad_norm": 0.15438930690288544, "learning_rate": 0.001, "loss": 1.556, "step": 21115 }, { "epoch": 0.8933073864117099, "grad_norm": 0.18182194232940674, "learning_rate": 0.001, "loss": 2.803, "step": 21116 }, { "epoch": 0.8933496911752263, "grad_norm": 2.966031074523926, "learning_rate": 0.001, "loss": 3.5384, "step": 21117 }, { "epoch": 0.8933919959387427, "grad_norm": 0.16081485152244568, "learning_rate": 0.001, "loss": 2.8794, "step": 21118 }, { "epoch": 0.893434300702259, "grad_norm": 0.15742535889148712, "learning_rate": 0.001, "loss": 1.7857, "step": 21119 }, { "epoch": 0.8934766054657755, "grad_norm": 0.16597464680671692, "learning_rate": 0.001, "loss": 2.4486, "step": 21120 }, { "epoch": 0.8935189102292919, "grad_norm": 0.1744956225156784, "learning_rate": 0.001, "loss": 2.2636, "step": 21121 }, { "epoch": 0.8935612149928082, "grad_norm": 0.1758044958114624, "learning_rate": 0.001, "loss": 2.0722, "step": 21122 }, { "epoch": 0.8936035197563246, "grad_norm": 0.1646287441253662, "learning_rate": 0.001, "loss": 2.4013, "step": 21123 }, { "epoch": 0.893645824519841, "grad_norm": 1.3558330535888672, "learning_rate": 0.001, "loss": 1.6679, "step": 21124 }, { "epoch": 0.8936881292833573, "grad_norm": 0.1626473218202591, "learning_rate": 0.001, "loss": 1.708, "step": 21125 }, { "epoch": 0.8937304340468737, "grad_norm": 0.505549967288971, "learning_rate": 0.001, "loss": 1.9515, "step": 21126 }, { "epoch": 0.8937727388103901, "grad_norm": 0.20900815725326538, "learning_rate": 0.001, "loss": 2.2761, "step": 21127 }, { "epoch": 0.8938150435739064, "grad_norm": 0.1580853909254074, "learning_rate": 0.001, "loss": 2.0052, "step": 21128 }, { "epoch": 0.8938573483374228, "grad_norm": 0.14948229491710663, "learning_rate": 0.001, "loss": 1.5362, "step": 21129 }, { "epoch": 0.8938996531009392, "grad_norm": 0.17575781047344208, "learning_rate": 0.001, "loss": 1.8339, "step": 21130 }, { "epoch": 0.8939419578644555, "grad_norm": 0.16846154630184174, "learning_rate": 0.001, "loss": 3.2954, "step": 21131 }, { "epoch": 0.8939842626279719, "grad_norm": 0.18402104079723358, "learning_rate": 0.001, "loss": 2.1852, "step": 21132 }, { "epoch": 0.8940265673914883, "grad_norm": 0.1554890275001526, "learning_rate": 0.001, "loss": 2.3382, "step": 21133 }, { "epoch": 0.8940688721550046, "grad_norm": 0.13957303762435913, "learning_rate": 0.001, "loss": 2.1363, "step": 21134 }, { "epoch": 0.894111176918521, "grad_norm": 0.16399943828582764, "learning_rate": 0.001, "loss": 1.6929, "step": 21135 }, { "epoch": 0.8941534816820373, "grad_norm": 0.8293852210044861, "learning_rate": 0.001, "loss": 1.8677, "step": 21136 }, { "epoch": 0.8941957864455538, "grad_norm": 0.15984512865543365, "learning_rate": 0.001, "loss": 1.7682, "step": 21137 }, { "epoch": 0.8942380912090702, "grad_norm": 0.3316708505153656, "learning_rate": 0.001, "loss": 2.9118, "step": 21138 }, { "epoch": 0.8942803959725865, "grad_norm": 0.1807178258895874, "learning_rate": 0.001, "loss": 2.949, "step": 21139 }, { "epoch": 0.8943227007361029, "grad_norm": 0.13455355167388916, "learning_rate": 0.001, "loss": 1.867, "step": 21140 }, { "epoch": 0.8943650054996193, "grad_norm": 0.1604076772928238, "learning_rate": 0.001, "loss": 3.1111, "step": 21141 }, { "epoch": 0.8944073102631356, "grad_norm": 0.2634793221950531, "learning_rate": 0.001, "loss": 1.3338, "step": 21142 }, { "epoch": 0.894449615026652, "grad_norm": 0.15633098781108856, "learning_rate": 0.001, "loss": 2.0984, "step": 21143 }, { "epoch": 0.8944919197901684, "grad_norm": 0.16510465741157532, "learning_rate": 0.001, "loss": 1.7809, "step": 21144 }, { "epoch": 0.8945342245536847, "grad_norm": 0.13871534168720245, "learning_rate": 0.001, "loss": 1.702, "step": 21145 }, { "epoch": 0.8945765293172011, "grad_norm": 3.331571340560913, "learning_rate": 0.001, "loss": 3.3754, "step": 21146 }, { "epoch": 0.8946188340807175, "grad_norm": 0.14018763601779938, "learning_rate": 0.001, "loss": 2.0596, "step": 21147 }, { "epoch": 0.8946611388442338, "grad_norm": 0.14845266938209534, "learning_rate": 0.001, "loss": 1.8956, "step": 21148 }, { "epoch": 0.8947034436077502, "grad_norm": 0.18321087956428528, "learning_rate": 0.001, "loss": 1.821, "step": 21149 }, { "epoch": 0.8947457483712666, "grad_norm": 39.97751998901367, "learning_rate": 0.001, "loss": 2.6723, "step": 21150 }, { "epoch": 0.8947880531347829, "grad_norm": 0.7367990612983704, "learning_rate": 0.001, "loss": 2.3226, "step": 21151 }, { "epoch": 0.8948303578982993, "grad_norm": 0.15124103426933289, "learning_rate": 0.001, "loss": 1.99, "step": 21152 }, { "epoch": 0.8948726626618158, "grad_norm": 0.14537779986858368, "learning_rate": 0.001, "loss": 2.4792, "step": 21153 }, { "epoch": 0.894914967425332, "grad_norm": 0.16853304207324982, "learning_rate": 0.001, "loss": 2.233, "step": 21154 }, { "epoch": 0.8949572721888485, "grad_norm": 0.17724493145942688, "learning_rate": 0.001, "loss": 2.6854, "step": 21155 }, { "epoch": 0.8949995769523649, "grad_norm": 0.1677272468805313, "learning_rate": 0.001, "loss": 1.6028, "step": 21156 }, { "epoch": 0.8950418817158812, "grad_norm": 0.17956793308258057, "learning_rate": 0.001, "loss": 1.9077, "step": 21157 }, { "epoch": 0.8950841864793976, "grad_norm": 0.18584755063056946, "learning_rate": 0.001, "loss": 1.6491, "step": 21158 }, { "epoch": 0.895126491242914, "grad_norm": 0.15615415573120117, "learning_rate": 0.001, "loss": 2.2369, "step": 21159 }, { "epoch": 0.8951687960064303, "grad_norm": 0.16031022369861603, "learning_rate": 0.001, "loss": 1.9678, "step": 21160 }, { "epoch": 0.8952111007699467, "grad_norm": 10.240532875061035, "learning_rate": 0.001, "loss": 3.2672, "step": 21161 }, { "epoch": 0.8952534055334631, "grad_norm": 0.1851503998041153, "learning_rate": 0.001, "loss": 2.2191, "step": 21162 }, { "epoch": 0.8952957102969794, "grad_norm": 0.17084771394729614, "learning_rate": 0.001, "loss": 1.9097, "step": 21163 }, { "epoch": 0.8953380150604958, "grad_norm": 0.6595641374588013, "learning_rate": 0.001, "loss": 3.2942, "step": 21164 }, { "epoch": 0.8953803198240122, "grad_norm": 0.12742707133293152, "learning_rate": 0.001, "loss": 2.523, "step": 21165 }, { "epoch": 0.8954226245875285, "grad_norm": 3.2080767154693604, "learning_rate": 0.001, "loss": 2.5101, "step": 21166 }, { "epoch": 0.8954649293510449, "grad_norm": 0.1244649738073349, "learning_rate": 0.001, "loss": 3.2143, "step": 21167 }, { "epoch": 0.8955072341145613, "grad_norm": 0.14987188577651978, "learning_rate": 0.001, "loss": 2.8929, "step": 21168 }, { "epoch": 0.8955495388780776, "grad_norm": 0.20873646438121796, "learning_rate": 0.001, "loss": 1.5905, "step": 21169 }, { "epoch": 0.895591843641594, "grad_norm": 1.1376652717590332, "learning_rate": 0.001, "loss": 1.4977, "step": 21170 }, { "epoch": 0.8956341484051105, "grad_norm": 0.16955767571926117, "learning_rate": 0.001, "loss": 2.2745, "step": 21171 }, { "epoch": 0.8956764531686268, "grad_norm": 0.1822686642408371, "learning_rate": 0.001, "loss": 2.4192, "step": 21172 }, { "epoch": 0.8957187579321432, "grad_norm": 0.1916595995426178, "learning_rate": 0.001, "loss": 1.7201, "step": 21173 }, { "epoch": 0.8957610626956596, "grad_norm": 0.14500486850738525, "learning_rate": 0.001, "loss": 1.6737, "step": 21174 }, { "epoch": 0.8958033674591759, "grad_norm": 0.15613558888435364, "learning_rate": 0.001, "loss": 1.9017, "step": 21175 }, { "epoch": 0.8958456722226923, "grad_norm": 0.149849534034729, "learning_rate": 0.001, "loss": 2.7908, "step": 21176 }, { "epoch": 0.8958879769862087, "grad_norm": 0.1522967666387558, "learning_rate": 0.001, "loss": 3.3811, "step": 21177 }, { "epoch": 0.895930281749725, "grad_norm": 0.20839162170886993, "learning_rate": 0.001, "loss": 1.6453, "step": 21178 }, { "epoch": 0.8959725865132414, "grad_norm": 0.18423724174499512, "learning_rate": 0.001, "loss": 2.3114, "step": 21179 }, { "epoch": 0.8960148912767577, "grad_norm": 0.20367826521396637, "learning_rate": 0.001, "loss": 2.0914, "step": 21180 }, { "epoch": 0.8960571960402741, "grad_norm": 2.9200098514556885, "learning_rate": 0.001, "loss": 1.8609, "step": 21181 }, { "epoch": 0.8960995008037905, "grad_norm": 0.2010602205991745, "learning_rate": 0.001, "loss": 2.1336, "step": 21182 }, { "epoch": 0.8961418055673068, "grad_norm": 0.15395566821098328, "learning_rate": 0.001, "loss": 3.506, "step": 21183 }, { "epoch": 0.8961841103308232, "grad_norm": 0.17142242193222046, "learning_rate": 0.001, "loss": 3.1603, "step": 21184 }, { "epoch": 0.8962264150943396, "grad_norm": 0.36323732137680054, "learning_rate": 0.001, "loss": 2.0423, "step": 21185 }, { "epoch": 0.8962687198578559, "grad_norm": 1.7019706964492798, "learning_rate": 0.001, "loss": 3.0269, "step": 21186 }, { "epoch": 0.8963110246213724, "grad_norm": 2.892049551010132, "learning_rate": 0.001, "loss": 2.2748, "step": 21187 }, { "epoch": 0.8963533293848888, "grad_norm": 0.4944346845149994, "learning_rate": 0.001, "loss": 2.5055, "step": 21188 }, { "epoch": 0.8963956341484051, "grad_norm": 0.19812704622745514, "learning_rate": 0.001, "loss": 1.639, "step": 21189 }, { "epoch": 0.8964379389119215, "grad_norm": 0.32802027463912964, "learning_rate": 0.001, "loss": 2.4047, "step": 21190 }, { "epoch": 0.8964802436754379, "grad_norm": 0.15056705474853516, "learning_rate": 0.001, "loss": 1.724, "step": 21191 }, { "epoch": 0.8965225484389542, "grad_norm": 0.15505051612854004, "learning_rate": 0.001, "loss": 2.1101, "step": 21192 }, { "epoch": 0.8965648532024706, "grad_norm": 0.16855968534946442, "learning_rate": 0.001, "loss": 1.7835, "step": 21193 }, { "epoch": 0.896607157965987, "grad_norm": 0.15163734555244446, "learning_rate": 0.001, "loss": 1.4858, "step": 21194 }, { "epoch": 0.8966494627295033, "grad_norm": 0.21959041059017181, "learning_rate": 0.001, "loss": 2.2257, "step": 21195 }, { "epoch": 0.8966917674930197, "grad_norm": 0.18358977138996124, "learning_rate": 0.001, "loss": 1.8863, "step": 21196 }, { "epoch": 0.8967340722565361, "grad_norm": 0.19511397182941437, "learning_rate": 0.001, "loss": 1.9636, "step": 21197 }, { "epoch": 0.8967763770200524, "grad_norm": 0.19627627730369568, "learning_rate": 0.001, "loss": 2.0717, "step": 21198 }, { "epoch": 0.8968186817835688, "grad_norm": 0.22323928773403168, "learning_rate": 0.001, "loss": 1.7666, "step": 21199 }, { "epoch": 0.8968609865470852, "grad_norm": 0.14983603358268738, "learning_rate": 0.001, "loss": 2.8837, "step": 21200 }, { "epoch": 0.8969032913106015, "grad_norm": 0.13686136901378632, "learning_rate": 0.001, "loss": 2.385, "step": 21201 }, { "epoch": 0.896945596074118, "grad_norm": 0.15880164504051208, "learning_rate": 0.001, "loss": 2.1539, "step": 21202 }, { "epoch": 0.8969879008376344, "grad_norm": 0.13948020339012146, "learning_rate": 0.001, "loss": 1.6086, "step": 21203 }, { "epoch": 0.8970302056011507, "grad_norm": 0.12735982239246368, "learning_rate": 0.001, "loss": 1.8913, "step": 21204 }, { "epoch": 0.8970725103646671, "grad_norm": 0.1532490998506546, "learning_rate": 0.001, "loss": 1.7689, "step": 21205 }, { "epoch": 0.8971148151281835, "grad_norm": 0.20498578250408173, "learning_rate": 0.001, "loss": 1.7279, "step": 21206 }, { "epoch": 0.8971571198916998, "grad_norm": 0.13713718950748444, "learning_rate": 0.001, "loss": 1.7066, "step": 21207 }, { "epoch": 0.8971994246552162, "grad_norm": 0.15375199913978577, "learning_rate": 0.001, "loss": 1.5455, "step": 21208 }, { "epoch": 0.8972417294187326, "grad_norm": 0.12200403213500977, "learning_rate": 0.001, "loss": 1.8769, "step": 21209 }, { "epoch": 0.8972840341822489, "grad_norm": 0.15674884617328644, "learning_rate": 0.001, "loss": 2.4422, "step": 21210 }, { "epoch": 0.8973263389457653, "grad_norm": 0.7319082617759705, "learning_rate": 0.001, "loss": 2.3794, "step": 21211 }, { "epoch": 0.8973686437092817, "grad_norm": 0.14720921218395233, "learning_rate": 0.001, "loss": 1.9387, "step": 21212 }, { "epoch": 0.897410948472798, "grad_norm": 0.2884654402732849, "learning_rate": 0.001, "loss": 3.2008, "step": 21213 }, { "epoch": 0.8974532532363144, "grad_norm": 1.7841389179229736, "learning_rate": 0.001, "loss": 3.6275, "step": 21214 }, { "epoch": 0.8974955579998308, "grad_norm": 0.7872580885887146, "learning_rate": 0.001, "loss": 1.6937, "step": 21215 }, { "epoch": 0.8975378627633471, "grad_norm": 0.16804750263690948, "learning_rate": 0.001, "loss": 2.4865, "step": 21216 }, { "epoch": 0.8975801675268635, "grad_norm": 0.18174222111701965, "learning_rate": 0.001, "loss": 2.2794, "step": 21217 }, { "epoch": 0.89762247229038, "grad_norm": 0.21821044385433197, "learning_rate": 0.001, "loss": 3.6223, "step": 21218 }, { "epoch": 0.8976647770538962, "grad_norm": 0.2736088037490845, "learning_rate": 0.001, "loss": 2.0796, "step": 21219 }, { "epoch": 0.8977070818174127, "grad_norm": 0.15616385638713837, "learning_rate": 0.001, "loss": 2.9314, "step": 21220 }, { "epoch": 0.8977493865809291, "grad_norm": 0.13997872173786163, "learning_rate": 0.001, "loss": 1.6217, "step": 21221 }, { "epoch": 0.8977916913444454, "grad_norm": 0.17286120355129242, "learning_rate": 0.001, "loss": 2.5522, "step": 21222 }, { "epoch": 0.8978339961079618, "grad_norm": 0.16818682849407196, "learning_rate": 0.001, "loss": 2.1953, "step": 21223 }, { "epoch": 0.8978763008714782, "grad_norm": 0.8613235354423523, "learning_rate": 0.001, "loss": 2.311, "step": 21224 }, { "epoch": 0.8979186056349945, "grad_norm": 1.0731678009033203, "learning_rate": 0.001, "loss": 3.8418, "step": 21225 }, { "epoch": 0.8979609103985109, "grad_norm": 0.1638665348291397, "learning_rate": 0.001, "loss": 2.2482, "step": 21226 }, { "epoch": 0.8980032151620272, "grad_norm": 0.3347175419330597, "learning_rate": 0.001, "loss": 1.9502, "step": 21227 }, { "epoch": 0.8980455199255436, "grad_norm": 1.1207183599472046, "learning_rate": 0.001, "loss": 1.5674, "step": 21228 }, { "epoch": 0.89808782468906, "grad_norm": 0.13141308724880219, "learning_rate": 0.001, "loss": 2.1263, "step": 21229 }, { "epoch": 0.8981301294525763, "grad_norm": 0.14101554453372955, "learning_rate": 0.001, "loss": 2.591, "step": 21230 }, { "epoch": 0.8981724342160927, "grad_norm": 0.17062920331954956, "learning_rate": 0.001, "loss": 1.6321, "step": 21231 }, { "epoch": 0.8982147389796091, "grad_norm": 0.18323302268981934, "learning_rate": 0.001, "loss": 2.9428, "step": 21232 }, { "epoch": 0.8982570437431254, "grad_norm": 0.15976440906524658, "learning_rate": 0.001, "loss": 1.8825, "step": 21233 }, { "epoch": 0.8982993485066418, "grad_norm": 0.15031205117702484, "learning_rate": 0.001, "loss": 2.5819, "step": 21234 }, { "epoch": 0.8983416532701582, "grad_norm": 4.328239917755127, "learning_rate": 0.001, "loss": 1.6748, "step": 21235 }, { "epoch": 0.8983839580336745, "grad_norm": 0.17019319534301758, "learning_rate": 0.001, "loss": 3.3208, "step": 21236 }, { "epoch": 0.898426262797191, "grad_norm": 0.21404282748699188, "learning_rate": 0.001, "loss": 1.6199, "step": 21237 }, { "epoch": 0.8984685675607074, "grad_norm": 0.15579356253147125, "learning_rate": 0.001, "loss": 1.7415, "step": 21238 }, { "epoch": 0.8985108723242237, "grad_norm": 0.3757559359073639, "learning_rate": 0.001, "loss": 1.8607, "step": 21239 }, { "epoch": 0.8985531770877401, "grad_norm": 0.16331882774829865, "learning_rate": 0.001, "loss": 2.3862, "step": 21240 }, { "epoch": 0.8985954818512565, "grad_norm": 0.1645757555961609, "learning_rate": 0.001, "loss": 1.8158, "step": 21241 }, { "epoch": 0.8986377866147728, "grad_norm": 0.159241184592247, "learning_rate": 0.001, "loss": 1.7989, "step": 21242 }, { "epoch": 0.8986800913782892, "grad_norm": 0.1543315201997757, "learning_rate": 0.001, "loss": 3.289, "step": 21243 }, { "epoch": 0.8987223961418056, "grad_norm": 0.14359703660011292, "learning_rate": 0.001, "loss": 2.5562, "step": 21244 }, { "epoch": 0.8987647009053219, "grad_norm": 2.587345600128174, "learning_rate": 0.001, "loss": 2.6168, "step": 21245 }, { "epoch": 0.8988070056688383, "grad_norm": 0.17357079684734344, "learning_rate": 0.001, "loss": 1.9072, "step": 21246 }, { "epoch": 0.8988493104323547, "grad_norm": 0.1991180181503296, "learning_rate": 0.001, "loss": 1.8736, "step": 21247 }, { "epoch": 0.898891615195871, "grad_norm": 0.14238642156124115, "learning_rate": 0.001, "loss": 1.1997, "step": 21248 }, { "epoch": 0.8989339199593874, "grad_norm": 0.20077811181545258, "learning_rate": 0.001, "loss": 2.0872, "step": 21249 }, { "epoch": 0.8989762247229038, "grad_norm": 0.7532477378845215, "learning_rate": 0.001, "loss": 2.3801, "step": 21250 }, { "epoch": 0.8990185294864201, "grad_norm": 0.1633615493774414, "learning_rate": 0.001, "loss": 2.2373, "step": 21251 }, { "epoch": 0.8990608342499365, "grad_norm": 0.13562917709350586, "learning_rate": 0.001, "loss": 1.5633, "step": 21252 }, { "epoch": 0.899103139013453, "grad_norm": 0.14501778781414032, "learning_rate": 0.001, "loss": 1.7519, "step": 21253 }, { "epoch": 0.8991454437769693, "grad_norm": 0.14665882289409637, "learning_rate": 0.001, "loss": 1.2838, "step": 21254 }, { "epoch": 0.8991877485404857, "grad_norm": 0.14085936546325684, "learning_rate": 0.001, "loss": 1.6832, "step": 21255 }, { "epoch": 0.8992300533040021, "grad_norm": 0.5537809133529663, "learning_rate": 0.001, "loss": 1.9436, "step": 21256 }, { "epoch": 0.8992723580675184, "grad_norm": 0.14022964239120483, "learning_rate": 0.001, "loss": 2.0391, "step": 21257 }, { "epoch": 0.8993146628310348, "grad_norm": 0.13678425550460815, "learning_rate": 0.001, "loss": 1.2781, "step": 21258 }, { "epoch": 0.8993569675945512, "grad_norm": 0.1590747982263565, "learning_rate": 0.001, "loss": 2.4737, "step": 21259 }, { "epoch": 0.8993992723580675, "grad_norm": 0.31500861048698425, "learning_rate": 0.001, "loss": 2.0949, "step": 21260 }, { "epoch": 0.8994415771215839, "grad_norm": 0.7429576516151428, "learning_rate": 0.001, "loss": 2.6399, "step": 21261 }, { "epoch": 0.8994838818851003, "grad_norm": 0.1560697704553604, "learning_rate": 0.001, "loss": 2.4382, "step": 21262 }, { "epoch": 0.8995261866486166, "grad_norm": 0.1646103411912918, "learning_rate": 0.001, "loss": 1.8626, "step": 21263 }, { "epoch": 0.899568491412133, "grad_norm": 0.4191820025444031, "learning_rate": 0.001, "loss": 2.2337, "step": 21264 }, { "epoch": 0.8996107961756494, "grad_norm": 0.16874629259109497, "learning_rate": 0.001, "loss": 3.4052, "step": 21265 }, { "epoch": 0.8996531009391657, "grad_norm": 0.12579210102558136, "learning_rate": 0.001, "loss": 2.4531, "step": 21266 }, { "epoch": 0.8996954057026821, "grad_norm": 0.14168886840343475, "learning_rate": 0.001, "loss": 2.4614, "step": 21267 }, { "epoch": 0.8997377104661985, "grad_norm": 0.16749995946884155, "learning_rate": 0.001, "loss": 1.3653, "step": 21268 }, { "epoch": 0.8997800152297148, "grad_norm": 0.6988632678985596, "learning_rate": 0.001, "loss": 1.7769, "step": 21269 }, { "epoch": 0.8998223199932313, "grad_norm": 0.22250531613826752, "learning_rate": 0.001, "loss": 1.8593, "step": 21270 }, { "epoch": 0.8998646247567476, "grad_norm": 0.11966440081596375, "learning_rate": 0.001, "loss": 1.4307, "step": 21271 }, { "epoch": 0.899906929520264, "grad_norm": 0.3543547987937927, "learning_rate": 0.001, "loss": 1.6872, "step": 21272 }, { "epoch": 0.8999492342837804, "grad_norm": 0.15212112665176392, "learning_rate": 0.001, "loss": 2.4326, "step": 21273 }, { "epoch": 0.8999915390472967, "grad_norm": 0.14390593767166138, "learning_rate": 0.001, "loss": 1.6194, "step": 21274 }, { "epoch": 0.9000338438108131, "grad_norm": 0.1561872363090515, "learning_rate": 0.001, "loss": 1.9529, "step": 21275 }, { "epoch": 0.9000761485743295, "grad_norm": 0.14127811789512634, "learning_rate": 0.001, "loss": 1.3719, "step": 21276 }, { "epoch": 0.9001184533378458, "grad_norm": 0.13319812715053558, "learning_rate": 0.001, "loss": 1.8501, "step": 21277 }, { "epoch": 0.9001607581013622, "grad_norm": 0.13405123353004456, "learning_rate": 0.001, "loss": 2.0724, "step": 21278 }, { "epoch": 0.9002030628648786, "grad_norm": 0.18458141386508942, "learning_rate": 0.001, "loss": 2.1271, "step": 21279 }, { "epoch": 0.9002453676283949, "grad_norm": 0.1432388573884964, "learning_rate": 0.001, "loss": 1.6285, "step": 21280 }, { "epoch": 0.9002876723919113, "grad_norm": 0.12944474816322327, "learning_rate": 0.001, "loss": 2.7784, "step": 21281 }, { "epoch": 0.9003299771554277, "grad_norm": 0.305156946182251, "learning_rate": 0.001, "loss": 2.5529, "step": 21282 }, { "epoch": 0.900372281918944, "grad_norm": 104.79377746582031, "learning_rate": 0.001, "loss": 2.4094, "step": 21283 }, { "epoch": 0.9004145866824604, "grad_norm": 0.16589900851249695, "learning_rate": 0.001, "loss": 2.0991, "step": 21284 }, { "epoch": 0.9004568914459768, "grad_norm": 0.14582109451293945, "learning_rate": 0.001, "loss": 2.7329, "step": 21285 }, { "epoch": 0.9004991962094931, "grad_norm": 0.17398712038993835, "learning_rate": 0.001, "loss": 1.9659, "step": 21286 }, { "epoch": 0.9005415009730096, "grad_norm": 0.16176342964172363, "learning_rate": 0.001, "loss": 1.6683, "step": 21287 }, { "epoch": 0.900583805736526, "grad_norm": 0.2542744278907776, "learning_rate": 0.001, "loss": 2.9904, "step": 21288 }, { "epoch": 0.9006261105000423, "grad_norm": 0.1428564190864563, "learning_rate": 0.001, "loss": 2.8341, "step": 21289 }, { "epoch": 0.9006684152635587, "grad_norm": 0.16790971159934998, "learning_rate": 0.001, "loss": 1.8499, "step": 21290 }, { "epoch": 0.9007107200270751, "grad_norm": 0.32263800501823425, "learning_rate": 0.001, "loss": 2.4642, "step": 21291 }, { "epoch": 0.9007530247905914, "grad_norm": 0.19915816187858582, "learning_rate": 0.001, "loss": 1.686, "step": 21292 }, { "epoch": 0.9007953295541078, "grad_norm": 0.17057372629642487, "learning_rate": 0.001, "loss": 1.6805, "step": 21293 }, { "epoch": 0.9008376343176242, "grad_norm": 0.22040168941020966, "learning_rate": 0.001, "loss": 2.4221, "step": 21294 }, { "epoch": 0.9008799390811405, "grad_norm": 0.41536402702331543, "learning_rate": 0.001, "loss": 1.8372, "step": 21295 }, { "epoch": 0.9009222438446569, "grad_norm": 2.5897037982940674, "learning_rate": 0.001, "loss": 2.0451, "step": 21296 }, { "epoch": 0.9009645486081733, "grad_norm": 0.16217264533042908, "learning_rate": 0.001, "loss": 2.2002, "step": 21297 }, { "epoch": 0.9010068533716896, "grad_norm": 0.16462668776512146, "learning_rate": 0.001, "loss": 2.3787, "step": 21298 }, { "epoch": 0.901049158135206, "grad_norm": 0.13496233522891998, "learning_rate": 0.001, "loss": 1.757, "step": 21299 }, { "epoch": 0.9010914628987224, "grad_norm": 0.15986299514770508, "learning_rate": 0.001, "loss": 2.5716, "step": 21300 }, { "epoch": 0.9011337676622387, "grad_norm": 2.0072743892669678, "learning_rate": 0.001, "loss": 1.5631, "step": 21301 }, { "epoch": 0.9011760724257551, "grad_norm": 0.15145206451416016, "learning_rate": 0.001, "loss": 1.5825, "step": 21302 }, { "epoch": 0.9012183771892716, "grad_norm": 0.1307021826505661, "learning_rate": 0.001, "loss": 2.7586, "step": 21303 }, { "epoch": 0.9012606819527879, "grad_norm": 1.082275152206421, "learning_rate": 0.001, "loss": 1.5857, "step": 21304 }, { "epoch": 0.9013029867163043, "grad_norm": 0.13324807584285736, "learning_rate": 0.001, "loss": 1.9865, "step": 21305 }, { "epoch": 0.9013452914798207, "grad_norm": 0.18132184445858002, "learning_rate": 0.001, "loss": 2.2404, "step": 21306 }, { "epoch": 0.901387596243337, "grad_norm": 0.15229110419750214, "learning_rate": 0.001, "loss": 1.9643, "step": 21307 }, { "epoch": 0.9014299010068534, "grad_norm": 0.15678609907627106, "learning_rate": 0.001, "loss": 1.4846, "step": 21308 }, { "epoch": 0.9014722057703698, "grad_norm": 0.2569018006324768, "learning_rate": 0.001, "loss": 1.4851, "step": 21309 }, { "epoch": 0.9015145105338861, "grad_norm": 0.2765081822872162, "learning_rate": 0.001, "loss": 2.6487, "step": 21310 }, { "epoch": 0.9015568152974025, "grad_norm": 0.4249289631843567, "learning_rate": 0.001, "loss": 2.114, "step": 21311 }, { "epoch": 0.9015991200609189, "grad_norm": 0.14523530006408691, "learning_rate": 0.001, "loss": 1.4716, "step": 21312 }, { "epoch": 0.9016414248244352, "grad_norm": 0.14755897223949432, "learning_rate": 0.001, "loss": 1.7763, "step": 21313 }, { "epoch": 0.9016837295879516, "grad_norm": 6.201207637786865, "learning_rate": 0.001, "loss": 1.6094, "step": 21314 }, { "epoch": 0.9017260343514679, "grad_norm": 0.15470610558986664, "learning_rate": 0.001, "loss": 2.9894, "step": 21315 }, { "epoch": 0.9017683391149843, "grad_norm": 0.2583865821361542, "learning_rate": 0.001, "loss": 2.2243, "step": 21316 }, { "epoch": 0.9018106438785007, "grad_norm": 0.1813354194164276, "learning_rate": 0.001, "loss": 1.8183, "step": 21317 }, { "epoch": 0.901852948642017, "grad_norm": 0.1495773047208786, "learning_rate": 0.001, "loss": 2.1804, "step": 21318 }, { "epoch": 0.9018952534055334, "grad_norm": 0.15756896138191223, "learning_rate": 0.001, "loss": 1.8869, "step": 21319 }, { "epoch": 0.9019375581690499, "grad_norm": 0.44224387407302856, "learning_rate": 0.001, "loss": 2.3278, "step": 21320 }, { "epoch": 0.9019798629325662, "grad_norm": 0.23249466717243195, "learning_rate": 0.001, "loss": 1.8345, "step": 21321 }, { "epoch": 0.9020221676960826, "grad_norm": 0.17175745964050293, "learning_rate": 0.001, "loss": 1.687, "step": 21322 }, { "epoch": 0.902064472459599, "grad_norm": 0.15828604996204376, "learning_rate": 0.001, "loss": 2.0996, "step": 21323 }, { "epoch": 0.9021067772231153, "grad_norm": 0.1508798450231552, "learning_rate": 0.001, "loss": 2.1381, "step": 21324 }, { "epoch": 0.9021490819866317, "grad_norm": 0.1373659372329712, "learning_rate": 0.001, "loss": 2.6336, "step": 21325 }, { "epoch": 0.9021913867501481, "grad_norm": 0.3546662926673889, "learning_rate": 0.001, "loss": 2.1206, "step": 21326 }, { "epoch": 0.9022336915136644, "grad_norm": 0.17910721898078918, "learning_rate": 0.001, "loss": 3.109, "step": 21327 }, { "epoch": 0.9022759962771808, "grad_norm": 2.0273354053497314, "learning_rate": 0.001, "loss": 1.6428, "step": 21328 }, { "epoch": 0.9023183010406972, "grad_norm": 0.17860253155231476, "learning_rate": 0.001, "loss": 2.9683, "step": 21329 }, { "epoch": 0.9023606058042135, "grad_norm": 0.16391193866729736, "learning_rate": 0.001, "loss": 1.66, "step": 21330 }, { "epoch": 0.9024029105677299, "grad_norm": 0.41829827427864075, "learning_rate": 0.001, "loss": 2.3392, "step": 21331 }, { "epoch": 0.9024452153312463, "grad_norm": 0.15510700643062592, "learning_rate": 0.001, "loss": 2.1811, "step": 21332 }, { "epoch": 0.9024875200947626, "grad_norm": 0.16545583307743073, "learning_rate": 0.001, "loss": 2.1031, "step": 21333 }, { "epoch": 0.902529824858279, "grad_norm": 1.293203592300415, "learning_rate": 0.001, "loss": 1.5144, "step": 21334 }, { "epoch": 0.9025721296217954, "grad_norm": 0.14911191165447235, "learning_rate": 0.001, "loss": 2.7838, "step": 21335 }, { "epoch": 0.9026144343853117, "grad_norm": 0.2668335437774658, "learning_rate": 0.001, "loss": 3.2185, "step": 21336 }, { "epoch": 0.9026567391488282, "grad_norm": 0.2777593433856964, "learning_rate": 0.001, "loss": 2.5853, "step": 21337 }, { "epoch": 0.9026990439123446, "grad_norm": 0.6844047904014587, "learning_rate": 0.001, "loss": 1.6514, "step": 21338 }, { "epoch": 0.9027413486758609, "grad_norm": 0.1647360622882843, "learning_rate": 0.001, "loss": 2.2108, "step": 21339 }, { "epoch": 0.9027836534393773, "grad_norm": 0.15791663527488708, "learning_rate": 0.001, "loss": 1.6112, "step": 21340 }, { "epoch": 0.9028259582028937, "grad_norm": 0.1473037302494049, "learning_rate": 0.001, "loss": 2.266, "step": 21341 }, { "epoch": 0.90286826296641, "grad_norm": 0.15689495205879211, "learning_rate": 0.001, "loss": 1.765, "step": 21342 }, { "epoch": 0.9029105677299264, "grad_norm": 0.18425515294075012, "learning_rate": 0.001, "loss": 1.8901, "step": 21343 }, { "epoch": 0.9029528724934428, "grad_norm": 0.2768228054046631, "learning_rate": 0.001, "loss": 1.9768, "step": 21344 }, { "epoch": 0.9029951772569591, "grad_norm": 0.15262466669082642, "learning_rate": 0.001, "loss": 2.0877, "step": 21345 }, { "epoch": 0.9030374820204755, "grad_norm": 2.121443748474121, "learning_rate": 0.001, "loss": 2.5901, "step": 21346 }, { "epoch": 0.9030797867839919, "grad_norm": 0.14500996470451355, "learning_rate": 0.001, "loss": 2.0742, "step": 21347 }, { "epoch": 0.9031220915475082, "grad_norm": 0.19610632956027985, "learning_rate": 0.001, "loss": 2.2057, "step": 21348 }, { "epoch": 0.9031643963110246, "grad_norm": 0.17215634882450104, "learning_rate": 0.001, "loss": 3.0487, "step": 21349 }, { "epoch": 0.903206701074541, "grad_norm": 0.20982448756694794, "learning_rate": 0.001, "loss": 2.9082, "step": 21350 }, { "epoch": 0.9032490058380573, "grad_norm": 4.3952484130859375, "learning_rate": 0.001, "loss": 1.7265, "step": 21351 }, { "epoch": 0.9032913106015737, "grad_norm": 0.21057921648025513, "learning_rate": 0.001, "loss": 1.9388, "step": 21352 }, { "epoch": 0.9033336153650902, "grad_norm": 0.17475013434886932, "learning_rate": 0.001, "loss": 2.6006, "step": 21353 }, { "epoch": 0.9033759201286065, "grad_norm": 4.874578475952148, "learning_rate": 0.001, "loss": 2.2432, "step": 21354 }, { "epoch": 0.9034182248921229, "grad_norm": 0.16315825283527374, "learning_rate": 0.001, "loss": 1.6954, "step": 21355 }, { "epoch": 0.9034605296556393, "grad_norm": 0.1910039633512497, "learning_rate": 0.001, "loss": 2.0015, "step": 21356 }, { "epoch": 0.9035028344191556, "grad_norm": 0.14265620708465576, "learning_rate": 0.001, "loss": 2.2112, "step": 21357 }, { "epoch": 0.903545139182672, "grad_norm": 0.4426799714565277, "learning_rate": 0.001, "loss": 2.4595, "step": 21358 }, { "epoch": 0.9035874439461884, "grad_norm": 0.13497935235500336, "learning_rate": 0.001, "loss": 2.0469, "step": 21359 }, { "epoch": 0.9036297487097047, "grad_norm": 0.2634698152542114, "learning_rate": 0.001, "loss": 1.2785, "step": 21360 }, { "epoch": 0.9036720534732211, "grad_norm": 7.950343608856201, "learning_rate": 0.001, "loss": 3.839, "step": 21361 }, { "epoch": 0.9037143582367374, "grad_norm": 0.30899831652641296, "learning_rate": 0.001, "loss": 2.0495, "step": 21362 }, { "epoch": 0.9037566630002538, "grad_norm": 0.5557863116264343, "learning_rate": 0.001, "loss": 2.8, "step": 21363 }, { "epoch": 0.9037989677637702, "grad_norm": 0.14372897148132324, "learning_rate": 0.001, "loss": 1.3838, "step": 21364 }, { "epoch": 0.9038412725272865, "grad_norm": 0.17638888955116272, "learning_rate": 0.001, "loss": 1.7204, "step": 21365 }, { "epoch": 0.9038835772908029, "grad_norm": 0.5824902653694153, "learning_rate": 0.001, "loss": 2.1462, "step": 21366 }, { "epoch": 0.9039258820543193, "grad_norm": 0.18031518161296844, "learning_rate": 0.001, "loss": 1.7274, "step": 21367 }, { "epoch": 0.9039681868178356, "grad_norm": 0.1800137311220169, "learning_rate": 0.001, "loss": 2.7204, "step": 21368 }, { "epoch": 0.904010491581352, "grad_norm": 1.138705849647522, "learning_rate": 0.001, "loss": 1.8414, "step": 21369 }, { "epoch": 0.9040527963448685, "grad_norm": 0.164835125207901, "learning_rate": 0.001, "loss": 2.7179, "step": 21370 }, { "epoch": 0.9040951011083848, "grad_norm": 0.15999598801136017, "learning_rate": 0.001, "loss": 2.2956, "step": 21371 }, { "epoch": 0.9041374058719012, "grad_norm": 0.17977473139762878, "learning_rate": 0.001, "loss": 3.2465, "step": 21372 }, { "epoch": 0.9041797106354176, "grad_norm": 0.15177109837532043, "learning_rate": 0.001, "loss": 1.9558, "step": 21373 }, { "epoch": 0.9042220153989339, "grad_norm": 0.9045629501342773, "learning_rate": 0.001, "loss": 2.9101, "step": 21374 }, { "epoch": 0.9042643201624503, "grad_norm": 0.15818485617637634, "learning_rate": 0.001, "loss": 2.2249, "step": 21375 }, { "epoch": 0.9043066249259667, "grad_norm": 0.16541606187820435, "learning_rate": 0.001, "loss": 2.3738, "step": 21376 }, { "epoch": 0.904348929689483, "grad_norm": 0.9918728470802307, "learning_rate": 0.001, "loss": 2.5462, "step": 21377 }, { "epoch": 0.9043912344529994, "grad_norm": 0.12892168760299683, "learning_rate": 0.001, "loss": 3.0338, "step": 21378 }, { "epoch": 0.9044335392165158, "grad_norm": 0.1821994036436081, "learning_rate": 0.001, "loss": 2.3973, "step": 21379 }, { "epoch": 0.9044758439800321, "grad_norm": 0.3445868492126465, "learning_rate": 0.001, "loss": 2.9188, "step": 21380 }, { "epoch": 0.9045181487435485, "grad_norm": 0.14604288339614868, "learning_rate": 0.001, "loss": 2.1574, "step": 21381 }, { "epoch": 0.9045604535070649, "grad_norm": 0.14212435483932495, "learning_rate": 0.001, "loss": 2.1932, "step": 21382 }, { "epoch": 0.9046027582705812, "grad_norm": 0.12347246706485748, "learning_rate": 0.001, "loss": 1.7564, "step": 21383 }, { "epoch": 0.9046450630340976, "grad_norm": 0.3492525517940521, "learning_rate": 0.001, "loss": 1.7874, "step": 21384 }, { "epoch": 0.904687367797614, "grad_norm": 0.1499442309141159, "learning_rate": 0.001, "loss": 2.3026, "step": 21385 }, { "epoch": 0.9047296725611303, "grad_norm": 0.15123094618320465, "learning_rate": 0.001, "loss": 2.6157, "step": 21386 }, { "epoch": 0.9047719773246468, "grad_norm": 0.18827593326568604, "learning_rate": 0.001, "loss": 2.5425, "step": 21387 }, { "epoch": 0.9048142820881632, "grad_norm": 0.16215500235557556, "learning_rate": 0.001, "loss": 2.1401, "step": 21388 }, { "epoch": 0.9048565868516795, "grad_norm": 0.15039432048797607, "learning_rate": 0.001, "loss": 2.3611, "step": 21389 }, { "epoch": 0.9048988916151959, "grad_norm": 0.183966264128685, "learning_rate": 0.001, "loss": 2.4818, "step": 21390 }, { "epoch": 0.9049411963787123, "grad_norm": 0.1891811490058899, "learning_rate": 0.001, "loss": 3.7626, "step": 21391 }, { "epoch": 0.9049835011422286, "grad_norm": 0.18254221975803375, "learning_rate": 0.001, "loss": 1.7773, "step": 21392 }, { "epoch": 0.905025805905745, "grad_norm": 0.14208267629146576, "learning_rate": 0.001, "loss": 1.8959, "step": 21393 }, { "epoch": 0.9050681106692614, "grad_norm": 0.3248007297515869, "learning_rate": 0.001, "loss": 2.2422, "step": 21394 }, { "epoch": 0.9051104154327777, "grad_norm": 0.15163081884384155, "learning_rate": 0.001, "loss": 2.1537, "step": 21395 }, { "epoch": 0.9051527201962941, "grad_norm": 0.16903750598430634, "learning_rate": 0.001, "loss": 1.838, "step": 21396 }, { "epoch": 0.9051950249598105, "grad_norm": 0.13609051704406738, "learning_rate": 0.001, "loss": 1.3032, "step": 21397 }, { "epoch": 0.9052373297233268, "grad_norm": 0.21412265300750732, "learning_rate": 0.001, "loss": 2.4583, "step": 21398 }, { "epoch": 0.9052796344868432, "grad_norm": 0.6192092895507812, "learning_rate": 0.001, "loss": 3.1105, "step": 21399 }, { "epoch": 0.9053219392503596, "grad_norm": 1.6677172183990479, "learning_rate": 0.001, "loss": 1.8853, "step": 21400 }, { "epoch": 0.9053642440138759, "grad_norm": 0.5495797395706177, "learning_rate": 0.001, "loss": 2.3331, "step": 21401 }, { "epoch": 0.9054065487773924, "grad_norm": 0.5030525922775269, "learning_rate": 0.001, "loss": 1.9881, "step": 21402 }, { "epoch": 0.9054488535409088, "grad_norm": 0.14512759447097778, "learning_rate": 0.001, "loss": 2.2818, "step": 21403 }, { "epoch": 0.9054911583044251, "grad_norm": 0.12264164537191391, "learning_rate": 0.001, "loss": 1.9594, "step": 21404 }, { "epoch": 0.9055334630679415, "grad_norm": 0.14045067131519318, "learning_rate": 0.001, "loss": 1.8512, "step": 21405 }, { "epoch": 0.9055757678314578, "grad_norm": 0.4061392843723297, "learning_rate": 0.001, "loss": 2.3092, "step": 21406 }, { "epoch": 0.9056180725949742, "grad_norm": 1.0429646968841553, "learning_rate": 0.001, "loss": 1.7809, "step": 21407 }, { "epoch": 0.9056603773584906, "grad_norm": 0.14219164848327637, "learning_rate": 0.001, "loss": 1.641, "step": 21408 }, { "epoch": 0.9057026821220069, "grad_norm": 0.14022192358970642, "learning_rate": 0.001, "loss": 2.4478, "step": 21409 }, { "epoch": 0.9057449868855233, "grad_norm": 0.20621517300605774, "learning_rate": 0.001, "loss": 2.3145, "step": 21410 }, { "epoch": 0.9057872916490397, "grad_norm": 0.1814170777797699, "learning_rate": 0.001, "loss": 1.846, "step": 21411 }, { "epoch": 0.905829596412556, "grad_norm": 0.17007854580879211, "learning_rate": 0.001, "loss": 2.2049, "step": 21412 }, { "epoch": 0.9058719011760724, "grad_norm": 0.16254855692386627, "learning_rate": 0.001, "loss": 2.3874, "step": 21413 }, { "epoch": 0.9059142059395888, "grad_norm": 0.1488046646118164, "learning_rate": 0.001, "loss": 2.5976, "step": 21414 }, { "epoch": 0.9059565107031051, "grad_norm": 0.18739381432533264, "learning_rate": 0.001, "loss": 2.8319, "step": 21415 }, { "epoch": 0.9059988154666215, "grad_norm": 0.3353780210018158, "learning_rate": 0.001, "loss": 2.6556, "step": 21416 }, { "epoch": 0.9060411202301379, "grad_norm": 0.9559656381607056, "learning_rate": 0.001, "loss": 2.9665, "step": 21417 }, { "epoch": 0.9060834249936542, "grad_norm": 0.15689292550086975, "learning_rate": 0.001, "loss": 2.0337, "step": 21418 }, { "epoch": 0.9061257297571707, "grad_norm": 0.13871730864048004, "learning_rate": 0.001, "loss": 1.9963, "step": 21419 }, { "epoch": 0.9061680345206871, "grad_norm": 0.16745619475841522, "learning_rate": 0.001, "loss": 1.7658, "step": 21420 }, { "epoch": 0.9062103392842034, "grad_norm": 0.14893504977226257, "learning_rate": 0.001, "loss": 1.7183, "step": 21421 }, { "epoch": 0.9062526440477198, "grad_norm": 0.18129420280456543, "learning_rate": 0.001, "loss": 1.9251, "step": 21422 }, { "epoch": 0.9062949488112362, "grad_norm": 0.22704291343688965, "learning_rate": 0.001, "loss": 1.8624, "step": 21423 }, { "epoch": 0.9063372535747525, "grad_norm": 0.16960926353931427, "learning_rate": 0.001, "loss": 1.7591, "step": 21424 }, { "epoch": 0.9063795583382689, "grad_norm": 0.1893257051706314, "learning_rate": 0.001, "loss": 1.8019, "step": 21425 }, { "epoch": 0.9064218631017853, "grad_norm": 8.908495903015137, "learning_rate": 0.001, "loss": 1.3735, "step": 21426 }, { "epoch": 0.9064641678653016, "grad_norm": 0.16186918318271637, "learning_rate": 0.001, "loss": 1.9056, "step": 21427 }, { "epoch": 0.906506472628818, "grad_norm": 0.16607151925563812, "learning_rate": 0.001, "loss": 2.8435, "step": 21428 }, { "epoch": 0.9065487773923344, "grad_norm": 0.34751367568969727, "learning_rate": 0.001, "loss": 1.9356, "step": 21429 }, { "epoch": 0.9065910821558507, "grad_norm": 0.3750505745410919, "learning_rate": 0.001, "loss": 2.2931, "step": 21430 }, { "epoch": 0.9066333869193671, "grad_norm": 0.1678369790315628, "learning_rate": 0.001, "loss": 1.6514, "step": 21431 }, { "epoch": 0.9066756916828835, "grad_norm": 0.5519915223121643, "learning_rate": 0.001, "loss": 2.7002, "step": 21432 }, { "epoch": 0.9067179964463998, "grad_norm": 0.1600515991449356, "learning_rate": 0.001, "loss": 1.8045, "step": 21433 }, { "epoch": 0.9067603012099162, "grad_norm": 0.15844613313674927, "learning_rate": 0.001, "loss": 1.8964, "step": 21434 }, { "epoch": 0.9068026059734327, "grad_norm": 0.1344721019268036, "learning_rate": 0.001, "loss": 1.9177, "step": 21435 }, { "epoch": 0.906844910736949, "grad_norm": 0.16520515084266663, "learning_rate": 0.001, "loss": 2.6828, "step": 21436 }, { "epoch": 0.9068872155004654, "grad_norm": 0.15085281431674957, "learning_rate": 0.001, "loss": 1.7172, "step": 21437 }, { "epoch": 0.9069295202639818, "grad_norm": 0.23470507562160492, "learning_rate": 0.001, "loss": 2.8068, "step": 21438 }, { "epoch": 0.9069718250274981, "grad_norm": 0.15734681487083435, "learning_rate": 0.001, "loss": 2.1509, "step": 21439 }, { "epoch": 0.9070141297910145, "grad_norm": 0.19993236660957336, "learning_rate": 0.001, "loss": 1.9934, "step": 21440 }, { "epoch": 0.9070564345545309, "grad_norm": 0.1488742232322693, "learning_rate": 0.001, "loss": 2.1611, "step": 21441 }, { "epoch": 0.9070987393180472, "grad_norm": 0.15275177359580994, "learning_rate": 0.001, "loss": 1.9241, "step": 21442 }, { "epoch": 0.9071410440815636, "grad_norm": 0.1733865737915039, "learning_rate": 0.001, "loss": 1.6173, "step": 21443 }, { "epoch": 0.90718334884508, "grad_norm": 43.84885025024414, "learning_rate": 0.001, "loss": 3.4629, "step": 21444 }, { "epoch": 0.9072256536085963, "grad_norm": 0.14967910945415497, "learning_rate": 0.001, "loss": 1.882, "step": 21445 }, { "epoch": 0.9072679583721127, "grad_norm": 0.1916898787021637, "learning_rate": 0.001, "loss": 1.8736, "step": 21446 }, { "epoch": 0.9073102631356291, "grad_norm": 3.939188003540039, "learning_rate": 0.001, "loss": 3.2875, "step": 21447 }, { "epoch": 0.9073525678991454, "grad_norm": 0.16006742417812347, "learning_rate": 0.001, "loss": 1.9604, "step": 21448 }, { "epoch": 0.9073948726626618, "grad_norm": 0.20729239284992218, "learning_rate": 0.001, "loss": 2.4355, "step": 21449 }, { "epoch": 0.9074371774261782, "grad_norm": 0.15743091702461243, "learning_rate": 0.001, "loss": 2.3285, "step": 21450 }, { "epoch": 0.9074794821896945, "grad_norm": 0.15352989733219147, "learning_rate": 0.001, "loss": 2.2617, "step": 21451 }, { "epoch": 0.907521786953211, "grad_norm": 1.4606817960739136, "learning_rate": 0.001, "loss": 2.2048, "step": 21452 }, { "epoch": 0.9075640917167273, "grad_norm": 0.14188838005065918, "learning_rate": 0.001, "loss": 1.9296, "step": 21453 }, { "epoch": 0.9076063964802437, "grad_norm": 0.1336817890405655, "learning_rate": 0.001, "loss": 1.9424, "step": 21454 }, { "epoch": 0.9076487012437601, "grad_norm": 1.9917988777160645, "learning_rate": 0.001, "loss": 1.8911, "step": 21455 }, { "epoch": 0.9076910060072764, "grad_norm": 0.7746376395225525, "learning_rate": 0.001, "loss": 3.08, "step": 21456 }, { "epoch": 0.9077333107707928, "grad_norm": 0.18907096982002258, "learning_rate": 0.001, "loss": 1.9718, "step": 21457 }, { "epoch": 0.9077756155343092, "grad_norm": 0.1984204649925232, "learning_rate": 0.001, "loss": 1.9067, "step": 21458 }, { "epoch": 0.9078179202978255, "grad_norm": 0.13996772468090057, "learning_rate": 0.001, "loss": 4.0638, "step": 21459 }, { "epoch": 0.9078602250613419, "grad_norm": 0.16142001748085022, "learning_rate": 0.001, "loss": 2.3478, "step": 21460 }, { "epoch": 0.9079025298248583, "grad_norm": 0.17676426470279694, "learning_rate": 0.001, "loss": 4.1074, "step": 21461 }, { "epoch": 0.9079448345883746, "grad_norm": 0.1353548765182495, "learning_rate": 0.001, "loss": 2.4199, "step": 21462 }, { "epoch": 0.907987139351891, "grad_norm": 0.18205760419368744, "learning_rate": 0.001, "loss": 2.2659, "step": 21463 }, { "epoch": 0.9080294441154074, "grad_norm": 0.16253677010536194, "learning_rate": 0.001, "loss": 2.387, "step": 21464 }, { "epoch": 0.9080717488789237, "grad_norm": 0.18944591283798218, "learning_rate": 0.001, "loss": 2.3769, "step": 21465 }, { "epoch": 0.9081140536424401, "grad_norm": 0.1672883927822113, "learning_rate": 0.001, "loss": 2.4218, "step": 21466 }, { "epoch": 0.9081563584059565, "grad_norm": 0.1499961018562317, "learning_rate": 0.001, "loss": 2.0867, "step": 21467 }, { "epoch": 0.9081986631694728, "grad_norm": 0.2781917452812195, "learning_rate": 0.001, "loss": 2.1938, "step": 21468 }, { "epoch": 0.9082409679329893, "grad_norm": 0.2037796825170517, "learning_rate": 0.001, "loss": 3.4381, "step": 21469 }, { "epoch": 0.9082832726965057, "grad_norm": 0.2747834622859955, "learning_rate": 0.001, "loss": 2.2192, "step": 21470 }, { "epoch": 0.908325577460022, "grad_norm": 0.128767728805542, "learning_rate": 0.001, "loss": 1.6764, "step": 21471 }, { "epoch": 0.9083678822235384, "grad_norm": 0.13520826399326324, "learning_rate": 0.001, "loss": 1.7189, "step": 21472 }, { "epoch": 0.9084101869870548, "grad_norm": 0.16434721648693085, "learning_rate": 0.001, "loss": 1.1854, "step": 21473 }, { "epoch": 0.9084524917505711, "grad_norm": 0.7008240818977356, "learning_rate": 0.001, "loss": 1.8192, "step": 21474 }, { "epoch": 0.9084947965140875, "grad_norm": 0.1205538809299469, "learning_rate": 0.001, "loss": 2.7022, "step": 21475 }, { "epoch": 0.9085371012776039, "grad_norm": 0.17018336057662964, "learning_rate": 0.001, "loss": 2.3906, "step": 21476 }, { "epoch": 0.9085794060411202, "grad_norm": 0.2808079421520233, "learning_rate": 0.001, "loss": 3.2353, "step": 21477 }, { "epoch": 0.9086217108046366, "grad_norm": 0.18852543830871582, "learning_rate": 0.001, "loss": 2.035, "step": 21478 }, { "epoch": 0.908664015568153, "grad_norm": 0.12925562262535095, "learning_rate": 0.001, "loss": 1.7236, "step": 21479 }, { "epoch": 0.9087063203316693, "grad_norm": 4.139763832092285, "learning_rate": 0.001, "loss": 1.6331, "step": 21480 }, { "epoch": 0.9087486250951857, "grad_norm": 0.15899895131587982, "learning_rate": 0.001, "loss": 1.7789, "step": 21481 }, { "epoch": 0.9087909298587021, "grad_norm": 0.1730024665594101, "learning_rate": 0.001, "loss": 3.3566, "step": 21482 }, { "epoch": 0.9088332346222184, "grad_norm": 0.1567884385585785, "learning_rate": 0.001, "loss": 1.9054, "step": 21483 }, { "epoch": 0.9088755393857348, "grad_norm": 2.438006639480591, "learning_rate": 0.001, "loss": 2.6868, "step": 21484 }, { "epoch": 0.9089178441492513, "grad_norm": 0.5900127291679382, "learning_rate": 0.001, "loss": 2.5526, "step": 21485 }, { "epoch": 0.9089601489127676, "grad_norm": 0.7061132788658142, "learning_rate": 0.001, "loss": 2.5784, "step": 21486 }, { "epoch": 0.909002453676284, "grad_norm": 0.7392309308052063, "learning_rate": 0.001, "loss": 2.1671, "step": 21487 }, { "epoch": 0.9090447584398004, "grad_norm": 0.18610547482967377, "learning_rate": 0.001, "loss": 2.4229, "step": 21488 }, { "epoch": 0.9090870632033167, "grad_norm": 0.15791387856006622, "learning_rate": 0.001, "loss": 2.6113, "step": 21489 }, { "epoch": 0.9091293679668331, "grad_norm": 13.427140235900879, "learning_rate": 0.001, "loss": 1.7686, "step": 21490 }, { "epoch": 0.9091716727303495, "grad_norm": 0.1820589154958725, "learning_rate": 0.001, "loss": 2.2658, "step": 21491 }, { "epoch": 0.9092139774938658, "grad_norm": 0.5405210256576538, "learning_rate": 0.001, "loss": 2.6741, "step": 21492 }, { "epoch": 0.9092562822573822, "grad_norm": 0.9230998754501343, "learning_rate": 0.001, "loss": 2.0778, "step": 21493 }, { "epoch": 0.9092985870208986, "grad_norm": 0.1825629621744156, "learning_rate": 0.001, "loss": 2.0666, "step": 21494 }, { "epoch": 0.9093408917844149, "grad_norm": 0.21903732419013977, "learning_rate": 0.001, "loss": 1.5503, "step": 21495 }, { "epoch": 0.9093831965479313, "grad_norm": 0.30625858902931213, "learning_rate": 0.001, "loss": 1.7506, "step": 21496 }, { "epoch": 0.9094255013114476, "grad_norm": 0.23304063081741333, "learning_rate": 0.001, "loss": 2.6598, "step": 21497 }, { "epoch": 0.909467806074964, "grad_norm": 0.18999595940113068, "learning_rate": 0.001, "loss": 1.8506, "step": 21498 }, { "epoch": 0.9095101108384804, "grad_norm": 3.7173240184783936, "learning_rate": 0.001, "loss": 2.9739, "step": 21499 }, { "epoch": 0.9095524156019967, "grad_norm": 0.33216527104377747, "learning_rate": 0.001, "loss": 1.2953, "step": 21500 }, { "epoch": 0.9095947203655131, "grad_norm": 0.1976582258939743, "learning_rate": 0.001, "loss": 2.2576, "step": 21501 }, { "epoch": 0.9096370251290296, "grad_norm": 5.519730091094971, "learning_rate": 0.001, "loss": 1.8796, "step": 21502 }, { "epoch": 0.9096793298925459, "grad_norm": 0.31326529383659363, "learning_rate": 0.001, "loss": 2.1128, "step": 21503 }, { "epoch": 0.9097216346560623, "grad_norm": 0.15805281698703766, "learning_rate": 0.001, "loss": 1.3282, "step": 21504 }, { "epoch": 0.9097639394195787, "grad_norm": 1.21946120262146, "learning_rate": 0.001, "loss": 2.203, "step": 21505 }, { "epoch": 0.909806244183095, "grad_norm": 0.13598865270614624, "learning_rate": 0.001, "loss": 1.8011, "step": 21506 }, { "epoch": 0.9098485489466114, "grad_norm": 0.30996546149253845, "learning_rate": 0.001, "loss": 1.5201, "step": 21507 }, { "epoch": 0.9098908537101278, "grad_norm": 0.21078462898731232, "learning_rate": 0.001, "loss": 2.2036, "step": 21508 }, { "epoch": 0.9099331584736441, "grad_norm": 0.1793532818555832, "learning_rate": 0.001, "loss": 2.754, "step": 21509 }, { "epoch": 0.9099754632371605, "grad_norm": 0.16735830903053284, "learning_rate": 0.001, "loss": 3.22, "step": 21510 }, { "epoch": 0.9100177680006769, "grad_norm": 0.15920020639896393, "learning_rate": 0.001, "loss": 2.8717, "step": 21511 }, { "epoch": 0.9100600727641932, "grad_norm": 0.17090164124965668, "learning_rate": 0.001, "loss": 1.8264, "step": 21512 }, { "epoch": 0.9101023775277096, "grad_norm": 0.1805136799812317, "learning_rate": 0.001, "loss": 1.7801, "step": 21513 }, { "epoch": 0.910144682291226, "grad_norm": 0.20294389128684998, "learning_rate": 0.001, "loss": 2.2071, "step": 21514 }, { "epoch": 0.9101869870547423, "grad_norm": 0.18520846962928772, "learning_rate": 0.001, "loss": 1.3472, "step": 21515 }, { "epoch": 0.9102292918182587, "grad_norm": 0.8986330032348633, "learning_rate": 0.001, "loss": 2.8275, "step": 21516 }, { "epoch": 0.9102715965817751, "grad_norm": 0.16281363368034363, "learning_rate": 0.001, "loss": 3.0088, "step": 21517 }, { "epoch": 0.9103139013452914, "grad_norm": 0.13993607461452484, "learning_rate": 0.001, "loss": 2.1769, "step": 21518 }, { "epoch": 0.9103562061088079, "grad_norm": 0.13132378458976746, "learning_rate": 0.001, "loss": 1.4656, "step": 21519 }, { "epoch": 0.9103985108723243, "grad_norm": 0.1520097702741623, "learning_rate": 0.001, "loss": 1.8638, "step": 21520 }, { "epoch": 0.9104408156358406, "grad_norm": 1.1580663919448853, "learning_rate": 0.001, "loss": 2.0435, "step": 21521 }, { "epoch": 0.910483120399357, "grad_norm": 0.1821383833885193, "learning_rate": 0.001, "loss": 2.5283, "step": 21522 }, { "epoch": 0.9105254251628734, "grad_norm": 0.1639668494462967, "learning_rate": 0.001, "loss": 2.0908, "step": 21523 }, { "epoch": 0.9105677299263897, "grad_norm": 0.18007436394691467, "learning_rate": 0.001, "loss": 1.8416, "step": 21524 }, { "epoch": 0.9106100346899061, "grad_norm": 0.13596655428409576, "learning_rate": 0.001, "loss": 1.352, "step": 21525 }, { "epoch": 0.9106523394534225, "grad_norm": 0.19696587324142456, "learning_rate": 0.001, "loss": 1.9196, "step": 21526 }, { "epoch": 0.9106946442169388, "grad_norm": 0.15187956392765045, "learning_rate": 0.001, "loss": 2.5806, "step": 21527 }, { "epoch": 0.9107369489804552, "grad_norm": 0.15384723246097565, "learning_rate": 0.001, "loss": 3.9416, "step": 21528 }, { "epoch": 0.9107792537439716, "grad_norm": 0.21854868531227112, "learning_rate": 0.001, "loss": 2.478, "step": 21529 }, { "epoch": 0.9108215585074879, "grad_norm": 0.1801101565361023, "learning_rate": 0.001, "loss": 2.2489, "step": 21530 }, { "epoch": 0.9108638632710043, "grad_norm": 0.19910669326782227, "learning_rate": 0.001, "loss": 1.7515, "step": 21531 }, { "epoch": 0.9109061680345207, "grad_norm": 0.18891854584217072, "learning_rate": 0.001, "loss": 1.8261, "step": 21532 }, { "epoch": 0.910948472798037, "grad_norm": 0.1557842195034027, "learning_rate": 0.001, "loss": 2.175, "step": 21533 }, { "epoch": 0.9109907775615534, "grad_norm": 0.15806511044502258, "learning_rate": 0.001, "loss": 1.7377, "step": 21534 }, { "epoch": 0.9110330823250699, "grad_norm": 5.196726322174072, "learning_rate": 0.001, "loss": 2.0587, "step": 21535 }, { "epoch": 0.9110753870885862, "grad_norm": 0.15925204753875732, "learning_rate": 0.001, "loss": 3.1414, "step": 21536 }, { "epoch": 0.9111176918521026, "grad_norm": 0.16708320379257202, "learning_rate": 0.001, "loss": 2.3862, "step": 21537 }, { "epoch": 0.911159996615619, "grad_norm": 0.14083898067474365, "learning_rate": 0.001, "loss": 1.5379, "step": 21538 }, { "epoch": 0.9112023013791353, "grad_norm": 2.581650733947754, "learning_rate": 0.001, "loss": 2.6506, "step": 21539 }, { "epoch": 0.9112446061426517, "grad_norm": 0.15130378305912018, "learning_rate": 0.001, "loss": 2.7111, "step": 21540 }, { "epoch": 0.911286910906168, "grad_norm": 0.172215536236763, "learning_rate": 0.001, "loss": 1.4651, "step": 21541 }, { "epoch": 0.9113292156696844, "grad_norm": 0.2063896358013153, "learning_rate": 0.001, "loss": 1.5084, "step": 21542 }, { "epoch": 0.9113715204332008, "grad_norm": 0.48414063453674316, "learning_rate": 0.001, "loss": 3.9561, "step": 21543 }, { "epoch": 0.9114138251967171, "grad_norm": 0.31004536151885986, "learning_rate": 0.001, "loss": 2.4255, "step": 21544 }, { "epoch": 0.9114561299602335, "grad_norm": 0.17649905383586884, "learning_rate": 0.001, "loss": 1.8264, "step": 21545 }, { "epoch": 0.9114984347237499, "grad_norm": 0.4742259085178375, "learning_rate": 0.001, "loss": 3.0758, "step": 21546 }, { "epoch": 0.9115407394872662, "grad_norm": 0.21102559566497803, "learning_rate": 0.001, "loss": 2.8004, "step": 21547 }, { "epoch": 0.9115830442507826, "grad_norm": 0.47817152738571167, "learning_rate": 0.001, "loss": 2.0812, "step": 21548 }, { "epoch": 0.911625349014299, "grad_norm": 0.156982883810997, "learning_rate": 0.001, "loss": 2.3945, "step": 21549 }, { "epoch": 0.9116676537778153, "grad_norm": 0.14067421853542328, "learning_rate": 0.001, "loss": 2.0697, "step": 21550 }, { "epoch": 0.9117099585413317, "grad_norm": 0.17669621109962463, "learning_rate": 0.001, "loss": 2.3237, "step": 21551 }, { "epoch": 0.9117522633048482, "grad_norm": 0.16611798107624054, "learning_rate": 0.001, "loss": 1.5483, "step": 21552 }, { "epoch": 0.9117945680683645, "grad_norm": 0.2230168581008911, "learning_rate": 0.001, "loss": 2.6282, "step": 21553 }, { "epoch": 0.9118368728318809, "grad_norm": 4.087730884552002, "learning_rate": 0.001, "loss": 1.8573, "step": 21554 }, { "epoch": 0.9118791775953973, "grad_norm": 0.1587943285703659, "learning_rate": 0.001, "loss": 3.0096, "step": 21555 }, { "epoch": 0.9119214823589136, "grad_norm": 0.13796477019786835, "learning_rate": 0.001, "loss": 2.5867, "step": 21556 }, { "epoch": 0.91196378712243, "grad_norm": 0.17402009665966034, "learning_rate": 0.001, "loss": 2.2494, "step": 21557 }, { "epoch": 0.9120060918859464, "grad_norm": 0.20480291545391083, "learning_rate": 0.001, "loss": 2.7589, "step": 21558 }, { "epoch": 0.9120483966494627, "grad_norm": 0.45675426721572876, "learning_rate": 0.001, "loss": 2.9796, "step": 21559 }, { "epoch": 0.9120907014129791, "grad_norm": 0.26858261227607727, "learning_rate": 0.001, "loss": 2.7131, "step": 21560 }, { "epoch": 0.9121330061764955, "grad_norm": 0.1663147658109665, "learning_rate": 0.001, "loss": 1.9666, "step": 21561 }, { "epoch": 0.9121753109400118, "grad_norm": 0.16249950230121613, "learning_rate": 0.001, "loss": 2.6662, "step": 21562 }, { "epoch": 0.9122176157035282, "grad_norm": 0.22289776802062988, "learning_rate": 0.001, "loss": 2.3533, "step": 21563 }, { "epoch": 0.9122599204670446, "grad_norm": 0.20994803309440613, "learning_rate": 0.001, "loss": 1.978, "step": 21564 }, { "epoch": 0.9123022252305609, "grad_norm": 0.4022495448589325, "learning_rate": 0.001, "loss": 2.1195, "step": 21565 }, { "epoch": 0.9123445299940773, "grad_norm": 0.17992646992206573, "learning_rate": 0.001, "loss": 1.934, "step": 21566 }, { "epoch": 0.9123868347575937, "grad_norm": 9.34231948852539, "learning_rate": 0.001, "loss": 2.3568, "step": 21567 }, { "epoch": 0.91242913952111, "grad_norm": 0.19124965369701385, "learning_rate": 0.001, "loss": 1.8253, "step": 21568 }, { "epoch": 0.9124714442846265, "grad_norm": 0.1695529967546463, "learning_rate": 0.001, "loss": 2.892, "step": 21569 }, { "epoch": 0.9125137490481429, "grad_norm": 0.1486518681049347, "learning_rate": 0.001, "loss": 1.868, "step": 21570 }, { "epoch": 0.9125560538116592, "grad_norm": 20.817100524902344, "learning_rate": 0.001, "loss": 1.8457, "step": 21571 }, { "epoch": 0.9125983585751756, "grad_norm": 12.602542877197266, "learning_rate": 0.001, "loss": 2.1331, "step": 21572 }, { "epoch": 0.912640663338692, "grad_norm": 0.16852043569087982, "learning_rate": 0.001, "loss": 2.6505, "step": 21573 }, { "epoch": 0.9126829681022083, "grad_norm": 0.17694880068302155, "learning_rate": 0.001, "loss": 1.5316, "step": 21574 }, { "epoch": 0.9127252728657247, "grad_norm": 0.19821469485759735, "learning_rate": 0.001, "loss": 2.0534, "step": 21575 }, { "epoch": 0.9127675776292411, "grad_norm": 0.21617907285690308, "learning_rate": 0.001, "loss": 2.5649, "step": 21576 }, { "epoch": 0.9128098823927574, "grad_norm": 0.17548352479934692, "learning_rate": 0.001, "loss": 1.9663, "step": 21577 }, { "epoch": 0.9128521871562738, "grad_norm": 0.22286243736743927, "learning_rate": 0.001, "loss": 1.97, "step": 21578 }, { "epoch": 0.9128944919197902, "grad_norm": 0.19249877333641052, "learning_rate": 0.001, "loss": 2.8943, "step": 21579 }, { "epoch": 0.9129367966833065, "grad_norm": 0.14703737199306488, "learning_rate": 0.001, "loss": 2.2766, "step": 21580 }, { "epoch": 0.9129791014468229, "grad_norm": 0.17037394642829895, "learning_rate": 0.001, "loss": 1.5034, "step": 21581 }, { "epoch": 0.9130214062103393, "grad_norm": 0.18207064270973206, "learning_rate": 0.001, "loss": 2.1486, "step": 21582 }, { "epoch": 0.9130637109738556, "grad_norm": 0.1920434832572937, "learning_rate": 0.001, "loss": 1.6474, "step": 21583 }, { "epoch": 0.913106015737372, "grad_norm": 1.184415578842163, "learning_rate": 0.001, "loss": 3.1735, "step": 21584 }, { "epoch": 0.9131483205008885, "grad_norm": 0.15448032319545746, "learning_rate": 0.001, "loss": 1.736, "step": 21585 }, { "epoch": 0.9131906252644048, "grad_norm": 0.18459028005599976, "learning_rate": 0.001, "loss": 2.0065, "step": 21586 }, { "epoch": 0.9132329300279212, "grad_norm": 0.14690928161144257, "learning_rate": 0.001, "loss": 2.3362, "step": 21587 }, { "epoch": 0.9132752347914375, "grad_norm": 0.15525342524051666, "learning_rate": 0.001, "loss": 1.691, "step": 21588 }, { "epoch": 0.9133175395549539, "grad_norm": 0.15081803500652313, "learning_rate": 0.001, "loss": 1.3852, "step": 21589 }, { "epoch": 0.9133598443184703, "grad_norm": 0.14801929891109467, "learning_rate": 0.001, "loss": 2.3841, "step": 21590 }, { "epoch": 0.9134021490819866, "grad_norm": 0.1725027710199356, "learning_rate": 0.001, "loss": 2.52, "step": 21591 }, { "epoch": 0.913444453845503, "grad_norm": 0.11509247869253159, "learning_rate": 0.001, "loss": 1.6571, "step": 21592 }, { "epoch": 0.9134867586090194, "grad_norm": 0.14290519058704376, "learning_rate": 0.001, "loss": 1.8487, "step": 21593 }, { "epoch": 0.9135290633725357, "grad_norm": 0.21351279318332672, "learning_rate": 0.001, "loss": 1.9015, "step": 21594 }, { "epoch": 0.9135713681360521, "grad_norm": 0.391020268201828, "learning_rate": 0.001, "loss": 2.0749, "step": 21595 }, { "epoch": 0.9136136728995685, "grad_norm": 0.14413462579250336, "learning_rate": 0.001, "loss": 1.8242, "step": 21596 }, { "epoch": 0.9136559776630848, "grad_norm": 0.18715713918209076, "learning_rate": 0.001, "loss": 3.9564, "step": 21597 }, { "epoch": 0.9136982824266012, "grad_norm": 0.17989511787891388, "learning_rate": 0.001, "loss": 2.6794, "step": 21598 }, { "epoch": 0.9137405871901176, "grad_norm": 0.1721455156803131, "learning_rate": 0.001, "loss": 1.6095, "step": 21599 }, { "epoch": 0.9137828919536339, "grad_norm": 1.7878568172454834, "learning_rate": 0.001, "loss": 2.6891, "step": 21600 }, { "epoch": 0.9138251967171503, "grad_norm": 0.14471499621868134, "learning_rate": 0.001, "loss": 1.3701, "step": 21601 }, { "epoch": 0.9138675014806668, "grad_norm": 2.2740018367767334, "learning_rate": 0.001, "loss": 2.1687, "step": 21602 }, { "epoch": 0.913909806244183, "grad_norm": 0.13715597987174988, "learning_rate": 0.001, "loss": 1.995, "step": 21603 }, { "epoch": 0.9139521110076995, "grad_norm": 0.13513702154159546, "learning_rate": 0.001, "loss": 1.735, "step": 21604 }, { "epoch": 0.9139944157712159, "grad_norm": 0.1583508551120758, "learning_rate": 0.001, "loss": 1.746, "step": 21605 }, { "epoch": 0.9140367205347322, "grad_norm": 0.1774708777666092, "learning_rate": 0.001, "loss": 2.3999, "step": 21606 }, { "epoch": 0.9140790252982486, "grad_norm": 0.15882831811904907, "learning_rate": 0.001, "loss": 1.6509, "step": 21607 }, { "epoch": 0.914121330061765, "grad_norm": 0.1779770851135254, "learning_rate": 0.001, "loss": 2.1135, "step": 21608 }, { "epoch": 0.9141636348252813, "grad_norm": 0.33798548579216003, "learning_rate": 0.001, "loss": 2.7735, "step": 21609 }, { "epoch": 0.9142059395887977, "grad_norm": 0.31764113903045654, "learning_rate": 0.001, "loss": 2.0155, "step": 21610 }, { "epoch": 0.9142482443523141, "grad_norm": 0.17167538404464722, "learning_rate": 0.001, "loss": 1.7689, "step": 21611 }, { "epoch": 0.9142905491158304, "grad_norm": 0.16581086814403534, "learning_rate": 0.001, "loss": 1.7205, "step": 21612 }, { "epoch": 0.9143328538793468, "grad_norm": 0.16308166086673737, "learning_rate": 0.001, "loss": 2.3171, "step": 21613 }, { "epoch": 0.9143751586428632, "grad_norm": 0.1509043574333191, "learning_rate": 0.001, "loss": 1.4007, "step": 21614 }, { "epoch": 0.9144174634063795, "grad_norm": 0.18043819069862366, "learning_rate": 0.001, "loss": 3.3869, "step": 21615 }, { "epoch": 0.9144597681698959, "grad_norm": 0.19052763283252716, "learning_rate": 0.001, "loss": 2.2411, "step": 21616 }, { "epoch": 0.9145020729334123, "grad_norm": 0.13170795142650604, "learning_rate": 0.001, "loss": 2.1545, "step": 21617 }, { "epoch": 0.9145443776969286, "grad_norm": 0.16418077051639557, "learning_rate": 0.001, "loss": 2.2063, "step": 21618 }, { "epoch": 0.914586682460445, "grad_norm": 0.5688857436180115, "learning_rate": 0.001, "loss": 2.1972, "step": 21619 }, { "epoch": 0.9146289872239615, "grad_norm": 0.1831405758857727, "learning_rate": 0.001, "loss": 1.6587, "step": 21620 }, { "epoch": 0.9146712919874778, "grad_norm": 0.4554640054702759, "learning_rate": 0.001, "loss": 1.7407, "step": 21621 }, { "epoch": 0.9147135967509942, "grad_norm": 0.17047862708568573, "learning_rate": 0.001, "loss": 1.6915, "step": 21622 }, { "epoch": 0.9147559015145106, "grad_norm": 0.7939593195915222, "learning_rate": 0.001, "loss": 1.8853, "step": 21623 }, { "epoch": 0.9147982062780269, "grad_norm": 0.17119646072387695, "learning_rate": 0.001, "loss": 3.3695, "step": 21624 }, { "epoch": 0.9148405110415433, "grad_norm": 0.17291773855686188, "learning_rate": 0.001, "loss": 1.8255, "step": 21625 }, { "epoch": 0.9148828158050597, "grad_norm": 0.49092814326286316, "learning_rate": 0.001, "loss": 2.3737, "step": 21626 }, { "epoch": 0.914925120568576, "grad_norm": 0.13538378477096558, "learning_rate": 0.001, "loss": 1.5565, "step": 21627 }, { "epoch": 0.9149674253320924, "grad_norm": 0.14585568010807037, "learning_rate": 0.001, "loss": 2.0289, "step": 21628 }, { "epoch": 0.9150097300956088, "grad_norm": 1.05545175075531, "learning_rate": 0.001, "loss": 2.3307, "step": 21629 }, { "epoch": 0.9150520348591251, "grad_norm": 0.13337115943431854, "learning_rate": 0.001, "loss": 2.3943, "step": 21630 }, { "epoch": 0.9150943396226415, "grad_norm": 0.13960030674934387, "learning_rate": 0.001, "loss": 2.0699, "step": 21631 }, { "epoch": 0.9151366443861578, "grad_norm": 0.1596209853887558, "learning_rate": 0.001, "loss": 2.9616, "step": 21632 }, { "epoch": 0.9151789491496742, "grad_norm": 1.2385811805725098, "learning_rate": 0.001, "loss": 2.1809, "step": 21633 }, { "epoch": 0.9152212539131906, "grad_norm": 0.15184301137924194, "learning_rate": 0.001, "loss": 1.4528, "step": 21634 }, { "epoch": 0.915263558676707, "grad_norm": 3.3898375034332275, "learning_rate": 0.001, "loss": 1.8417, "step": 21635 }, { "epoch": 0.9153058634402234, "grad_norm": 0.1527806520462036, "learning_rate": 0.001, "loss": 2.1561, "step": 21636 }, { "epoch": 0.9153481682037398, "grad_norm": 0.22297519445419312, "learning_rate": 0.001, "loss": 2.2794, "step": 21637 }, { "epoch": 0.9153904729672561, "grad_norm": 0.21347364783287048, "learning_rate": 0.001, "loss": 3.186, "step": 21638 }, { "epoch": 0.9154327777307725, "grad_norm": 0.15860065817832947, "learning_rate": 0.001, "loss": 1.7825, "step": 21639 }, { "epoch": 0.9154750824942889, "grad_norm": 0.4748792052268982, "learning_rate": 0.001, "loss": 2.4484, "step": 21640 }, { "epoch": 0.9155173872578052, "grad_norm": 0.15251575410366058, "learning_rate": 0.001, "loss": 1.5772, "step": 21641 }, { "epoch": 0.9155596920213216, "grad_norm": 0.21372830867767334, "learning_rate": 0.001, "loss": 2.8094, "step": 21642 }, { "epoch": 0.915601996784838, "grad_norm": 0.19164182245731354, "learning_rate": 0.001, "loss": 3.3052, "step": 21643 }, { "epoch": 0.9156443015483543, "grad_norm": 0.15785560011863708, "learning_rate": 0.001, "loss": 2.3681, "step": 21644 }, { "epoch": 0.9156866063118707, "grad_norm": 1.266538143157959, "learning_rate": 0.001, "loss": 2.0366, "step": 21645 }, { "epoch": 0.9157289110753871, "grad_norm": 0.16436876356601715, "learning_rate": 0.001, "loss": 2.3029, "step": 21646 }, { "epoch": 0.9157712158389034, "grad_norm": 0.1448490023612976, "learning_rate": 0.001, "loss": 2.3137, "step": 21647 }, { "epoch": 0.9158135206024198, "grad_norm": 0.1606566458940506, "learning_rate": 0.001, "loss": 1.5938, "step": 21648 }, { "epoch": 0.9158558253659362, "grad_norm": 0.6070122122764587, "learning_rate": 0.001, "loss": 3.4232, "step": 21649 }, { "epoch": 0.9158981301294525, "grad_norm": 0.15284164249897003, "learning_rate": 0.001, "loss": 2.2872, "step": 21650 }, { "epoch": 0.915940434892969, "grad_norm": 0.18675555288791656, "learning_rate": 0.001, "loss": 3.7336, "step": 21651 }, { "epoch": 0.9159827396564854, "grad_norm": 0.20005974173545837, "learning_rate": 0.001, "loss": 2.4288, "step": 21652 }, { "epoch": 0.9160250444200017, "grad_norm": 0.17080628871917725, "learning_rate": 0.001, "loss": 2.0765, "step": 21653 }, { "epoch": 0.9160673491835181, "grad_norm": 0.17919304966926575, "learning_rate": 0.001, "loss": 1.6258, "step": 21654 }, { "epoch": 0.9161096539470345, "grad_norm": 4.160139560699463, "learning_rate": 0.001, "loss": 3.0095, "step": 21655 }, { "epoch": 0.9161519587105508, "grad_norm": 0.12385336309671402, "learning_rate": 0.001, "loss": 1.9264, "step": 21656 }, { "epoch": 0.9161942634740672, "grad_norm": 0.19847314059734344, "learning_rate": 0.001, "loss": 2.0386, "step": 21657 }, { "epoch": 0.9162365682375836, "grad_norm": 0.14693698287010193, "learning_rate": 0.001, "loss": 1.6431, "step": 21658 }, { "epoch": 0.9162788730010999, "grad_norm": 0.1676744669675827, "learning_rate": 0.001, "loss": 1.7444, "step": 21659 }, { "epoch": 0.9163211777646163, "grad_norm": 0.17183883488178253, "learning_rate": 0.001, "loss": 2.122, "step": 21660 }, { "epoch": 0.9163634825281327, "grad_norm": 0.16480883955955505, "learning_rate": 0.001, "loss": 2.9668, "step": 21661 }, { "epoch": 0.916405787291649, "grad_norm": 0.16469353437423706, "learning_rate": 0.001, "loss": 2.4785, "step": 21662 }, { "epoch": 0.9164480920551654, "grad_norm": 0.1656375229358673, "learning_rate": 0.001, "loss": 1.7279, "step": 21663 }, { "epoch": 0.9164903968186818, "grad_norm": 0.15687058866024017, "learning_rate": 0.001, "loss": 2.5898, "step": 21664 }, { "epoch": 0.9165327015821981, "grad_norm": 0.585669994354248, "learning_rate": 0.001, "loss": 2.0657, "step": 21665 }, { "epoch": 0.9165750063457145, "grad_norm": 0.5769844651222229, "learning_rate": 0.001, "loss": 2.0928, "step": 21666 }, { "epoch": 0.916617311109231, "grad_norm": 0.18087390065193176, "learning_rate": 0.001, "loss": 1.8316, "step": 21667 }, { "epoch": 0.9166596158727472, "grad_norm": 0.15963704884052277, "learning_rate": 0.001, "loss": 1.6256, "step": 21668 }, { "epoch": 0.9167019206362637, "grad_norm": 0.15890270471572876, "learning_rate": 0.001, "loss": 1.5167, "step": 21669 }, { "epoch": 0.9167442253997801, "grad_norm": 0.15813934803009033, "learning_rate": 0.001, "loss": 2.0896, "step": 21670 }, { "epoch": 0.9167865301632964, "grad_norm": 0.17641639709472656, "learning_rate": 0.001, "loss": 1.7598, "step": 21671 }, { "epoch": 0.9168288349268128, "grad_norm": 0.14289072155952454, "learning_rate": 0.001, "loss": 2.1995, "step": 21672 }, { "epoch": 0.9168711396903292, "grad_norm": 0.14103195071220398, "learning_rate": 0.001, "loss": 1.6827, "step": 21673 }, { "epoch": 0.9169134444538455, "grad_norm": 1.1286795139312744, "learning_rate": 0.001, "loss": 1.7365, "step": 21674 }, { "epoch": 0.9169557492173619, "grad_norm": 0.1436692327260971, "learning_rate": 0.001, "loss": 1.5473, "step": 21675 }, { "epoch": 0.9169980539808782, "grad_norm": 0.17740869522094727, "learning_rate": 0.001, "loss": 2.0802, "step": 21676 }, { "epoch": 0.9170403587443946, "grad_norm": 0.21841758489608765, "learning_rate": 0.001, "loss": 2.4257, "step": 21677 }, { "epoch": 0.917082663507911, "grad_norm": 0.1688189059495926, "learning_rate": 0.001, "loss": 1.6848, "step": 21678 }, { "epoch": 0.9171249682714273, "grad_norm": 7.481250762939453, "learning_rate": 0.001, "loss": 1.8009, "step": 21679 }, { "epoch": 0.9171672730349437, "grad_norm": 0.16650600731372833, "learning_rate": 0.001, "loss": 1.9332, "step": 21680 }, { "epoch": 0.9172095777984601, "grad_norm": 0.16894063353538513, "learning_rate": 0.001, "loss": 1.8266, "step": 21681 }, { "epoch": 0.9172518825619764, "grad_norm": 1.4320636987686157, "learning_rate": 0.001, "loss": 1.7303, "step": 21682 }, { "epoch": 0.9172941873254928, "grad_norm": 0.15776890516281128, "learning_rate": 0.001, "loss": 1.483, "step": 21683 }, { "epoch": 0.9173364920890092, "grad_norm": 0.5583562254905701, "learning_rate": 0.001, "loss": 2.6289, "step": 21684 }, { "epoch": 0.9173787968525255, "grad_norm": 2.3220925331115723, "learning_rate": 0.001, "loss": 2.3595, "step": 21685 }, { "epoch": 0.917421101616042, "grad_norm": 0.19249920547008514, "learning_rate": 0.001, "loss": 2.1336, "step": 21686 }, { "epoch": 0.9174634063795584, "grad_norm": 0.24140247702598572, "learning_rate": 0.001, "loss": 2.4654, "step": 21687 }, { "epoch": 0.9175057111430747, "grad_norm": 0.17343805730342865, "learning_rate": 0.001, "loss": 2.3035, "step": 21688 }, { "epoch": 0.9175480159065911, "grad_norm": 0.19953952729701996, "learning_rate": 0.001, "loss": 1.9234, "step": 21689 }, { "epoch": 0.9175903206701075, "grad_norm": 1.9272069931030273, "learning_rate": 0.001, "loss": 3.4397, "step": 21690 }, { "epoch": 0.9176326254336238, "grad_norm": 0.1924961805343628, "learning_rate": 0.001, "loss": 2.4553, "step": 21691 }, { "epoch": 0.9176749301971402, "grad_norm": 0.20997212827205658, "learning_rate": 0.001, "loss": 1.7757, "step": 21692 }, { "epoch": 0.9177172349606566, "grad_norm": 0.18580669164657593, "learning_rate": 0.001, "loss": 2.1987, "step": 21693 }, { "epoch": 0.9177595397241729, "grad_norm": 0.2456483542919159, "learning_rate": 0.001, "loss": 2.1412, "step": 21694 }, { "epoch": 0.9178018444876893, "grad_norm": 0.2759229242801666, "learning_rate": 0.001, "loss": 2.3739, "step": 21695 }, { "epoch": 0.9178441492512057, "grad_norm": 0.19626349210739136, "learning_rate": 0.001, "loss": 2.4, "step": 21696 }, { "epoch": 0.917886454014722, "grad_norm": 0.19402183592319489, "learning_rate": 0.001, "loss": 3.3583, "step": 21697 }, { "epoch": 0.9179287587782384, "grad_norm": 0.21589398384094238, "learning_rate": 0.001, "loss": 1.6545, "step": 21698 }, { "epoch": 0.9179710635417548, "grad_norm": 0.19767926633358002, "learning_rate": 0.001, "loss": 2.963, "step": 21699 }, { "epoch": 0.9180133683052711, "grad_norm": 0.6116325855255127, "learning_rate": 0.001, "loss": 2.4778, "step": 21700 }, { "epoch": 0.9180556730687875, "grad_norm": 0.18952438235282898, "learning_rate": 0.001, "loss": 2.9357, "step": 21701 }, { "epoch": 0.918097977832304, "grad_norm": 0.20334750413894653, "learning_rate": 0.001, "loss": 1.9754, "step": 21702 }, { "epoch": 0.9181402825958203, "grad_norm": 0.17652149498462677, "learning_rate": 0.001, "loss": 3.0083, "step": 21703 }, { "epoch": 0.9181825873593367, "grad_norm": 0.1732001155614853, "learning_rate": 0.001, "loss": 2.601, "step": 21704 }, { "epoch": 0.9182248921228531, "grad_norm": 0.16499556601047516, "learning_rate": 0.001, "loss": 2.3997, "step": 21705 }, { "epoch": 0.9182671968863694, "grad_norm": 0.1443309634923935, "learning_rate": 0.001, "loss": 3.3834, "step": 21706 }, { "epoch": 0.9183095016498858, "grad_norm": 0.14484232664108276, "learning_rate": 0.001, "loss": 1.6655, "step": 21707 }, { "epoch": 0.9183518064134022, "grad_norm": 0.13555298745632172, "learning_rate": 0.001, "loss": 1.6735, "step": 21708 }, { "epoch": 0.9183941111769185, "grad_norm": 0.2232842892408371, "learning_rate": 0.001, "loss": 2.4252, "step": 21709 }, { "epoch": 0.9184364159404349, "grad_norm": 0.175230473279953, "learning_rate": 0.001, "loss": 2.0342, "step": 21710 }, { "epoch": 0.9184787207039513, "grad_norm": 0.1450617015361786, "learning_rate": 0.001, "loss": 1.7858, "step": 21711 }, { "epoch": 0.9185210254674676, "grad_norm": 0.37246161699295044, "learning_rate": 0.001, "loss": 3.1194, "step": 21712 }, { "epoch": 0.918563330230984, "grad_norm": 0.1520744115114212, "learning_rate": 0.001, "loss": 2.9826, "step": 21713 }, { "epoch": 0.9186056349945004, "grad_norm": 0.17187443375587463, "learning_rate": 0.001, "loss": 2.6739, "step": 21714 }, { "epoch": 0.9186479397580167, "grad_norm": 0.17397736012935638, "learning_rate": 0.001, "loss": 2.6349, "step": 21715 }, { "epoch": 0.9186902445215331, "grad_norm": 2.4483251571655273, "learning_rate": 0.001, "loss": 2.8474, "step": 21716 }, { "epoch": 0.9187325492850495, "grad_norm": 0.9830498695373535, "learning_rate": 0.001, "loss": 1.3181, "step": 21717 }, { "epoch": 0.9187748540485658, "grad_norm": 0.1355532854795456, "learning_rate": 0.001, "loss": 2.4041, "step": 21718 }, { "epoch": 0.9188171588120823, "grad_norm": 0.161952406167984, "learning_rate": 0.001, "loss": 2.9089, "step": 21719 }, { "epoch": 0.9188594635755987, "grad_norm": 0.1725846230983734, "learning_rate": 0.001, "loss": 2.2824, "step": 21720 }, { "epoch": 0.918901768339115, "grad_norm": 0.21761982142925262, "learning_rate": 0.001, "loss": 1.7065, "step": 21721 }, { "epoch": 0.9189440731026314, "grad_norm": 0.16416600346565247, "learning_rate": 0.001, "loss": 1.8811, "step": 21722 }, { "epoch": 0.9189863778661477, "grad_norm": 0.1734263002872467, "learning_rate": 0.001, "loss": 1.956, "step": 21723 }, { "epoch": 0.9190286826296641, "grad_norm": 0.3407423198223114, "learning_rate": 0.001, "loss": 1.9373, "step": 21724 }, { "epoch": 0.9190709873931805, "grad_norm": 0.1345268189907074, "learning_rate": 0.001, "loss": 1.7216, "step": 21725 }, { "epoch": 0.9191132921566968, "grad_norm": 0.1486416757106781, "learning_rate": 0.001, "loss": 1.7895, "step": 21726 }, { "epoch": 0.9191555969202132, "grad_norm": 0.14210502803325653, "learning_rate": 0.001, "loss": 2.0866, "step": 21727 }, { "epoch": 0.9191979016837296, "grad_norm": 0.13867826759815216, "learning_rate": 0.001, "loss": 2.6647, "step": 21728 }, { "epoch": 0.9192402064472459, "grad_norm": 0.4167260527610779, "learning_rate": 0.001, "loss": 2.3402, "step": 21729 }, { "epoch": 0.9192825112107623, "grad_norm": 0.16781596839427948, "learning_rate": 0.001, "loss": 1.7369, "step": 21730 }, { "epoch": 0.9193248159742787, "grad_norm": 0.16833274066448212, "learning_rate": 0.001, "loss": 2.136, "step": 21731 }, { "epoch": 0.919367120737795, "grad_norm": 15.674321174621582, "learning_rate": 0.001, "loss": 2.771, "step": 21732 }, { "epoch": 0.9194094255013114, "grad_norm": 1.287104606628418, "learning_rate": 0.001, "loss": 1.7155, "step": 21733 }, { "epoch": 0.9194517302648278, "grad_norm": 0.14855779707431793, "learning_rate": 0.001, "loss": 2.1788, "step": 21734 }, { "epoch": 0.9194940350283441, "grad_norm": 0.18779130280017853, "learning_rate": 0.001, "loss": 1.9621, "step": 21735 }, { "epoch": 0.9195363397918606, "grad_norm": 0.18050996959209442, "learning_rate": 0.001, "loss": 1.7367, "step": 21736 }, { "epoch": 0.919578644555377, "grad_norm": 0.2634378671646118, "learning_rate": 0.001, "loss": 2.6046, "step": 21737 }, { "epoch": 0.9196209493188933, "grad_norm": 0.2787913978099823, "learning_rate": 0.001, "loss": 3.1114, "step": 21738 }, { "epoch": 0.9196632540824097, "grad_norm": 0.15546195209026337, "learning_rate": 0.001, "loss": 2.7819, "step": 21739 }, { "epoch": 0.9197055588459261, "grad_norm": 0.21307052671909332, "learning_rate": 0.001, "loss": 2.5438, "step": 21740 }, { "epoch": 0.9197478636094424, "grad_norm": 0.18763384222984314, "learning_rate": 0.001, "loss": 1.7654, "step": 21741 }, { "epoch": 0.9197901683729588, "grad_norm": 0.12560519576072693, "learning_rate": 0.001, "loss": 2.6024, "step": 21742 }, { "epoch": 0.9198324731364752, "grad_norm": 0.18310007452964783, "learning_rate": 0.001, "loss": 2.0988, "step": 21743 }, { "epoch": 0.9198747778999915, "grad_norm": 0.2050357460975647, "learning_rate": 0.001, "loss": 2.5167, "step": 21744 }, { "epoch": 0.9199170826635079, "grad_norm": 0.21944154798984528, "learning_rate": 0.001, "loss": 2.9099, "step": 21745 }, { "epoch": 0.9199593874270243, "grad_norm": 3.412935495376587, "learning_rate": 0.001, "loss": 2.1277, "step": 21746 }, { "epoch": 0.9200016921905406, "grad_norm": 0.21068432927131653, "learning_rate": 0.001, "loss": 1.6681, "step": 21747 }, { "epoch": 0.920043996954057, "grad_norm": 0.18886512517929077, "learning_rate": 0.001, "loss": 2.294, "step": 21748 }, { "epoch": 0.9200863017175734, "grad_norm": 1.677681803703308, "learning_rate": 0.001, "loss": 2.0359, "step": 21749 }, { "epoch": 0.9201286064810897, "grad_norm": 0.194906085729599, "learning_rate": 0.001, "loss": 2.1362, "step": 21750 }, { "epoch": 0.9201709112446061, "grad_norm": 0.15261149406433105, "learning_rate": 0.001, "loss": 2.8701, "step": 21751 }, { "epoch": 0.9202132160081226, "grad_norm": 0.16249825060367584, "learning_rate": 0.001, "loss": 2.4157, "step": 21752 }, { "epoch": 0.9202555207716389, "grad_norm": 0.20041526854038239, "learning_rate": 0.001, "loss": 2.5838, "step": 21753 }, { "epoch": 0.9202978255351553, "grad_norm": 0.2039480060338974, "learning_rate": 0.001, "loss": 1.1561, "step": 21754 }, { "epoch": 0.9203401302986717, "grad_norm": 0.1664251685142517, "learning_rate": 0.001, "loss": 2.5506, "step": 21755 }, { "epoch": 0.920382435062188, "grad_norm": 0.19452784955501556, "learning_rate": 0.001, "loss": 2.1064, "step": 21756 }, { "epoch": 0.9204247398257044, "grad_norm": 0.19989818334579468, "learning_rate": 0.001, "loss": 1.9846, "step": 21757 }, { "epoch": 0.9204670445892208, "grad_norm": 0.18792906403541565, "learning_rate": 0.001, "loss": 1.8072, "step": 21758 }, { "epoch": 0.9205093493527371, "grad_norm": 0.4300324618816376, "learning_rate": 0.001, "loss": 3.106, "step": 21759 }, { "epoch": 0.9205516541162535, "grad_norm": 0.20472699403762817, "learning_rate": 0.001, "loss": 2.8519, "step": 21760 }, { "epoch": 0.9205939588797699, "grad_norm": 0.16424056887626648, "learning_rate": 0.001, "loss": 2.8775, "step": 21761 }, { "epoch": 0.9206362636432862, "grad_norm": 0.2106097787618637, "learning_rate": 0.001, "loss": 2.7263, "step": 21762 }, { "epoch": 0.9206785684068026, "grad_norm": 0.18007276952266693, "learning_rate": 0.001, "loss": 2.1168, "step": 21763 }, { "epoch": 0.920720873170319, "grad_norm": 0.8814851641654968, "learning_rate": 0.001, "loss": 2.6329, "step": 21764 }, { "epoch": 0.9207631779338353, "grad_norm": 0.16528195142745972, "learning_rate": 0.001, "loss": 2.3614, "step": 21765 }, { "epoch": 0.9208054826973517, "grad_norm": 0.14733877778053284, "learning_rate": 0.001, "loss": 1.9466, "step": 21766 }, { "epoch": 0.920847787460868, "grad_norm": 0.15155373513698578, "learning_rate": 0.001, "loss": 2.0437, "step": 21767 }, { "epoch": 0.9208900922243844, "grad_norm": 0.7689833045005798, "learning_rate": 0.001, "loss": 1.5882, "step": 21768 }, { "epoch": 0.9209323969879009, "grad_norm": 0.35222962498664856, "learning_rate": 0.001, "loss": 2.6627, "step": 21769 }, { "epoch": 0.9209747017514172, "grad_norm": 0.22019173204898834, "learning_rate": 0.001, "loss": 2.2336, "step": 21770 }, { "epoch": 0.9210170065149336, "grad_norm": 1.362524151802063, "learning_rate": 0.001, "loss": 1.5802, "step": 21771 }, { "epoch": 0.92105931127845, "grad_norm": 0.18947310745716095, "learning_rate": 0.001, "loss": 2.1529, "step": 21772 }, { "epoch": 0.9211016160419663, "grad_norm": 0.5228827595710754, "learning_rate": 0.001, "loss": 2.2288, "step": 21773 }, { "epoch": 0.9211439208054827, "grad_norm": 0.19477491080760956, "learning_rate": 0.001, "loss": 1.7683, "step": 21774 }, { "epoch": 0.9211862255689991, "grad_norm": 0.1812824010848999, "learning_rate": 0.001, "loss": 1.9203, "step": 21775 }, { "epoch": 0.9212285303325154, "grad_norm": 0.5380221009254456, "learning_rate": 0.001, "loss": 1.5879, "step": 21776 }, { "epoch": 0.9212708350960318, "grad_norm": 2.430056095123291, "learning_rate": 0.001, "loss": 1.8626, "step": 21777 }, { "epoch": 0.9213131398595482, "grad_norm": 0.14518165588378906, "learning_rate": 0.001, "loss": 1.956, "step": 21778 }, { "epoch": 0.9213554446230645, "grad_norm": 0.16930201649665833, "learning_rate": 0.001, "loss": 2.0088, "step": 21779 }, { "epoch": 0.9213977493865809, "grad_norm": 0.16156643629074097, "learning_rate": 0.001, "loss": 2.6447, "step": 21780 }, { "epoch": 0.9214400541500973, "grad_norm": 0.2412625253200531, "learning_rate": 0.001, "loss": 1.5386, "step": 21781 }, { "epoch": 0.9214823589136136, "grad_norm": 0.16392214596271515, "learning_rate": 0.001, "loss": 1.8491, "step": 21782 }, { "epoch": 0.92152466367713, "grad_norm": 0.3166408836841583, "learning_rate": 0.001, "loss": 1.8965, "step": 21783 }, { "epoch": 0.9215669684406465, "grad_norm": 0.6187849044799805, "learning_rate": 0.001, "loss": 3.2798, "step": 21784 }, { "epoch": 0.9216092732041627, "grad_norm": 0.16876527667045593, "learning_rate": 0.001, "loss": 3.2222, "step": 21785 }, { "epoch": 0.9216515779676792, "grad_norm": 0.21848821640014648, "learning_rate": 0.001, "loss": 2.4569, "step": 21786 }, { "epoch": 0.9216938827311956, "grad_norm": 0.2898956537246704, "learning_rate": 0.001, "loss": 1.9074, "step": 21787 }, { "epoch": 0.9217361874947119, "grad_norm": 0.9921806454658508, "learning_rate": 0.001, "loss": 2.6345, "step": 21788 }, { "epoch": 0.9217784922582283, "grad_norm": 0.2962776720523834, "learning_rate": 0.001, "loss": 1.9957, "step": 21789 }, { "epoch": 0.9218207970217447, "grad_norm": 4.324014663696289, "learning_rate": 0.001, "loss": 2.4474, "step": 21790 }, { "epoch": 0.921863101785261, "grad_norm": 2.0869667530059814, "learning_rate": 0.001, "loss": 2.0607, "step": 21791 }, { "epoch": 0.9219054065487774, "grad_norm": 0.1384233683347702, "learning_rate": 0.001, "loss": 2.6952, "step": 21792 }, { "epoch": 0.9219477113122938, "grad_norm": 0.2545308470726013, "learning_rate": 0.001, "loss": 1.5397, "step": 21793 }, { "epoch": 0.9219900160758101, "grad_norm": 0.1824929267168045, "learning_rate": 0.001, "loss": 2.8564, "step": 21794 }, { "epoch": 0.9220323208393265, "grad_norm": 0.6214986443519592, "learning_rate": 0.001, "loss": 1.8838, "step": 21795 }, { "epoch": 0.9220746256028429, "grad_norm": 0.1345427930355072, "learning_rate": 0.001, "loss": 1.5997, "step": 21796 }, { "epoch": 0.9221169303663592, "grad_norm": 0.3405480980873108, "learning_rate": 0.001, "loss": 3.5338, "step": 21797 }, { "epoch": 0.9221592351298756, "grad_norm": 1.3706905841827393, "learning_rate": 0.001, "loss": 1.5665, "step": 21798 }, { "epoch": 0.922201539893392, "grad_norm": 3.3803353309631348, "learning_rate": 0.001, "loss": 2.0037, "step": 21799 }, { "epoch": 0.9222438446569083, "grad_norm": 2.6381101608276367, "learning_rate": 0.001, "loss": 1.8247, "step": 21800 }, { "epoch": 0.9222861494204248, "grad_norm": 0.15839798748493195, "learning_rate": 0.001, "loss": 2.7898, "step": 21801 }, { "epoch": 0.9223284541839412, "grad_norm": 0.3356753885746002, "learning_rate": 0.001, "loss": 2.5539, "step": 21802 }, { "epoch": 0.9223707589474575, "grad_norm": 0.1914248913526535, "learning_rate": 0.001, "loss": 1.6862, "step": 21803 }, { "epoch": 0.9224130637109739, "grad_norm": 10.729948997497559, "learning_rate": 0.001, "loss": 1.7948, "step": 21804 }, { "epoch": 0.9224553684744903, "grad_norm": 0.5791533589363098, "learning_rate": 0.001, "loss": 2.1725, "step": 21805 }, { "epoch": 0.9224976732380066, "grad_norm": 0.20189444720745087, "learning_rate": 0.001, "loss": 1.7816, "step": 21806 }, { "epoch": 0.922539978001523, "grad_norm": 5.010307788848877, "learning_rate": 0.001, "loss": 3.1041, "step": 21807 }, { "epoch": 0.9225822827650394, "grad_norm": 0.21783508360385895, "learning_rate": 0.001, "loss": 2.4775, "step": 21808 }, { "epoch": 0.9226245875285557, "grad_norm": 0.20718340575695038, "learning_rate": 0.001, "loss": 2.3186, "step": 21809 }, { "epoch": 0.9226668922920721, "grad_norm": 0.18991310894489288, "learning_rate": 0.001, "loss": 1.8765, "step": 21810 }, { "epoch": 0.9227091970555885, "grad_norm": 0.1602325290441513, "learning_rate": 0.001, "loss": 1.8977, "step": 21811 }, { "epoch": 0.9227515018191048, "grad_norm": 0.40473005175590515, "learning_rate": 0.001, "loss": 2.3219, "step": 21812 }, { "epoch": 0.9227938065826212, "grad_norm": 0.46014153957366943, "learning_rate": 0.001, "loss": 3.8687, "step": 21813 }, { "epoch": 0.9228361113461375, "grad_norm": 53.86265182495117, "learning_rate": 0.001, "loss": 3.7695, "step": 21814 }, { "epoch": 0.9228784161096539, "grad_norm": 0.18623895943164825, "learning_rate": 0.001, "loss": 2.0448, "step": 21815 }, { "epoch": 0.9229207208731703, "grad_norm": 0.33342859148979187, "learning_rate": 0.001, "loss": 2.0159, "step": 21816 }, { "epoch": 0.9229630256366866, "grad_norm": 17.7737979888916, "learning_rate": 0.001, "loss": 3.0125, "step": 21817 }, { "epoch": 0.923005330400203, "grad_norm": 0.12280112504959106, "learning_rate": 0.001, "loss": 1.5704, "step": 21818 }, { "epoch": 0.9230476351637195, "grad_norm": 0.21494708955287933, "learning_rate": 0.001, "loss": 1.6715, "step": 21819 }, { "epoch": 0.9230899399272358, "grad_norm": 0.2017885446548462, "learning_rate": 0.001, "loss": 2.2747, "step": 21820 }, { "epoch": 0.9231322446907522, "grad_norm": 0.2331605851650238, "learning_rate": 0.001, "loss": 1.9135, "step": 21821 }, { "epoch": 0.9231745494542686, "grad_norm": 0.16537924110889435, "learning_rate": 0.001, "loss": 2.3864, "step": 21822 }, { "epoch": 0.9232168542177849, "grad_norm": 0.1348440796136856, "learning_rate": 0.001, "loss": 1.8148, "step": 21823 }, { "epoch": 0.9232591589813013, "grad_norm": 0.24906490743160248, "learning_rate": 0.001, "loss": 2.2961, "step": 21824 }, { "epoch": 0.9233014637448177, "grad_norm": 0.16530346870422363, "learning_rate": 0.001, "loss": 2.0339, "step": 21825 }, { "epoch": 0.923343768508334, "grad_norm": 0.13655491173267365, "learning_rate": 0.001, "loss": 2.1709, "step": 21826 }, { "epoch": 0.9233860732718504, "grad_norm": 0.13853634893894196, "learning_rate": 0.001, "loss": 1.7936, "step": 21827 }, { "epoch": 0.9234283780353668, "grad_norm": 0.14643678069114685, "learning_rate": 0.001, "loss": 2.1668, "step": 21828 }, { "epoch": 0.9234706827988831, "grad_norm": 0.2975621521472931, "learning_rate": 0.001, "loss": 1.9856, "step": 21829 }, { "epoch": 0.9235129875623995, "grad_norm": 0.1982540637254715, "learning_rate": 0.001, "loss": 2.4848, "step": 21830 }, { "epoch": 0.9235552923259159, "grad_norm": 0.22689512372016907, "learning_rate": 0.001, "loss": 2.57, "step": 21831 }, { "epoch": 0.9235975970894322, "grad_norm": 0.1401916742324829, "learning_rate": 0.001, "loss": 1.6943, "step": 21832 }, { "epoch": 0.9236399018529486, "grad_norm": 2.501429796218872, "learning_rate": 0.001, "loss": 1.5686, "step": 21833 }, { "epoch": 0.923682206616465, "grad_norm": 0.15667013823986053, "learning_rate": 0.001, "loss": 1.5747, "step": 21834 }, { "epoch": 0.9237245113799814, "grad_norm": 0.19675031304359436, "learning_rate": 0.001, "loss": 2.1214, "step": 21835 }, { "epoch": 0.9237668161434978, "grad_norm": 0.28486713767051697, "learning_rate": 0.001, "loss": 2.2525, "step": 21836 }, { "epoch": 0.9238091209070142, "grad_norm": 0.28747811913490295, "learning_rate": 0.001, "loss": 2.6731, "step": 21837 }, { "epoch": 0.9238514256705305, "grad_norm": 0.16760526597499847, "learning_rate": 0.001, "loss": 2.2244, "step": 21838 }, { "epoch": 0.9238937304340469, "grad_norm": 0.23037217557430267, "learning_rate": 0.001, "loss": 2.328, "step": 21839 }, { "epoch": 0.9239360351975633, "grad_norm": 0.13840559124946594, "learning_rate": 0.001, "loss": 1.8101, "step": 21840 }, { "epoch": 0.9239783399610796, "grad_norm": 0.25135916471481323, "learning_rate": 0.001, "loss": 2.3498, "step": 21841 }, { "epoch": 0.924020644724596, "grad_norm": 55.877220153808594, "learning_rate": 0.001, "loss": 2.1315, "step": 21842 }, { "epoch": 0.9240629494881124, "grad_norm": 0.12954889237880707, "learning_rate": 0.001, "loss": 2.0259, "step": 21843 }, { "epoch": 0.9241052542516287, "grad_norm": 3.64554762840271, "learning_rate": 0.001, "loss": 2.1561, "step": 21844 }, { "epoch": 0.9241475590151451, "grad_norm": 0.13647957146167755, "learning_rate": 0.001, "loss": 2.0474, "step": 21845 }, { "epoch": 0.9241898637786615, "grad_norm": 0.1622430980205536, "learning_rate": 0.001, "loss": 2.051, "step": 21846 }, { "epoch": 0.9242321685421778, "grad_norm": 0.17634640634059906, "learning_rate": 0.001, "loss": 2.8697, "step": 21847 }, { "epoch": 0.9242744733056942, "grad_norm": 0.14419732987880707, "learning_rate": 0.001, "loss": 1.8025, "step": 21848 }, { "epoch": 0.9243167780692106, "grad_norm": 37.351749420166016, "learning_rate": 0.001, "loss": 1.999, "step": 21849 }, { "epoch": 0.9243590828327269, "grad_norm": 0.20993314683437347, "learning_rate": 0.001, "loss": 2.2057, "step": 21850 }, { "epoch": 0.9244013875962434, "grad_norm": 0.29707229137420654, "learning_rate": 0.001, "loss": 3.4083, "step": 21851 }, { "epoch": 0.9244436923597598, "grad_norm": 0.2973187565803528, "learning_rate": 0.001, "loss": 2.5179, "step": 21852 }, { "epoch": 0.9244859971232761, "grad_norm": 0.26736098527908325, "learning_rate": 0.001, "loss": 1.8084, "step": 21853 }, { "epoch": 0.9245283018867925, "grad_norm": 0.22918181121349335, "learning_rate": 0.001, "loss": 2.0938, "step": 21854 }, { "epoch": 0.9245706066503089, "grad_norm": 0.2083725780248642, "learning_rate": 0.001, "loss": 3.048, "step": 21855 }, { "epoch": 0.9246129114138252, "grad_norm": 1.0710909366607666, "learning_rate": 0.001, "loss": 1.9642, "step": 21856 }, { "epoch": 0.9246552161773416, "grad_norm": 0.6909781694412231, "learning_rate": 0.001, "loss": 3.8566, "step": 21857 }, { "epoch": 0.9246975209408579, "grad_norm": 0.310916543006897, "learning_rate": 0.001, "loss": 2.163, "step": 21858 }, { "epoch": 0.9247398257043743, "grad_norm": 0.15251141786575317, "learning_rate": 0.001, "loss": 2.3132, "step": 21859 }, { "epoch": 0.9247821304678907, "grad_norm": 0.19609764218330383, "learning_rate": 0.001, "loss": 1.6755, "step": 21860 }, { "epoch": 0.924824435231407, "grad_norm": 0.7422865033149719, "learning_rate": 0.001, "loss": 2.823, "step": 21861 }, { "epoch": 0.9248667399949234, "grad_norm": 0.14667384326457977, "learning_rate": 0.001, "loss": 2.1124, "step": 21862 }, { "epoch": 0.9249090447584398, "grad_norm": 0.16594403982162476, "learning_rate": 0.001, "loss": 2.0591, "step": 21863 }, { "epoch": 0.9249513495219561, "grad_norm": 0.18323829770088196, "learning_rate": 0.001, "loss": 1.6222, "step": 21864 }, { "epoch": 0.9249936542854725, "grad_norm": 0.22297947108745575, "learning_rate": 0.001, "loss": 2.2291, "step": 21865 }, { "epoch": 0.9250359590489889, "grad_norm": 0.16403332352638245, "learning_rate": 0.001, "loss": 2.3493, "step": 21866 }, { "epoch": 0.9250782638125052, "grad_norm": 0.23182706534862518, "learning_rate": 0.001, "loss": 2.0805, "step": 21867 }, { "epoch": 0.9251205685760217, "grad_norm": 0.22759734094142914, "learning_rate": 0.001, "loss": 2.4908, "step": 21868 }, { "epoch": 0.9251628733395381, "grad_norm": 0.6803989410400391, "learning_rate": 0.001, "loss": 2.7571, "step": 21869 }, { "epoch": 0.9252051781030544, "grad_norm": 1.2668521404266357, "learning_rate": 0.001, "loss": 1.9701, "step": 21870 }, { "epoch": 0.9252474828665708, "grad_norm": 0.1656140238046646, "learning_rate": 0.001, "loss": 1.9307, "step": 21871 }, { "epoch": 0.9252897876300872, "grad_norm": 0.14242412149906158, "learning_rate": 0.001, "loss": 2.5352, "step": 21872 }, { "epoch": 0.9253320923936035, "grad_norm": 0.1434088498353958, "learning_rate": 0.001, "loss": 2.284, "step": 21873 }, { "epoch": 0.9253743971571199, "grad_norm": 0.21865014731884003, "learning_rate": 0.001, "loss": 2.3557, "step": 21874 }, { "epoch": 0.9254167019206363, "grad_norm": 0.1559746265411377, "learning_rate": 0.001, "loss": 1.9937, "step": 21875 }, { "epoch": 0.9254590066841526, "grad_norm": 2.3426716327667236, "learning_rate": 0.001, "loss": 2.8022, "step": 21876 }, { "epoch": 0.925501311447669, "grad_norm": 0.18600401282310486, "learning_rate": 0.001, "loss": 2.062, "step": 21877 }, { "epoch": 0.9255436162111854, "grad_norm": 0.37630805373191833, "learning_rate": 0.001, "loss": 2.308, "step": 21878 }, { "epoch": 0.9255859209747017, "grad_norm": 0.26190125942230225, "learning_rate": 0.001, "loss": 1.5722, "step": 21879 }, { "epoch": 0.9256282257382181, "grad_norm": 0.17115432024002075, "learning_rate": 0.001, "loss": 2.0632, "step": 21880 }, { "epoch": 0.9256705305017345, "grad_norm": 0.3577999472618103, "learning_rate": 0.001, "loss": 2.8232, "step": 21881 }, { "epoch": 0.9257128352652508, "grad_norm": 0.15650738775730133, "learning_rate": 0.001, "loss": 2.1344, "step": 21882 }, { "epoch": 0.9257551400287672, "grad_norm": 1.1321587562561035, "learning_rate": 0.001, "loss": 3.678, "step": 21883 }, { "epoch": 0.9257974447922837, "grad_norm": 0.2203882336616516, "learning_rate": 0.001, "loss": 2.5869, "step": 21884 }, { "epoch": 0.9258397495558, "grad_norm": 0.20148371160030365, "learning_rate": 0.001, "loss": 1.5426, "step": 21885 }, { "epoch": 0.9258820543193164, "grad_norm": 0.18005190789699554, "learning_rate": 0.001, "loss": 1.9097, "step": 21886 }, { "epoch": 0.9259243590828328, "grad_norm": 0.9154829382896423, "learning_rate": 0.001, "loss": 2.5761, "step": 21887 }, { "epoch": 0.9259666638463491, "grad_norm": 0.16887728869915009, "learning_rate": 0.001, "loss": 2.6111, "step": 21888 }, { "epoch": 0.9260089686098655, "grad_norm": 0.16773365437984467, "learning_rate": 0.001, "loss": 1.4804, "step": 21889 }, { "epoch": 0.9260512733733819, "grad_norm": 0.672725260257721, "learning_rate": 0.001, "loss": 2.6119, "step": 21890 }, { "epoch": 0.9260935781368982, "grad_norm": 2.3344016075134277, "learning_rate": 0.001, "loss": 1.5694, "step": 21891 }, { "epoch": 0.9261358829004146, "grad_norm": 0.21035782992839813, "learning_rate": 0.001, "loss": 3.0887, "step": 21892 }, { "epoch": 0.926178187663931, "grad_norm": 0.1845501810312271, "learning_rate": 0.001, "loss": 2.0434, "step": 21893 }, { "epoch": 0.9262204924274473, "grad_norm": 0.20723649859428406, "learning_rate": 0.001, "loss": 1.8821, "step": 21894 }, { "epoch": 0.9262627971909637, "grad_norm": 0.4533455967903137, "learning_rate": 0.001, "loss": 2.3809, "step": 21895 }, { "epoch": 0.9263051019544801, "grad_norm": 1.0757017135620117, "learning_rate": 0.001, "loss": 2.2222, "step": 21896 }, { "epoch": 0.9263474067179964, "grad_norm": 0.2541113495826721, "learning_rate": 0.001, "loss": 2.3574, "step": 21897 }, { "epoch": 0.9263897114815128, "grad_norm": 1.4928815364837646, "learning_rate": 0.001, "loss": 2.738, "step": 21898 }, { "epoch": 0.9264320162450292, "grad_norm": 0.17061060667037964, "learning_rate": 0.001, "loss": 1.8219, "step": 21899 }, { "epoch": 0.9264743210085455, "grad_norm": 2.1404104232788086, "learning_rate": 0.001, "loss": 3.1614, "step": 21900 }, { "epoch": 0.926516625772062, "grad_norm": 0.4184578061103821, "learning_rate": 0.001, "loss": 2.5873, "step": 21901 }, { "epoch": 0.9265589305355783, "grad_norm": 0.758017361164093, "learning_rate": 0.001, "loss": 2.5198, "step": 21902 }, { "epoch": 0.9266012352990947, "grad_norm": 0.2236809879541397, "learning_rate": 0.001, "loss": 2.0093, "step": 21903 }, { "epoch": 0.9266435400626111, "grad_norm": 0.1709446907043457, "learning_rate": 0.001, "loss": 2.2584, "step": 21904 }, { "epoch": 0.9266858448261274, "grad_norm": 0.18377362191677094, "learning_rate": 0.001, "loss": 1.7896, "step": 21905 }, { "epoch": 0.9267281495896438, "grad_norm": 0.17706288397312164, "learning_rate": 0.001, "loss": 2.1536, "step": 21906 }, { "epoch": 0.9267704543531602, "grad_norm": 3.2292115688323975, "learning_rate": 0.001, "loss": 2.3149, "step": 21907 }, { "epoch": 0.9268127591166765, "grad_norm": 0.1633622944355011, "learning_rate": 0.001, "loss": 2.2869, "step": 21908 }, { "epoch": 0.9268550638801929, "grad_norm": 0.18171913921833038, "learning_rate": 0.001, "loss": 1.4094, "step": 21909 }, { "epoch": 0.9268973686437093, "grad_norm": 0.20554277300834656, "learning_rate": 0.001, "loss": 1.8559, "step": 21910 }, { "epoch": 0.9269396734072256, "grad_norm": 0.4027025103569031, "learning_rate": 0.001, "loss": 3.185, "step": 21911 }, { "epoch": 0.926981978170742, "grad_norm": 0.23326435685157776, "learning_rate": 0.001, "loss": 2.6125, "step": 21912 }, { "epoch": 0.9270242829342584, "grad_norm": 0.13189657032489777, "learning_rate": 0.001, "loss": 1.8056, "step": 21913 }, { "epoch": 0.9270665876977747, "grad_norm": 0.15088336169719696, "learning_rate": 0.001, "loss": 2.4187, "step": 21914 }, { "epoch": 0.9271088924612911, "grad_norm": 0.275808185338974, "learning_rate": 0.001, "loss": 2.0664, "step": 21915 }, { "epoch": 0.9271511972248075, "grad_norm": 0.16052496433258057, "learning_rate": 0.001, "loss": 1.911, "step": 21916 }, { "epoch": 0.9271935019883238, "grad_norm": 0.14898687601089478, "learning_rate": 0.001, "loss": 1.9662, "step": 21917 }, { "epoch": 0.9272358067518403, "grad_norm": 0.13894212245941162, "learning_rate": 0.001, "loss": 2.3613, "step": 21918 }, { "epoch": 0.9272781115153567, "grad_norm": 0.12985843420028687, "learning_rate": 0.001, "loss": 1.5966, "step": 21919 }, { "epoch": 0.927320416278873, "grad_norm": 13.45024299621582, "learning_rate": 0.001, "loss": 2.3785, "step": 21920 }, { "epoch": 0.9273627210423894, "grad_norm": 0.14908571541309357, "learning_rate": 0.001, "loss": 2.8162, "step": 21921 }, { "epoch": 0.9274050258059058, "grad_norm": 0.1413915455341339, "learning_rate": 0.001, "loss": 2.1948, "step": 21922 }, { "epoch": 0.9274473305694221, "grad_norm": 0.13808265328407288, "learning_rate": 0.001, "loss": 1.619, "step": 21923 }, { "epoch": 0.9274896353329385, "grad_norm": 1.1322904825210571, "learning_rate": 0.001, "loss": 3.322, "step": 21924 }, { "epoch": 0.9275319400964549, "grad_norm": 0.19078315794467926, "learning_rate": 0.001, "loss": 2.4069, "step": 21925 }, { "epoch": 0.9275742448599712, "grad_norm": 0.341876357793808, "learning_rate": 0.001, "loss": 1.9122, "step": 21926 }, { "epoch": 0.9276165496234876, "grad_norm": 0.1864626556634903, "learning_rate": 0.001, "loss": 1.8488, "step": 21927 }, { "epoch": 0.927658854387004, "grad_norm": 0.19447799026966095, "learning_rate": 0.001, "loss": 2.0424, "step": 21928 }, { "epoch": 0.9277011591505203, "grad_norm": 0.534332275390625, "learning_rate": 0.001, "loss": 2.9907, "step": 21929 }, { "epoch": 0.9277434639140367, "grad_norm": 0.16288526356220245, "learning_rate": 0.001, "loss": 2.4716, "step": 21930 }, { "epoch": 0.9277857686775531, "grad_norm": 0.1490895301103592, "learning_rate": 0.001, "loss": 1.6003, "step": 21931 }, { "epoch": 0.9278280734410694, "grad_norm": 0.16813409328460693, "learning_rate": 0.001, "loss": 2.4994, "step": 21932 }, { "epoch": 0.9278703782045858, "grad_norm": 0.14073902368545532, "learning_rate": 0.001, "loss": 2.4238, "step": 21933 }, { "epoch": 0.9279126829681023, "grad_norm": 0.1578540802001953, "learning_rate": 0.001, "loss": 1.8206, "step": 21934 }, { "epoch": 0.9279549877316186, "grad_norm": 0.3728206753730774, "learning_rate": 0.001, "loss": 2.1717, "step": 21935 }, { "epoch": 0.927997292495135, "grad_norm": 0.16407276690006256, "learning_rate": 0.001, "loss": 1.3542, "step": 21936 }, { "epoch": 0.9280395972586514, "grad_norm": 0.5730623006820679, "learning_rate": 0.001, "loss": 2.5158, "step": 21937 }, { "epoch": 0.9280819020221677, "grad_norm": 0.4225897789001465, "learning_rate": 0.001, "loss": 2.63, "step": 21938 }, { "epoch": 0.9281242067856841, "grad_norm": 0.19734539091587067, "learning_rate": 0.001, "loss": 1.9227, "step": 21939 }, { "epoch": 0.9281665115492005, "grad_norm": 0.22450488805770874, "learning_rate": 0.001, "loss": 2.4276, "step": 21940 }, { "epoch": 0.9282088163127168, "grad_norm": 0.174544557929039, "learning_rate": 0.001, "loss": 2.449, "step": 21941 }, { "epoch": 0.9282511210762332, "grad_norm": 0.16052083671092987, "learning_rate": 0.001, "loss": 2.0025, "step": 21942 }, { "epoch": 0.9282934258397496, "grad_norm": 0.14678356051445007, "learning_rate": 0.001, "loss": 2.2724, "step": 21943 }, { "epoch": 0.9283357306032659, "grad_norm": 0.1925598680973053, "learning_rate": 0.001, "loss": 2.3425, "step": 21944 }, { "epoch": 0.9283780353667823, "grad_norm": 0.1436510980129242, "learning_rate": 0.001, "loss": 1.4963, "step": 21945 }, { "epoch": 0.9284203401302987, "grad_norm": 0.1730327010154724, "learning_rate": 0.001, "loss": 2.7398, "step": 21946 }, { "epoch": 0.928462644893815, "grad_norm": 0.19589565694332123, "learning_rate": 0.001, "loss": 1.7817, "step": 21947 }, { "epoch": 0.9285049496573314, "grad_norm": 0.4311773180961609, "learning_rate": 0.001, "loss": 2.5202, "step": 21948 }, { "epoch": 0.9285472544208477, "grad_norm": 0.16326993703842163, "learning_rate": 0.001, "loss": 1.5574, "step": 21949 }, { "epoch": 0.9285895591843641, "grad_norm": 0.17932333052158356, "learning_rate": 0.001, "loss": 1.8842, "step": 21950 }, { "epoch": 0.9286318639478806, "grad_norm": 0.18280398845672607, "learning_rate": 0.001, "loss": 1.8407, "step": 21951 }, { "epoch": 0.9286741687113969, "grad_norm": 0.2984718978404999, "learning_rate": 0.001, "loss": 2.01, "step": 21952 }, { "epoch": 0.9287164734749133, "grad_norm": 0.1796807199716568, "learning_rate": 0.001, "loss": 2.3846, "step": 21953 }, { "epoch": 0.9287587782384297, "grad_norm": 0.21217189729213715, "learning_rate": 0.001, "loss": 1.991, "step": 21954 }, { "epoch": 0.928801083001946, "grad_norm": 0.19677573442459106, "learning_rate": 0.001, "loss": 1.8435, "step": 21955 }, { "epoch": 0.9288433877654624, "grad_norm": 0.15436428785324097, "learning_rate": 0.001, "loss": 2.0693, "step": 21956 }, { "epoch": 0.9288856925289788, "grad_norm": 0.15341611206531525, "learning_rate": 0.001, "loss": 1.5144, "step": 21957 }, { "epoch": 0.9289279972924951, "grad_norm": 0.16485430300235748, "learning_rate": 0.001, "loss": 2.2537, "step": 21958 }, { "epoch": 0.9289703020560115, "grad_norm": 0.20801179111003876, "learning_rate": 0.001, "loss": 1.6632, "step": 21959 }, { "epoch": 0.9290126068195279, "grad_norm": 0.1642443984746933, "learning_rate": 0.001, "loss": 2.366, "step": 21960 }, { "epoch": 0.9290549115830442, "grad_norm": 0.22680698335170746, "learning_rate": 0.001, "loss": 1.711, "step": 21961 }, { "epoch": 0.9290972163465606, "grad_norm": 0.16985486447811127, "learning_rate": 0.001, "loss": 2.383, "step": 21962 }, { "epoch": 0.929139521110077, "grad_norm": 0.1616482138633728, "learning_rate": 0.001, "loss": 1.9125, "step": 21963 }, { "epoch": 0.9291818258735933, "grad_norm": 0.15879781544208527, "learning_rate": 0.001, "loss": 2.075, "step": 21964 }, { "epoch": 0.9292241306371097, "grad_norm": 0.12933063507080078, "learning_rate": 0.001, "loss": 2.4285, "step": 21965 }, { "epoch": 0.9292664354006261, "grad_norm": 0.1703050583600998, "learning_rate": 0.001, "loss": 1.8814, "step": 21966 }, { "epoch": 0.9293087401641424, "grad_norm": 0.15759029984474182, "learning_rate": 0.001, "loss": 1.975, "step": 21967 }, { "epoch": 0.9293510449276589, "grad_norm": 0.16814181208610535, "learning_rate": 0.001, "loss": 3.0872, "step": 21968 }, { "epoch": 0.9293933496911753, "grad_norm": 1.8021339178085327, "learning_rate": 0.001, "loss": 2.3614, "step": 21969 }, { "epoch": 0.9294356544546916, "grad_norm": 0.16891023516654968, "learning_rate": 0.001, "loss": 1.7206, "step": 21970 }, { "epoch": 0.929477959218208, "grad_norm": 0.297505259513855, "learning_rate": 0.001, "loss": 2.7611, "step": 21971 }, { "epoch": 0.9295202639817244, "grad_norm": 0.1714632362127304, "learning_rate": 0.001, "loss": 2.1129, "step": 21972 }, { "epoch": 0.9295625687452407, "grad_norm": 0.19479063153266907, "learning_rate": 0.001, "loss": 2.0595, "step": 21973 }, { "epoch": 0.9296048735087571, "grad_norm": 0.180049866437912, "learning_rate": 0.001, "loss": 2.3096, "step": 21974 }, { "epoch": 0.9296471782722735, "grad_norm": 0.6898881196975708, "learning_rate": 0.001, "loss": 2.1587, "step": 21975 }, { "epoch": 0.9296894830357898, "grad_norm": 1.291130781173706, "learning_rate": 0.001, "loss": 2.1467, "step": 21976 }, { "epoch": 0.9297317877993062, "grad_norm": 0.16307774186134338, "learning_rate": 0.001, "loss": 2.5267, "step": 21977 }, { "epoch": 0.9297740925628226, "grad_norm": 0.2098264843225479, "learning_rate": 0.001, "loss": 2.5711, "step": 21978 }, { "epoch": 0.9298163973263389, "grad_norm": 2.183814287185669, "learning_rate": 0.001, "loss": 1.6738, "step": 21979 }, { "epoch": 0.9298587020898553, "grad_norm": 0.1923678070306778, "learning_rate": 0.001, "loss": 3.2464, "step": 21980 }, { "epoch": 0.9299010068533717, "grad_norm": 0.13686540722846985, "learning_rate": 0.001, "loss": 2.1387, "step": 21981 }, { "epoch": 0.929943311616888, "grad_norm": 4.34953498840332, "learning_rate": 0.001, "loss": 1.6731, "step": 21982 }, { "epoch": 0.9299856163804044, "grad_norm": 0.16118478775024414, "learning_rate": 0.001, "loss": 2.368, "step": 21983 }, { "epoch": 0.9300279211439209, "grad_norm": 0.589535117149353, "learning_rate": 0.001, "loss": 2.4586, "step": 21984 }, { "epoch": 0.9300702259074372, "grad_norm": 0.535638689994812, "learning_rate": 0.001, "loss": 2.6041, "step": 21985 }, { "epoch": 0.9301125306709536, "grad_norm": 0.1409117430448532, "learning_rate": 0.001, "loss": 2.2921, "step": 21986 }, { "epoch": 0.93015483543447, "grad_norm": 0.27417516708374023, "learning_rate": 0.001, "loss": 1.468, "step": 21987 }, { "epoch": 0.9301971401979863, "grad_norm": 0.4808787405490875, "learning_rate": 0.001, "loss": 1.8091, "step": 21988 }, { "epoch": 0.9302394449615027, "grad_norm": 3.5292954444885254, "learning_rate": 0.001, "loss": 2.1259, "step": 21989 }, { "epoch": 0.9302817497250191, "grad_norm": 0.1566908210515976, "learning_rate": 0.001, "loss": 2.3119, "step": 21990 }, { "epoch": 0.9303240544885354, "grad_norm": 0.14310206472873688, "learning_rate": 0.001, "loss": 3.0195, "step": 21991 }, { "epoch": 0.9303663592520518, "grad_norm": 0.16697970032691956, "learning_rate": 0.001, "loss": 2.2591, "step": 21992 }, { "epoch": 0.9304086640155681, "grad_norm": 0.1776588410139084, "learning_rate": 0.001, "loss": 2.4498, "step": 21993 }, { "epoch": 0.9304509687790845, "grad_norm": 0.6791718602180481, "learning_rate": 0.001, "loss": 1.7671, "step": 21994 }, { "epoch": 0.9304932735426009, "grad_norm": 11.523677825927734, "learning_rate": 0.001, "loss": 2.4538, "step": 21995 }, { "epoch": 0.9305355783061172, "grad_norm": 0.16806228458881378, "learning_rate": 0.001, "loss": 2.1725, "step": 21996 }, { "epoch": 0.9305778830696336, "grad_norm": 0.24460969865322113, "learning_rate": 0.001, "loss": 2.6757, "step": 21997 }, { "epoch": 0.93062018783315, "grad_norm": 0.19408555328845978, "learning_rate": 0.001, "loss": 2.5526, "step": 21998 }, { "epoch": 0.9306624925966663, "grad_norm": 0.18568742275238037, "learning_rate": 0.001, "loss": 2.6363, "step": 21999 }, { "epoch": 0.9307047973601827, "grad_norm": 0.16618452966213226, "learning_rate": 0.001, "loss": 2.1546, "step": 22000 }, { "epoch": 0.9307471021236992, "grad_norm": 0.16462920606136322, "learning_rate": 0.001, "loss": 1.5723, "step": 22001 }, { "epoch": 0.9307894068872155, "grad_norm": 0.12047246843576431, "learning_rate": 0.001, "loss": 1.9203, "step": 22002 }, { "epoch": 0.9308317116507319, "grad_norm": 0.22285322844982147, "learning_rate": 0.001, "loss": 3.2207, "step": 22003 }, { "epoch": 0.9308740164142483, "grad_norm": 0.14793570339679718, "learning_rate": 0.001, "loss": 2.233, "step": 22004 }, { "epoch": 0.9309163211777646, "grad_norm": 0.16332119703292847, "learning_rate": 0.001, "loss": 1.9054, "step": 22005 }, { "epoch": 0.930958625941281, "grad_norm": 0.1610136777162552, "learning_rate": 0.001, "loss": 2.1695, "step": 22006 }, { "epoch": 0.9310009307047974, "grad_norm": 0.8120068907737732, "learning_rate": 0.001, "loss": 2.0812, "step": 22007 }, { "epoch": 0.9310432354683137, "grad_norm": 0.1367693543434143, "learning_rate": 0.001, "loss": 1.6623, "step": 22008 }, { "epoch": 0.9310855402318301, "grad_norm": 0.2394467443227768, "learning_rate": 0.001, "loss": 2.7049, "step": 22009 }, { "epoch": 0.9311278449953465, "grad_norm": 0.4347785413265228, "learning_rate": 0.001, "loss": 2.373, "step": 22010 }, { "epoch": 0.9311701497588628, "grad_norm": 0.1576579064130783, "learning_rate": 0.001, "loss": 2.3929, "step": 22011 }, { "epoch": 0.9312124545223792, "grad_norm": 0.41782185435295105, "learning_rate": 0.001, "loss": 1.6985, "step": 22012 }, { "epoch": 0.9312547592858956, "grad_norm": 0.16780440509319305, "learning_rate": 0.001, "loss": 1.8258, "step": 22013 }, { "epoch": 0.9312970640494119, "grad_norm": 0.15203818678855896, "learning_rate": 0.001, "loss": 2.5598, "step": 22014 }, { "epoch": 0.9313393688129283, "grad_norm": 0.1656513512134552, "learning_rate": 0.001, "loss": 2.0156, "step": 22015 }, { "epoch": 0.9313816735764447, "grad_norm": 0.2978683412075043, "learning_rate": 0.001, "loss": 1.8497, "step": 22016 }, { "epoch": 0.931423978339961, "grad_norm": 2.85372257232666, "learning_rate": 0.001, "loss": 2.6914, "step": 22017 }, { "epoch": 0.9314662831034775, "grad_norm": 0.16330118477344513, "learning_rate": 0.001, "loss": 2.3187, "step": 22018 }, { "epoch": 0.9315085878669939, "grad_norm": 0.6117385029792786, "learning_rate": 0.001, "loss": 2.3308, "step": 22019 }, { "epoch": 0.9315508926305102, "grad_norm": 0.15620072185993195, "learning_rate": 0.001, "loss": 2.5478, "step": 22020 }, { "epoch": 0.9315931973940266, "grad_norm": 0.167611226439476, "learning_rate": 0.001, "loss": 2.4346, "step": 22021 }, { "epoch": 0.931635502157543, "grad_norm": 0.36283206939697266, "learning_rate": 0.001, "loss": 2.1529, "step": 22022 }, { "epoch": 0.9316778069210593, "grad_norm": 0.15195079147815704, "learning_rate": 0.001, "loss": 1.6573, "step": 22023 }, { "epoch": 0.9317201116845757, "grad_norm": 0.16134585440158844, "learning_rate": 0.001, "loss": 2.0162, "step": 22024 }, { "epoch": 0.9317624164480921, "grad_norm": 0.20685450732707977, "learning_rate": 0.001, "loss": 1.9848, "step": 22025 }, { "epoch": 0.9318047212116084, "grad_norm": 0.24313196539878845, "learning_rate": 0.001, "loss": 2.4929, "step": 22026 }, { "epoch": 0.9318470259751248, "grad_norm": 0.20717033743858337, "learning_rate": 0.001, "loss": 1.6051, "step": 22027 }, { "epoch": 0.9318893307386412, "grad_norm": 0.1792394369840622, "learning_rate": 0.001, "loss": 2.2256, "step": 22028 }, { "epoch": 0.9319316355021575, "grad_norm": 0.17706766724586487, "learning_rate": 0.001, "loss": 3.126, "step": 22029 }, { "epoch": 0.9319739402656739, "grad_norm": 0.21002162992954254, "learning_rate": 0.001, "loss": 2.634, "step": 22030 }, { "epoch": 0.9320162450291903, "grad_norm": 0.16654951870441437, "learning_rate": 0.001, "loss": 2.363, "step": 22031 }, { "epoch": 0.9320585497927066, "grad_norm": 0.19493767619132996, "learning_rate": 0.001, "loss": 2.8382, "step": 22032 }, { "epoch": 0.932100854556223, "grad_norm": 0.24346645176410675, "learning_rate": 0.001, "loss": 1.4973, "step": 22033 }, { "epoch": 0.9321431593197395, "grad_norm": 0.19552533328533173, "learning_rate": 0.001, "loss": 2.1005, "step": 22034 }, { "epoch": 0.9321854640832558, "grad_norm": 0.4952528774738312, "learning_rate": 0.001, "loss": 2.4358, "step": 22035 }, { "epoch": 0.9322277688467722, "grad_norm": 0.16397695243358612, "learning_rate": 0.001, "loss": 1.7882, "step": 22036 }, { "epoch": 0.9322700736102885, "grad_norm": 0.16197176277637482, "learning_rate": 0.001, "loss": 2.0223, "step": 22037 }, { "epoch": 0.9323123783738049, "grad_norm": 0.5470212697982788, "learning_rate": 0.001, "loss": 3.6555, "step": 22038 }, { "epoch": 0.9323546831373213, "grad_norm": 0.1571442037820816, "learning_rate": 0.001, "loss": 2.9497, "step": 22039 }, { "epoch": 0.9323969879008376, "grad_norm": 0.16548150777816772, "learning_rate": 0.001, "loss": 2.1686, "step": 22040 }, { "epoch": 0.932439292664354, "grad_norm": 22.103620529174805, "learning_rate": 0.001, "loss": 2.1238, "step": 22041 }, { "epoch": 0.9324815974278704, "grad_norm": 0.16696321964263916, "learning_rate": 0.001, "loss": 2.387, "step": 22042 }, { "epoch": 0.9325239021913867, "grad_norm": 31.06536102294922, "learning_rate": 0.001, "loss": 2.0811, "step": 22043 }, { "epoch": 0.9325662069549031, "grad_norm": 0.3314652144908905, "learning_rate": 0.001, "loss": 2.2734, "step": 22044 }, { "epoch": 0.9326085117184195, "grad_norm": 0.17265798151493073, "learning_rate": 0.001, "loss": 2.7165, "step": 22045 }, { "epoch": 0.9326508164819358, "grad_norm": 0.1684129536151886, "learning_rate": 0.001, "loss": 1.8214, "step": 22046 }, { "epoch": 0.9326931212454522, "grad_norm": 3.3001720905303955, "learning_rate": 0.001, "loss": 2.131, "step": 22047 }, { "epoch": 0.9327354260089686, "grad_norm": 0.13358072936534882, "learning_rate": 0.001, "loss": 2.129, "step": 22048 }, { "epoch": 0.9327777307724849, "grad_norm": 0.15245433151721954, "learning_rate": 0.001, "loss": 2.2186, "step": 22049 }, { "epoch": 0.9328200355360013, "grad_norm": 0.16228505969047546, "learning_rate": 0.001, "loss": 1.5467, "step": 22050 }, { "epoch": 0.9328623402995178, "grad_norm": 0.18052756786346436, "learning_rate": 0.001, "loss": 2.555, "step": 22051 }, { "epoch": 0.932904645063034, "grad_norm": 0.20280319452285767, "learning_rate": 0.001, "loss": 1.93, "step": 22052 }, { "epoch": 0.9329469498265505, "grad_norm": 0.15624532103538513, "learning_rate": 0.001, "loss": 2.3892, "step": 22053 }, { "epoch": 0.9329892545900669, "grad_norm": 0.15655791759490967, "learning_rate": 0.001, "loss": 2.7288, "step": 22054 }, { "epoch": 0.9330315593535832, "grad_norm": 0.40425026416778564, "learning_rate": 0.001, "loss": 3.1181, "step": 22055 }, { "epoch": 0.9330738641170996, "grad_norm": 0.41149187088012695, "learning_rate": 0.001, "loss": 4.535, "step": 22056 }, { "epoch": 0.933116168880616, "grad_norm": 6.105749130249023, "learning_rate": 0.001, "loss": 2.8493, "step": 22057 }, { "epoch": 0.9331584736441323, "grad_norm": 0.14242605865001678, "learning_rate": 0.001, "loss": 2.2115, "step": 22058 }, { "epoch": 0.9332007784076487, "grad_norm": 0.14453691244125366, "learning_rate": 0.001, "loss": 2.0789, "step": 22059 }, { "epoch": 0.9332430831711651, "grad_norm": 0.1681307554244995, "learning_rate": 0.001, "loss": 2.631, "step": 22060 }, { "epoch": 0.9332853879346814, "grad_norm": 0.14247244596481323, "learning_rate": 0.001, "loss": 2.0911, "step": 22061 }, { "epoch": 0.9333276926981978, "grad_norm": 0.16670750081539154, "learning_rate": 0.001, "loss": 1.9668, "step": 22062 }, { "epoch": 0.9333699974617142, "grad_norm": 0.1602124273777008, "learning_rate": 0.001, "loss": 2.805, "step": 22063 }, { "epoch": 0.9334123022252305, "grad_norm": 0.4584788978099823, "learning_rate": 0.001, "loss": 2.2362, "step": 22064 }, { "epoch": 0.9334546069887469, "grad_norm": 0.16626909375190735, "learning_rate": 0.001, "loss": 2.8254, "step": 22065 }, { "epoch": 0.9334969117522633, "grad_norm": 0.1582920104265213, "learning_rate": 0.001, "loss": 2.3247, "step": 22066 }, { "epoch": 0.9335392165157796, "grad_norm": 0.15107718110084534, "learning_rate": 0.001, "loss": 1.2551, "step": 22067 }, { "epoch": 0.9335815212792961, "grad_norm": 0.1743275374174118, "learning_rate": 0.001, "loss": 1.9243, "step": 22068 }, { "epoch": 0.9336238260428125, "grad_norm": 0.148182213306427, "learning_rate": 0.001, "loss": 1.3991, "step": 22069 }, { "epoch": 0.9336661308063288, "grad_norm": 0.1482088416814804, "learning_rate": 0.001, "loss": 1.905, "step": 22070 }, { "epoch": 0.9337084355698452, "grad_norm": 0.17089058458805084, "learning_rate": 0.001, "loss": 2.596, "step": 22071 }, { "epoch": 0.9337507403333616, "grad_norm": 0.15498429536819458, "learning_rate": 0.001, "loss": 1.7609, "step": 22072 }, { "epoch": 0.9337930450968779, "grad_norm": 0.1602306365966797, "learning_rate": 0.001, "loss": 1.7998, "step": 22073 }, { "epoch": 0.9338353498603943, "grad_norm": 0.816593587398529, "learning_rate": 0.001, "loss": 1.4125, "step": 22074 }, { "epoch": 0.9338776546239107, "grad_norm": 3.837941884994507, "learning_rate": 0.001, "loss": 2.5317, "step": 22075 }, { "epoch": 0.933919959387427, "grad_norm": 0.1522829383611679, "learning_rate": 0.001, "loss": 1.5934, "step": 22076 }, { "epoch": 0.9339622641509434, "grad_norm": 0.1627858579158783, "learning_rate": 0.001, "loss": 2.3209, "step": 22077 }, { "epoch": 0.9340045689144598, "grad_norm": 0.22749550640583038, "learning_rate": 0.001, "loss": 1.7942, "step": 22078 }, { "epoch": 0.9340468736779761, "grad_norm": 0.2853844463825226, "learning_rate": 0.001, "loss": 2.4773, "step": 22079 }, { "epoch": 0.9340891784414925, "grad_norm": 0.16091381013393402, "learning_rate": 0.001, "loss": 2.1776, "step": 22080 }, { "epoch": 0.9341314832050089, "grad_norm": 0.15295559167861938, "learning_rate": 0.001, "loss": 2.8482, "step": 22081 }, { "epoch": 0.9341737879685252, "grad_norm": 0.1788291186094284, "learning_rate": 0.001, "loss": 1.6635, "step": 22082 }, { "epoch": 0.9342160927320416, "grad_norm": 0.1991390734910965, "learning_rate": 0.001, "loss": 2.5794, "step": 22083 }, { "epoch": 0.934258397495558, "grad_norm": 0.32976192235946655, "learning_rate": 0.001, "loss": 2.057, "step": 22084 }, { "epoch": 0.9343007022590744, "grad_norm": 0.1380927860736847, "learning_rate": 0.001, "loss": 1.8436, "step": 22085 }, { "epoch": 0.9343430070225908, "grad_norm": 0.13859638571739197, "learning_rate": 0.001, "loss": 1.8601, "step": 22086 }, { "epoch": 0.9343853117861071, "grad_norm": 3.625734567642212, "learning_rate": 0.001, "loss": 2.2505, "step": 22087 }, { "epoch": 0.9344276165496235, "grad_norm": 0.1253889799118042, "learning_rate": 0.001, "loss": 2.8063, "step": 22088 }, { "epoch": 0.9344699213131399, "grad_norm": 0.1292024403810501, "learning_rate": 0.001, "loss": 1.7487, "step": 22089 }, { "epoch": 0.9345122260766562, "grad_norm": 2.1888620853424072, "learning_rate": 0.001, "loss": 1.8234, "step": 22090 }, { "epoch": 0.9345545308401726, "grad_norm": 0.8209211230278015, "learning_rate": 0.001, "loss": 2.2189, "step": 22091 }, { "epoch": 0.934596835603689, "grad_norm": 0.14426971971988678, "learning_rate": 0.001, "loss": 1.8472, "step": 22092 }, { "epoch": 0.9346391403672053, "grad_norm": 0.1676510125398636, "learning_rate": 0.001, "loss": 2.6767, "step": 22093 }, { "epoch": 0.9346814451307217, "grad_norm": 0.1549409031867981, "learning_rate": 0.001, "loss": 2.5355, "step": 22094 }, { "epoch": 0.9347237498942381, "grad_norm": 0.15506504476070404, "learning_rate": 0.001, "loss": 2.0499, "step": 22095 }, { "epoch": 0.9347660546577544, "grad_norm": 0.16985729336738586, "learning_rate": 0.001, "loss": 2.1923, "step": 22096 }, { "epoch": 0.9348083594212708, "grad_norm": 0.15692901611328125, "learning_rate": 0.001, "loss": 1.603, "step": 22097 }, { "epoch": 0.9348506641847872, "grad_norm": 0.17224203050136566, "learning_rate": 0.001, "loss": 2.0071, "step": 22098 }, { "epoch": 0.9348929689483035, "grad_norm": 1.8327698707580566, "learning_rate": 0.001, "loss": 1.5114, "step": 22099 }, { "epoch": 0.93493527371182, "grad_norm": 0.16607293486595154, "learning_rate": 0.001, "loss": 1.9993, "step": 22100 }, { "epoch": 0.9349775784753364, "grad_norm": 1.2528990507125854, "learning_rate": 0.001, "loss": 2.7158, "step": 22101 }, { "epoch": 0.9350198832388527, "grad_norm": 1.1872773170471191, "learning_rate": 0.001, "loss": 2.4694, "step": 22102 }, { "epoch": 0.9350621880023691, "grad_norm": 0.6562841534614563, "learning_rate": 0.001, "loss": 2.5087, "step": 22103 }, { "epoch": 0.9351044927658855, "grad_norm": 0.16749240458011627, "learning_rate": 0.001, "loss": 2.0348, "step": 22104 }, { "epoch": 0.9351467975294018, "grad_norm": 0.7875110507011414, "learning_rate": 0.001, "loss": 2.6865, "step": 22105 }, { "epoch": 0.9351891022929182, "grad_norm": 0.15838965773582458, "learning_rate": 0.001, "loss": 2.4356, "step": 22106 }, { "epoch": 0.9352314070564346, "grad_norm": 0.23913998901844025, "learning_rate": 0.001, "loss": 2.4341, "step": 22107 }, { "epoch": 0.9352737118199509, "grad_norm": 0.19448761641979218, "learning_rate": 0.001, "loss": 2.0626, "step": 22108 }, { "epoch": 0.9353160165834673, "grad_norm": 0.16222184896469116, "learning_rate": 0.001, "loss": 1.2972, "step": 22109 }, { "epoch": 0.9353583213469837, "grad_norm": 0.17358072102069855, "learning_rate": 0.001, "loss": 2.4516, "step": 22110 }, { "epoch": 0.9354006261105, "grad_norm": 0.22711333632469177, "learning_rate": 0.001, "loss": 2.397, "step": 22111 }, { "epoch": 0.9354429308740164, "grad_norm": 0.151783287525177, "learning_rate": 0.001, "loss": 2.1857, "step": 22112 }, { "epoch": 0.9354852356375328, "grad_norm": 0.7333983182907104, "learning_rate": 0.001, "loss": 1.6396, "step": 22113 }, { "epoch": 0.9355275404010491, "grad_norm": 0.5164382457733154, "learning_rate": 0.001, "loss": 1.9616, "step": 22114 }, { "epoch": 0.9355698451645655, "grad_norm": 0.21135050058364868, "learning_rate": 0.001, "loss": 1.9535, "step": 22115 }, { "epoch": 0.935612149928082, "grad_norm": 0.19637274742126465, "learning_rate": 0.001, "loss": 2.3697, "step": 22116 }, { "epoch": 0.9356544546915982, "grad_norm": 0.2197602093219757, "learning_rate": 0.001, "loss": 2.5342, "step": 22117 }, { "epoch": 0.9356967594551147, "grad_norm": 11.109819412231445, "learning_rate": 0.001, "loss": 1.8034, "step": 22118 }, { "epoch": 0.9357390642186311, "grad_norm": 0.5363801121711731, "learning_rate": 0.001, "loss": 1.6609, "step": 22119 }, { "epoch": 0.9357813689821474, "grad_norm": 0.7339990735054016, "learning_rate": 0.001, "loss": 1.8741, "step": 22120 }, { "epoch": 0.9358236737456638, "grad_norm": 0.1876104772090912, "learning_rate": 0.001, "loss": 1.6976, "step": 22121 }, { "epoch": 0.9358659785091802, "grad_norm": 6.681327819824219, "learning_rate": 0.001, "loss": 2.7797, "step": 22122 }, { "epoch": 0.9359082832726965, "grad_norm": 0.15576091408729553, "learning_rate": 0.001, "loss": 1.8506, "step": 22123 }, { "epoch": 0.9359505880362129, "grad_norm": 0.15668316185474396, "learning_rate": 0.001, "loss": 1.701, "step": 22124 }, { "epoch": 0.9359928927997293, "grad_norm": 0.19054444134235382, "learning_rate": 0.001, "loss": 1.8375, "step": 22125 }, { "epoch": 0.9360351975632456, "grad_norm": 0.14556585252285004, "learning_rate": 0.001, "loss": 1.7359, "step": 22126 }, { "epoch": 0.936077502326762, "grad_norm": 1.7815239429473877, "learning_rate": 0.001, "loss": 2.0106, "step": 22127 }, { "epoch": 0.9361198070902783, "grad_norm": 0.13777472078800201, "learning_rate": 0.001, "loss": 2.0197, "step": 22128 }, { "epoch": 0.9361621118537947, "grad_norm": 0.1779537945985794, "learning_rate": 0.001, "loss": 2.0832, "step": 22129 }, { "epoch": 0.9362044166173111, "grad_norm": 0.1431722640991211, "learning_rate": 0.001, "loss": 2.2073, "step": 22130 }, { "epoch": 0.9362467213808274, "grad_norm": 0.5513030290603638, "learning_rate": 0.001, "loss": 1.8146, "step": 22131 }, { "epoch": 0.9362890261443438, "grad_norm": 0.14709900319576263, "learning_rate": 0.001, "loss": 2.2721, "step": 22132 }, { "epoch": 0.9363313309078602, "grad_norm": 0.13920997083187103, "learning_rate": 0.001, "loss": 1.7674, "step": 22133 }, { "epoch": 0.9363736356713765, "grad_norm": 0.13396479189395905, "learning_rate": 0.001, "loss": 1.5546, "step": 22134 }, { "epoch": 0.936415940434893, "grad_norm": 0.14524602890014648, "learning_rate": 0.001, "loss": 1.9367, "step": 22135 }, { "epoch": 0.9364582451984094, "grad_norm": 0.14268428087234497, "learning_rate": 0.001, "loss": 1.7628, "step": 22136 }, { "epoch": 0.9365005499619257, "grad_norm": 0.16720207035541534, "learning_rate": 0.001, "loss": 2.0936, "step": 22137 }, { "epoch": 0.9365428547254421, "grad_norm": 0.1389460563659668, "learning_rate": 0.001, "loss": 1.5024, "step": 22138 }, { "epoch": 0.9365851594889585, "grad_norm": 0.1430455595254898, "learning_rate": 0.001, "loss": 3.3252, "step": 22139 }, { "epoch": 0.9366274642524748, "grad_norm": 0.2216232270002365, "learning_rate": 0.001, "loss": 1.4568, "step": 22140 }, { "epoch": 0.9366697690159912, "grad_norm": 0.1489740014076233, "learning_rate": 0.001, "loss": 2.1509, "step": 22141 }, { "epoch": 0.9367120737795076, "grad_norm": 0.16746602952480316, "learning_rate": 0.001, "loss": 1.7385, "step": 22142 }, { "epoch": 0.9367543785430239, "grad_norm": 1.196807861328125, "learning_rate": 0.001, "loss": 2.4886, "step": 22143 }, { "epoch": 0.9367966833065403, "grad_norm": 0.15129493176937103, "learning_rate": 0.001, "loss": 2.6034, "step": 22144 }, { "epoch": 0.9368389880700567, "grad_norm": 0.17672112584114075, "learning_rate": 0.001, "loss": 3.1263, "step": 22145 }, { "epoch": 0.936881292833573, "grad_norm": 0.16527965664863586, "learning_rate": 0.001, "loss": 1.997, "step": 22146 }, { "epoch": 0.9369235975970894, "grad_norm": 0.8040674328804016, "learning_rate": 0.001, "loss": 2.0254, "step": 22147 }, { "epoch": 0.9369659023606058, "grad_norm": 0.1395406723022461, "learning_rate": 0.001, "loss": 2.0166, "step": 22148 }, { "epoch": 0.9370082071241221, "grad_norm": 0.14435064792633057, "learning_rate": 0.001, "loss": 1.8352, "step": 22149 }, { "epoch": 0.9370505118876385, "grad_norm": 0.14937026798725128, "learning_rate": 0.001, "loss": 4.2509, "step": 22150 }, { "epoch": 0.937092816651155, "grad_norm": 0.22345872223377228, "learning_rate": 0.001, "loss": 2.0077, "step": 22151 }, { "epoch": 0.9371351214146713, "grad_norm": 0.14060348272323608, "learning_rate": 0.001, "loss": 1.376, "step": 22152 }, { "epoch": 0.9371774261781877, "grad_norm": 0.3378833532333374, "learning_rate": 0.001, "loss": 2.4393, "step": 22153 }, { "epoch": 0.9372197309417041, "grad_norm": 0.16241557896137238, "learning_rate": 0.001, "loss": 2.1142, "step": 22154 }, { "epoch": 0.9372620357052204, "grad_norm": 0.15988023579120636, "learning_rate": 0.001, "loss": 1.9314, "step": 22155 }, { "epoch": 0.9373043404687368, "grad_norm": 0.15488198399543762, "learning_rate": 0.001, "loss": 1.4996, "step": 22156 }, { "epoch": 0.9373466452322532, "grad_norm": 0.157693013548851, "learning_rate": 0.001, "loss": 2.2934, "step": 22157 }, { "epoch": 0.9373889499957695, "grad_norm": 0.16368556022644043, "learning_rate": 0.001, "loss": 1.5275, "step": 22158 }, { "epoch": 0.9374312547592859, "grad_norm": 0.20545239746570587, "learning_rate": 0.001, "loss": 1.5569, "step": 22159 }, { "epoch": 0.9374735595228023, "grad_norm": 0.2001711130142212, "learning_rate": 0.001, "loss": 2.5836, "step": 22160 }, { "epoch": 0.9375158642863186, "grad_norm": 0.3313537538051605, "learning_rate": 0.001, "loss": 3.5361, "step": 22161 }, { "epoch": 0.937558169049835, "grad_norm": 0.1719702184200287, "learning_rate": 0.001, "loss": 1.6831, "step": 22162 }, { "epoch": 0.9376004738133514, "grad_norm": 0.1592639684677124, "learning_rate": 0.001, "loss": 1.5466, "step": 22163 }, { "epoch": 0.9376427785768677, "grad_norm": 0.13352732360363007, "learning_rate": 0.001, "loss": 1.5322, "step": 22164 }, { "epoch": 0.9376850833403841, "grad_norm": 0.18848414719104767, "learning_rate": 0.001, "loss": 2.1188, "step": 22165 }, { "epoch": 0.9377273881039005, "grad_norm": 0.14446255564689636, "learning_rate": 0.001, "loss": 2.4954, "step": 22166 }, { "epoch": 0.9377696928674168, "grad_norm": 0.1566217541694641, "learning_rate": 0.001, "loss": 1.6525, "step": 22167 }, { "epoch": 0.9378119976309333, "grad_norm": 0.15236781537532806, "learning_rate": 0.001, "loss": 1.9077, "step": 22168 }, { "epoch": 0.9378543023944497, "grad_norm": 0.4224134385585785, "learning_rate": 0.001, "loss": 2.2906, "step": 22169 }, { "epoch": 0.937896607157966, "grad_norm": 0.6217629313468933, "learning_rate": 0.001, "loss": 2.5966, "step": 22170 }, { "epoch": 0.9379389119214824, "grad_norm": 0.17094938457012177, "learning_rate": 0.001, "loss": 2.0306, "step": 22171 }, { "epoch": 0.9379812166849987, "grad_norm": 0.18009664118289948, "learning_rate": 0.001, "loss": 1.8595, "step": 22172 }, { "epoch": 0.9380235214485151, "grad_norm": 0.1493372768163681, "learning_rate": 0.001, "loss": 1.5855, "step": 22173 }, { "epoch": 0.9380658262120315, "grad_norm": 0.13836872577667236, "learning_rate": 0.001, "loss": 2.7204, "step": 22174 }, { "epoch": 0.9381081309755478, "grad_norm": 0.15783464908599854, "learning_rate": 0.001, "loss": 2.2411, "step": 22175 }, { "epoch": 0.9381504357390642, "grad_norm": 0.2531960606575012, "learning_rate": 0.001, "loss": 2.9065, "step": 22176 }, { "epoch": 0.9381927405025806, "grad_norm": 0.18750788271427155, "learning_rate": 0.001, "loss": 2.6953, "step": 22177 }, { "epoch": 0.9382350452660969, "grad_norm": 0.1514870524406433, "learning_rate": 0.001, "loss": 2.5924, "step": 22178 }, { "epoch": 0.9382773500296133, "grad_norm": 0.1663045436143875, "learning_rate": 0.001, "loss": 3.2424, "step": 22179 }, { "epoch": 0.9383196547931297, "grad_norm": 0.12670683860778809, "learning_rate": 0.001, "loss": 1.8389, "step": 22180 }, { "epoch": 0.938361959556646, "grad_norm": 0.16413888335227966, "learning_rate": 0.001, "loss": 1.6121, "step": 22181 }, { "epoch": 0.9384042643201624, "grad_norm": 0.1846090853214264, "learning_rate": 0.001, "loss": 2.6834, "step": 22182 }, { "epoch": 0.9384465690836788, "grad_norm": 0.17222410440444946, "learning_rate": 0.001, "loss": 1.5327, "step": 22183 }, { "epoch": 0.9384888738471951, "grad_norm": 0.18798105418682098, "learning_rate": 0.001, "loss": 2.3528, "step": 22184 }, { "epoch": 0.9385311786107116, "grad_norm": 0.12880779802799225, "learning_rate": 0.001, "loss": 3.7122, "step": 22185 }, { "epoch": 0.938573483374228, "grad_norm": 0.15221832692623138, "learning_rate": 0.001, "loss": 2.1376, "step": 22186 }, { "epoch": 0.9386157881377443, "grad_norm": 0.2237653285264969, "learning_rate": 0.001, "loss": 3.0669, "step": 22187 }, { "epoch": 0.9386580929012607, "grad_norm": 0.17113400995731354, "learning_rate": 0.001, "loss": 1.6939, "step": 22188 }, { "epoch": 0.9387003976647771, "grad_norm": 0.1556374728679657, "learning_rate": 0.001, "loss": 2.1034, "step": 22189 }, { "epoch": 0.9387427024282934, "grad_norm": 0.14844928681850433, "learning_rate": 0.001, "loss": 1.7177, "step": 22190 }, { "epoch": 0.9387850071918098, "grad_norm": 0.7886921763420105, "learning_rate": 0.001, "loss": 2.4152, "step": 22191 }, { "epoch": 0.9388273119553262, "grad_norm": 0.17194926738739014, "learning_rate": 0.001, "loss": 2.1804, "step": 22192 }, { "epoch": 0.9388696167188425, "grad_norm": 0.17443875968456268, "learning_rate": 0.001, "loss": 1.7619, "step": 22193 }, { "epoch": 0.9389119214823589, "grad_norm": 0.17937207221984863, "learning_rate": 0.001, "loss": 1.8103, "step": 22194 }, { "epoch": 0.9389542262458753, "grad_norm": 0.16381466388702393, "learning_rate": 0.001, "loss": 2.595, "step": 22195 }, { "epoch": 0.9389965310093916, "grad_norm": 0.15668261051177979, "learning_rate": 0.001, "loss": 2.379, "step": 22196 }, { "epoch": 0.939038835772908, "grad_norm": 0.1664295494556427, "learning_rate": 0.001, "loss": 1.8999, "step": 22197 }, { "epoch": 0.9390811405364244, "grad_norm": 0.20704372227191925, "learning_rate": 0.001, "loss": 2.2593, "step": 22198 }, { "epoch": 0.9391234452999407, "grad_norm": 0.17788812518119812, "learning_rate": 0.001, "loss": 2.0697, "step": 22199 }, { "epoch": 0.9391657500634571, "grad_norm": 0.20091095566749573, "learning_rate": 0.001, "loss": 2.408, "step": 22200 }, { "epoch": 0.9392080548269736, "grad_norm": 0.16119319200515747, "learning_rate": 0.001, "loss": 2.2376, "step": 22201 }, { "epoch": 0.9392503595904899, "grad_norm": 0.16883371770381927, "learning_rate": 0.001, "loss": 1.7975, "step": 22202 }, { "epoch": 0.9392926643540063, "grad_norm": 0.15238231420516968, "learning_rate": 0.001, "loss": 2.2641, "step": 22203 }, { "epoch": 0.9393349691175227, "grad_norm": 0.14293359220027924, "learning_rate": 0.001, "loss": 1.9112, "step": 22204 }, { "epoch": 0.939377273881039, "grad_norm": 0.41308876872062683, "learning_rate": 0.001, "loss": 2.4264, "step": 22205 }, { "epoch": 0.9394195786445554, "grad_norm": 0.1429714858531952, "learning_rate": 0.001, "loss": 2.8803, "step": 22206 }, { "epoch": 0.9394618834080718, "grad_norm": 0.16283197700977325, "learning_rate": 0.001, "loss": 1.6608, "step": 22207 }, { "epoch": 0.9395041881715881, "grad_norm": 0.18357500433921814, "learning_rate": 0.001, "loss": 2.3365, "step": 22208 }, { "epoch": 0.9395464929351045, "grad_norm": 0.1459013670682907, "learning_rate": 0.001, "loss": 1.9035, "step": 22209 }, { "epoch": 0.9395887976986209, "grad_norm": 0.13705354928970337, "learning_rate": 0.001, "loss": 2.3804, "step": 22210 }, { "epoch": 0.9396311024621372, "grad_norm": 0.15534013509750366, "learning_rate": 0.001, "loss": 2.1509, "step": 22211 }, { "epoch": 0.9396734072256536, "grad_norm": 0.19198080897331238, "learning_rate": 0.001, "loss": 1.5588, "step": 22212 }, { "epoch": 0.93971571198917, "grad_norm": 0.14550134539604187, "learning_rate": 0.001, "loss": 1.6061, "step": 22213 }, { "epoch": 0.9397580167526863, "grad_norm": 0.1513293832540512, "learning_rate": 0.001, "loss": 2.114, "step": 22214 }, { "epoch": 0.9398003215162027, "grad_norm": 6.008566379547119, "learning_rate": 0.001, "loss": 1.6516, "step": 22215 }, { "epoch": 0.9398426262797192, "grad_norm": 0.14722181856632233, "learning_rate": 0.001, "loss": 1.2652, "step": 22216 }, { "epoch": 0.9398849310432354, "grad_norm": 0.16536396741867065, "learning_rate": 0.001, "loss": 1.9738, "step": 22217 }, { "epoch": 0.9399272358067519, "grad_norm": 0.1685878336429596, "learning_rate": 0.001, "loss": 1.5726, "step": 22218 }, { "epoch": 0.9399695405702682, "grad_norm": 0.19034594297409058, "learning_rate": 0.001, "loss": 2.2402, "step": 22219 }, { "epoch": 0.9400118453337846, "grad_norm": 0.16235460340976715, "learning_rate": 0.001, "loss": 2.2383, "step": 22220 }, { "epoch": 0.940054150097301, "grad_norm": 0.1350528448820114, "learning_rate": 0.001, "loss": 3.5003, "step": 22221 }, { "epoch": 0.9400964548608173, "grad_norm": 0.5780348181724548, "learning_rate": 0.001, "loss": 3.6418, "step": 22222 }, { "epoch": 0.9401387596243337, "grad_norm": 0.9433380365371704, "learning_rate": 0.001, "loss": 2.0663, "step": 22223 }, { "epoch": 0.9401810643878501, "grad_norm": 1.4721174240112305, "learning_rate": 0.001, "loss": 2.5166, "step": 22224 }, { "epoch": 0.9402233691513664, "grad_norm": 0.18319587409496307, "learning_rate": 0.001, "loss": 2.239, "step": 22225 }, { "epoch": 0.9402656739148828, "grad_norm": 0.14535488188266754, "learning_rate": 0.001, "loss": 2.5118, "step": 22226 }, { "epoch": 0.9403079786783992, "grad_norm": 0.1889243721961975, "learning_rate": 0.001, "loss": 2.082, "step": 22227 }, { "epoch": 0.9403502834419155, "grad_norm": 0.15350744128227234, "learning_rate": 0.001, "loss": 2.3159, "step": 22228 }, { "epoch": 0.9403925882054319, "grad_norm": 0.9224516749382019, "learning_rate": 0.001, "loss": 1.3976, "step": 22229 }, { "epoch": 0.9404348929689483, "grad_norm": 0.132807195186615, "learning_rate": 0.001, "loss": 2.8195, "step": 22230 }, { "epoch": 0.9404771977324646, "grad_norm": 0.17904257774353027, "learning_rate": 0.001, "loss": 2.3766, "step": 22231 }, { "epoch": 0.940519502495981, "grad_norm": 0.15651574730873108, "learning_rate": 0.001, "loss": 1.5067, "step": 22232 }, { "epoch": 0.9405618072594975, "grad_norm": 0.1492500901222229, "learning_rate": 0.001, "loss": 1.3076, "step": 22233 }, { "epoch": 0.9406041120230137, "grad_norm": 0.177638441324234, "learning_rate": 0.001, "loss": 2.5056, "step": 22234 }, { "epoch": 0.9406464167865302, "grad_norm": 0.1903133988380432, "learning_rate": 0.001, "loss": 2.0664, "step": 22235 }, { "epoch": 0.9406887215500466, "grad_norm": 0.2579449415206909, "learning_rate": 0.001, "loss": 1.6951, "step": 22236 }, { "epoch": 0.9407310263135629, "grad_norm": 0.19664129614830017, "learning_rate": 0.001, "loss": 2.2619, "step": 22237 }, { "epoch": 0.9407733310770793, "grad_norm": 0.1550266593694687, "learning_rate": 0.001, "loss": 1.8561, "step": 22238 }, { "epoch": 0.9408156358405957, "grad_norm": 0.3928470015525818, "learning_rate": 0.001, "loss": 2.0354, "step": 22239 }, { "epoch": 0.940857940604112, "grad_norm": 0.16893982887268066, "learning_rate": 0.001, "loss": 2.6994, "step": 22240 }, { "epoch": 0.9409002453676284, "grad_norm": 0.16361390054225922, "learning_rate": 0.001, "loss": 2.2244, "step": 22241 }, { "epoch": 0.9409425501311448, "grad_norm": 0.14021305739879608, "learning_rate": 0.001, "loss": 2.2345, "step": 22242 }, { "epoch": 0.9409848548946611, "grad_norm": 0.1266973614692688, "learning_rate": 0.001, "loss": 1.6334, "step": 22243 }, { "epoch": 0.9410271596581775, "grad_norm": 0.16998490691184998, "learning_rate": 0.001, "loss": 1.9222, "step": 22244 }, { "epoch": 0.9410694644216939, "grad_norm": 0.17645969986915588, "learning_rate": 0.001, "loss": 2.8088, "step": 22245 }, { "epoch": 0.9411117691852102, "grad_norm": 0.14849461615085602, "learning_rate": 0.001, "loss": 1.4422, "step": 22246 }, { "epoch": 0.9411540739487266, "grad_norm": 0.12691397964954376, "learning_rate": 0.001, "loss": 1.4996, "step": 22247 }, { "epoch": 0.941196378712243, "grad_norm": 0.13772864639759064, "learning_rate": 0.001, "loss": 2.2293, "step": 22248 }, { "epoch": 0.9412386834757593, "grad_norm": 0.15115776658058167, "learning_rate": 0.001, "loss": 1.4696, "step": 22249 }, { "epoch": 0.9412809882392758, "grad_norm": 0.14529401063919067, "learning_rate": 0.001, "loss": 2.4046, "step": 22250 }, { "epoch": 0.9413232930027922, "grad_norm": 0.14119921624660492, "learning_rate": 0.001, "loss": 1.5726, "step": 22251 }, { "epoch": 0.9413655977663085, "grad_norm": 0.31882578134536743, "learning_rate": 0.001, "loss": 2.2414, "step": 22252 }, { "epoch": 0.9414079025298249, "grad_norm": 0.20864500105381012, "learning_rate": 0.001, "loss": 2.1722, "step": 22253 }, { "epoch": 0.9414502072933413, "grad_norm": 0.15085260570049286, "learning_rate": 0.001, "loss": 1.5922, "step": 22254 }, { "epoch": 0.9414925120568576, "grad_norm": 0.15553170442581177, "learning_rate": 0.001, "loss": 1.7225, "step": 22255 }, { "epoch": 0.941534816820374, "grad_norm": 41.11113739013672, "learning_rate": 0.001, "loss": 1.9813, "step": 22256 }, { "epoch": 0.9415771215838904, "grad_norm": 0.1472524106502533, "learning_rate": 0.001, "loss": 1.5666, "step": 22257 }, { "epoch": 0.9416194263474067, "grad_norm": 0.15531371533870697, "learning_rate": 0.001, "loss": 2.4356, "step": 22258 }, { "epoch": 0.9416617311109231, "grad_norm": 0.158295139670372, "learning_rate": 0.001, "loss": 1.8226, "step": 22259 }, { "epoch": 0.9417040358744395, "grad_norm": 0.1623242199420929, "learning_rate": 0.001, "loss": 2.2772, "step": 22260 }, { "epoch": 0.9417463406379558, "grad_norm": 0.14404623210430145, "learning_rate": 0.001, "loss": 2.1139, "step": 22261 }, { "epoch": 0.9417886454014722, "grad_norm": 0.15782445669174194, "learning_rate": 0.001, "loss": 1.8102, "step": 22262 }, { "epoch": 0.9418309501649885, "grad_norm": 0.15719851851463318, "learning_rate": 0.001, "loss": 1.3974, "step": 22263 }, { "epoch": 0.9418732549285049, "grad_norm": 0.17760580778121948, "learning_rate": 0.001, "loss": 1.7701, "step": 22264 }, { "epoch": 0.9419155596920213, "grad_norm": 0.1664871722459793, "learning_rate": 0.001, "loss": 2.3184, "step": 22265 }, { "epoch": 0.9419578644555376, "grad_norm": 0.47373244166374207, "learning_rate": 0.001, "loss": 2.0407, "step": 22266 }, { "epoch": 0.942000169219054, "grad_norm": 0.5430980324745178, "learning_rate": 0.001, "loss": 1.881, "step": 22267 }, { "epoch": 0.9420424739825705, "grad_norm": 0.17633113265037537, "learning_rate": 0.001, "loss": 1.8635, "step": 22268 }, { "epoch": 0.9420847787460868, "grad_norm": 0.1332206279039383, "learning_rate": 0.001, "loss": 2.9, "step": 22269 }, { "epoch": 0.9421270835096032, "grad_norm": 0.17408061027526855, "learning_rate": 0.001, "loss": 2.2226, "step": 22270 }, { "epoch": 0.9421693882731196, "grad_norm": 0.19134634733200073, "learning_rate": 0.001, "loss": 3.0214, "step": 22271 }, { "epoch": 0.9422116930366359, "grad_norm": 0.16139277815818787, "learning_rate": 0.001, "loss": 3.3642, "step": 22272 }, { "epoch": 0.9422539978001523, "grad_norm": 0.14892078936100006, "learning_rate": 0.001, "loss": 2.0493, "step": 22273 }, { "epoch": 0.9422963025636687, "grad_norm": 0.18064714968204498, "learning_rate": 0.001, "loss": 1.9058, "step": 22274 }, { "epoch": 0.942338607327185, "grad_norm": 4.666594505310059, "learning_rate": 0.001, "loss": 1.9391, "step": 22275 }, { "epoch": 0.9423809120907014, "grad_norm": 0.16249027848243713, "learning_rate": 0.001, "loss": 2.5574, "step": 22276 }, { "epoch": 0.9424232168542178, "grad_norm": 0.14459872245788574, "learning_rate": 0.001, "loss": 1.8483, "step": 22277 }, { "epoch": 0.9424655216177341, "grad_norm": 0.1476597934961319, "learning_rate": 0.001, "loss": 1.9867, "step": 22278 }, { "epoch": 0.9425078263812505, "grad_norm": 0.4886634945869446, "learning_rate": 0.001, "loss": 2.0321, "step": 22279 }, { "epoch": 0.9425501311447669, "grad_norm": 0.23067691922187805, "learning_rate": 0.001, "loss": 2.1118, "step": 22280 }, { "epoch": 0.9425924359082832, "grad_norm": 0.14237605035305023, "learning_rate": 0.001, "loss": 1.9353, "step": 22281 }, { "epoch": 0.9426347406717996, "grad_norm": 0.1730206310749054, "learning_rate": 0.001, "loss": 2.1826, "step": 22282 }, { "epoch": 0.942677045435316, "grad_norm": 0.14405101537704468, "learning_rate": 0.001, "loss": 3.3043, "step": 22283 }, { "epoch": 0.9427193501988324, "grad_norm": 0.2477824091911316, "learning_rate": 0.001, "loss": 3.2702, "step": 22284 }, { "epoch": 0.9427616549623488, "grad_norm": 0.12695826590061188, "learning_rate": 0.001, "loss": 2.3018, "step": 22285 }, { "epoch": 0.9428039597258652, "grad_norm": 2.1101129055023193, "learning_rate": 0.001, "loss": 2.0449, "step": 22286 }, { "epoch": 0.9428462644893815, "grad_norm": 0.13695190846920013, "learning_rate": 0.001, "loss": 2.5038, "step": 22287 }, { "epoch": 0.9428885692528979, "grad_norm": 0.1824672371149063, "learning_rate": 0.001, "loss": 2.4115, "step": 22288 }, { "epoch": 0.9429308740164143, "grad_norm": 0.1646248996257782, "learning_rate": 0.001, "loss": 4.0476, "step": 22289 }, { "epoch": 0.9429731787799306, "grad_norm": 0.13337703049182892, "learning_rate": 0.001, "loss": 2.0791, "step": 22290 }, { "epoch": 0.943015483543447, "grad_norm": 0.13796573877334595, "learning_rate": 0.001, "loss": 1.7621, "step": 22291 }, { "epoch": 0.9430577883069634, "grad_norm": 0.16016069054603577, "learning_rate": 0.001, "loss": 1.8191, "step": 22292 }, { "epoch": 0.9431000930704797, "grad_norm": 0.15696018934249878, "learning_rate": 0.001, "loss": 1.7768, "step": 22293 }, { "epoch": 0.9431423978339961, "grad_norm": 0.2677614092826843, "learning_rate": 0.001, "loss": 1.8438, "step": 22294 }, { "epoch": 0.9431847025975125, "grad_norm": 0.17819730937480927, "learning_rate": 0.001, "loss": 1.4621, "step": 22295 }, { "epoch": 0.9432270073610288, "grad_norm": 0.9130420088768005, "learning_rate": 0.001, "loss": 3.1412, "step": 22296 }, { "epoch": 0.9432693121245452, "grad_norm": 0.1402437835931778, "learning_rate": 0.001, "loss": 2.0391, "step": 22297 }, { "epoch": 0.9433116168880616, "grad_norm": 1.1969804763793945, "learning_rate": 0.001, "loss": 2.7918, "step": 22298 }, { "epoch": 0.9433539216515779, "grad_norm": 0.7922569513320923, "learning_rate": 0.001, "loss": 1.9959, "step": 22299 }, { "epoch": 0.9433962264150944, "grad_norm": 0.17550984025001526, "learning_rate": 0.001, "loss": 1.9096, "step": 22300 }, { "epoch": 0.9434385311786108, "grad_norm": 0.14983303844928741, "learning_rate": 0.001, "loss": 1.9521, "step": 22301 }, { "epoch": 0.9434808359421271, "grad_norm": 0.14537028968334198, "learning_rate": 0.001, "loss": 2.2255, "step": 22302 }, { "epoch": 0.9435231407056435, "grad_norm": 0.18867170810699463, "learning_rate": 0.001, "loss": 1.7019, "step": 22303 }, { "epoch": 0.9435654454691599, "grad_norm": 0.183610200881958, "learning_rate": 0.001, "loss": 1.4077, "step": 22304 }, { "epoch": 0.9436077502326762, "grad_norm": 0.15308107435703278, "learning_rate": 0.001, "loss": 1.8193, "step": 22305 }, { "epoch": 0.9436500549961926, "grad_norm": 0.15715880692005157, "learning_rate": 0.001, "loss": 1.7452, "step": 22306 }, { "epoch": 0.943692359759709, "grad_norm": 0.15837711095809937, "learning_rate": 0.001, "loss": 1.8037, "step": 22307 }, { "epoch": 0.9437346645232253, "grad_norm": 0.1577230989933014, "learning_rate": 0.001, "loss": 1.6906, "step": 22308 }, { "epoch": 0.9437769692867417, "grad_norm": 0.1631125658750534, "learning_rate": 0.001, "loss": 1.8791, "step": 22309 }, { "epoch": 0.943819274050258, "grad_norm": 0.15031826496124268, "learning_rate": 0.001, "loss": 3.3296, "step": 22310 }, { "epoch": 0.9438615788137744, "grad_norm": 0.15790674090385437, "learning_rate": 0.001, "loss": 1.7894, "step": 22311 }, { "epoch": 0.9439038835772908, "grad_norm": 0.16160251200199127, "learning_rate": 0.001, "loss": 2.2725, "step": 22312 }, { "epoch": 0.9439461883408071, "grad_norm": 0.14644403755664825, "learning_rate": 0.001, "loss": 2.6085, "step": 22313 }, { "epoch": 0.9439884931043235, "grad_norm": 0.14695337414741516, "learning_rate": 0.001, "loss": 1.806, "step": 22314 }, { "epoch": 0.9440307978678399, "grad_norm": 0.22415880858898163, "learning_rate": 0.001, "loss": 1.6828, "step": 22315 }, { "epoch": 0.9440731026313562, "grad_norm": 0.1559433937072754, "learning_rate": 0.001, "loss": 2.0603, "step": 22316 }, { "epoch": 0.9441154073948727, "grad_norm": 0.14911769330501556, "learning_rate": 0.001, "loss": 3.1866, "step": 22317 }, { "epoch": 0.9441577121583891, "grad_norm": 0.17388781905174255, "learning_rate": 0.001, "loss": 2.1254, "step": 22318 }, { "epoch": 0.9442000169219054, "grad_norm": 1.0141180753707886, "learning_rate": 0.001, "loss": 2.1553, "step": 22319 }, { "epoch": 0.9442423216854218, "grad_norm": 0.13785704970359802, "learning_rate": 0.001, "loss": 1.6176, "step": 22320 }, { "epoch": 0.9442846264489382, "grad_norm": 1.1813472509384155, "learning_rate": 0.001, "loss": 1.9315, "step": 22321 }, { "epoch": 0.9443269312124545, "grad_norm": 0.15482167899608612, "learning_rate": 0.001, "loss": 2.5054, "step": 22322 }, { "epoch": 0.9443692359759709, "grad_norm": 0.164631187915802, "learning_rate": 0.001, "loss": 2.0827, "step": 22323 }, { "epoch": 0.9444115407394873, "grad_norm": 0.18228505551815033, "learning_rate": 0.001, "loss": 2.0032, "step": 22324 }, { "epoch": 0.9444538455030036, "grad_norm": 0.2625649571418762, "learning_rate": 0.001, "loss": 2.6495, "step": 22325 }, { "epoch": 0.94449615026652, "grad_norm": 0.2495313137769699, "learning_rate": 0.001, "loss": 1.887, "step": 22326 }, { "epoch": 0.9445384550300364, "grad_norm": 0.16760574281215668, "learning_rate": 0.001, "loss": 1.9866, "step": 22327 }, { "epoch": 0.9445807597935527, "grad_norm": 0.16984060406684875, "learning_rate": 0.001, "loss": 2.2137, "step": 22328 }, { "epoch": 0.9446230645570691, "grad_norm": 0.20226795971393585, "learning_rate": 0.001, "loss": 2.0092, "step": 22329 }, { "epoch": 0.9446653693205855, "grad_norm": 0.24898548424243927, "learning_rate": 0.001, "loss": 2.6886, "step": 22330 }, { "epoch": 0.9447076740841018, "grad_norm": 0.40224114060401917, "learning_rate": 0.001, "loss": 3.2176, "step": 22331 }, { "epoch": 0.9447499788476182, "grad_norm": 0.18034416437149048, "learning_rate": 0.001, "loss": 1.7549, "step": 22332 }, { "epoch": 0.9447922836111347, "grad_norm": 0.14370644092559814, "learning_rate": 0.001, "loss": 1.5195, "step": 22333 }, { "epoch": 0.944834588374651, "grad_norm": 0.7416594624519348, "learning_rate": 0.001, "loss": 1.7912, "step": 22334 }, { "epoch": 0.9448768931381674, "grad_norm": 0.15781673789024353, "learning_rate": 0.001, "loss": 2.0478, "step": 22335 }, { "epoch": 0.9449191979016838, "grad_norm": 0.17141476273536682, "learning_rate": 0.001, "loss": 2.7985, "step": 22336 }, { "epoch": 0.9449615026652001, "grad_norm": 0.15994961559772491, "learning_rate": 0.001, "loss": 2.1938, "step": 22337 }, { "epoch": 0.9450038074287165, "grad_norm": 0.3029901385307312, "learning_rate": 0.001, "loss": 1.5779, "step": 22338 }, { "epoch": 0.9450461121922329, "grad_norm": 0.20497363805770874, "learning_rate": 0.001, "loss": 2.5866, "step": 22339 }, { "epoch": 0.9450884169557492, "grad_norm": 0.1555112898349762, "learning_rate": 0.001, "loss": 3.3234, "step": 22340 }, { "epoch": 0.9451307217192656, "grad_norm": 0.18291279673576355, "learning_rate": 0.001, "loss": 2.5366, "step": 22341 }, { "epoch": 0.945173026482782, "grad_norm": 0.16539302468299866, "learning_rate": 0.001, "loss": 3.267, "step": 22342 }, { "epoch": 0.9452153312462983, "grad_norm": 0.15976227819919586, "learning_rate": 0.001, "loss": 2.2329, "step": 22343 }, { "epoch": 0.9452576360098147, "grad_norm": 0.262100487947464, "learning_rate": 0.001, "loss": 1.7827, "step": 22344 }, { "epoch": 0.9452999407733311, "grad_norm": 1.0867878198623657, "learning_rate": 0.001, "loss": 2.03, "step": 22345 }, { "epoch": 0.9453422455368474, "grad_norm": 0.16407005488872528, "learning_rate": 0.001, "loss": 2.5212, "step": 22346 }, { "epoch": 0.9453845503003638, "grad_norm": 0.1369040310382843, "learning_rate": 0.001, "loss": 2.0789, "step": 22347 }, { "epoch": 0.9454268550638802, "grad_norm": 0.13893716037273407, "learning_rate": 0.001, "loss": 1.5555, "step": 22348 }, { "epoch": 0.9454691598273965, "grad_norm": 0.5329959392547607, "learning_rate": 0.001, "loss": 2.0893, "step": 22349 }, { "epoch": 0.945511464590913, "grad_norm": 0.17351019382476807, "learning_rate": 0.001, "loss": 1.7663, "step": 22350 }, { "epoch": 0.9455537693544294, "grad_norm": 0.1668737381696701, "learning_rate": 0.001, "loss": 2.5798, "step": 22351 }, { "epoch": 0.9455960741179457, "grad_norm": 1.1766389608383179, "learning_rate": 0.001, "loss": 1.6858, "step": 22352 }, { "epoch": 0.9456383788814621, "grad_norm": 0.19534622132778168, "learning_rate": 0.001, "loss": 2.2868, "step": 22353 }, { "epoch": 0.9456806836449784, "grad_norm": 0.1639569103717804, "learning_rate": 0.001, "loss": 1.7299, "step": 22354 }, { "epoch": 0.9457229884084948, "grad_norm": 0.14215198159217834, "learning_rate": 0.001, "loss": 2.8559, "step": 22355 }, { "epoch": 0.9457652931720112, "grad_norm": 0.1718878597021103, "learning_rate": 0.001, "loss": 2.5348, "step": 22356 }, { "epoch": 0.9458075979355275, "grad_norm": 1.5104109048843384, "learning_rate": 0.001, "loss": 3.4204, "step": 22357 }, { "epoch": 0.9458499026990439, "grad_norm": 0.18013618886470795, "learning_rate": 0.001, "loss": 3.1525, "step": 22358 }, { "epoch": 0.9458922074625603, "grad_norm": 0.5852957963943481, "learning_rate": 0.001, "loss": 1.9951, "step": 22359 }, { "epoch": 0.9459345122260766, "grad_norm": 0.3737950921058655, "learning_rate": 0.001, "loss": 2.5527, "step": 22360 }, { "epoch": 0.945976816989593, "grad_norm": 0.20486804842948914, "learning_rate": 0.001, "loss": 2.8936, "step": 22361 }, { "epoch": 0.9460191217531094, "grad_norm": 0.18885180354118347, "learning_rate": 0.001, "loss": 1.9923, "step": 22362 }, { "epoch": 0.9460614265166257, "grad_norm": 0.1904320865869522, "learning_rate": 0.001, "loss": 1.5084, "step": 22363 }, { "epoch": 0.9461037312801421, "grad_norm": 0.1865696907043457, "learning_rate": 0.001, "loss": 2.2982, "step": 22364 }, { "epoch": 0.9461460360436585, "grad_norm": 0.14152313768863678, "learning_rate": 0.001, "loss": 1.7908, "step": 22365 }, { "epoch": 0.9461883408071748, "grad_norm": 0.16738182306289673, "learning_rate": 0.001, "loss": 2.1825, "step": 22366 }, { "epoch": 0.9462306455706913, "grad_norm": 0.4649767577648163, "learning_rate": 0.001, "loss": 2.4458, "step": 22367 }, { "epoch": 0.9462729503342077, "grad_norm": 0.1782921850681305, "learning_rate": 0.001, "loss": 2.2509, "step": 22368 }, { "epoch": 0.946315255097724, "grad_norm": 0.16340084373950958, "learning_rate": 0.001, "loss": 2.4144, "step": 22369 }, { "epoch": 0.9463575598612404, "grad_norm": 0.19446922838687897, "learning_rate": 0.001, "loss": 2.3684, "step": 22370 }, { "epoch": 0.9463998646247568, "grad_norm": 0.20399482548236847, "learning_rate": 0.001, "loss": 1.9716, "step": 22371 }, { "epoch": 0.9464421693882731, "grad_norm": 0.14847496151924133, "learning_rate": 0.001, "loss": 2.6068, "step": 22372 }, { "epoch": 0.9464844741517895, "grad_norm": 52.29764175415039, "learning_rate": 0.001, "loss": 2.1968, "step": 22373 }, { "epoch": 0.9465267789153059, "grad_norm": 0.18324236571788788, "learning_rate": 0.001, "loss": 2.1071, "step": 22374 }, { "epoch": 0.9465690836788222, "grad_norm": 0.1397266834974289, "learning_rate": 0.001, "loss": 1.4608, "step": 22375 }, { "epoch": 0.9466113884423386, "grad_norm": 0.19050876796245575, "learning_rate": 0.001, "loss": 1.6181, "step": 22376 }, { "epoch": 0.946653693205855, "grad_norm": 0.17032888531684875, "learning_rate": 0.001, "loss": 1.9212, "step": 22377 }, { "epoch": 0.9466959979693713, "grad_norm": 0.1688220053911209, "learning_rate": 0.001, "loss": 1.6201, "step": 22378 }, { "epoch": 0.9467383027328877, "grad_norm": 0.1576201170682907, "learning_rate": 0.001, "loss": 2.0552, "step": 22379 }, { "epoch": 0.9467806074964041, "grad_norm": 0.17971685528755188, "learning_rate": 0.001, "loss": 1.8798, "step": 22380 }, { "epoch": 0.9468229122599204, "grad_norm": 0.21118150651454926, "learning_rate": 0.001, "loss": 2.0006, "step": 22381 }, { "epoch": 0.9468652170234368, "grad_norm": 0.17267964780330658, "learning_rate": 0.001, "loss": 1.6616, "step": 22382 }, { "epoch": 0.9469075217869533, "grad_norm": 0.15508294105529785, "learning_rate": 0.001, "loss": 1.9034, "step": 22383 }, { "epoch": 0.9469498265504696, "grad_norm": 0.2111867219209671, "learning_rate": 0.001, "loss": 1.9499, "step": 22384 }, { "epoch": 0.946992131313986, "grad_norm": 0.1746092438697815, "learning_rate": 0.001, "loss": 1.4817, "step": 22385 }, { "epoch": 0.9470344360775024, "grad_norm": 0.33903783559799194, "learning_rate": 0.001, "loss": 1.9695, "step": 22386 }, { "epoch": 0.9470767408410187, "grad_norm": 0.362958699464798, "learning_rate": 0.001, "loss": 2.04, "step": 22387 }, { "epoch": 0.9471190456045351, "grad_norm": 0.15616007149219513, "learning_rate": 0.001, "loss": 1.8717, "step": 22388 }, { "epoch": 0.9471613503680515, "grad_norm": 0.19031497836112976, "learning_rate": 0.001, "loss": 2.02, "step": 22389 }, { "epoch": 0.9472036551315678, "grad_norm": 0.1764826625585556, "learning_rate": 0.001, "loss": 1.3835, "step": 22390 }, { "epoch": 0.9472459598950842, "grad_norm": 0.1563934087753296, "learning_rate": 0.001, "loss": 2.3536, "step": 22391 }, { "epoch": 0.9472882646586006, "grad_norm": 0.15653590857982635, "learning_rate": 0.001, "loss": 2.4971, "step": 22392 }, { "epoch": 0.9473305694221169, "grad_norm": 0.1690565049648285, "learning_rate": 0.001, "loss": 2.161, "step": 22393 }, { "epoch": 0.9473728741856333, "grad_norm": 0.1902676671743393, "learning_rate": 0.001, "loss": 1.7483, "step": 22394 }, { "epoch": 0.9474151789491497, "grad_norm": 0.14070867002010345, "learning_rate": 0.001, "loss": 1.9859, "step": 22395 }, { "epoch": 0.947457483712666, "grad_norm": 0.1544153243303299, "learning_rate": 0.001, "loss": 1.7372, "step": 22396 }, { "epoch": 0.9474997884761824, "grad_norm": 0.12571722269058228, "learning_rate": 0.001, "loss": 2.009, "step": 22397 }, { "epoch": 0.9475420932396987, "grad_norm": 0.18831411004066467, "learning_rate": 0.001, "loss": 2.0311, "step": 22398 }, { "epoch": 0.9475843980032151, "grad_norm": 0.19460955262184143, "learning_rate": 0.001, "loss": 1.6814, "step": 22399 }, { "epoch": 0.9476267027667316, "grad_norm": 0.14858324825763702, "learning_rate": 0.001, "loss": 1.8082, "step": 22400 }, { "epoch": 0.9476690075302479, "grad_norm": 0.18766085803508759, "learning_rate": 0.001, "loss": 2.5125, "step": 22401 }, { "epoch": 0.9477113122937643, "grad_norm": 0.15538080036640167, "learning_rate": 0.001, "loss": 1.6178, "step": 22402 }, { "epoch": 0.9477536170572807, "grad_norm": 0.21831777691841125, "learning_rate": 0.001, "loss": 3.1002, "step": 22403 }, { "epoch": 0.947795921820797, "grad_norm": 0.1795506328344345, "learning_rate": 0.001, "loss": 1.9556, "step": 22404 }, { "epoch": 0.9478382265843134, "grad_norm": 0.12825722992420197, "learning_rate": 0.001, "loss": 2.2971, "step": 22405 }, { "epoch": 0.9478805313478298, "grad_norm": 0.16179698705673218, "learning_rate": 0.001, "loss": 2.6207, "step": 22406 }, { "epoch": 0.9479228361113461, "grad_norm": 0.17438729107379913, "learning_rate": 0.001, "loss": 1.7021, "step": 22407 }, { "epoch": 0.9479651408748625, "grad_norm": 0.14387691020965576, "learning_rate": 0.001, "loss": 2.1247, "step": 22408 }, { "epoch": 0.9480074456383789, "grad_norm": 0.14491653442382812, "learning_rate": 0.001, "loss": 1.5185, "step": 22409 }, { "epoch": 0.9480497504018952, "grad_norm": 0.15343250334262848, "learning_rate": 0.001, "loss": 1.7086, "step": 22410 }, { "epoch": 0.9480920551654116, "grad_norm": 0.2698756456375122, "learning_rate": 0.001, "loss": 2.0098, "step": 22411 }, { "epoch": 0.948134359928928, "grad_norm": 0.12875612080097198, "learning_rate": 0.001, "loss": 1.5075, "step": 22412 }, { "epoch": 0.9481766646924443, "grad_norm": 0.2064325362443924, "learning_rate": 0.001, "loss": 1.9919, "step": 22413 }, { "epoch": 0.9482189694559607, "grad_norm": 0.1798713207244873, "learning_rate": 0.001, "loss": 2.8559, "step": 22414 }, { "epoch": 0.9482612742194771, "grad_norm": 4.14750337600708, "learning_rate": 0.001, "loss": 2.7561, "step": 22415 }, { "epoch": 0.9483035789829934, "grad_norm": 0.15084944665431976, "learning_rate": 0.001, "loss": 2.2551, "step": 22416 }, { "epoch": 0.9483458837465099, "grad_norm": 0.1535675972700119, "learning_rate": 0.001, "loss": 3.2881, "step": 22417 }, { "epoch": 0.9483881885100263, "grad_norm": 0.1589122712612152, "learning_rate": 0.001, "loss": 1.8991, "step": 22418 }, { "epoch": 0.9484304932735426, "grad_norm": 0.15410476922988892, "learning_rate": 0.001, "loss": 2.1234, "step": 22419 }, { "epoch": 0.948472798037059, "grad_norm": 0.15095588564872742, "learning_rate": 0.001, "loss": 1.6618, "step": 22420 }, { "epoch": 0.9485151028005754, "grad_norm": 0.14121654629707336, "learning_rate": 0.001, "loss": 1.6598, "step": 22421 }, { "epoch": 0.9485574075640917, "grad_norm": 0.1701010763645172, "learning_rate": 0.001, "loss": 1.8561, "step": 22422 }, { "epoch": 0.9485997123276081, "grad_norm": 0.15959163010120392, "learning_rate": 0.001, "loss": 1.7565, "step": 22423 }, { "epoch": 0.9486420170911245, "grad_norm": 0.1474931389093399, "learning_rate": 0.001, "loss": 2.6955, "step": 22424 }, { "epoch": 0.9486843218546408, "grad_norm": 0.17197000980377197, "learning_rate": 0.001, "loss": 3.4178, "step": 22425 }, { "epoch": 0.9487266266181572, "grad_norm": 0.27430710196495056, "learning_rate": 0.001, "loss": 2.2628, "step": 22426 }, { "epoch": 0.9487689313816736, "grad_norm": 0.15919172763824463, "learning_rate": 0.001, "loss": 1.9106, "step": 22427 }, { "epoch": 0.9488112361451899, "grad_norm": 11.105170249938965, "learning_rate": 0.001, "loss": 2.4885, "step": 22428 }, { "epoch": 0.9488535409087063, "grad_norm": 0.18378356099128723, "learning_rate": 0.001, "loss": 1.9228, "step": 22429 }, { "epoch": 0.9488958456722227, "grad_norm": 0.16684526205062866, "learning_rate": 0.001, "loss": 1.6823, "step": 22430 }, { "epoch": 0.948938150435739, "grad_norm": 0.12937648594379425, "learning_rate": 0.001, "loss": 2.4837, "step": 22431 }, { "epoch": 0.9489804551992554, "grad_norm": 6.135654449462891, "learning_rate": 0.001, "loss": 1.8389, "step": 22432 }, { "epoch": 0.9490227599627719, "grad_norm": 0.15108750760555267, "learning_rate": 0.001, "loss": 1.4086, "step": 22433 }, { "epoch": 0.9490650647262882, "grad_norm": 0.1532086580991745, "learning_rate": 0.001, "loss": 2.467, "step": 22434 }, { "epoch": 0.9491073694898046, "grad_norm": 0.21752135455608368, "learning_rate": 0.001, "loss": 3.2817, "step": 22435 }, { "epoch": 0.949149674253321, "grad_norm": 0.45808929204940796, "learning_rate": 0.001, "loss": 2.6698, "step": 22436 }, { "epoch": 0.9491919790168373, "grad_norm": 0.1843804270029068, "learning_rate": 0.001, "loss": 1.7856, "step": 22437 }, { "epoch": 0.9492342837803537, "grad_norm": 0.21448786556720734, "learning_rate": 0.001, "loss": 2.3707, "step": 22438 }, { "epoch": 0.9492765885438701, "grad_norm": 0.13399538397789001, "learning_rate": 0.001, "loss": 1.7928, "step": 22439 }, { "epoch": 0.9493188933073864, "grad_norm": 0.13753435015678406, "learning_rate": 0.001, "loss": 2.8811, "step": 22440 }, { "epoch": 0.9493611980709028, "grad_norm": 0.14787648618221283, "learning_rate": 0.001, "loss": 1.6632, "step": 22441 }, { "epoch": 0.9494035028344192, "grad_norm": 0.150163933634758, "learning_rate": 0.001, "loss": 1.6527, "step": 22442 }, { "epoch": 0.9494458075979355, "grad_norm": 0.14982707798480988, "learning_rate": 0.001, "loss": 1.4879, "step": 22443 }, { "epoch": 0.9494881123614519, "grad_norm": 0.9315142035484314, "learning_rate": 0.001, "loss": 1.8539, "step": 22444 }, { "epoch": 0.9495304171249682, "grad_norm": 0.1498139351606369, "learning_rate": 0.001, "loss": 2.1401, "step": 22445 }, { "epoch": 0.9495727218884846, "grad_norm": 0.1625237613916397, "learning_rate": 0.001, "loss": 2.2502, "step": 22446 }, { "epoch": 0.949615026652001, "grad_norm": 1.6854742765426636, "learning_rate": 0.001, "loss": 1.7948, "step": 22447 }, { "epoch": 0.9496573314155173, "grad_norm": 0.24923864006996155, "learning_rate": 0.001, "loss": 1.9564, "step": 22448 }, { "epoch": 0.9496996361790337, "grad_norm": 0.16029444336891174, "learning_rate": 0.001, "loss": 1.8311, "step": 22449 }, { "epoch": 0.9497419409425502, "grad_norm": 0.18315951526165009, "learning_rate": 0.001, "loss": 2.9322, "step": 22450 }, { "epoch": 0.9497842457060665, "grad_norm": 0.14090849459171295, "learning_rate": 0.001, "loss": 1.3484, "step": 22451 }, { "epoch": 0.9498265504695829, "grad_norm": 0.19691091775894165, "learning_rate": 0.001, "loss": 1.3982, "step": 22452 }, { "epoch": 0.9498688552330993, "grad_norm": 0.19947366416454315, "learning_rate": 0.001, "loss": 2.8261, "step": 22453 }, { "epoch": 0.9499111599966156, "grad_norm": 0.1901845932006836, "learning_rate": 0.001, "loss": 2.3973, "step": 22454 }, { "epoch": 0.949953464760132, "grad_norm": 12.674116134643555, "learning_rate": 0.001, "loss": 2.4079, "step": 22455 }, { "epoch": 0.9499957695236484, "grad_norm": 0.20128510892391205, "learning_rate": 0.001, "loss": 2.0499, "step": 22456 }, { "epoch": 0.9500380742871647, "grad_norm": 1.169372320175171, "learning_rate": 0.001, "loss": 2.0313, "step": 22457 }, { "epoch": 0.9500803790506811, "grad_norm": 0.7405765652656555, "learning_rate": 0.001, "loss": 2.3182, "step": 22458 }, { "epoch": 0.9501226838141975, "grad_norm": 0.16553084552288055, "learning_rate": 0.001, "loss": 1.6338, "step": 22459 }, { "epoch": 0.9501649885777138, "grad_norm": 0.34061887860298157, "learning_rate": 0.001, "loss": 1.7407, "step": 22460 }, { "epoch": 0.9502072933412302, "grad_norm": 0.13990429043769836, "learning_rate": 0.001, "loss": 1.731, "step": 22461 }, { "epoch": 0.9502495981047466, "grad_norm": 0.1741171032190323, "learning_rate": 0.001, "loss": 1.9323, "step": 22462 }, { "epoch": 0.9502919028682629, "grad_norm": 0.1664520800113678, "learning_rate": 0.001, "loss": 2.2529, "step": 22463 }, { "epoch": 0.9503342076317793, "grad_norm": 0.17836974561214447, "learning_rate": 0.001, "loss": 2.3956, "step": 22464 }, { "epoch": 0.9503765123952957, "grad_norm": 0.16051559150218964, "learning_rate": 0.001, "loss": 2.4441, "step": 22465 }, { "epoch": 0.950418817158812, "grad_norm": 0.14825597405433655, "learning_rate": 0.001, "loss": 2.1567, "step": 22466 }, { "epoch": 0.9504611219223285, "grad_norm": 0.1824505478143692, "learning_rate": 0.001, "loss": 2.5797, "step": 22467 }, { "epoch": 0.9505034266858449, "grad_norm": 0.21144576370716095, "learning_rate": 0.001, "loss": 2.8215, "step": 22468 }, { "epoch": 0.9505457314493612, "grad_norm": 0.16636143624782562, "learning_rate": 0.001, "loss": 2.8168, "step": 22469 }, { "epoch": 0.9505880362128776, "grad_norm": 0.3710878789424896, "learning_rate": 0.001, "loss": 3.2767, "step": 22470 }, { "epoch": 0.950630340976394, "grad_norm": 0.1762601137161255, "learning_rate": 0.001, "loss": 2.8635, "step": 22471 }, { "epoch": 0.9506726457399103, "grad_norm": 2.875742197036743, "learning_rate": 0.001, "loss": 1.802, "step": 22472 }, { "epoch": 0.9507149505034267, "grad_norm": 0.16046804189682007, "learning_rate": 0.001, "loss": 1.5675, "step": 22473 }, { "epoch": 0.9507572552669431, "grad_norm": 0.18604080379009247, "learning_rate": 0.001, "loss": 2.3506, "step": 22474 }, { "epoch": 0.9507995600304594, "grad_norm": 0.1995047777891159, "learning_rate": 0.001, "loss": 1.4797, "step": 22475 }, { "epoch": 0.9508418647939758, "grad_norm": 41.969749450683594, "learning_rate": 0.001, "loss": 1.6885, "step": 22476 }, { "epoch": 0.9508841695574922, "grad_norm": 0.17194336652755737, "learning_rate": 0.001, "loss": 1.8758, "step": 22477 }, { "epoch": 0.9509264743210085, "grad_norm": 0.1948256492614746, "learning_rate": 0.001, "loss": 2.4022, "step": 22478 }, { "epoch": 0.9509687790845249, "grad_norm": 0.21217121183872223, "learning_rate": 0.001, "loss": 2.4836, "step": 22479 }, { "epoch": 0.9510110838480413, "grad_norm": 0.1763080209493637, "learning_rate": 0.001, "loss": 2.3558, "step": 22480 }, { "epoch": 0.9510533886115576, "grad_norm": 0.22325830161571503, "learning_rate": 0.001, "loss": 2.0567, "step": 22481 }, { "epoch": 0.951095693375074, "grad_norm": 0.16838374733924866, "learning_rate": 0.001, "loss": 2.779, "step": 22482 }, { "epoch": 0.9511379981385905, "grad_norm": 0.17531989514827728, "learning_rate": 0.001, "loss": 2.0818, "step": 22483 }, { "epoch": 0.9511803029021068, "grad_norm": 0.17867770791053772, "learning_rate": 0.001, "loss": 2.5842, "step": 22484 }, { "epoch": 0.9512226076656232, "grad_norm": 0.1932533085346222, "learning_rate": 0.001, "loss": 1.9442, "step": 22485 }, { "epoch": 0.9512649124291396, "grad_norm": 0.1727113425731659, "learning_rate": 0.001, "loss": 1.5683, "step": 22486 }, { "epoch": 0.9513072171926559, "grad_norm": 0.1291782408952713, "learning_rate": 0.001, "loss": 2.3555, "step": 22487 }, { "epoch": 0.9513495219561723, "grad_norm": 0.7629174590110779, "learning_rate": 0.001, "loss": 2.651, "step": 22488 }, { "epoch": 0.9513918267196886, "grad_norm": 0.12951643764972687, "learning_rate": 0.001, "loss": 1.9717, "step": 22489 }, { "epoch": 0.951434131483205, "grad_norm": 0.6491950750350952, "learning_rate": 0.001, "loss": 1.9874, "step": 22490 }, { "epoch": 0.9514764362467214, "grad_norm": 0.1988588273525238, "learning_rate": 0.001, "loss": 2.0231, "step": 22491 }, { "epoch": 0.9515187410102377, "grad_norm": 0.18948745727539062, "learning_rate": 0.001, "loss": 2.0585, "step": 22492 }, { "epoch": 0.9515610457737541, "grad_norm": 0.3615002930164337, "learning_rate": 0.001, "loss": 3.7874, "step": 22493 }, { "epoch": 0.9516033505372705, "grad_norm": 0.13920491933822632, "learning_rate": 0.001, "loss": 1.6694, "step": 22494 }, { "epoch": 0.9516456553007868, "grad_norm": 0.16164518892765045, "learning_rate": 0.001, "loss": 2.0692, "step": 22495 }, { "epoch": 0.9516879600643032, "grad_norm": 0.47578608989715576, "learning_rate": 0.001, "loss": 2.1681, "step": 22496 }, { "epoch": 0.9517302648278196, "grad_norm": 0.20885364711284637, "learning_rate": 0.001, "loss": 3.6518, "step": 22497 }, { "epoch": 0.9517725695913359, "grad_norm": 0.16062334179878235, "learning_rate": 0.001, "loss": 1.9614, "step": 22498 }, { "epoch": 0.9518148743548523, "grad_norm": 0.22703850269317627, "learning_rate": 0.001, "loss": 2.476, "step": 22499 }, { "epoch": 0.9518571791183688, "grad_norm": 0.2395503968000412, "learning_rate": 0.001, "loss": 1.9883, "step": 22500 }, { "epoch": 0.9518994838818851, "grad_norm": 0.15861296653747559, "learning_rate": 0.001, "loss": 1.8161, "step": 22501 }, { "epoch": 0.9519417886454015, "grad_norm": 0.15886007249355316, "learning_rate": 0.001, "loss": 1.8054, "step": 22502 }, { "epoch": 0.9519840934089179, "grad_norm": 0.18226757645606995, "learning_rate": 0.001, "loss": 1.7382, "step": 22503 }, { "epoch": 0.9520263981724342, "grad_norm": 0.17415927350521088, "learning_rate": 0.001, "loss": 1.6456, "step": 22504 }, { "epoch": 0.9520687029359506, "grad_norm": 0.22157450020313263, "learning_rate": 0.001, "loss": 2.0265, "step": 22505 }, { "epoch": 0.952111007699467, "grad_norm": 0.18525850772857666, "learning_rate": 0.001, "loss": 3.7023, "step": 22506 }, { "epoch": 0.9521533124629833, "grad_norm": 0.2394370287656784, "learning_rate": 0.001, "loss": 3.2888, "step": 22507 }, { "epoch": 0.9521956172264997, "grad_norm": 0.15804435312747955, "learning_rate": 0.001, "loss": 1.9221, "step": 22508 }, { "epoch": 0.9522379219900161, "grad_norm": 0.18028190732002258, "learning_rate": 0.001, "loss": 2.1621, "step": 22509 }, { "epoch": 0.9522802267535324, "grad_norm": 0.16329197585582733, "learning_rate": 0.001, "loss": 1.6036, "step": 22510 }, { "epoch": 0.9523225315170488, "grad_norm": 0.15661050379276276, "learning_rate": 0.001, "loss": 1.8212, "step": 22511 }, { "epoch": 0.9523648362805652, "grad_norm": 0.23924638330936432, "learning_rate": 0.001, "loss": 1.4197, "step": 22512 }, { "epoch": 0.9524071410440815, "grad_norm": 0.17719987034797668, "learning_rate": 0.001, "loss": 1.427, "step": 22513 }, { "epoch": 0.9524494458075979, "grad_norm": 0.1736730933189392, "learning_rate": 0.001, "loss": 1.5887, "step": 22514 }, { "epoch": 0.9524917505711143, "grad_norm": 0.1556921899318695, "learning_rate": 0.001, "loss": 2.8394, "step": 22515 }, { "epoch": 0.9525340553346306, "grad_norm": 0.15462371706962585, "learning_rate": 0.001, "loss": 1.9483, "step": 22516 }, { "epoch": 0.9525763600981471, "grad_norm": 0.16534818708896637, "learning_rate": 0.001, "loss": 2.2795, "step": 22517 }, { "epoch": 0.9526186648616635, "grad_norm": 0.276944100856781, "learning_rate": 0.001, "loss": 2.1088, "step": 22518 }, { "epoch": 0.9526609696251798, "grad_norm": 0.13408029079437256, "learning_rate": 0.001, "loss": 1.9941, "step": 22519 }, { "epoch": 0.9527032743886962, "grad_norm": 8.694868087768555, "learning_rate": 0.001, "loss": 1.924, "step": 22520 }, { "epoch": 0.9527455791522126, "grad_norm": 0.24468626081943512, "learning_rate": 0.001, "loss": 2.0428, "step": 22521 }, { "epoch": 0.9527878839157289, "grad_norm": 0.1722177416086197, "learning_rate": 0.001, "loss": 2.467, "step": 22522 }, { "epoch": 0.9528301886792453, "grad_norm": 0.16793347895145416, "learning_rate": 0.001, "loss": 1.8234, "step": 22523 }, { "epoch": 0.9528724934427617, "grad_norm": 0.6583682894706726, "learning_rate": 0.001, "loss": 1.7482, "step": 22524 }, { "epoch": 0.952914798206278, "grad_norm": 0.17453870177268982, "learning_rate": 0.001, "loss": 1.8821, "step": 22525 }, { "epoch": 0.9529571029697944, "grad_norm": 0.17266742885112762, "learning_rate": 0.001, "loss": 1.8525, "step": 22526 }, { "epoch": 0.9529994077333108, "grad_norm": 0.1756075918674469, "learning_rate": 0.001, "loss": 2.13, "step": 22527 }, { "epoch": 0.9530417124968271, "grad_norm": 0.18365883827209473, "learning_rate": 0.001, "loss": 1.8475, "step": 22528 }, { "epoch": 0.9530840172603435, "grad_norm": 1.1204431056976318, "learning_rate": 0.001, "loss": 1.7542, "step": 22529 }, { "epoch": 0.9531263220238599, "grad_norm": 0.16155286133289337, "learning_rate": 0.001, "loss": 2.4133, "step": 22530 }, { "epoch": 0.9531686267873762, "grad_norm": 1.2708326578140259, "learning_rate": 0.001, "loss": 1.942, "step": 22531 }, { "epoch": 0.9532109315508926, "grad_norm": 0.3815459609031677, "learning_rate": 0.001, "loss": 1.5244, "step": 22532 }, { "epoch": 0.953253236314409, "grad_norm": 0.1591525673866272, "learning_rate": 0.001, "loss": 1.6586, "step": 22533 }, { "epoch": 0.9532955410779254, "grad_norm": 0.16831563413143158, "learning_rate": 0.001, "loss": 1.9452, "step": 22534 }, { "epoch": 0.9533378458414418, "grad_norm": 0.1781228631734848, "learning_rate": 0.001, "loss": 1.9083, "step": 22535 }, { "epoch": 0.9533801506049581, "grad_norm": 0.196019247174263, "learning_rate": 0.001, "loss": 1.8808, "step": 22536 }, { "epoch": 0.9534224553684745, "grad_norm": 0.16879303753376007, "learning_rate": 0.001, "loss": 2.5292, "step": 22537 }, { "epoch": 0.9534647601319909, "grad_norm": 0.14344555139541626, "learning_rate": 0.001, "loss": 3.0776, "step": 22538 }, { "epoch": 0.9535070648955072, "grad_norm": 0.13954542577266693, "learning_rate": 0.001, "loss": 1.8676, "step": 22539 }, { "epoch": 0.9535493696590236, "grad_norm": 0.16517263650894165, "learning_rate": 0.001, "loss": 2.1363, "step": 22540 }, { "epoch": 0.95359167442254, "grad_norm": 0.4212926924228668, "learning_rate": 0.001, "loss": 2.4361, "step": 22541 }, { "epoch": 0.9536339791860563, "grad_norm": 0.19001418352127075, "learning_rate": 0.001, "loss": 2.4905, "step": 22542 }, { "epoch": 0.9536762839495727, "grad_norm": 0.1455460637807846, "learning_rate": 0.001, "loss": 1.8486, "step": 22543 }, { "epoch": 0.9537185887130891, "grad_norm": 0.13077104091644287, "learning_rate": 0.001, "loss": 1.5365, "step": 22544 }, { "epoch": 0.9537608934766054, "grad_norm": 0.139632910490036, "learning_rate": 0.001, "loss": 1.5715, "step": 22545 }, { "epoch": 0.9538031982401218, "grad_norm": 0.14451947808265686, "learning_rate": 0.001, "loss": 2.0676, "step": 22546 }, { "epoch": 0.9538455030036382, "grad_norm": 0.2569085657596588, "learning_rate": 0.001, "loss": 2.4596, "step": 22547 }, { "epoch": 0.9538878077671545, "grad_norm": 9.396105766296387, "learning_rate": 0.001, "loss": 1.7845, "step": 22548 }, { "epoch": 0.953930112530671, "grad_norm": 0.17778228223323822, "learning_rate": 0.001, "loss": 2.6535, "step": 22549 }, { "epoch": 0.9539724172941874, "grad_norm": 1.270423173904419, "learning_rate": 0.001, "loss": 1.7749, "step": 22550 }, { "epoch": 0.9540147220577037, "grad_norm": 0.20643652975559235, "learning_rate": 0.001, "loss": 2.5151, "step": 22551 }, { "epoch": 0.9540570268212201, "grad_norm": 0.5847330093383789, "learning_rate": 0.001, "loss": 2.6464, "step": 22552 }, { "epoch": 0.9540993315847365, "grad_norm": 0.15910720825195312, "learning_rate": 0.001, "loss": 2.5165, "step": 22553 }, { "epoch": 0.9541416363482528, "grad_norm": 0.2874158024787903, "learning_rate": 0.001, "loss": 2.2493, "step": 22554 }, { "epoch": 0.9541839411117692, "grad_norm": 0.14962883293628693, "learning_rate": 0.001, "loss": 2.0772, "step": 22555 }, { "epoch": 0.9542262458752856, "grad_norm": 0.16051600873470306, "learning_rate": 0.001, "loss": 1.4298, "step": 22556 }, { "epoch": 0.9542685506388019, "grad_norm": 0.16125862300395966, "learning_rate": 0.001, "loss": 1.383, "step": 22557 }, { "epoch": 0.9543108554023183, "grad_norm": 0.15643396973609924, "learning_rate": 0.001, "loss": 1.9494, "step": 22558 }, { "epoch": 0.9543531601658347, "grad_norm": 0.2045285403728485, "learning_rate": 0.001, "loss": 2.7422, "step": 22559 }, { "epoch": 0.954395464929351, "grad_norm": 0.2184513658285141, "learning_rate": 0.001, "loss": 2.085, "step": 22560 }, { "epoch": 0.9544377696928674, "grad_norm": 0.16785749793052673, "learning_rate": 0.001, "loss": 2.3423, "step": 22561 }, { "epoch": 0.9544800744563838, "grad_norm": 0.16938123106956482, "learning_rate": 0.001, "loss": 1.6398, "step": 22562 }, { "epoch": 0.9545223792199001, "grad_norm": 0.15510663390159607, "learning_rate": 0.001, "loss": 1.739, "step": 22563 }, { "epoch": 0.9545646839834165, "grad_norm": 0.16871055960655212, "learning_rate": 0.001, "loss": 1.6647, "step": 22564 }, { "epoch": 0.954606988746933, "grad_norm": 0.2923224866390228, "learning_rate": 0.001, "loss": 3.0942, "step": 22565 }, { "epoch": 0.9546492935104492, "grad_norm": 0.37078985571861267, "learning_rate": 0.001, "loss": 1.9319, "step": 22566 }, { "epoch": 0.9546915982739657, "grad_norm": 0.16258271038532257, "learning_rate": 0.001, "loss": 3.1049, "step": 22567 }, { "epoch": 0.9547339030374821, "grad_norm": 0.18606746196746826, "learning_rate": 0.001, "loss": 3.302, "step": 22568 }, { "epoch": 0.9547762078009984, "grad_norm": 0.7223076224327087, "learning_rate": 0.001, "loss": 1.7057, "step": 22569 }, { "epoch": 0.9548185125645148, "grad_norm": 0.1600056141614914, "learning_rate": 0.001, "loss": 2.7663, "step": 22570 }, { "epoch": 0.9548608173280312, "grad_norm": 0.16260926425457, "learning_rate": 0.001, "loss": 3.0786, "step": 22571 }, { "epoch": 0.9549031220915475, "grad_norm": 0.16478076577186584, "learning_rate": 0.001, "loss": 1.8152, "step": 22572 }, { "epoch": 0.9549454268550639, "grad_norm": 0.14797167479991913, "learning_rate": 0.001, "loss": 1.6904, "step": 22573 }, { "epoch": 0.9549877316185803, "grad_norm": 0.1376873105764389, "learning_rate": 0.001, "loss": 1.672, "step": 22574 }, { "epoch": 0.9550300363820966, "grad_norm": 0.15510663390159607, "learning_rate": 0.001, "loss": 2.1794, "step": 22575 }, { "epoch": 0.955072341145613, "grad_norm": 0.15933188796043396, "learning_rate": 0.001, "loss": 2.7918, "step": 22576 }, { "epoch": 0.9551146459091294, "grad_norm": 0.15145090222358704, "learning_rate": 0.001, "loss": 1.8279, "step": 22577 }, { "epoch": 0.9551569506726457, "grad_norm": 0.17468474805355072, "learning_rate": 0.001, "loss": 3.198, "step": 22578 }, { "epoch": 0.9551992554361621, "grad_norm": 0.17872756719589233, "learning_rate": 0.001, "loss": 1.7174, "step": 22579 }, { "epoch": 0.9552415601996784, "grad_norm": 0.17756347358226776, "learning_rate": 0.001, "loss": 2.8859, "step": 22580 }, { "epoch": 0.9552838649631948, "grad_norm": 0.17524990439414978, "learning_rate": 0.001, "loss": 2.7646, "step": 22581 }, { "epoch": 0.9553261697267112, "grad_norm": 0.1490987241268158, "learning_rate": 0.001, "loss": 2.0239, "step": 22582 }, { "epoch": 0.9553684744902275, "grad_norm": 0.17126286029815674, "learning_rate": 0.001, "loss": 2.1528, "step": 22583 }, { "epoch": 0.955410779253744, "grad_norm": 0.14318470656871796, "learning_rate": 0.001, "loss": 2.0535, "step": 22584 }, { "epoch": 0.9554530840172604, "grad_norm": 0.1496138721704483, "learning_rate": 0.001, "loss": 2.2055, "step": 22585 }, { "epoch": 0.9554953887807767, "grad_norm": 0.1591835618019104, "learning_rate": 0.001, "loss": 1.8175, "step": 22586 }, { "epoch": 0.9555376935442931, "grad_norm": 0.18127931654453278, "learning_rate": 0.001, "loss": 3.1087, "step": 22587 }, { "epoch": 0.9555799983078095, "grad_norm": 0.12975792586803436, "learning_rate": 0.001, "loss": 2.2168, "step": 22588 }, { "epoch": 0.9556223030713258, "grad_norm": 0.1456218957901001, "learning_rate": 0.001, "loss": 1.4985, "step": 22589 }, { "epoch": 0.9556646078348422, "grad_norm": 0.1666790097951889, "learning_rate": 0.001, "loss": 1.8393, "step": 22590 }, { "epoch": 0.9557069125983586, "grad_norm": 0.1480528861284256, "learning_rate": 0.001, "loss": 1.7311, "step": 22591 }, { "epoch": 0.9557492173618749, "grad_norm": 0.1545533984899521, "learning_rate": 0.001, "loss": 2.0204, "step": 22592 }, { "epoch": 0.9557915221253913, "grad_norm": 0.17958112061023712, "learning_rate": 0.001, "loss": 2.5396, "step": 22593 }, { "epoch": 0.9558338268889077, "grad_norm": 0.20622247457504272, "learning_rate": 0.001, "loss": 2.4856, "step": 22594 }, { "epoch": 0.955876131652424, "grad_norm": 0.1637284755706787, "learning_rate": 0.001, "loss": 1.561, "step": 22595 }, { "epoch": 0.9559184364159404, "grad_norm": 0.13965626060962677, "learning_rate": 0.001, "loss": 1.7077, "step": 22596 }, { "epoch": 0.9559607411794568, "grad_norm": 0.15451842546463013, "learning_rate": 0.001, "loss": 2.9797, "step": 22597 }, { "epoch": 0.9560030459429731, "grad_norm": 0.1734355241060257, "learning_rate": 0.001, "loss": 1.9119, "step": 22598 }, { "epoch": 0.9560453507064895, "grad_norm": 0.16818620264530182, "learning_rate": 0.001, "loss": 2.4106, "step": 22599 }, { "epoch": 0.956087655470006, "grad_norm": 0.4067355990409851, "learning_rate": 0.001, "loss": 2.4487, "step": 22600 }, { "epoch": 0.9561299602335223, "grad_norm": 0.16298459470272064, "learning_rate": 0.001, "loss": 2.2041, "step": 22601 }, { "epoch": 0.9561722649970387, "grad_norm": 0.7233208417892456, "learning_rate": 0.001, "loss": 1.9019, "step": 22602 }, { "epoch": 0.9562145697605551, "grad_norm": 0.15039215981960297, "learning_rate": 0.001, "loss": 2.3582, "step": 22603 }, { "epoch": 0.9562568745240714, "grad_norm": 0.25849875807762146, "learning_rate": 0.001, "loss": 2.4879, "step": 22604 }, { "epoch": 0.9562991792875878, "grad_norm": 0.14870387315750122, "learning_rate": 0.001, "loss": 2.3015, "step": 22605 }, { "epoch": 0.9563414840511042, "grad_norm": 0.16163372993469238, "learning_rate": 0.001, "loss": 2.9453, "step": 22606 }, { "epoch": 0.9563837888146205, "grad_norm": 0.17681002616882324, "learning_rate": 0.001, "loss": 2.3321, "step": 22607 }, { "epoch": 0.9564260935781369, "grad_norm": 0.16098996996879578, "learning_rate": 0.001, "loss": 1.9478, "step": 22608 }, { "epoch": 0.9564683983416533, "grad_norm": 0.14798353612422943, "learning_rate": 0.001, "loss": 1.9331, "step": 22609 }, { "epoch": 0.9565107031051696, "grad_norm": 0.31577110290527344, "learning_rate": 0.001, "loss": 3.3183, "step": 22610 }, { "epoch": 0.956553007868686, "grad_norm": 0.1621190905570984, "learning_rate": 0.001, "loss": 1.9568, "step": 22611 }, { "epoch": 0.9565953126322024, "grad_norm": 0.17691479623317719, "learning_rate": 0.001, "loss": 2.06, "step": 22612 }, { "epoch": 0.9566376173957187, "grad_norm": 0.15779487788677216, "learning_rate": 0.001, "loss": 1.6084, "step": 22613 }, { "epoch": 0.9566799221592351, "grad_norm": 0.15047040581703186, "learning_rate": 0.001, "loss": 2.1398, "step": 22614 }, { "epoch": 0.9567222269227516, "grad_norm": 0.17002348601818085, "learning_rate": 0.001, "loss": 1.7664, "step": 22615 }, { "epoch": 0.9567645316862678, "grad_norm": 0.14370211958885193, "learning_rate": 0.001, "loss": 1.598, "step": 22616 }, { "epoch": 0.9568068364497843, "grad_norm": 1.530818223953247, "learning_rate": 0.001, "loss": 3.326, "step": 22617 }, { "epoch": 0.9568491412133007, "grad_norm": 0.1687633991241455, "learning_rate": 0.001, "loss": 1.355, "step": 22618 }, { "epoch": 0.956891445976817, "grad_norm": 0.13722075521945953, "learning_rate": 0.001, "loss": 3.3707, "step": 22619 }, { "epoch": 0.9569337507403334, "grad_norm": 0.16178375482559204, "learning_rate": 0.001, "loss": 2.5891, "step": 22620 }, { "epoch": 0.9569760555038498, "grad_norm": 0.7043463587760925, "learning_rate": 0.001, "loss": 2.3528, "step": 22621 }, { "epoch": 0.9570183602673661, "grad_norm": 0.14952924847602844, "learning_rate": 0.001, "loss": 1.4348, "step": 22622 }, { "epoch": 0.9570606650308825, "grad_norm": 0.14588811993598938, "learning_rate": 0.001, "loss": 2.168, "step": 22623 }, { "epoch": 0.9571029697943988, "grad_norm": 0.136274516582489, "learning_rate": 0.001, "loss": 3.4291, "step": 22624 }, { "epoch": 0.9571452745579152, "grad_norm": 0.15219195187091827, "learning_rate": 0.001, "loss": 2.353, "step": 22625 }, { "epoch": 0.9571875793214316, "grad_norm": 0.192640021443367, "learning_rate": 0.001, "loss": 2.5704, "step": 22626 }, { "epoch": 0.9572298840849479, "grad_norm": 2.2798144817352295, "learning_rate": 0.001, "loss": 2.4483, "step": 22627 }, { "epoch": 0.9572721888484643, "grad_norm": 0.12433091551065445, "learning_rate": 0.001, "loss": 2.9738, "step": 22628 }, { "epoch": 0.9573144936119807, "grad_norm": 0.14844204485416412, "learning_rate": 0.001, "loss": 2.3593, "step": 22629 }, { "epoch": 0.957356798375497, "grad_norm": 0.18237383663654327, "learning_rate": 0.001, "loss": 2.4718, "step": 22630 }, { "epoch": 0.9573991031390134, "grad_norm": 0.15633726119995117, "learning_rate": 0.001, "loss": 2.9718, "step": 22631 }, { "epoch": 0.9574414079025299, "grad_norm": 0.1357184201478958, "learning_rate": 0.001, "loss": 1.8955, "step": 22632 }, { "epoch": 0.9574837126660461, "grad_norm": 0.15892942249774933, "learning_rate": 0.001, "loss": 1.6971, "step": 22633 }, { "epoch": 0.9575260174295626, "grad_norm": 0.1862511783838272, "learning_rate": 0.001, "loss": 2.416, "step": 22634 }, { "epoch": 0.957568322193079, "grad_norm": 0.18506558239459991, "learning_rate": 0.001, "loss": 2.481, "step": 22635 }, { "epoch": 0.9576106269565953, "grad_norm": 2.206505298614502, "learning_rate": 0.001, "loss": 2.0285, "step": 22636 }, { "epoch": 0.9576529317201117, "grad_norm": 0.19020433723926544, "learning_rate": 0.001, "loss": 1.833, "step": 22637 }, { "epoch": 0.9576952364836281, "grad_norm": 0.22142185270786285, "learning_rate": 0.001, "loss": 2.196, "step": 22638 }, { "epoch": 0.9577375412471444, "grad_norm": 0.1741228997707367, "learning_rate": 0.001, "loss": 2.101, "step": 22639 }, { "epoch": 0.9577798460106608, "grad_norm": 0.1707838922739029, "learning_rate": 0.001, "loss": 2.0027, "step": 22640 }, { "epoch": 0.9578221507741772, "grad_norm": 0.16270555555820465, "learning_rate": 0.001, "loss": 2.9908, "step": 22641 }, { "epoch": 0.9578644555376935, "grad_norm": 0.1549161970615387, "learning_rate": 0.001, "loss": 1.7517, "step": 22642 }, { "epoch": 0.9579067603012099, "grad_norm": 0.16210713982582092, "learning_rate": 0.001, "loss": 2.5452, "step": 22643 }, { "epoch": 0.9579490650647263, "grad_norm": 0.14848294854164124, "learning_rate": 0.001, "loss": 2.117, "step": 22644 }, { "epoch": 0.9579913698282426, "grad_norm": 0.7370397448539734, "learning_rate": 0.001, "loss": 1.9066, "step": 22645 }, { "epoch": 0.958033674591759, "grad_norm": 0.15546540915966034, "learning_rate": 0.001, "loss": 1.5374, "step": 22646 }, { "epoch": 0.9580759793552754, "grad_norm": 0.1666499376296997, "learning_rate": 0.001, "loss": 1.7665, "step": 22647 }, { "epoch": 0.9581182841187917, "grad_norm": 0.16058315336704254, "learning_rate": 0.001, "loss": 1.7836, "step": 22648 }, { "epoch": 0.9581605888823082, "grad_norm": 0.1704726219177246, "learning_rate": 0.001, "loss": 2.0733, "step": 22649 }, { "epoch": 0.9582028936458246, "grad_norm": 1.3915730714797974, "learning_rate": 0.001, "loss": 2.1041, "step": 22650 }, { "epoch": 0.9582451984093409, "grad_norm": 0.14765958487987518, "learning_rate": 0.001, "loss": 2.1703, "step": 22651 }, { "epoch": 0.9582875031728573, "grad_norm": 0.136846661567688, "learning_rate": 0.001, "loss": 2.0503, "step": 22652 }, { "epoch": 0.9583298079363737, "grad_norm": 2.0541558265686035, "learning_rate": 0.001, "loss": 1.9055, "step": 22653 }, { "epoch": 0.95837211269989, "grad_norm": 0.168845996260643, "learning_rate": 0.001, "loss": 2.0872, "step": 22654 }, { "epoch": 0.9584144174634064, "grad_norm": 0.18082398176193237, "learning_rate": 0.001, "loss": 2.6359, "step": 22655 }, { "epoch": 0.9584567222269228, "grad_norm": 0.2050907164812088, "learning_rate": 0.001, "loss": 2.1573, "step": 22656 }, { "epoch": 0.9584990269904391, "grad_norm": 0.23154161870479584, "learning_rate": 0.001, "loss": 2.4487, "step": 22657 }, { "epoch": 0.9585413317539555, "grad_norm": 0.228712260723114, "learning_rate": 0.001, "loss": 2.5394, "step": 22658 }, { "epoch": 0.9585836365174719, "grad_norm": 0.18508756160736084, "learning_rate": 0.001, "loss": 1.982, "step": 22659 }, { "epoch": 0.9586259412809882, "grad_norm": 0.2040599286556244, "learning_rate": 0.001, "loss": 1.9579, "step": 22660 }, { "epoch": 0.9586682460445046, "grad_norm": 0.2641620337963104, "learning_rate": 0.001, "loss": 2.3784, "step": 22661 }, { "epoch": 0.958710550808021, "grad_norm": 0.17128469049930573, "learning_rate": 0.001, "loss": 2.367, "step": 22662 }, { "epoch": 0.9587528555715373, "grad_norm": 0.5706077814102173, "learning_rate": 0.001, "loss": 2.7794, "step": 22663 }, { "epoch": 0.9587951603350537, "grad_norm": 0.15378667414188385, "learning_rate": 0.001, "loss": 1.2554, "step": 22664 }, { "epoch": 0.9588374650985702, "grad_norm": 0.14689679443836212, "learning_rate": 0.001, "loss": 2.1687, "step": 22665 }, { "epoch": 0.9588797698620865, "grad_norm": 0.19567295908927917, "learning_rate": 0.001, "loss": 2.2162, "step": 22666 }, { "epoch": 0.9589220746256029, "grad_norm": 0.3699814975261688, "learning_rate": 0.001, "loss": 2.1986, "step": 22667 }, { "epoch": 0.9589643793891193, "grad_norm": 0.19808320701122284, "learning_rate": 0.001, "loss": 2.4602, "step": 22668 }, { "epoch": 0.9590066841526356, "grad_norm": 0.17543701827526093, "learning_rate": 0.001, "loss": 2.4826, "step": 22669 }, { "epoch": 0.959048988916152, "grad_norm": 0.16463440656661987, "learning_rate": 0.001, "loss": 2.3312, "step": 22670 }, { "epoch": 0.9590912936796683, "grad_norm": 15.79118537902832, "learning_rate": 0.001, "loss": 2.4984, "step": 22671 }, { "epoch": 0.9591335984431847, "grad_norm": 0.457317054271698, "learning_rate": 0.001, "loss": 1.6768, "step": 22672 }, { "epoch": 0.9591759032067011, "grad_norm": 0.3178519904613495, "learning_rate": 0.001, "loss": 1.9121, "step": 22673 }, { "epoch": 0.9592182079702174, "grad_norm": 0.1642330437898636, "learning_rate": 0.001, "loss": 1.1973, "step": 22674 }, { "epoch": 0.9592605127337338, "grad_norm": 0.9215223789215088, "learning_rate": 0.001, "loss": 1.7921, "step": 22675 }, { "epoch": 0.9593028174972502, "grad_norm": 0.15992654860019684, "learning_rate": 0.001, "loss": 1.6263, "step": 22676 }, { "epoch": 0.9593451222607665, "grad_norm": 0.18335092067718506, "learning_rate": 0.001, "loss": 1.7375, "step": 22677 }, { "epoch": 0.9593874270242829, "grad_norm": 0.19411593675613403, "learning_rate": 0.001, "loss": 1.8051, "step": 22678 }, { "epoch": 0.9594297317877993, "grad_norm": 0.16195566952228546, "learning_rate": 0.001, "loss": 3.0332, "step": 22679 }, { "epoch": 0.9594720365513156, "grad_norm": 0.5709326863288879, "learning_rate": 0.001, "loss": 2.7656, "step": 22680 }, { "epoch": 0.959514341314832, "grad_norm": 0.145700141787529, "learning_rate": 0.001, "loss": 1.9661, "step": 22681 }, { "epoch": 0.9595566460783485, "grad_norm": 0.16970385611057281, "learning_rate": 0.001, "loss": 1.9235, "step": 22682 }, { "epoch": 0.9595989508418648, "grad_norm": 0.17255772650241852, "learning_rate": 0.001, "loss": 2.1868, "step": 22683 }, { "epoch": 0.9596412556053812, "grad_norm": 0.16571679711341858, "learning_rate": 0.001, "loss": 1.8325, "step": 22684 }, { "epoch": 0.9596835603688976, "grad_norm": 0.12924878299236298, "learning_rate": 0.001, "loss": 1.8835, "step": 22685 }, { "epoch": 0.9597258651324139, "grad_norm": 0.20774781703948975, "learning_rate": 0.001, "loss": 2.1721, "step": 22686 }, { "epoch": 0.9597681698959303, "grad_norm": 0.35853028297424316, "learning_rate": 0.001, "loss": 2.2272, "step": 22687 }, { "epoch": 0.9598104746594467, "grad_norm": 0.5301867127418518, "learning_rate": 0.001, "loss": 1.568, "step": 22688 }, { "epoch": 0.959852779422963, "grad_norm": 0.19120998680591583, "learning_rate": 0.001, "loss": 2.1802, "step": 22689 }, { "epoch": 0.9598950841864794, "grad_norm": 0.16802310943603516, "learning_rate": 0.001, "loss": 1.6033, "step": 22690 }, { "epoch": 0.9599373889499958, "grad_norm": 0.17627355456352234, "learning_rate": 0.001, "loss": 2.273, "step": 22691 }, { "epoch": 0.9599796937135121, "grad_norm": 0.22242601215839386, "learning_rate": 0.001, "loss": 1.9304, "step": 22692 }, { "epoch": 0.9600219984770285, "grad_norm": 0.1294320523738861, "learning_rate": 0.001, "loss": 2.0693, "step": 22693 }, { "epoch": 0.9600643032405449, "grad_norm": 0.17644540965557098, "learning_rate": 0.001, "loss": 2.0387, "step": 22694 }, { "epoch": 0.9601066080040612, "grad_norm": 1.0202915668487549, "learning_rate": 0.001, "loss": 1.9817, "step": 22695 }, { "epoch": 0.9601489127675776, "grad_norm": 0.15253128111362457, "learning_rate": 0.001, "loss": 2.055, "step": 22696 }, { "epoch": 0.960191217531094, "grad_norm": 0.15679995715618134, "learning_rate": 0.001, "loss": 2.1456, "step": 22697 }, { "epoch": 0.9602335222946103, "grad_norm": 0.15941359102725983, "learning_rate": 0.001, "loss": 2.0955, "step": 22698 }, { "epoch": 0.9602758270581268, "grad_norm": 0.17292888462543488, "learning_rate": 0.001, "loss": 1.624, "step": 22699 }, { "epoch": 0.9603181318216432, "grad_norm": 0.1328788846731186, "learning_rate": 0.001, "loss": 1.4056, "step": 22700 }, { "epoch": 0.9603604365851595, "grad_norm": 0.18797191977500916, "learning_rate": 0.001, "loss": 2.5867, "step": 22701 }, { "epoch": 0.9604027413486759, "grad_norm": 0.7679688930511475, "learning_rate": 0.001, "loss": 3.1701, "step": 22702 }, { "epoch": 0.9604450461121923, "grad_norm": 0.6459025740623474, "learning_rate": 0.001, "loss": 4.0775, "step": 22703 }, { "epoch": 0.9604873508757086, "grad_norm": 0.13661043345928192, "learning_rate": 0.001, "loss": 2.6037, "step": 22704 }, { "epoch": 0.960529655639225, "grad_norm": 0.15741907060146332, "learning_rate": 0.001, "loss": 2.0212, "step": 22705 }, { "epoch": 0.9605719604027414, "grad_norm": 0.9307416677474976, "learning_rate": 0.001, "loss": 2.2273, "step": 22706 }, { "epoch": 0.9606142651662577, "grad_norm": 0.19062964618206024, "learning_rate": 0.001, "loss": 3.1016, "step": 22707 }, { "epoch": 0.9606565699297741, "grad_norm": 0.17339986562728882, "learning_rate": 0.001, "loss": 1.9746, "step": 22708 }, { "epoch": 0.9606988746932905, "grad_norm": 0.17814798653125763, "learning_rate": 0.001, "loss": 2.9384, "step": 22709 }, { "epoch": 0.9607411794568068, "grad_norm": 0.19734229147434235, "learning_rate": 0.001, "loss": 2.3626, "step": 22710 }, { "epoch": 0.9607834842203232, "grad_norm": 0.2042291760444641, "learning_rate": 0.001, "loss": 1.851, "step": 22711 }, { "epoch": 0.9608257889838396, "grad_norm": 1.1738054752349854, "learning_rate": 0.001, "loss": 3.5114, "step": 22712 }, { "epoch": 0.9608680937473559, "grad_norm": 3.5392398834228516, "learning_rate": 0.001, "loss": 2.1209, "step": 22713 }, { "epoch": 0.9609103985108723, "grad_norm": 0.2352452278137207, "learning_rate": 0.001, "loss": 2.2434, "step": 22714 }, { "epoch": 0.9609527032743886, "grad_norm": 0.696122407913208, "learning_rate": 0.001, "loss": 2.0429, "step": 22715 }, { "epoch": 0.960995008037905, "grad_norm": 0.24021172523498535, "learning_rate": 0.001, "loss": 1.927, "step": 22716 }, { "epoch": 0.9610373128014215, "grad_norm": 0.1911345273256302, "learning_rate": 0.001, "loss": 2.3258, "step": 22717 }, { "epoch": 0.9610796175649378, "grad_norm": 0.1680738478899002, "learning_rate": 0.001, "loss": 1.7179, "step": 22718 }, { "epoch": 0.9611219223284542, "grad_norm": 0.20426170527935028, "learning_rate": 0.001, "loss": 1.8262, "step": 22719 }, { "epoch": 0.9611642270919706, "grad_norm": 0.9206085205078125, "learning_rate": 0.001, "loss": 1.7566, "step": 22720 }, { "epoch": 0.9612065318554869, "grad_norm": 0.17064528167247772, "learning_rate": 0.001, "loss": 3.1635, "step": 22721 }, { "epoch": 0.9612488366190033, "grad_norm": 0.1871117353439331, "learning_rate": 0.001, "loss": 3.1601, "step": 22722 }, { "epoch": 0.9612911413825197, "grad_norm": 0.14905090630054474, "learning_rate": 0.001, "loss": 2.5749, "step": 22723 }, { "epoch": 0.961333446146036, "grad_norm": 0.3839198350906372, "learning_rate": 0.001, "loss": 3.2174, "step": 22724 }, { "epoch": 0.9613757509095524, "grad_norm": 0.15603891015052795, "learning_rate": 0.001, "loss": 1.784, "step": 22725 }, { "epoch": 0.9614180556730688, "grad_norm": 0.16384871304035187, "learning_rate": 0.001, "loss": 2.1131, "step": 22726 }, { "epoch": 0.9614603604365851, "grad_norm": 0.1653030961751938, "learning_rate": 0.001, "loss": 2.8662, "step": 22727 }, { "epoch": 0.9615026652001015, "grad_norm": 0.15746630728244781, "learning_rate": 0.001, "loss": 2.3647, "step": 22728 }, { "epoch": 0.9615449699636179, "grad_norm": 0.1539701670408249, "learning_rate": 0.001, "loss": 1.9527, "step": 22729 }, { "epoch": 0.9615872747271342, "grad_norm": 0.15698012709617615, "learning_rate": 0.001, "loss": 1.9104, "step": 22730 }, { "epoch": 0.9616295794906506, "grad_norm": 0.17898909747600555, "learning_rate": 0.001, "loss": 2.0614, "step": 22731 }, { "epoch": 0.961671884254167, "grad_norm": 0.1490541249513626, "learning_rate": 0.001, "loss": 1.9078, "step": 22732 }, { "epoch": 0.9617141890176834, "grad_norm": 0.17301572859287262, "learning_rate": 0.001, "loss": 1.5119, "step": 22733 }, { "epoch": 0.9617564937811998, "grad_norm": 1.670694351196289, "learning_rate": 0.001, "loss": 1.6093, "step": 22734 }, { "epoch": 0.9617987985447162, "grad_norm": 0.16678212583065033, "learning_rate": 0.001, "loss": 1.5122, "step": 22735 }, { "epoch": 0.9618411033082325, "grad_norm": 0.15618404746055603, "learning_rate": 0.001, "loss": 1.4957, "step": 22736 }, { "epoch": 0.9618834080717489, "grad_norm": 0.15901844203472137, "learning_rate": 0.001, "loss": 2.9647, "step": 22737 }, { "epoch": 0.9619257128352653, "grad_norm": 0.162552148103714, "learning_rate": 0.001, "loss": 2.2336, "step": 22738 }, { "epoch": 0.9619680175987816, "grad_norm": 0.6180990934371948, "learning_rate": 0.001, "loss": 2.6319, "step": 22739 }, { "epoch": 0.962010322362298, "grad_norm": 0.16461017727851868, "learning_rate": 0.001, "loss": 2.8503, "step": 22740 }, { "epoch": 0.9620526271258144, "grad_norm": 0.15487508475780487, "learning_rate": 0.001, "loss": 1.7637, "step": 22741 }, { "epoch": 0.9620949318893307, "grad_norm": 0.16154402494430542, "learning_rate": 0.001, "loss": 1.7477, "step": 22742 }, { "epoch": 0.9621372366528471, "grad_norm": 0.162453293800354, "learning_rate": 0.001, "loss": 1.1528, "step": 22743 }, { "epoch": 0.9621795414163635, "grad_norm": 0.15126554667949677, "learning_rate": 0.001, "loss": 2.702, "step": 22744 }, { "epoch": 0.9622218461798798, "grad_norm": 0.13284894824028015, "learning_rate": 0.001, "loss": 3.2294, "step": 22745 }, { "epoch": 0.9622641509433962, "grad_norm": 1.1502666473388672, "learning_rate": 0.001, "loss": 2.4184, "step": 22746 }, { "epoch": 0.9623064557069126, "grad_norm": 0.1922639161348343, "learning_rate": 0.001, "loss": 2.0937, "step": 22747 }, { "epoch": 0.9623487604704289, "grad_norm": 0.16614429652690887, "learning_rate": 0.001, "loss": 1.959, "step": 22748 }, { "epoch": 0.9623910652339454, "grad_norm": 0.1562388688325882, "learning_rate": 0.001, "loss": 2.9115, "step": 22749 }, { "epoch": 0.9624333699974618, "grad_norm": 0.15959501266479492, "learning_rate": 0.001, "loss": 1.7341, "step": 22750 }, { "epoch": 0.9624756747609781, "grad_norm": 0.5524002909660339, "learning_rate": 0.001, "loss": 1.8583, "step": 22751 }, { "epoch": 0.9625179795244945, "grad_norm": 0.16397516429424286, "learning_rate": 0.001, "loss": 1.9359, "step": 22752 }, { "epoch": 0.9625602842880109, "grad_norm": 0.19087645411491394, "learning_rate": 0.001, "loss": 2.3213, "step": 22753 }, { "epoch": 0.9626025890515272, "grad_norm": 0.18343958258628845, "learning_rate": 0.001, "loss": 2.2451, "step": 22754 }, { "epoch": 0.9626448938150436, "grad_norm": 0.17732329666614532, "learning_rate": 0.001, "loss": 2.5962, "step": 22755 }, { "epoch": 0.96268719857856, "grad_norm": 0.1700247973203659, "learning_rate": 0.001, "loss": 1.6104, "step": 22756 }, { "epoch": 0.9627295033420763, "grad_norm": 18.20061492919922, "learning_rate": 0.001, "loss": 2.0838, "step": 22757 }, { "epoch": 0.9627718081055927, "grad_norm": 0.8646219372749329, "learning_rate": 0.001, "loss": 3.463, "step": 22758 }, { "epoch": 0.962814112869109, "grad_norm": 0.6529208421707153, "learning_rate": 0.001, "loss": 3.052, "step": 22759 }, { "epoch": 0.9628564176326254, "grad_norm": 0.16437289118766785, "learning_rate": 0.001, "loss": 1.9609, "step": 22760 }, { "epoch": 0.9628987223961418, "grad_norm": 0.2545870840549469, "learning_rate": 0.001, "loss": 2.0325, "step": 22761 }, { "epoch": 0.9629410271596581, "grad_norm": 0.4441566467285156, "learning_rate": 0.001, "loss": 2.3939, "step": 22762 }, { "epoch": 0.9629833319231745, "grad_norm": 0.17705874145030975, "learning_rate": 0.001, "loss": 2.0372, "step": 22763 }, { "epoch": 0.963025636686691, "grad_norm": 0.14965033531188965, "learning_rate": 0.001, "loss": 1.515, "step": 22764 }, { "epoch": 0.9630679414502072, "grad_norm": 0.1866179257631302, "learning_rate": 0.001, "loss": 2.3098, "step": 22765 }, { "epoch": 0.9631102462137237, "grad_norm": 2.7773499488830566, "learning_rate": 0.001, "loss": 2.5506, "step": 22766 }, { "epoch": 0.9631525509772401, "grad_norm": 0.18301425874233246, "learning_rate": 0.001, "loss": 1.7283, "step": 22767 }, { "epoch": 0.9631948557407564, "grad_norm": 0.15599261224269867, "learning_rate": 0.001, "loss": 2.4437, "step": 22768 }, { "epoch": 0.9632371605042728, "grad_norm": 0.35363784432411194, "learning_rate": 0.001, "loss": 2.3903, "step": 22769 }, { "epoch": 0.9632794652677892, "grad_norm": 5.986473560333252, "learning_rate": 0.001, "loss": 1.5687, "step": 22770 }, { "epoch": 0.9633217700313055, "grad_norm": 0.20316632091999054, "learning_rate": 0.001, "loss": 2.2692, "step": 22771 }, { "epoch": 0.9633640747948219, "grad_norm": 0.16965316236019135, "learning_rate": 0.001, "loss": 2.4032, "step": 22772 }, { "epoch": 0.9634063795583383, "grad_norm": 0.18180473148822784, "learning_rate": 0.001, "loss": 2.0577, "step": 22773 }, { "epoch": 0.9634486843218546, "grad_norm": 0.16141119599342346, "learning_rate": 0.001, "loss": 1.8242, "step": 22774 }, { "epoch": 0.963490989085371, "grad_norm": 0.18191201984882355, "learning_rate": 0.001, "loss": 2.1905, "step": 22775 }, { "epoch": 0.9635332938488874, "grad_norm": 0.783811092376709, "learning_rate": 0.001, "loss": 3.1841, "step": 22776 }, { "epoch": 0.9635755986124037, "grad_norm": 0.3125717043876648, "learning_rate": 0.001, "loss": 1.7756, "step": 22777 }, { "epoch": 0.9636179033759201, "grad_norm": 0.25806742906570435, "learning_rate": 0.001, "loss": 1.7628, "step": 22778 }, { "epoch": 0.9636602081394365, "grad_norm": 0.16139912605285645, "learning_rate": 0.001, "loss": 1.7258, "step": 22779 }, { "epoch": 0.9637025129029528, "grad_norm": 0.15739895403385162, "learning_rate": 0.001, "loss": 1.9454, "step": 22780 }, { "epoch": 0.9637448176664692, "grad_norm": 0.15625838935375214, "learning_rate": 0.001, "loss": 2.3343, "step": 22781 }, { "epoch": 0.9637871224299857, "grad_norm": 0.15970557928085327, "learning_rate": 0.001, "loss": 2.6073, "step": 22782 }, { "epoch": 0.963829427193502, "grad_norm": 2.783663272857666, "learning_rate": 0.001, "loss": 2.5539, "step": 22783 }, { "epoch": 0.9638717319570184, "grad_norm": 0.15198123455047607, "learning_rate": 0.001, "loss": 1.7133, "step": 22784 }, { "epoch": 0.9639140367205348, "grad_norm": 0.25221678614616394, "learning_rate": 0.001, "loss": 3.6873, "step": 22785 }, { "epoch": 0.9639563414840511, "grad_norm": 0.1301819235086441, "learning_rate": 0.001, "loss": 2.8356, "step": 22786 }, { "epoch": 0.9639986462475675, "grad_norm": 0.2297348827123642, "learning_rate": 0.001, "loss": 2.3075, "step": 22787 }, { "epoch": 0.9640409510110839, "grad_norm": 0.15260782837867737, "learning_rate": 0.001, "loss": 1.8023, "step": 22788 }, { "epoch": 0.9640832557746002, "grad_norm": 0.18781907856464386, "learning_rate": 0.001, "loss": 2.7565, "step": 22789 }, { "epoch": 0.9641255605381166, "grad_norm": 0.38502037525177, "learning_rate": 0.001, "loss": 2.6757, "step": 22790 }, { "epoch": 0.964167865301633, "grad_norm": 0.1971847265958786, "learning_rate": 0.001, "loss": 1.6928, "step": 22791 }, { "epoch": 0.9642101700651493, "grad_norm": 0.1461469531059265, "learning_rate": 0.001, "loss": 2.707, "step": 22792 }, { "epoch": 0.9642524748286657, "grad_norm": 0.15411047637462616, "learning_rate": 0.001, "loss": 1.8741, "step": 22793 }, { "epoch": 0.9642947795921821, "grad_norm": 0.15573959052562714, "learning_rate": 0.001, "loss": 2.3374, "step": 22794 }, { "epoch": 0.9643370843556984, "grad_norm": 0.1828451305627823, "learning_rate": 0.001, "loss": 2.5809, "step": 22795 }, { "epoch": 0.9643793891192148, "grad_norm": 2.2866430282592773, "learning_rate": 0.001, "loss": 2.0316, "step": 22796 }, { "epoch": 0.9644216938827312, "grad_norm": 5.461979866027832, "learning_rate": 0.001, "loss": 1.6355, "step": 22797 }, { "epoch": 0.9644639986462475, "grad_norm": 0.24063386023044586, "learning_rate": 0.001, "loss": 1.7713, "step": 22798 }, { "epoch": 0.964506303409764, "grad_norm": 0.15354004502296448, "learning_rate": 0.001, "loss": 1.7979, "step": 22799 }, { "epoch": 0.9645486081732804, "grad_norm": 0.20435552299022675, "learning_rate": 0.001, "loss": 1.8636, "step": 22800 }, { "epoch": 0.9645909129367967, "grad_norm": 0.17988991737365723, "learning_rate": 0.001, "loss": 3.3384, "step": 22801 }, { "epoch": 0.9646332177003131, "grad_norm": 0.19008944928646088, "learning_rate": 0.001, "loss": 1.8226, "step": 22802 }, { "epoch": 0.9646755224638295, "grad_norm": 0.20340676605701447, "learning_rate": 0.001, "loss": 3.0798, "step": 22803 }, { "epoch": 0.9647178272273458, "grad_norm": 0.18513543903827667, "learning_rate": 0.001, "loss": 1.7459, "step": 22804 }, { "epoch": 0.9647601319908622, "grad_norm": 0.18096236884593964, "learning_rate": 0.001, "loss": 2.0214, "step": 22805 }, { "epoch": 0.9648024367543785, "grad_norm": 0.20372340083122253, "learning_rate": 0.001, "loss": 2.1574, "step": 22806 }, { "epoch": 0.9648447415178949, "grad_norm": 0.1942087858915329, "learning_rate": 0.001, "loss": 1.8102, "step": 22807 }, { "epoch": 0.9648870462814113, "grad_norm": 0.16786138713359833, "learning_rate": 0.001, "loss": 1.7745, "step": 22808 }, { "epoch": 0.9649293510449276, "grad_norm": 21.640262603759766, "learning_rate": 0.001, "loss": 2.6787, "step": 22809 }, { "epoch": 0.964971655808444, "grad_norm": 0.17724758386611938, "learning_rate": 0.001, "loss": 2.0269, "step": 22810 }, { "epoch": 0.9650139605719604, "grad_norm": 0.17593099176883698, "learning_rate": 0.001, "loss": 2.6293, "step": 22811 }, { "epoch": 0.9650562653354767, "grad_norm": 0.1671978384256363, "learning_rate": 0.001, "loss": 2.3804, "step": 22812 }, { "epoch": 0.9650985700989931, "grad_norm": 0.19632820785045624, "learning_rate": 0.001, "loss": 1.808, "step": 22813 }, { "epoch": 0.9651408748625095, "grad_norm": 0.35916808247566223, "learning_rate": 0.001, "loss": 1.8704, "step": 22814 }, { "epoch": 0.9651831796260258, "grad_norm": 0.16983725130558014, "learning_rate": 0.001, "loss": 3.2959, "step": 22815 }, { "epoch": 0.9652254843895423, "grad_norm": 0.6574493050575256, "learning_rate": 0.001, "loss": 3.0752, "step": 22816 }, { "epoch": 0.9652677891530587, "grad_norm": 0.14061124622821808, "learning_rate": 0.001, "loss": 2.3643, "step": 22817 }, { "epoch": 0.965310093916575, "grad_norm": 0.12727145850658417, "learning_rate": 0.001, "loss": 2.3003, "step": 22818 }, { "epoch": 0.9653523986800914, "grad_norm": 0.15103477239608765, "learning_rate": 0.001, "loss": 2.3464, "step": 22819 }, { "epoch": 0.9653947034436078, "grad_norm": 0.15163427591323853, "learning_rate": 0.001, "loss": 1.8442, "step": 22820 }, { "epoch": 0.9654370082071241, "grad_norm": 0.16377829015254974, "learning_rate": 0.001, "loss": 2.1828, "step": 22821 }, { "epoch": 0.9654793129706405, "grad_norm": 0.44327592849731445, "learning_rate": 0.001, "loss": 2.5437, "step": 22822 }, { "epoch": 0.9655216177341569, "grad_norm": 0.2196311205625534, "learning_rate": 0.001, "loss": 1.9828, "step": 22823 }, { "epoch": 0.9655639224976732, "grad_norm": 0.13134104013442993, "learning_rate": 0.001, "loss": 1.5855, "step": 22824 }, { "epoch": 0.9656062272611896, "grad_norm": 0.4789327085018158, "learning_rate": 0.001, "loss": 2.3847, "step": 22825 }, { "epoch": 0.965648532024706, "grad_norm": 0.14557301998138428, "learning_rate": 0.001, "loss": 2.6556, "step": 22826 }, { "epoch": 0.9656908367882223, "grad_norm": 0.1414880007505417, "learning_rate": 0.001, "loss": 1.5357, "step": 22827 }, { "epoch": 0.9657331415517387, "grad_norm": 0.14299647510051727, "learning_rate": 0.001, "loss": 2.0178, "step": 22828 }, { "epoch": 0.9657754463152551, "grad_norm": 1.3442524671554565, "learning_rate": 0.001, "loss": 1.8132, "step": 22829 }, { "epoch": 0.9658177510787714, "grad_norm": 0.14745590090751648, "learning_rate": 0.001, "loss": 2.4792, "step": 22830 }, { "epoch": 0.9658600558422878, "grad_norm": 0.14321614801883698, "learning_rate": 0.001, "loss": 2.3954, "step": 22831 }, { "epoch": 0.9659023606058043, "grad_norm": 0.15746840834617615, "learning_rate": 0.001, "loss": 1.694, "step": 22832 }, { "epoch": 0.9659446653693206, "grad_norm": 0.15602366626262665, "learning_rate": 0.001, "loss": 2.3334, "step": 22833 }, { "epoch": 0.965986970132837, "grad_norm": 0.1474500298500061, "learning_rate": 0.001, "loss": 2.1187, "step": 22834 }, { "epoch": 0.9660292748963534, "grad_norm": 1.2670032978057861, "learning_rate": 0.001, "loss": 1.8418, "step": 22835 }, { "epoch": 0.9660715796598697, "grad_norm": 0.20300118625164032, "learning_rate": 0.001, "loss": 1.9915, "step": 22836 }, { "epoch": 0.9661138844233861, "grad_norm": 0.20837882161140442, "learning_rate": 0.001, "loss": 2.6805, "step": 22837 }, { "epoch": 0.9661561891869025, "grad_norm": 0.1467253714799881, "learning_rate": 0.001, "loss": 1.7238, "step": 22838 }, { "epoch": 0.9661984939504188, "grad_norm": 8.646101951599121, "learning_rate": 0.001, "loss": 2.1153, "step": 22839 }, { "epoch": 0.9662407987139352, "grad_norm": 0.12623724341392517, "learning_rate": 0.001, "loss": 1.9916, "step": 22840 }, { "epoch": 0.9662831034774516, "grad_norm": 0.1795465052127838, "learning_rate": 0.001, "loss": 2.2702, "step": 22841 }, { "epoch": 0.9663254082409679, "grad_norm": 0.14602890610694885, "learning_rate": 0.001, "loss": 2.0795, "step": 22842 }, { "epoch": 0.9663677130044843, "grad_norm": 0.18272976577281952, "learning_rate": 0.001, "loss": 2.061, "step": 22843 }, { "epoch": 0.9664100177680007, "grad_norm": 0.19199426472187042, "learning_rate": 0.001, "loss": 2.0847, "step": 22844 }, { "epoch": 0.966452322531517, "grad_norm": 0.19457626342773438, "learning_rate": 0.001, "loss": 1.9797, "step": 22845 }, { "epoch": 0.9664946272950334, "grad_norm": 0.29736241698265076, "learning_rate": 0.001, "loss": 3.3281, "step": 22846 }, { "epoch": 0.9665369320585498, "grad_norm": 0.29135966300964355, "learning_rate": 0.001, "loss": 5.947, "step": 22847 }, { "epoch": 0.9665792368220661, "grad_norm": 3.7206881046295166, "learning_rate": 0.001, "loss": 1.9796, "step": 22848 }, { "epoch": 0.9666215415855826, "grad_norm": 0.14259220659732819, "learning_rate": 0.001, "loss": 3.0848, "step": 22849 }, { "epoch": 0.9666638463490989, "grad_norm": 2.7031147480010986, "learning_rate": 0.001, "loss": 2.8408, "step": 22850 }, { "epoch": 0.9667061511126153, "grad_norm": 0.1781085729598999, "learning_rate": 0.001, "loss": 2.7997, "step": 22851 }, { "epoch": 0.9667484558761317, "grad_norm": 0.1732090413570404, "learning_rate": 0.001, "loss": 1.9524, "step": 22852 }, { "epoch": 0.966790760639648, "grad_norm": 0.13723929226398468, "learning_rate": 0.001, "loss": 1.7664, "step": 22853 }, { "epoch": 0.9668330654031644, "grad_norm": 0.1692230999469757, "learning_rate": 0.001, "loss": 2.081, "step": 22854 }, { "epoch": 0.9668753701666808, "grad_norm": 0.1593841016292572, "learning_rate": 0.001, "loss": 1.6948, "step": 22855 }, { "epoch": 0.9669176749301971, "grad_norm": 0.13628552854061127, "learning_rate": 0.001, "loss": 2.2481, "step": 22856 }, { "epoch": 0.9669599796937135, "grad_norm": 0.1631813496351242, "learning_rate": 0.001, "loss": 1.9093, "step": 22857 }, { "epoch": 0.9670022844572299, "grad_norm": 0.16931277513504028, "learning_rate": 0.001, "loss": 3.1397, "step": 22858 }, { "epoch": 0.9670445892207462, "grad_norm": 0.14777718484401703, "learning_rate": 0.001, "loss": 1.9937, "step": 22859 }, { "epoch": 0.9670868939842626, "grad_norm": 0.23261679708957672, "learning_rate": 0.001, "loss": 1.5095, "step": 22860 }, { "epoch": 0.967129198747779, "grad_norm": 0.23100370168685913, "learning_rate": 0.001, "loss": 3.1784, "step": 22861 }, { "epoch": 0.9671715035112953, "grad_norm": 0.17123155295848846, "learning_rate": 0.001, "loss": 2.0223, "step": 22862 }, { "epoch": 0.9672138082748117, "grad_norm": 0.16940762102603912, "learning_rate": 0.001, "loss": 2.0325, "step": 22863 }, { "epoch": 0.9672561130383281, "grad_norm": 0.15157361328601837, "learning_rate": 0.001, "loss": 2.8099, "step": 22864 }, { "epoch": 0.9672984178018444, "grad_norm": 0.1540924608707428, "learning_rate": 0.001, "loss": 1.6055, "step": 22865 }, { "epoch": 0.9673407225653609, "grad_norm": 0.17207752168178558, "learning_rate": 0.001, "loss": 1.7778, "step": 22866 }, { "epoch": 0.9673830273288773, "grad_norm": 0.2308436930179596, "learning_rate": 0.001, "loss": 2.6066, "step": 22867 }, { "epoch": 0.9674253320923936, "grad_norm": 0.15052172541618347, "learning_rate": 0.001, "loss": 1.6072, "step": 22868 }, { "epoch": 0.96746763685591, "grad_norm": 0.13643066585063934, "learning_rate": 0.001, "loss": 1.8093, "step": 22869 }, { "epoch": 0.9675099416194264, "grad_norm": 0.16846615076065063, "learning_rate": 0.001, "loss": 1.918, "step": 22870 }, { "epoch": 0.9675522463829427, "grad_norm": 1.7029542922973633, "learning_rate": 0.001, "loss": 1.7533, "step": 22871 }, { "epoch": 0.9675945511464591, "grad_norm": 0.16174376010894775, "learning_rate": 0.001, "loss": 2.0584, "step": 22872 }, { "epoch": 0.9676368559099755, "grad_norm": 0.17282286286354065, "learning_rate": 0.001, "loss": 1.9735, "step": 22873 }, { "epoch": 0.9676791606734918, "grad_norm": 3.2396557331085205, "learning_rate": 0.001, "loss": 2.9205, "step": 22874 }, { "epoch": 0.9677214654370082, "grad_norm": 0.18239837884902954, "learning_rate": 0.001, "loss": 1.9157, "step": 22875 }, { "epoch": 0.9677637702005246, "grad_norm": 0.16040238738059998, "learning_rate": 0.001, "loss": 2.5035, "step": 22876 }, { "epoch": 0.9678060749640409, "grad_norm": 0.31164249777793884, "learning_rate": 0.001, "loss": 2.2374, "step": 22877 }, { "epoch": 0.9678483797275573, "grad_norm": 0.1581432968378067, "learning_rate": 0.001, "loss": 1.8085, "step": 22878 }, { "epoch": 0.9678906844910737, "grad_norm": 0.17101150751113892, "learning_rate": 0.001, "loss": 1.9065, "step": 22879 }, { "epoch": 0.96793298925459, "grad_norm": 0.1634765863418579, "learning_rate": 0.001, "loss": 1.3569, "step": 22880 }, { "epoch": 0.9679752940181064, "grad_norm": 0.1971825212240219, "learning_rate": 0.001, "loss": 1.9538, "step": 22881 }, { "epoch": 0.9680175987816229, "grad_norm": 0.17837342619895935, "learning_rate": 0.001, "loss": 1.7106, "step": 22882 }, { "epoch": 0.9680599035451392, "grad_norm": 0.18054227530956268, "learning_rate": 0.001, "loss": 2.0404, "step": 22883 }, { "epoch": 0.9681022083086556, "grad_norm": 0.16137461364269257, "learning_rate": 0.001, "loss": 1.4786, "step": 22884 }, { "epoch": 0.968144513072172, "grad_norm": 0.1961548924446106, "learning_rate": 0.001, "loss": 2.127, "step": 22885 }, { "epoch": 0.9681868178356883, "grad_norm": 0.1829284280538559, "learning_rate": 0.001, "loss": 1.826, "step": 22886 }, { "epoch": 0.9682291225992047, "grad_norm": 0.16938935220241547, "learning_rate": 0.001, "loss": 2.0336, "step": 22887 }, { "epoch": 0.9682714273627211, "grad_norm": 0.1795877367258072, "learning_rate": 0.001, "loss": 2.4894, "step": 22888 }, { "epoch": 0.9683137321262374, "grad_norm": 0.16748693585395813, "learning_rate": 0.001, "loss": 2.4214, "step": 22889 }, { "epoch": 0.9683560368897538, "grad_norm": 0.15759611129760742, "learning_rate": 0.001, "loss": 1.9249, "step": 22890 }, { "epoch": 0.9683983416532702, "grad_norm": 0.16091367602348328, "learning_rate": 0.001, "loss": 1.8189, "step": 22891 }, { "epoch": 0.9684406464167865, "grad_norm": 0.1906583309173584, "learning_rate": 0.001, "loss": 2.3985, "step": 22892 }, { "epoch": 0.9684829511803029, "grad_norm": 0.15400663018226624, "learning_rate": 0.001, "loss": 2.1882, "step": 22893 }, { "epoch": 0.9685252559438192, "grad_norm": 0.1885334551334381, "learning_rate": 0.001, "loss": 1.759, "step": 22894 }, { "epoch": 0.9685675607073356, "grad_norm": 0.2451082170009613, "learning_rate": 0.001, "loss": 2.4656, "step": 22895 }, { "epoch": 0.968609865470852, "grad_norm": 0.25269943475723267, "learning_rate": 0.001, "loss": 2.1214, "step": 22896 }, { "epoch": 0.9686521702343683, "grad_norm": 0.21075433492660522, "learning_rate": 0.001, "loss": 1.8323, "step": 22897 }, { "epoch": 0.9686944749978847, "grad_norm": 0.168150395154953, "learning_rate": 0.001, "loss": 1.8325, "step": 22898 }, { "epoch": 0.9687367797614012, "grad_norm": 0.1418176144361496, "learning_rate": 0.001, "loss": 2.0136, "step": 22899 }, { "epoch": 0.9687790845249175, "grad_norm": 0.21564440429210663, "learning_rate": 0.001, "loss": 2.308, "step": 22900 }, { "epoch": 0.9688213892884339, "grad_norm": 0.18524868786334991, "learning_rate": 0.001, "loss": 1.8404, "step": 22901 }, { "epoch": 0.9688636940519503, "grad_norm": 0.13573579490184784, "learning_rate": 0.001, "loss": 1.5914, "step": 22902 }, { "epoch": 0.9689059988154666, "grad_norm": 0.7693604826927185, "learning_rate": 0.001, "loss": 1.7203, "step": 22903 }, { "epoch": 0.968948303578983, "grad_norm": 0.2601660490036011, "learning_rate": 0.001, "loss": 2.7439, "step": 22904 }, { "epoch": 0.9689906083424994, "grad_norm": 0.16246432065963745, "learning_rate": 0.001, "loss": 2.6348, "step": 22905 }, { "epoch": 0.9690329131060157, "grad_norm": 0.2047150582075119, "learning_rate": 0.001, "loss": 2.1811, "step": 22906 }, { "epoch": 0.9690752178695321, "grad_norm": 0.19627639651298523, "learning_rate": 0.001, "loss": 1.8717, "step": 22907 }, { "epoch": 0.9691175226330485, "grad_norm": 0.2079043984413147, "learning_rate": 0.001, "loss": 3.3459, "step": 22908 }, { "epoch": 0.9691598273965648, "grad_norm": 0.16984863579273224, "learning_rate": 0.001, "loss": 1.721, "step": 22909 }, { "epoch": 0.9692021321600812, "grad_norm": 0.23695328831672668, "learning_rate": 0.001, "loss": 1.92, "step": 22910 }, { "epoch": 0.9692444369235976, "grad_norm": 0.13472682237625122, "learning_rate": 0.001, "loss": 1.5671, "step": 22911 }, { "epoch": 0.9692867416871139, "grad_norm": 0.1398351788520813, "learning_rate": 0.001, "loss": 2.6105, "step": 22912 }, { "epoch": 0.9693290464506303, "grad_norm": 0.16584454476833344, "learning_rate": 0.001, "loss": 2.4794, "step": 22913 }, { "epoch": 0.9693713512141467, "grad_norm": 0.20908065140247345, "learning_rate": 0.001, "loss": 4.0282, "step": 22914 }, { "epoch": 0.969413655977663, "grad_norm": 0.14626561105251312, "learning_rate": 0.001, "loss": 3.0325, "step": 22915 }, { "epoch": 0.9694559607411795, "grad_norm": 0.190009206533432, "learning_rate": 0.001, "loss": 1.9914, "step": 22916 }, { "epoch": 0.9694982655046959, "grad_norm": 0.30894187092781067, "learning_rate": 0.001, "loss": 1.9338, "step": 22917 }, { "epoch": 0.9695405702682122, "grad_norm": 0.1590581089258194, "learning_rate": 0.001, "loss": 2.0382, "step": 22918 }, { "epoch": 0.9695828750317286, "grad_norm": 0.1639058142900467, "learning_rate": 0.001, "loss": 1.5866, "step": 22919 }, { "epoch": 0.969625179795245, "grad_norm": 0.1441287249326706, "learning_rate": 0.001, "loss": 1.7956, "step": 22920 }, { "epoch": 0.9696674845587613, "grad_norm": 0.14907407760620117, "learning_rate": 0.001, "loss": 2.4144, "step": 22921 }, { "epoch": 0.9697097893222777, "grad_norm": 0.17159616947174072, "learning_rate": 0.001, "loss": 1.4419, "step": 22922 }, { "epoch": 0.9697520940857941, "grad_norm": 0.1924789398908615, "learning_rate": 0.001, "loss": 2.0441, "step": 22923 }, { "epoch": 0.9697943988493104, "grad_norm": 0.8495581150054932, "learning_rate": 0.001, "loss": 2.2902, "step": 22924 }, { "epoch": 0.9698367036128268, "grad_norm": 0.19208434224128723, "learning_rate": 0.001, "loss": 3.4082, "step": 22925 }, { "epoch": 0.9698790083763432, "grad_norm": 0.16336600482463837, "learning_rate": 0.001, "loss": 1.8806, "step": 22926 }, { "epoch": 0.9699213131398595, "grad_norm": 0.3081340491771698, "learning_rate": 0.001, "loss": 2.5719, "step": 22927 }, { "epoch": 0.9699636179033759, "grad_norm": 0.743816614151001, "learning_rate": 0.001, "loss": 1.715, "step": 22928 }, { "epoch": 0.9700059226668923, "grad_norm": 0.14184142649173737, "learning_rate": 0.001, "loss": 2.3673, "step": 22929 }, { "epoch": 0.9700482274304086, "grad_norm": 0.14244288206100464, "learning_rate": 0.001, "loss": 2.542, "step": 22930 }, { "epoch": 0.970090532193925, "grad_norm": 0.15329469740390778, "learning_rate": 0.001, "loss": 2.1073, "step": 22931 }, { "epoch": 0.9701328369574415, "grad_norm": 7.721384048461914, "learning_rate": 0.001, "loss": 2.1494, "step": 22932 }, { "epoch": 0.9701751417209578, "grad_norm": 0.22561503946781158, "learning_rate": 0.001, "loss": 1.9826, "step": 22933 }, { "epoch": 0.9702174464844742, "grad_norm": 0.3391435444355011, "learning_rate": 0.001, "loss": 1.8618, "step": 22934 }, { "epoch": 0.9702597512479906, "grad_norm": 0.17945805191993713, "learning_rate": 0.001, "loss": 1.4256, "step": 22935 }, { "epoch": 0.9703020560115069, "grad_norm": 0.1751527637243271, "learning_rate": 0.001, "loss": 2.4109, "step": 22936 }, { "epoch": 0.9703443607750233, "grad_norm": 0.15709732472896576, "learning_rate": 0.001, "loss": 1.6146, "step": 22937 }, { "epoch": 0.9703866655385397, "grad_norm": 0.16432954370975494, "learning_rate": 0.001, "loss": 2.0583, "step": 22938 }, { "epoch": 0.970428970302056, "grad_norm": 0.44643503427505493, "learning_rate": 0.001, "loss": 2.3525, "step": 22939 }, { "epoch": 0.9704712750655724, "grad_norm": 0.1962294727563858, "learning_rate": 0.001, "loss": 1.874, "step": 22940 }, { "epoch": 0.9705135798290887, "grad_norm": 0.1391528844833374, "learning_rate": 0.001, "loss": 2.7016, "step": 22941 }, { "epoch": 0.9705558845926051, "grad_norm": 0.15284281969070435, "learning_rate": 0.001, "loss": 2.5978, "step": 22942 }, { "epoch": 0.9705981893561215, "grad_norm": 0.19085395336151123, "learning_rate": 0.001, "loss": 1.2241, "step": 22943 }, { "epoch": 0.9706404941196378, "grad_norm": 0.17279383540153503, "learning_rate": 0.001, "loss": 2.0888, "step": 22944 }, { "epoch": 0.9706827988831542, "grad_norm": 0.15863092243671417, "learning_rate": 0.001, "loss": 1.6636, "step": 22945 }, { "epoch": 0.9707251036466706, "grad_norm": 0.1498878002166748, "learning_rate": 0.001, "loss": 1.7367, "step": 22946 }, { "epoch": 0.9707674084101869, "grad_norm": 0.14784745872020721, "learning_rate": 0.001, "loss": 1.7366, "step": 22947 }, { "epoch": 0.9708097131737033, "grad_norm": 0.14569970965385437, "learning_rate": 0.001, "loss": 1.6962, "step": 22948 }, { "epoch": 0.9708520179372198, "grad_norm": 0.13814716041088104, "learning_rate": 0.001, "loss": 1.8074, "step": 22949 }, { "epoch": 0.9708943227007361, "grad_norm": 0.18621012568473816, "learning_rate": 0.001, "loss": 2.2638, "step": 22950 }, { "epoch": 0.9709366274642525, "grad_norm": 0.1415053755044937, "learning_rate": 0.001, "loss": 1.6322, "step": 22951 }, { "epoch": 0.9709789322277689, "grad_norm": 0.1956765204668045, "learning_rate": 0.001, "loss": 2.0337, "step": 22952 }, { "epoch": 0.9710212369912852, "grad_norm": 0.16318294405937195, "learning_rate": 0.001, "loss": 2.2628, "step": 22953 }, { "epoch": 0.9710635417548016, "grad_norm": 6.47411584854126, "learning_rate": 0.001, "loss": 2.0023, "step": 22954 }, { "epoch": 0.971105846518318, "grad_norm": 0.14443252980709076, "learning_rate": 0.001, "loss": 2.8807, "step": 22955 }, { "epoch": 0.9711481512818343, "grad_norm": 0.12970362603664398, "learning_rate": 0.001, "loss": 3.2348, "step": 22956 }, { "epoch": 0.9711904560453507, "grad_norm": 0.21394479274749756, "learning_rate": 0.001, "loss": 1.8296, "step": 22957 }, { "epoch": 0.9712327608088671, "grad_norm": 0.15435615181922913, "learning_rate": 0.001, "loss": 2.0672, "step": 22958 }, { "epoch": 0.9712750655723834, "grad_norm": 0.21590697765350342, "learning_rate": 0.001, "loss": 2.2599, "step": 22959 }, { "epoch": 0.9713173703358998, "grad_norm": 0.14091353118419647, "learning_rate": 0.001, "loss": 2.0747, "step": 22960 }, { "epoch": 0.9713596750994162, "grad_norm": 0.1489262878894806, "learning_rate": 0.001, "loss": 1.9124, "step": 22961 }, { "epoch": 0.9714019798629325, "grad_norm": 0.14029927551746368, "learning_rate": 0.001, "loss": 2.5653, "step": 22962 }, { "epoch": 0.9714442846264489, "grad_norm": 0.1342879831790924, "learning_rate": 0.001, "loss": 1.4259, "step": 22963 }, { "epoch": 0.9714865893899653, "grad_norm": 0.13123171031475067, "learning_rate": 0.001, "loss": 2.2616, "step": 22964 }, { "epoch": 0.9715288941534816, "grad_norm": 0.13907508552074432, "learning_rate": 0.001, "loss": 1.5811, "step": 22965 }, { "epoch": 0.9715711989169981, "grad_norm": 0.14411531388759613, "learning_rate": 0.001, "loss": 2.1203, "step": 22966 }, { "epoch": 0.9716135036805145, "grad_norm": 0.15099336206912994, "learning_rate": 0.001, "loss": 2.7498, "step": 22967 }, { "epoch": 0.9716558084440308, "grad_norm": 0.14100094139575958, "learning_rate": 0.001, "loss": 1.9042, "step": 22968 }, { "epoch": 0.9716981132075472, "grad_norm": 0.16299395263195038, "learning_rate": 0.001, "loss": 3.2449, "step": 22969 }, { "epoch": 0.9717404179710636, "grad_norm": 0.1855599284172058, "learning_rate": 0.001, "loss": 1.2989, "step": 22970 }, { "epoch": 0.9717827227345799, "grad_norm": 0.13893848657608032, "learning_rate": 0.001, "loss": 1.8379, "step": 22971 }, { "epoch": 0.9718250274980963, "grad_norm": 0.22873970866203308, "learning_rate": 0.001, "loss": 2.7587, "step": 22972 }, { "epoch": 0.9718673322616127, "grad_norm": 0.21288566291332245, "learning_rate": 0.001, "loss": 1.7145, "step": 22973 }, { "epoch": 0.971909637025129, "grad_norm": 0.16666394472122192, "learning_rate": 0.001, "loss": 2.0038, "step": 22974 }, { "epoch": 0.9719519417886454, "grad_norm": 0.1581132858991623, "learning_rate": 0.001, "loss": 1.5963, "step": 22975 }, { "epoch": 0.9719942465521618, "grad_norm": 0.1787661761045456, "learning_rate": 0.001, "loss": 2.5239, "step": 22976 }, { "epoch": 0.9720365513156781, "grad_norm": 0.1651124209165573, "learning_rate": 0.001, "loss": 2.6818, "step": 22977 }, { "epoch": 0.9720788560791945, "grad_norm": 0.14961262047290802, "learning_rate": 0.001, "loss": 2.9672, "step": 22978 }, { "epoch": 0.9721211608427109, "grad_norm": 0.15636776387691498, "learning_rate": 0.001, "loss": 2.4673, "step": 22979 }, { "epoch": 0.9721634656062272, "grad_norm": 1.0559109449386597, "learning_rate": 0.001, "loss": 2.7908, "step": 22980 }, { "epoch": 0.9722057703697436, "grad_norm": 0.15720365941524506, "learning_rate": 0.001, "loss": 2.1491, "step": 22981 }, { "epoch": 0.9722480751332601, "grad_norm": 0.14667530357837677, "learning_rate": 0.001, "loss": 1.7503, "step": 22982 }, { "epoch": 0.9722903798967764, "grad_norm": 0.15242712199687958, "learning_rate": 0.001, "loss": 2.048, "step": 22983 }, { "epoch": 0.9723326846602928, "grad_norm": 7.654297828674316, "learning_rate": 0.001, "loss": 1.64, "step": 22984 }, { "epoch": 0.9723749894238091, "grad_norm": 3.1997509002685547, "learning_rate": 0.001, "loss": 1.8606, "step": 22985 }, { "epoch": 0.9724172941873255, "grad_norm": 0.3333907723426819, "learning_rate": 0.001, "loss": 1.8566, "step": 22986 }, { "epoch": 0.9724595989508419, "grad_norm": 0.2523484528064728, "learning_rate": 0.001, "loss": 2.3548, "step": 22987 }, { "epoch": 0.9725019037143582, "grad_norm": 0.1559314876794815, "learning_rate": 0.001, "loss": 1.3173, "step": 22988 }, { "epoch": 0.9725442084778746, "grad_norm": 0.13869056105613708, "learning_rate": 0.001, "loss": 2.4087, "step": 22989 }, { "epoch": 0.972586513241391, "grad_norm": 0.13740174472332, "learning_rate": 0.001, "loss": 1.849, "step": 22990 }, { "epoch": 0.9726288180049073, "grad_norm": 0.1633240431547165, "learning_rate": 0.001, "loss": 2.104, "step": 22991 }, { "epoch": 0.9726711227684237, "grad_norm": 0.14043399691581726, "learning_rate": 0.001, "loss": 2.5682, "step": 22992 }, { "epoch": 0.9727134275319401, "grad_norm": 0.16599078476428986, "learning_rate": 0.001, "loss": 3.1016, "step": 22993 }, { "epoch": 0.9727557322954564, "grad_norm": 0.6337260007858276, "learning_rate": 0.001, "loss": 2.6611, "step": 22994 }, { "epoch": 0.9727980370589728, "grad_norm": 0.21094225347042084, "learning_rate": 0.001, "loss": 3.6387, "step": 22995 }, { "epoch": 0.9728403418224892, "grad_norm": 0.3254936635494232, "learning_rate": 0.001, "loss": 3.3134, "step": 22996 }, { "epoch": 0.9728826465860055, "grad_norm": 0.16493062674999237, "learning_rate": 0.001, "loss": 2.4989, "step": 22997 }, { "epoch": 0.972924951349522, "grad_norm": 0.18628916144371033, "learning_rate": 0.001, "loss": 2.0522, "step": 22998 }, { "epoch": 0.9729672561130384, "grad_norm": 0.20497991144657135, "learning_rate": 0.001, "loss": 2.1398, "step": 22999 }, { "epoch": 0.9730095608765547, "grad_norm": 0.13439905643463135, "learning_rate": 0.001, "loss": 1.7492, "step": 23000 }, { "epoch": 0.9730518656400711, "grad_norm": 0.20762348175048828, "learning_rate": 0.001, "loss": 2.6112, "step": 23001 }, { "epoch": 0.9730941704035875, "grad_norm": 0.17342343926429749, "learning_rate": 0.001, "loss": 1.9358, "step": 23002 }, { "epoch": 0.9731364751671038, "grad_norm": 0.15936243534088135, "learning_rate": 0.001, "loss": 2.0159, "step": 23003 }, { "epoch": 0.9731787799306202, "grad_norm": 0.15786008536815643, "learning_rate": 0.001, "loss": 1.3607, "step": 23004 }, { "epoch": 0.9732210846941366, "grad_norm": 0.18366417288780212, "learning_rate": 0.001, "loss": 1.746, "step": 23005 }, { "epoch": 0.9732633894576529, "grad_norm": 0.2870394289493561, "learning_rate": 0.001, "loss": 1.9022, "step": 23006 }, { "epoch": 0.9733056942211693, "grad_norm": 0.18606680631637573, "learning_rate": 0.001, "loss": 2.4152, "step": 23007 }, { "epoch": 0.9733479989846857, "grad_norm": 0.15611374378204346, "learning_rate": 0.001, "loss": 2.4503, "step": 23008 }, { "epoch": 0.973390303748202, "grad_norm": 0.1782141774892807, "learning_rate": 0.001, "loss": 2.1087, "step": 23009 }, { "epoch": 0.9734326085117184, "grad_norm": 0.817843496799469, "learning_rate": 0.001, "loss": 2.758, "step": 23010 }, { "epoch": 0.9734749132752348, "grad_norm": 0.13205979764461517, "learning_rate": 0.001, "loss": 1.4987, "step": 23011 }, { "epoch": 0.9735172180387511, "grad_norm": 3.9600741863250732, "learning_rate": 0.001, "loss": 1.5233, "step": 23012 }, { "epoch": 0.9735595228022675, "grad_norm": 0.17472542822360992, "learning_rate": 0.001, "loss": 1.5872, "step": 23013 }, { "epoch": 0.973601827565784, "grad_norm": 0.17888934910297394, "learning_rate": 0.001, "loss": 1.989, "step": 23014 }, { "epoch": 0.9736441323293002, "grad_norm": 0.162007674574852, "learning_rate": 0.001, "loss": 2.0518, "step": 23015 }, { "epoch": 0.9736864370928167, "grad_norm": 0.17636260390281677, "learning_rate": 0.001, "loss": 1.9944, "step": 23016 }, { "epoch": 0.9737287418563331, "grad_norm": 0.19297371804714203, "learning_rate": 0.001, "loss": 1.6305, "step": 23017 }, { "epoch": 0.9737710466198494, "grad_norm": 0.16038502752780914, "learning_rate": 0.001, "loss": 2.8023, "step": 23018 }, { "epoch": 0.9738133513833658, "grad_norm": 0.24749846756458282, "learning_rate": 0.001, "loss": 1.8999, "step": 23019 }, { "epoch": 0.9738556561468822, "grad_norm": 0.16544784605503082, "learning_rate": 0.001, "loss": 1.9664, "step": 23020 }, { "epoch": 0.9738979609103985, "grad_norm": 0.16566026210784912, "learning_rate": 0.001, "loss": 1.6773, "step": 23021 }, { "epoch": 0.9739402656739149, "grad_norm": 0.16837327182292938, "learning_rate": 0.001, "loss": 2.1888, "step": 23022 }, { "epoch": 0.9739825704374313, "grad_norm": 0.16060344874858856, "learning_rate": 0.001, "loss": 1.9338, "step": 23023 }, { "epoch": 0.9740248752009476, "grad_norm": 0.21505653858184814, "learning_rate": 0.001, "loss": 2.1727, "step": 23024 }, { "epoch": 0.974067179964464, "grad_norm": 0.17655415832996368, "learning_rate": 0.001, "loss": 1.7649, "step": 23025 }, { "epoch": 0.9741094847279804, "grad_norm": 0.1724175214767456, "learning_rate": 0.001, "loss": 1.7799, "step": 23026 }, { "epoch": 0.9741517894914967, "grad_norm": 0.21680817008018494, "learning_rate": 0.001, "loss": 1.9723, "step": 23027 }, { "epoch": 0.9741940942550131, "grad_norm": 5.8497748374938965, "learning_rate": 0.001, "loss": 2.4025, "step": 23028 }, { "epoch": 0.9742363990185295, "grad_norm": 0.18116092681884766, "learning_rate": 0.001, "loss": 2.083, "step": 23029 }, { "epoch": 0.9742787037820458, "grad_norm": 0.1881796270608902, "learning_rate": 0.001, "loss": 1.8736, "step": 23030 }, { "epoch": 0.9743210085455623, "grad_norm": 0.2181680053472519, "learning_rate": 0.001, "loss": 1.9318, "step": 23031 }, { "epoch": 0.9743633133090785, "grad_norm": 0.2097787708044052, "learning_rate": 0.001, "loss": 1.819, "step": 23032 }, { "epoch": 0.974405618072595, "grad_norm": 0.16672278940677643, "learning_rate": 0.001, "loss": 1.8597, "step": 23033 }, { "epoch": 0.9744479228361114, "grad_norm": 0.1386634260416031, "learning_rate": 0.001, "loss": 1.5876, "step": 23034 }, { "epoch": 0.9744902275996277, "grad_norm": 0.1755525916814804, "learning_rate": 0.001, "loss": 1.8865, "step": 23035 }, { "epoch": 0.9745325323631441, "grad_norm": 0.17198018729686737, "learning_rate": 0.001, "loss": 2.1409, "step": 23036 }, { "epoch": 0.9745748371266605, "grad_norm": 0.20147661864757538, "learning_rate": 0.001, "loss": 1.6044, "step": 23037 }, { "epoch": 0.9746171418901768, "grad_norm": 0.292715460062027, "learning_rate": 0.001, "loss": 1.906, "step": 23038 }, { "epoch": 0.9746594466536932, "grad_norm": 3.269289493560791, "learning_rate": 0.001, "loss": 2.3023, "step": 23039 }, { "epoch": 0.9747017514172096, "grad_norm": 0.30846601724624634, "learning_rate": 0.001, "loss": 2.5841, "step": 23040 }, { "epoch": 0.9747440561807259, "grad_norm": 0.2580687701702118, "learning_rate": 0.001, "loss": 2.0838, "step": 23041 }, { "epoch": 0.9747863609442423, "grad_norm": 0.4336620271205902, "learning_rate": 0.001, "loss": 2.2656, "step": 23042 }, { "epoch": 0.9748286657077587, "grad_norm": 0.9304342865943909, "learning_rate": 0.001, "loss": 2.4475, "step": 23043 }, { "epoch": 0.974870970471275, "grad_norm": 5.938025951385498, "learning_rate": 0.001, "loss": 2.5208, "step": 23044 }, { "epoch": 0.9749132752347914, "grad_norm": 24.187389373779297, "learning_rate": 0.001, "loss": 1.5931, "step": 23045 }, { "epoch": 0.9749555799983078, "grad_norm": 0.4992624819278717, "learning_rate": 0.001, "loss": 2.4145, "step": 23046 }, { "epoch": 0.9749978847618241, "grad_norm": 2.016716718673706, "learning_rate": 0.001, "loss": 2.8388, "step": 23047 }, { "epoch": 0.9750401895253406, "grad_norm": 1.3812201023101807, "learning_rate": 0.001, "loss": 2.0137, "step": 23048 }, { "epoch": 0.975082494288857, "grad_norm": 0.2711379826068878, "learning_rate": 0.001, "loss": 1.798, "step": 23049 }, { "epoch": 0.9751247990523733, "grad_norm": 0.3764013350009918, "learning_rate": 0.001, "loss": 2.0769, "step": 23050 }, { "epoch": 0.9751671038158897, "grad_norm": 2.7994561195373535, "learning_rate": 0.001, "loss": 1.6256, "step": 23051 }, { "epoch": 0.9752094085794061, "grad_norm": 0.26443973183631897, "learning_rate": 0.001, "loss": 2.8268, "step": 23052 }, { "epoch": 0.9752517133429224, "grad_norm": 0.17250576615333557, "learning_rate": 0.001, "loss": 2.3258, "step": 23053 }, { "epoch": 0.9752940181064388, "grad_norm": 0.23773407936096191, "learning_rate": 0.001, "loss": 2.3701, "step": 23054 }, { "epoch": 0.9753363228699552, "grad_norm": 0.17003774642944336, "learning_rate": 0.001, "loss": 1.8557, "step": 23055 }, { "epoch": 0.9753786276334715, "grad_norm": 0.6521497368812561, "learning_rate": 0.001, "loss": 2.9078, "step": 23056 }, { "epoch": 0.9754209323969879, "grad_norm": 0.1700069010257721, "learning_rate": 0.001, "loss": 2.3583, "step": 23057 }, { "epoch": 0.9754632371605043, "grad_norm": 1.8804014921188354, "learning_rate": 0.001, "loss": 1.5116, "step": 23058 }, { "epoch": 0.9755055419240206, "grad_norm": 0.16962087154388428, "learning_rate": 0.001, "loss": 2.3743, "step": 23059 }, { "epoch": 0.975547846687537, "grad_norm": 0.1760241985321045, "learning_rate": 0.001, "loss": 1.7771, "step": 23060 }, { "epoch": 0.9755901514510534, "grad_norm": 0.3765549063682556, "learning_rate": 0.001, "loss": 2.3506, "step": 23061 }, { "epoch": 0.9756324562145697, "grad_norm": 0.26375001668930054, "learning_rate": 0.001, "loss": 2.108, "step": 23062 }, { "epoch": 0.9756747609780861, "grad_norm": 0.17104345560073853, "learning_rate": 0.001, "loss": 1.2225, "step": 23063 }, { "epoch": 0.9757170657416026, "grad_norm": 0.15106819570064545, "learning_rate": 0.001, "loss": 2.0871, "step": 23064 }, { "epoch": 0.9757593705051189, "grad_norm": 0.16280482709407806, "learning_rate": 0.001, "loss": 1.6354, "step": 23065 }, { "epoch": 0.9758016752686353, "grad_norm": 0.19860334694385529, "learning_rate": 0.001, "loss": 1.9093, "step": 23066 }, { "epoch": 0.9758439800321517, "grad_norm": 0.14197562634944916, "learning_rate": 0.001, "loss": 1.5894, "step": 23067 }, { "epoch": 0.975886284795668, "grad_norm": 0.16649185121059418, "learning_rate": 0.001, "loss": 2.0655, "step": 23068 }, { "epoch": 0.9759285895591844, "grad_norm": 0.15236613154411316, "learning_rate": 0.001, "loss": 1.4908, "step": 23069 }, { "epoch": 0.9759708943227008, "grad_norm": 0.7669524550437927, "learning_rate": 0.001, "loss": 1.9742, "step": 23070 }, { "epoch": 0.9760131990862171, "grad_norm": 0.2197335958480835, "learning_rate": 0.001, "loss": 1.8815, "step": 23071 }, { "epoch": 0.9760555038497335, "grad_norm": 0.18375352025032043, "learning_rate": 0.001, "loss": 2.4837, "step": 23072 }, { "epoch": 0.9760978086132499, "grad_norm": 0.31479236483573914, "learning_rate": 0.001, "loss": 1.4933, "step": 23073 }, { "epoch": 0.9761401133767662, "grad_norm": 6.906200408935547, "learning_rate": 0.001, "loss": 2.0665, "step": 23074 }, { "epoch": 0.9761824181402826, "grad_norm": 0.17486466467380524, "learning_rate": 0.001, "loss": 3.0518, "step": 23075 }, { "epoch": 0.9762247229037989, "grad_norm": 0.16075573861598969, "learning_rate": 0.001, "loss": 2.0813, "step": 23076 }, { "epoch": 0.9762670276673153, "grad_norm": 0.16066281497478485, "learning_rate": 0.001, "loss": 3.0841, "step": 23077 }, { "epoch": 0.9763093324308317, "grad_norm": 0.20229189097881317, "learning_rate": 0.001, "loss": 2.0031, "step": 23078 }, { "epoch": 0.976351637194348, "grad_norm": 0.1783391684293747, "learning_rate": 0.001, "loss": 2.0519, "step": 23079 }, { "epoch": 0.9763939419578644, "grad_norm": 0.16642500460147858, "learning_rate": 0.001, "loss": 1.9304, "step": 23080 }, { "epoch": 0.9764362467213809, "grad_norm": 1.8268184661865234, "learning_rate": 0.001, "loss": 1.4835, "step": 23081 }, { "epoch": 0.9764785514848972, "grad_norm": 0.14119704067707062, "learning_rate": 0.001, "loss": 2.4155, "step": 23082 }, { "epoch": 0.9765208562484136, "grad_norm": 0.3065393567085266, "learning_rate": 0.001, "loss": 3.0736, "step": 23083 }, { "epoch": 0.97656316101193, "grad_norm": 1.7388895750045776, "learning_rate": 0.001, "loss": 1.6748, "step": 23084 }, { "epoch": 0.9766054657754463, "grad_norm": 0.18974030017852783, "learning_rate": 0.001, "loss": 2.2323, "step": 23085 }, { "epoch": 0.9766477705389627, "grad_norm": 0.20437590777873993, "learning_rate": 0.001, "loss": 2.0711, "step": 23086 }, { "epoch": 0.9766900753024791, "grad_norm": 0.144393190741539, "learning_rate": 0.001, "loss": 1.9763, "step": 23087 }, { "epoch": 0.9767323800659954, "grad_norm": 0.1816982924938202, "learning_rate": 0.001, "loss": 2.3921, "step": 23088 }, { "epoch": 0.9767746848295118, "grad_norm": 0.24456720054149628, "learning_rate": 0.001, "loss": 2.6854, "step": 23089 }, { "epoch": 0.9768169895930282, "grad_norm": 0.17447160184383392, "learning_rate": 0.001, "loss": 1.757, "step": 23090 }, { "epoch": 0.9768592943565445, "grad_norm": 25.030933380126953, "learning_rate": 0.001, "loss": 2.1183, "step": 23091 }, { "epoch": 0.9769015991200609, "grad_norm": 0.20335282385349274, "learning_rate": 0.001, "loss": 2.738, "step": 23092 }, { "epoch": 0.9769439038835773, "grad_norm": 0.4485716223716736, "learning_rate": 0.001, "loss": 1.7982, "step": 23093 }, { "epoch": 0.9769862086470936, "grad_norm": 0.2497912049293518, "learning_rate": 0.001, "loss": 2.0586, "step": 23094 }, { "epoch": 0.97702851341061, "grad_norm": 0.19284145534038544, "learning_rate": 0.001, "loss": 1.5553, "step": 23095 }, { "epoch": 0.9770708181741264, "grad_norm": 0.14969174563884735, "learning_rate": 0.001, "loss": 1.8045, "step": 23096 }, { "epoch": 0.9771131229376427, "grad_norm": 0.18470831215381622, "learning_rate": 0.001, "loss": 2.3824, "step": 23097 }, { "epoch": 0.9771554277011592, "grad_norm": 0.1780456155538559, "learning_rate": 0.001, "loss": 2.5985, "step": 23098 }, { "epoch": 0.9771977324646756, "grad_norm": 0.18881769478321075, "learning_rate": 0.001, "loss": 2.6814, "step": 23099 }, { "epoch": 0.9772400372281919, "grad_norm": 0.160416379570961, "learning_rate": 0.001, "loss": 2.7233, "step": 23100 }, { "epoch": 0.9772823419917083, "grad_norm": 0.1704859733581543, "learning_rate": 0.001, "loss": 2.3356, "step": 23101 }, { "epoch": 0.9773246467552247, "grad_norm": 0.15877144038677216, "learning_rate": 0.001, "loss": 1.8551, "step": 23102 }, { "epoch": 0.977366951518741, "grad_norm": 0.15213461220264435, "learning_rate": 0.001, "loss": 2.2297, "step": 23103 }, { "epoch": 0.9774092562822574, "grad_norm": 0.23088282346725464, "learning_rate": 0.001, "loss": 2.2761, "step": 23104 }, { "epoch": 0.9774515610457738, "grad_norm": 3.229534864425659, "learning_rate": 0.001, "loss": 3.1849, "step": 23105 }, { "epoch": 0.9774938658092901, "grad_norm": 0.14823846518993378, "learning_rate": 0.001, "loss": 2.04, "step": 23106 }, { "epoch": 0.9775361705728065, "grad_norm": 0.13392066955566406, "learning_rate": 0.001, "loss": 2.1271, "step": 23107 }, { "epoch": 0.9775784753363229, "grad_norm": 0.251636266708374, "learning_rate": 0.001, "loss": 2.1417, "step": 23108 }, { "epoch": 0.9776207800998392, "grad_norm": 0.12352551519870758, "learning_rate": 0.001, "loss": 1.8046, "step": 23109 }, { "epoch": 0.9776630848633556, "grad_norm": 0.44623270630836487, "learning_rate": 0.001, "loss": 3.5603, "step": 23110 }, { "epoch": 0.977705389626872, "grad_norm": 0.46664145588874817, "learning_rate": 0.001, "loss": 1.9793, "step": 23111 }, { "epoch": 0.9777476943903883, "grad_norm": 0.19913674890995026, "learning_rate": 0.001, "loss": 2.2404, "step": 23112 }, { "epoch": 0.9777899991539047, "grad_norm": 0.14924490451812744, "learning_rate": 0.001, "loss": 1.7245, "step": 23113 }, { "epoch": 0.9778323039174212, "grad_norm": 0.17213156819343567, "learning_rate": 0.001, "loss": 1.6626, "step": 23114 }, { "epoch": 0.9778746086809375, "grad_norm": 0.16330088675022125, "learning_rate": 0.001, "loss": 1.6231, "step": 23115 }, { "epoch": 0.9779169134444539, "grad_norm": 0.5413417816162109, "learning_rate": 0.001, "loss": 2.2929, "step": 23116 }, { "epoch": 0.9779592182079703, "grad_norm": 0.8194732069969177, "learning_rate": 0.001, "loss": 2.0811, "step": 23117 }, { "epoch": 0.9780015229714866, "grad_norm": 0.3619847297668457, "learning_rate": 0.001, "loss": 2.1488, "step": 23118 }, { "epoch": 0.978043827735003, "grad_norm": 0.2249876707792282, "learning_rate": 0.001, "loss": 2.3759, "step": 23119 }, { "epoch": 0.9780861324985193, "grad_norm": 2.6536123752593994, "learning_rate": 0.001, "loss": 2.256, "step": 23120 }, { "epoch": 0.9781284372620357, "grad_norm": 0.1619919091463089, "learning_rate": 0.001, "loss": 2.3861, "step": 23121 }, { "epoch": 0.9781707420255521, "grad_norm": 0.16015075147151947, "learning_rate": 0.001, "loss": 1.9889, "step": 23122 }, { "epoch": 0.9782130467890684, "grad_norm": 0.14887668192386627, "learning_rate": 0.001, "loss": 2.1329, "step": 23123 }, { "epoch": 0.9782553515525848, "grad_norm": 0.14117036759853363, "learning_rate": 0.001, "loss": 2.0059, "step": 23124 }, { "epoch": 0.9782976563161012, "grad_norm": 0.8189803957939148, "learning_rate": 0.001, "loss": 2.0877, "step": 23125 }, { "epoch": 0.9783399610796175, "grad_norm": 0.93526691198349, "learning_rate": 0.001, "loss": 1.3733, "step": 23126 }, { "epoch": 0.9783822658431339, "grad_norm": 0.15379607677459717, "learning_rate": 0.001, "loss": 2.3471, "step": 23127 }, { "epoch": 0.9784245706066503, "grad_norm": 0.1474200189113617, "learning_rate": 0.001, "loss": 2.0631, "step": 23128 }, { "epoch": 0.9784668753701666, "grad_norm": 0.14249064028263092, "learning_rate": 0.001, "loss": 1.8189, "step": 23129 }, { "epoch": 0.978509180133683, "grad_norm": 0.17934098839759827, "learning_rate": 0.001, "loss": 1.7359, "step": 23130 }, { "epoch": 0.9785514848971995, "grad_norm": 0.16001644730567932, "learning_rate": 0.001, "loss": 1.8006, "step": 23131 }, { "epoch": 0.9785937896607158, "grad_norm": 0.1634327471256256, "learning_rate": 0.001, "loss": 1.937, "step": 23132 }, { "epoch": 0.9786360944242322, "grad_norm": 0.1568276584148407, "learning_rate": 0.001, "loss": 1.8826, "step": 23133 }, { "epoch": 0.9786783991877486, "grad_norm": 0.17905938625335693, "learning_rate": 0.001, "loss": 1.6522, "step": 23134 }, { "epoch": 0.9787207039512649, "grad_norm": 0.2984275817871094, "learning_rate": 0.001, "loss": 1.7585, "step": 23135 }, { "epoch": 0.9787630087147813, "grad_norm": 0.16698578000068665, "learning_rate": 0.001, "loss": 2.5787, "step": 23136 }, { "epoch": 0.9788053134782977, "grad_norm": 0.13776206970214844, "learning_rate": 0.001, "loss": 1.9259, "step": 23137 }, { "epoch": 0.978847618241814, "grad_norm": 0.1826232522726059, "learning_rate": 0.001, "loss": 2.3701, "step": 23138 }, { "epoch": 0.9788899230053304, "grad_norm": 0.14153793454170227, "learning_rate": 0.001, "loss": 1.9833, "step": 23139 }, { "epoch": 0.9789322277688468, "grad_norm": 0.19493409991264343, "learning_rate": 0.001, "loss": 3.4682, "step": 23140 }, { "epoch": 0.9789745325323631, "grad_norm": 0.13115043938159943, "learning_rate": 0.001, "loss": 1.2842, "step": 23141 }, { "epoch": 0.9790168372958795, "grad_norm": 0.13824576139450073, "learning_rate": 0.001, "loss": 2.2034, "step": 23142 }, { "epoch": 0.9790591420593959, "grad_norm": 0.17789553105831146, "learning_rate": 0.001, "loss": 3.4185, "step": 23143 }, { "epoch": 0.9791014468229122, "grad_norm": 0.1795099377632141, "learning_rate": 0.001, "loss": 2.0876, "step": 23144 }, { "epoch": 0.9791437515864286, "grad_norm": 0.15983949601650238, "learning_rate": 0.001, "loss": 1.6265, "step": 23145 }, { "epoch": 0.979186056349945, "grad_norm": 0.1699821501970291, "learning_rate": 0.001, "loss": 2.6895, "step": 23146 }, { "epoch": 0.9792283611134613, "grad_norm": 2.1327931880950928, "learning_rate": 0.001, "loss": 1.7483, "step": 23147 }, { "epoch": 0.9792706658769778, "grad_norm": 0.14410525560379028, "learning_rate": 0.001, "loss": 2.2787, "step": 23148 }, { "epoch": 0.9793129706404942, "grad_norm": 0.14737246930599213, "learning_rate": 0.001, "loss": 2.1674, "step": 23149 }, { "epoch": 0.9793552754040105, "grad_norm": 0.17130975425243378, "learning_rate": 0.001, "loss": 2.8032, "step": 23150 }, { "epoch": 0.9793975801675269, "grad_norm": 0.1790180206298828, "learning_rate": 0.001, "loss": 2.6401, "step": 23151 }, { "epoch": 0.9794398849310433, "grad_norm": 0.1794183999300003, "learning_rate": 0.001, "loss": 1.896, "step": 23152 }, { "epoch": 0.9794821896945596, "grad_norm": 0.12838570773601532, "learning_rate": 0.001, "loss": 1.8266, "step": 23153 }, { "epoch": 0.979524494458076, "grad_norm": 0.18338772654533386, "learning_rate": 0.001, "loss": 1.7244, "step": 23154 }, { "epoch": 0.9795667992215924, "grad_norm": 0.17173555493354797, "learning_rate": 0.001, "loss": 1.6205, "step": 23155 }, { "epoch": 0.9796091039851087, "grad_norm": 0.18049339950084686, "learning_rate": 0.001, "loss": 2.2256, "step": 23156 }, { "epoch": 0.9796514087486251, "grad_norm": 0.13144956529140472, "learning_rate": 0.001, "loss": 2.6915, "step": 23157 }, { "epoch": 0.9796937135121415, "grad_norm": 0.1813870072364807, "learning_rate": 0.001, "loss": 1.9047, "step": 23158 }, { "epoch": 0.9797360182756578, "grad_norm": 1.0048320293426514, "learning_rate": 0.001, "loss": 2.8726, "step": 23159 }, { "epoch": 0.9797783230391742, "grad_norm": 0.1345098316669464, "learning_rate": 0.001, "loss": 1.4787, "step": 23160 }, { "epoch": 0.9798206278026906, "grad_norm": 0.1486302614212036, "learning_rate": 0.001, "loss": 2.3496, "step": 23161 }, { "epoch": 0.9798629325662069, "grad_norm": 0.198981374502182, "learning_rate": 0.001, "loss": 1.918, "step": 23162 }, { "epoch": 0.9799052373297233, "grad_norm": 0.37297117710113525, "learning_rate": 0.001, "loss": 3.412, "step": 23163 }, { "epoch": 0.9799475420932398, "grad_norm": 0.18499860167503357, "learning_rate": 0.001, "loss": 2.2248, "step": 23164 }, { "epoch": 0.979989846856756, "grad_norm": 0.16248784959316254, "learning_rate": 0.001, "loss": 2.2262, "step": 23165 }, { "epoch": 0.9800321516202725, "grad_norm": 0.13412149250507355, "learning_rate": 0.001, "loss": 1.9383, "step": 23166 }, { "epoch": 0.9800744563837888, "grad_norm": 0.14490722119808197, "learning_rate": 0.001, "loss": 1.7336, "step": 23167 }, { "epoch": 0.9801167611473052, "grad_norm": 0.16167306900024414, "learning_rate": 0.001, "loss": 3.7023, "step": 23168 }, { "epoch": 0.9801590659108216, "grad_norm": 0.2081134021282196, "learning_rate": 0.001, "loss": 3.0097, "step": 23169 }, { "epoch": 0.9802013706743379, "grad_norm": 0.15773500502109528, "learning_rate": 0.001, "loss": 1.7977, "step": 23170 }, { "epoch": 0.9802436754378543, "grad_norm": 0.2358037680387497, "learning_rate": 0.001, "loss": 2.6206, "step": 23171 }, { "epoch": 0.9802859802013707, "grad_norm": 0.17311686277389526, "learning_rate": 0.001, "loss": 2.619, "step": 23172 }, { "epoch": 0.980328284964887, "grad_norm": 15.802042961120605, "learning_rate": 0.001, "loss": 1.6373, "step": 23173 }, { "epoch": 0.9803705897284034, "grad_norm": 0.14769822359085083, "learning_rate": 0.001, "loss": 1.7526, "step": 23174 }, { "epoch": 0.9804128944919198, "grad_norm": 2.210271120071411, "learning_rate": 0.001, "loss": 1.7164, "step": 23175 }, { "epoch": 0.9804551992554361, "grad_norm": 0.15342247486114502, "learning_rate": 0.001, "loss": 3.1296, "step": 23176 }, { "epoch": 0.9804975040189525, "grad_norm": 0.1497957855463028, "learning_rate": 0.001, "loss": 2.6274, "step": 23177 }, { "epoch": 0.9805398087824689, "grad_norm": 0.1614879071712494, "learning_rate": 0.001, "loss": 1.4673, "step": 23178 }, { "epoch": 0.9805821135459852, "grad_norm": 0.1438165307044983, "learning_rate": 0.001, "loss": 3.2337, "step": 23179 }, { "epoch": 0.9806244183095016, "grad_norm": 0.1747131198644638, "learning_rate": 0.001, "loss": 2.1779, "step": 23180 }, { "epoch": 0.980666723073018, "grad_norm": 0.15409283339977264, "learning_rate": 0.001, "loss": 2.4793, "step": 23181 }, { "epoch": 0.9807090278365344, "grad_norm": 0.1740875393152237, "learning_rate": 0.001, "loss": 2.0542, "step": 23182 }, { "epoch": 0.9807513326000508, "grad_norm": 0.13922379910945892, "learning_rate": 0.001, "loss": 2.1908, "step": 23183 }, { "epoch": 0.9807936373635672, "grad_norm": 0.20002827048301697, "learning_rate": 0.001, "loss": 1.9136, "step": 23184 }, { "epoch": 0.9808359421270835, "grad_norm": 0.13429036736488342, "learning_rate": 0.001, "loss": 1.3618, "step": 23185 }, { "epoch": 0.9808782468905999, "grad_norm": 0.16126492619514465, "learning_rate": 0.001, "loss": 2.7224, "step": 23186 }, { "epoch": 0.9809205516541163, "grad_norm": 0.14296741783618927, "learning_rate": 0.001, "loss": 3.2186, "step": 23187 }, { "epoch": 0.9809628564176326, "grad_norm": 5.455838203430176, "learning_rate": 0.001, "loss": 2.5349, "step": 23188 }, { "epoch": 0.981005161181149, "grad_norm": 0.13420797884464264, "learning_rate": 0.001, "loss": 1.4715, "step": 23189 }, { "epoch": 0.9810474659446654, "grad_norm": 0.2915991544723511, "learning_rate": 0.001, "loss": 1.9732, "step": 23190 }, { "epoch": 0.9810897707081817, "grad_norm": 0.12874186038970947, "learning_rate": 0.001, "loss": 1.8352, "step": 23191 }, { "epoch": 0.9811320754716981, "grad_norm": 0.14853565394878387, "learning_rate": 0.001, "loss": 2.8209, "step": 23192 }, { "epoch": 0.9811743802352145, "grad_norm": 0.14589852094650269, "learning_rate": 0.001, "loss": 2.0377, "step": 23193 }, { "epoch": 0.9812166849987308, "grad_norm": 0.15394212305545807, "learning_rate": 0.001, "loss": 1.9553, "step": 23194 }, { "epoch": 0.9812589897622472, "grad_norm": 0.1757000833749771, "learning_rate": 0.001, "loss": 2.2428, "step": 23195 }, { "epoch": 0.9813012945257636, "grad_norm": 0.15882089734077454, "learning_rate": 0.001, "loss": 2.0133, "step": 23196 }, { "epoch": 0.9813435992892799, "grad_norm": 0.14844751358032227, "learning_rate": 0.001, "loss": 1.7509, "step": 23197 }, { "epoch": 0.9813859040527964, "grad_norm": 0.16744114458560944, "learning_rate": 0.001, "loss": 1.7146, "step": 23198 }, { "epoch": 0.9814282088163128, "grad_norm": 0.1688256859779358, "learning_rate": 0.001, "loss": 3.276, "step": 23199 }, { "epoch": 0.9814705135798291, "grad_norm": 4.072800159454346, "learning_rate": 0.001, "loss": 2.5557, "step": 23200 }, { "epoch": 0.9815128183433455, "grad_norm": 0.1719663292169571, "learning_rate": 0.001, "loss": 1.8419, "step": 23201 }, { "epoch": 0.9815551231068619, "grad_norm": 0.15536484122276306, "learning_rate": 0.001, "loss": 1.8884, "step": 23202 }, { "epoch": 0.9815974278703782, "grad_norm": 1.0643222332000732, "learning_rate": 0.001, "loss": 1.9732, "step": 23203 }, { "epoch": 0.9816397326338946, "grad_norm": 0.19347727298736572, "learning_rate": 0.001, "loss": 2.0092, "step": 23204 }, { "epoch": 0.981682037397411, "grad_norm": 0.14213740825653076, "learning_rate": 0.001, "loss": 2.3664, "step": 23205 }, { "epoch": 0.9817243421609273, "grad_norm": 0.5251598954200745, "learning_rate": 0.001, "loss": 2.6287, "step": 23206 }, { "epoch": 0.9817666469244437, "grad_norm": 0.19425900280475616, "learning_rate": 0.001, "loss": 2.4588, "step": 23207 }, { "epoch": 0.9818089516879601, "grad_norm": 1.0385212898254395, "learning_rate": 0.001, "loss": 1.961, "step": 23208 }, { "epoch": 0.9818512564514764, "grad_norm": 3.3503012657165527, "learning_rate": 0.001, "loss": 2.6355, "step": 23209 }, { "epoch": 0.9818935612149928, "grad_norm": 0.344450980424881, "learning_rate": 0.001, "loss": 3.4627, "step": 23210 }, { "epoch": 0.9819358659785091, "grad_norm": 0.3758525252342224, "learning_rate": 0.001, "loss": 2.3434, "step": 23211 }, { "epoch": 0.9819781707420255, "grad_norm": 0.23000246286392212, "learning_rate": 0.001, "loss": 2.1027, "step": 23212 }, { "epoch": 0.982020475505542, "grad_norm": 0.21098732948303223, "learning_rate": 0.001, "loss": 1.8135, "step": 23213 }, { "epoch": 0.9820627802690582, "grad_norm": 0.3435835540294647, "learning_rate": 0.001, "loss": 2.3019, "step": 23214 }, { "epoch": 0.9821050850325747, "grad_norm": 0.1896451711654663, "learning_rate": 0.001, "loss": 3.6827, "step": 23215 }, { "epoch": 0.9821473897960911, "grad_norm": 0.9407826066017151, "learning_rate": 0.001, "loss": 2.207, "step": 23216 }, { "epoch": 0.9821896945596074, "grad_norm": 0.17004285752773285, "learning_rate": 0.001, "loss": 1.6758, "step": 23217 }, { "epoch": 0.9822319993231238, "grad_norm": 0.21439626812934875, "learning_rate": 0.001, "loss": 2.7161, "step": 23218 }, { "epoch": 0.9822743040866402, "grad_norm": 0.6642925143241882, "learning_rate": 0.001, "loss": 2.7868, "step": 23219 }, { "epoch": 0.9823166088501565, "grad_norm": 0.20337320864200592, "learning_rate": 0.001, "loss": 1.9839, "step": 23220 }, { "epoch": 0.9823589136136729, "grad_norm": 0.15737198293209076, "learning_rate": 0.001, "loss": 2.3604, "step": 23221 }, { "epoch": 0.9824012183771893, "grad_norm": 0.6608482599258423, "learning_rate": 0.001, "loss": 1.6606, "step": 23222 }, { "epoch": 0.9824435231407056, "grad_norm": 0.11500653624534607, "learning_rate": 0.001, "loss": 2.2689, "step": 23223 }, { "epoch": 0.982485827904222, "grad_norm": 0.2476760298013687, "learning_rate": 0.001, "loss": 2.4611, "step": 23224 }, { "epoch": 0.9825281326677384, "grad_norm": 0.20634648203849792, "learning_rate": 0.001, "loss": 3.2298, "step": 23225 }, { "epoch": 0.9825704374312547, "grad_norm": 0.174639493227005, "learning_rate": 0.001, "loss": 2.3492, "step": 23226 }, { "epoch": 0.9826127421947711, "grad_norm": 0.1685517132282257, "learning_rate": 0.001, "loss": 3.3922, "step": 23227 }, { "epoch": 0.9826550469582875, "grad_norm": 0.17105820775032043, "learning_rate": 0.001, "loss": 1.8573, "step": 23228 }, { "epoch": 0.9826973517218038, "grad_norm": 0.1790645271539688, "learning_rate": 0.001, "loss": 3.3843, "step": 23229 }, { "epoch": 0.9827396564853202, "grad_norm": 0.14651429653167725, "learning_rate": 0.001, "loss": 2.4584, "step": 23230 }, { "epoch": 0.9827819612488367, "grad_norm": 0.15420038998126984, "learning_rate": 0.001, "loss": 1.8876, "step": 23231 }, { "epoch": 0.982824266012353, "grad_norm": 0.13720165193080902, "learning_rate": 0.001, "loss": 1.6773, "step": 23232 }, { "epoch": 0.9828665707758694, "grad_norm": 0.3285228908061981, "learning_rate": 0.001, "loss": 2.1622, "step": 23233 }, { "epoch": 0.9829088755393858, "grad_norm": 0.22455190122127533, "learning_rate": 0.001, "loss": 3.7145, "step": 23234 }, { "epoch": 0.9829511803029021, "grad_norm": 0.14361530542373657, "learning_rate": 0.001, "loss": 2.551, "step": 23235 }, { "epoch": 0.9829934850664185, "grad_norm": 0.20710772275924683, "learning_rate": 0.001, "loss": 1.8731, "step": 23236 }, { "epoch": 0.9830357898299349, "grad_norm": 0.15071751177310944, "learning_rate": 0.001, "loss": 1.4985, "step": 23237 }, { "epoch": 0.9830780945934512, "grad_norm": 0.15085220336914062, "learning_rate": 0.001, "loss": 2.0827, "step": 23238 }, { "epoch": 0.9831203993569676, "grad_norm": 0.1446499526500702, "learning_rate": 0.001, "loss": 2.4606, "step": 23239 }, { "epoch": 0.983162704120484, "grad_norm": 0.1713118702173233, "learning_rate": 0.001, "loss": 1.947, "step": 23240 }, { "epoch": 0.9832050088840003, "grad_norm": 0.23264853656291962, "learning_rate": 0.001, "loss": 1.7186, "step": 23241 }, { "epoch": 0.9832473136475167, "grad_norm": 0.15223060548305511, "learning_rate": 0.001, "loss": 2.3135, "step": 23242 }, { "epoch": 0.9832896184110331, "grad_norm": 2.1504344940185547, "learning_rate": 0.001, "loss": 2.345, "step": 23243 }, { "epoch": 0.9833319231745494, "grad_norm": 0.2231014370918274, "learning_rate": 0.001, "loss": 2.4845, "step": 23244 }, { "epoch": 0.9833742279380658, "grad_norm": 30.52190399169922, "learning_rate": 0.001, "loss": 1.5587, "step": 23245 }, { "epoch": 0.9834165327015822, "grad_norm": 0.1924566775560379, "learning_rate": 0.001, "loss": 1.8833, "step": 23246 }, { "epoch": 0.9834588374650985, "grad_norm": 0.17296500504016876, "learning_rate": 0.001, "loss": 1.9576, "step": 23247 }, { "epoch": 0.983501142228615, "grad_norm": 0.17767272889614105, "learning_rate": 0.001, "loss": 2.5095, "step": 23248 }, { "epoch": 0.9835434469921314, "grad_norm": 16.753442764282227, "learning_rate": 0.001, "loss": 2.5697, "step": 23249 }, { "epoch": 0.9835857517556477, "grad_norm": 0.19115328788757324, "learning_rate": 0.001, "loss": 1.6478, "step": 23250 }, { "epoch": 0.9836280565191641, "grad_norm": 22.24527359008789, "learning_rate": 0.001, "loss": 2.6725, "step": 23251 }, { "epoch": 0.9836703612826805, "grad_norm": 2.2926394939422607, "learning_rate": 0.001, "loss": 1.8396, "step": 23252 }, { "epoch": 0.9837126660461968, "grad_norm": 0.22507277131080627, "learning_rate": 0.001, "loss": 2.7259, "step": 23253 }, { "epoch": 0.9837549708097132, "grad_norm": 0.20244504511356354, "learning_rate": 0.001, "loss": 1.8665, "step": 23254 }, { "epoch": 0.9837972755732295, "grad_norm": 0.17822127044200897, "learning_rate": 0.001, "loss": 2.7145, "step": 23255 }, { "epoch": 0.9838395803367459, "grad_norm": 0.21497797966003418, "learning_rate": 0.001, "loss": 1.9941, "step": 23256 }, { "epoch": 0.9838818851002623, "grad_norm": 0.2271234095096588, "learning_rate": 0.001, "loss": 2.2163, "step": 23257 }, { "epoch": 0.9839241898637786, "grad_norm": 1.1759521961212158, "learning_rate": 0.001, "loss": 2.7644, "step": 23258 }, { "epoch": 0.983966494627295, "grad_norm": 0.19246691465377808, "learning_rate": 0.001, "loss": 1.9231, "step": 23259 }, { "epoch": 0.9840087993908114, "grad_norm": 4.506860733032227, "learning_rate": 0.001, "loss": 2.4722, "step": 23260 }, { "epoch": 0.9840511041543277, "grad_norm": 0.20085294544696808, "learning_rate": 0.001, "loss": 3.0157, "step": 23261 }, { "epoch": 0.9840934089178441, "grad_norm": 1.484424352645874, "learning_rate": 0.001, "loss": 2.2361, "step": 23262 }, { "epoch": 0.9841357136813605, "grad_norm": 0.21662937104701996, "learning_rate": 0.001, "loss": 2.2051, "step": 23263 }, { "epoch": 0.9841780184448768, "grad_norm": 0.20913396775722504, "learning_rate": 0.001, "loss": 1.697, "step": 23264 }, { "epoch": 0.9842203232083933, "grad_norm": 0.19714033603668213, "learning_rate": 0.001, "loss": 2.0469, "step": 23265 }, { "epoch": 0.9842626279719097, "grad_norm": 0.18258053064346313, "learning_rate": 0.001, "loss": 2.446, "step": 23266 }, { "epoch": 0.984304932735426, "grad_norm": 0.43898874521255493, "learning_rate": 0.001, "loss": 1.9534, "step": 23267 }, { "epoch": 0.9843472374989424, "grad_norm": 0.16025614738464355, "learning_rate": 0.001, "loss": 2.2371, "step": 23268 }, { "epoch": 0.9843895422624588, "grad_norm": 0.4538215100765228, "learning_rate": 0.001, "loss": 1.9265, "step": 23269 }, { "epoch": 0.9844318470259751, "grad_norm": 0.18556450307369232, "learning_rate": 0.001, "loss": 1.5331, "step": 23270 }, { "epoch": 0.9844741517894915, "grad_norm": 0.15075351297855377, "learning_rate": 0.001, "loss": 3.1292, "step": 23271 }, { "epoch": 0.9845164565530079, "grad_norm": 19.696006774902344, "learning_rate": 0.001, "loss": 2.7711, "step": 23272 }, { "epoch": 0.9845587613165242, "grad_norm": 0.22151979804039001, "learning_rate": 0.001, "loss": 1.6431, "step": 23273 }, { "epoch": 0.9846010660800406, "grad_norm": 0.27550455927848816, "learning_rate": 0.001, "loss": 2.4787, "step": 23274 }, { "epoch": 0.984643370843557, "grad_norm": 0.22198323905467987, "learning_rate": 0.001, "loss": 1.6055, "step": 23275 }, { "epoch": 0.9846856756070733, "grad_norm": 0.20321761071681976, "learning_rate": 0.001, "loss": 2.4743, "step": 23276 }, { "epoch": 0.9847279803705897, "grad_norm": 0.19668048620224, "learning_rate": 0.001, "loss": 1.825, "step": 23277 }, { "epoch": 0.9847702851341061, "grad_norm": 0.7689309120178223, "learning_rate": 0.001, "loss": 3.5899, "step": 23278 }, { "epoch": 0.9848125898976224, "grad_norm": 0.18673300743103027, "learning_rate": 0.001, "loss": 2.8243, "step": 23279 }, { "epoch": 0.9848548946611388, "grad_norm": 1.7281594276428223, "learning_rate": 0.001, "loss": 1.5403, "step": 23280 }, { "epoch": 0.9848971994246553, "grad_norm": 0.15205202996730804, "learning_rate": 0.001, "loss": 3.1236, "step": 23281 }, { "epoch": 0.9849395041881716, "grad_norm": 0.14774078130722046, "learning_rate": 0.001, "loss": 2.1822, "step": 23282 }, { "epoch": 0.984981808951688, "grad_norm": 0.15019498765468597, "learning_rate": 0.001, "loss": 1.8555, "step": 23283 }, { "epoch": 0.9850241137152044, "grad_norm": 0.17441150546073914, "learning_rate": 0.001, "loss": 1.8417, "step": 23284 }, { "epoch": 0.9850664184787207, "grad_norm": 0.16328319907188416, "learning_rate": 0.001, "loss": 1.957, "step": 23285 }, { "epoch": 0.9851087232422371, "grad_norm": 1.1682124137878418, "learning_rate": 0.001, "loss": 1.9435, "step": 23286 }, { "epoch": 0.9851510280057535, "grad_norm": 0.8976200819015503, "learning_rate": 0.001, "loss": 2.9577, "step": 23287 }, { "epoch": 0.9851933327692698, "grad_norm": 0.18787872791290283, "learning_rate": 0.001, "loss": 2.205, "step": 23288 }, { "epoch": 0.9852356375327862, "grad_norm": 0.17400221526622772, "learning_rate": 0.001, "loss": 1.8947, "step": 23289 }, { "epoch": 0.9852779422963026, "grad_norm": 0.16904416680335999, "learning_rate": 0.001, "loss": 1.7336, "step": 23290 }, { "epoch": 0.9853202470598189, "grad_norm": 0.2862572968006134, "learning_rate": 0.001, "loss": 2.0924, "step": 23291 }, { "epoch": 0.9853625518233353, "grad_norm": 0.18694454431533813, "learning_rate": 0.001, "loss": 2.4069, "step": 23292 }, { "epoch": 0.9854048565868517, "grad_norm": 0.17351536452770233, "learning_rate": 0.001, "loss": 2.3785, "step": 23293 }, { "epoch": 0.985447161350368, "grad_norm": 0.16305550932884216, "learning_rate": 0.001, "loss": 2.5295, "step": 23294 }, { "epoch": 0.9854894661138844, "grad_norm": 0.28731977939605713, "learning_rate": 0.001, "loss": 1.5698, "step": 23295 }, { "epoch": 0.9855317708774008, "grad_norm": 0.19942089915275574, "learning_rate": 0.001, "loss": 2.6501, "step": 23296 }, { "epoch": 0.9855740756409171, "grad_norm": 0.15859198570251465, "learning_rate": 0.001, "loss": 1.9334, "step": 23297 }, { "epoch": 0.9856163804044336, "grad_norm": 0.17347994446754456, "learning_rate": 0.001, "loss": 2.281, "step": 23298 }, { "epoch": 0.98565868516795, "grad_norm": 0.1470501720905304, "learning_rate": 0.001, "loss": 2.8596, "step": 23299 }, { "epoch": 0.9857009899314663, "grad_norm": 0.17378562688827515, "learning_rate": 0.001, "loss": 2.6134, "step": 23300 }, { "epoch": 0.9857432946949827, "grad_norm": 2.3224494457244873, "learning_rate": 0.001, "loss": 1.8163, "step": 23301 }, { "epoch": 0.985785599458499, "grad_norm": 0.15434542298316956, "learning_rate": 0.001, "loss": 2.0795, "step": 23302 }, { "epoch": 0.9858279042220154, "grad_norm": 0.17513182759284973, "learning_rate": 0.001, "loss": 1.9282, "step": 23303 }, { "epoch": 0.9858702089855318, "grad_norm": 0.18898020684719086, "learning_rate": 0.001, "loss": 2.8024, "step": 23304 }, { "epoch": 0.9859125137490481, "grad_norm": 0.5918177366256714, "learning_rate": 0.001, "loss": 2.3883, "step": 23305 }, { "epoch": 0.9859548185125645, "grad_norm": 0.13751918077468872, "learning_rate": 0.001, "loss": 1.7054, "step": 23306 }, { "epoch": 0.9859971232760809, "grad_norm": 0.1347641944885254, "learning_rate": 0.001, "loss": 1.615, "step": 23307 }, { "epoch": 0.9860394280395972, "grad_norm": 0.21464622020721436, "learning_rate": 0.001, "loss": 2.3099, "step": 23308 }, { "epoch": 0.9860817328031136, "grad_norm": 0.15518558025360107, "learning_rate": 0.001, "loss": 2.1591, "step": 23309 }, { "epoch": 0.98612403756663, "grad_norm": 0.1407310962677002, "learning_rate": 0.001, "loss": 1.9204, "step": 23310 }, { "epoch": 0.9861663423301463, "grad_norm": 0.11285633593797684, "learning_rate": 0.001, "loss": 1.7241, "step": 23311 }, { "epoch": 0.9862086470936627, "grad_norm": 8.615754127502441, "learning_rate": 0.001, "loss": 2.5709, "step": 23312 }, { "epoch": 0.9862509518571791, "grad_norm": 0.13533753156661987, "learning_rate": 0.001, "loss": 1.6491, "step": 23313 }, { "epoch": 0.9862932566206954, "grad_norm": 0.16837237775325775, "learning_rate": 0.001, "loss": 2.4037, "step": 23314 }, { "epoch": 0.9863355613842119, "grad_norm": 0.14844800531864166, "learning_rate": 0.001, "loss": 1.6873, "step": 23315 }, { "epoch": 0.9863778661477283, "grad_norm": 0.13803304731845856, "learning_rate": 0.001, "loss": 2.2417, "step": 23316 }, { "epoch": 0.9864201709112446, "grad_norm": 0.19187100231647491, "learning_rate": 0.001, "loss": 1.9479, "step": 23317 }, { "epoch": 0.986462475674761, "grad_norm": 1.3388183116912842, "learning_rate": 0.001, "loss": 1.8091, "step": 23318 }, { "epoch": 0.9865047804382774, "grad_norm": 0.18258285522460938, "learning_rate": 0.001, "loss": 2.8915, "step": 23319 }, { "epoch": 0.9865470852017937, "grad_norm": 0.1443546712398529, "learning_rate": 0.001, "loss": 2.0421, "step": 23320 }, { "epoch": 0.9865893899653101, "grad_norm": 0.16136842966079712, "learning_rate": 0.001, "loss": 2.1917, "step": 23321 }, { "epoch": 0.9866316947288265, "grad_norm": 0.13147707283496857, "learning_rate": 0.001, "loss": 1.6761, "step": 23322 }, { "epoch": 0.9866739994923428, "grad_norm": 0.18694503605365753, "learning_rate": 0.001, "loss": 2.0926, "step": 23323 }, { "epoch": 0.9867163042558592, "grad_norm": 0.1893046796321869, "learning_rate": 0.001, "loss": 2.8645, "step": 23324 }, { "epoch": 0.9867586090193756, "grad_norm": 0.2780762314796448, "learning_rate": 0.001, "loss": 2.0847, "step": 23325 }, { "epoch": 0.9868009137828919, "grad_norm": 0.19630302488803864, "learning_rate": 0.001, "loss": 2.0885, "step": 23326 }, { "epoch": 0.9868432185464083, "grad_norm": 0.1403011828660965, "learning_rate": 0.001, "loss": 1.564, "step": 23327 }, { "epoch": 0.9868855233099247, "grad_norm": 0.8801196813583374, "learning_rate": 0.001, "loss": 2.5189, "step": 23328 }, { "epoch": 0.986927828073441, "grad_norm": 0.13722547888755798, "learning_rate": 0.001, "loss": 2.0738, "step": 23329 }, { "epoch": 0.9869701328369574, "grad_norm": 0.42092952132225037, "learning_rate": 0.001, "loss": 1.6655, "step": 23330 }, { "epoch": 0.9870124376004739, "grad_norm": 0.19227460026741028, "learning_rate": 0.001, "loss": 2.1327, "step": 23331 }, { "epoch": 0.9870547423639902, "grad_norm": 0.14953431487083435, "learning_rate": 0.001, "loss": 1.5976, "step": 23332 }, { "epoch": 0.9870970471275066, "grad_norm": 1.6279288530349731, "learning_rate": 0.001, "loss": 2.4999, "step": 23333 }, { "epoch": 0.987139351891023, "grad_norm": 0.15288491547107697, "learning_rate": 0.001, "loss": 1.9023, "step": 23334 }, { "epoch": 0.9871816566545393, "grad_norm": 0.16339105367660522, "learning_rate": 0.001, "loss": 1.5201, "step": 23335 }, { "epoch": 0.9872239614180557, "grad_norm": 0.19303716719150543, "learning_rate": 0.001, "loss": 2.255, "step": 23336 }, { "epoch": 0.9872662661815721, "grad_norm": 0.16261610388755798, "learning_rate": 0.001, "loss": 1.7459, "step": 23337 }, { "epoch": 0.9873085709450884, "grad_norm": 0.2173750400543213, "learning_rate": 0.001, "loss": 2.1332, "step": 23338 }, { "epoch": 0.9873508757086048, "grad_norm": 0.5034084320068359, "learning_rate": 0.001, "loss": 2.0107, "step": 23339 }, { "epoch": 0.9873931804721212, "grad_norm": 0.1737852692604065, "learning_rate": 0.001, "loss": 1.8748, "step": 23340 }, { "epoch": 0.9874354852356375, "grad_norm": 0.19651900231838226, "learning_rate": 0.001, "loss": 2.7285, "step": 23341 }, { "epoch": 0.9874777899991539, "grad_norm": 0.14730535447597504, "learning_rate": 0.001, "loss": 2.8565, "step": 23342 }, { "epoch": 0.9875200947626703, "grad_norm": 0.15681281685829163, "learning_rate": 0.001, "loss": 2.0837, "step": 23343 }, { "epoch": 0.9875623995261866, "grad_norm": 0.17979633808135986, "learning_rate": 0.001, "loss": 2.1084, "step": 23344 }, { "epoch": 0.987604704289703, "grad_norm": 0.29083535075187683, "learning_rate": 0.001, "loss": 2.7915, "step": 23345 }, { "epoch": 0.9876470090532193, "grad_norm": 0.16025808453559875, "learning_rate": 0.001, "loss": 2.1277, "step": 23346 }, { "epoch": 0.9876893138167357, "grad_norm": 0.187299445271492, "learning_rate": 0.001, "loss": 2.0213, "step": 23347 }, { "epoch": 0.9877316185802522, "grad_norm": 0.1659306287765503, "learning_rate": 0.001, "loss": 1.4863, "step": 23348 }, { "epoch": 0.9877739233437685, "grad_norm": 0.13653291761875153, "learning_rate": 0.001, "loss": 2.0724, "step": 23349 }, { "epoch": 0.9878162281072849, "grad_norm": 0.19335125386714935, "learning_rate": 0.001, "loss": 2.5707, "step": 23350 }, { "epoch": 0.9878585328708013, "grad_norm": 0.17489448189735413, "learning_rate": 0.001, "loss": 2.4171, "step": 23351 }, { "epoch": 0.9879008376343176, "grad_norm": 0.15927927196025848, "learning_rate": 0.001, "loss": 1.9707, "step": 23352 }, { "epoch": 0.987943142397834, "grad_norm": 0.13132639229297638, "learning_rate": 0.001, "loss": 1.7196, "step": 23353 }, { "epoch": 0.9879854471613504, "grad_norm": 0.7293382883071899, "learning_rate": 0.001, "loss": 2.0281, "step": 23354 }, { "epoch": 0.9880277519248667, "grad_norm": 0.151032954454422, "learning_rate": 0.001, "loss": 1.6911, "step": 23355 }, { "epoch": 0.9880700566883831, "grad_norm": 0.13139599561691284, "learning_rate": 0.001, "loss": 1.4657, "step": 23356 }, { "epoch": 0.9881123614518995, "grad_norm": 0.14863912761211395, "learning_rate": 0.001, "loss": 1.7769, "step": 23357 }, { "epoch": 0.9881546662154158, "grad_norm": 0.14839519560337067, "learning_rate": 0.001, "loss": 2.1032, "step": 23358 }, { "epoch": 0.9881969709789322, "grad_norm": 0.12207513302564621, "learning_rate": 0.001, "loss": 1.7226, "step": 23359 }, { "epoch": 0.9882392757424486, "grad_norm": 0.18079492449760437, "learning_rate": 0.001, "loss": 1.8141, "step": 23360 }, { "epoch": 0.9882815805059649, "grad_norm": 0.1407337784767151, "learning_rate": 0.001, "loss": 2.2357, "step": 23361 }, { "epoch": 0.9883238852694813, "grad_norm": 0.1415151059627533, "learning_rate": 0.001, "loss": 1.8046, "step": 23362 }, { "epoch": 0.9883661900329977, "grad_norm": 2.5134902000427246, "learning_rate": 0.001, "loss": 2.7073, "step": 23363 }, { "epoch": 0.988408494796514, "grad_norm": 0.16898906230926514, "learning_rate": 0.001, "loss": 3.0829, "step": 23364 }, { "epoch": 0.9884507995600305, "grad_norm": 0.13951623439788818, "learning_rate": 0.001, "loss": 2.0555, "step": 23365 }, { "epoch": 0.9884931043235469, "grad_norm": 0.14504913985729218, "learning_rate": 0.001, "loss": 2.0594, "step": 23366 }, { "epoch": 0.9885354090870632, "grad_norm": 0.5770459771156311, "learning_rate": 0.001, "loss": 2.0616, "step": 23367 }, { "epoch": 0.9885777138505796, "grad_norm": 0.16317375004291534, "learning_rate": 0.001, "loss": 2.1634, "step": 23368 }, { "epoch": 0.988620018614096, "grad_norm": 0.1316281110048294, "learning_rate": 0.001, "loss": 2.3575, "step": 23369 }, { "epoch": 0.9886623233776123, "grad_norm": 0.14289572834968567, "learning_rate": 0.001, "loss": 1.5892, "step": 23370 }, { "epoch": 0.9887046281411287, "grad_norm": 0.16080570220947266, "learning_rate": 0.001, "loss": 2.1795, "step": 23371 }, { "epoch": 0.9887469329046451, "grad_norm": 0.185518279671669, "learning_rate": 0.001, "loss": 3.0211, "step": 23372 }, { "epoch": 0.9887892376681614, "grad_norm": 0.13568751513957977, "learning_rate": 0.001, "loss": 2.4641, "step": 23373 }, { "epoch": 0.9888315424316778, "grad_norm": 0.17552120983600616, "learning_rate": 0.001, "loss": 2.334, "step": 23374 }, { "epoch": 0.9888738471951942, "grad_norm": 0.15184400975704193, "learning_rate": 0.001, "loss": 2.95, "step": 23375 }, { "epoch": 0.9889161519587105, "grad_norm": 0.14975741505622864, "learning_rate": 0.001, "loss": 2.5531, "step": 23376 }, { "epoch": 0.9889584567222269, "grad_norm": 0.12690399587154388, "learning_rate": 0.001, "loss": 1.8152, "step": 23377 }, { "epoch": 0.9890007614857433, "grad_norm": 0.15675854682922363, "learning_rate": 0.001, "loss": 2.5597, "step": 23378 }, { "epoch": 0.9890430662492596, "grad_norm": 0.15196675062179565, "learning_rate": 0.001, "loss": 2.6782, "step": 23379 }, { "epoch": 0.989085371012776, "grad_norm": 0.7298043966293335, "learning_rate": 0.001, "loss": 2.3701, "step": 23380 }, { "epoch": 0.9891276757762925, "grad_norm": 0.14933039247989655, "learning_rate": 0.001, "loss": 1.7975, "step": 23381 }, { "epoch": 0.9891699805398088, "grad_norm": 0.15482641756534576, "learning_rate": 0.001, "loss": 2.4515, "step": 23382 }, { "epoch": 0.9892122853033252, "grad_norm": 1.0311862230300903, "learning_rate": 0.001, "loss": 2.0674, "step": 23383 }, { "epoch": 0.9892545900668416, "grad_norm": 0.13115628063678741, "learning_rate": 0.001, "loss": 2.2563, "step": 23384 }, { "epoch": 0.9892968948303579, "grad_norm": 0.15161246061325073, "learning_rate": 0.001, "loss": 1.3509, "step": 23385 }, { "epoch": 0.9893391995938743, "grad_norm": 0.14778055250644684, "learning_rate": 0.001, "loss": 2.6554, "step": 23386 }, { "epoch": 0.9893815043573907, "grad_norm": 0.1839752495288849, "learning_rate": 0.001, "loss": 2.5877, "step": 23387 }, { "epoch": 0.989423809120907, "grad_norm": 0.5433216094970703, "learning_rate": 0.001, "loss": 2.4707, "step": 23388 }, { "epoch": 0.9894661138844234, "grad_norm": 0.19804272055625916, "learning_rate": 0.001, "loss": 1.9891, "step": 23389 }, { "epoch": 0.9895084186479397, "grad_norm": 0.21847547590732574, "learning_rate": 0.001, "loss": 5.1899, "step": 23390 }, { "epoch": 0.9895507234114561, "grad_norm": 0.8081578612327576, "learning_rate": 0.001, "loss": 2.3353, "step": 23391 }, { "epoch": 0.9895930281749725, "grad_norm": 0.13023246824741364, "learning_rate": 0.001, "loss": 2.8709, "step": 23392 }, { "epoch": 0.9896353329384888, "grad_norm": 0.16377229988574982, "learning_rate": 0.001, "loss": 1.5964, "step": 23393 }, { "epoch": 0.9896776377020052, "grad_norm": 0.17955902218818665, "learning_rate": 0.001, "loss": 1.7785, "step": 23394 }, { "epoch": 0.9897199424655216, "grad_norm": 0.18906940519809723, "learning_rate": 0.001, "loss": 2.842, "step": 23395 }, { "epoch": 0.9897622472290379, "grad_norm": 0.20560045540332794, "learning_rate": 0.001, "loss": 1.9539, "step": 23396 }, { "epoch": 0.9898045519925543, "grad_norm": 0.1924160122871399, "learning_rate": 0.001, "loss": 1.8572, "step": 23397 }, { "epoch": 0.9898468567560708, "grad_norm": 0.1467902511358261, "learning_rate": 0.001, "loss": 1.7799, "step": 23398 }, { "epoch": 0.9898891615195871, "grad_norm": 0.15988442301750183, "learning_rate": 0.001, "loss": 1.7468, "step": 23399 }, { "epoch": 0.9899314662831035, "grad_norm": 0.16045702993869781, "learning_rate": 0.001, "loss": 2.8367, "step": 23400 }, { "epoch": 0.9899737710466199, "grad_norm": 0.16333599388599396, "learning_rate": 0.001, "loss": 1.6981, "step": 23401 }, { "epoch": 0.9900160758101362, "grad_norm": 0.1779310405254364, "learning_rate": 0.001, "loss": 1.8536, "step": 23402 }, { "epoch": 0.9900583805736526, "grad_norm": 0.8571863770484924, "learning_rate": 0.001, "loss": 2.8216, "step": 23403 }, { "epoch": 0.990100685337169, "grad_norm": 0.18227769434452057, "learning_rate": 0.001, "loss": 1.8207, "step": 23404 }, { "epoch": 0.9901429901006853, "grad_norm": 0.1805211752653122, "learning_rate": 0.001, "loss": 2.2027, "step": 23405 }, { "epoch": 0.9901852948642017, "grad_norm": 1.146898865699768, "learning_rate": 0.001, "loss": 2.0096, "step": 23406 }, { "epoch": 0.9902275996277181, "grad_norm": 0.14902471005916595, "learning_rate": 0.001, "loss": 1.337, "step": 23407 }, { "epoch": 0.9902699043912344, "grad_norm": 0.17255893349647522, "learning_rate": 0.001, "loss": 2.4503, "step": 23408 }, { "epoch": 0.9903122091547508, "grad_norm": 0.18396355211734772, "learning_rate": 0.001, "loss": 2.3969, "step": 23409 }, { "epoch": 0.9903545139182672, "grad_norm": 0.17676624655723572, "learning_rate": 0.001, "loss": 1.7923, "step": 23410 }, { "epoch": 0.9903968186817835, "grad_norm": 0.19076810777187347, "learning_rate": 0.001, "loss": 1.9227, "step": 23411 }, { "epoch": 0.9904391234452999, "grad_norm": 0.15760193765163422, "learning_rate": 0.001, "loss": 2.9747, "step": 23412 }, { "epoch": 0.9904814282088164, "grad_norm": 0.20325231552124023, "learning_rate": 0.001, "loss": 1.7755, "step": 23413 }, { "epoch": 0.9905237329723326, "grad_norm": 0.17613425850868225, "learning_rate": 0.001, "loss": 2.0028, "step": 23414 }, { "epoch": 0.9905660377358491, "grad_norm": 0.20244133472442627, "learning_rate": 0.001, "loss": 2.5725, "step": 23415 }, { "epoch": 0.9906083424993655, "grad_norm": 0.18409040570259094, "learning_rate": 0.001, "loss": 3.6709, "step": 23416 }, { "epoch": 0.9906506472628818, "grad_norm": 0.15753605961799622, "learning_rate": 0.001, "loss": 2.0971, "step": 23417 }, { "epoch": 0.9906929520263982, "grad_norm": 0.14398153126239777, "learning_rate": 0.001, "loss": 2.2371, "step": 23418 }, { "epoch": 0.9907352567899146, "grad_norm": 0.1526634693145752, "learning_rate": 0.001, "loss": 2.2506, "step": 23419 }, { "epoch": 0.9907775615534309, "grad_norm": 0.12150632590055466, "learning_rate": 0.001, "loss": 2.0968, "step": 23420 }, { "epoch": 0.9908198663169473, "grad_norm": 0.7939785122871399, "learning_rate": 0.001, "loss": 2.479, "step": 23421 }, { "epoch": 0.9908621710804637, "grad_norm": 0.16594654321670532, "learning_rate": 0.001, "loss": 1.6048, "step": 23422 }, { "epoch": 0.99090447584398, "grad_norm": 0.1510103940963745, "learning_rate": 0.001, "loss": 2.7858, "step": 23423 }, { "epoch": 0.9909467806074964, "grad_norm": 0.23928913474082947, "learning_rate": 0.001, "loss": 1.7646, "step": 23424 }, { "epoch": 0.9909890853710128, "grad_norm": 0.14759401977062225, "learning_rate": 0.001, "loss": 1.6538, "step": 23425 }, { "epoch": 0.9910313901345291, "grad_norm": 0.19283150136470795, "learning_rate": 0.001, "loss": 2.6298, "step": 23426 }, { "epoch": 0.9910736948980455, "grad_norm": 3.1644039154052734, "learning_rate": 0.001, "loss": 2.4429, "step": 23427 }, { "epoch": 0.9911159996615619, "grad_norm": 0.15195727348327637, "learning_rate": 0.001, "loss": 2.3545, "step": 23428 }, { "epoch": 0.9911583044250782, "grad_norm": 0.13520923256874084, "learning_rate": 0.001, "loss": 2.0637, "step": 23429 }, { "epoch": 0.9912006091885947, "grad_norm": 0.1306263655424118, "learning_rate": 0.001, "loss": 1.5021, "step": 23430 }, { "epoch": 0.9912429139521111, "grad_norm": 0.16827377676963806, "learning_rate": 0.001, "loss": 3.3265, "step": 23431 }, { "epoch": 0.9912852187156274, "grad_norm": 0.14732570946216583, "learning_rate": 0.001, "loss": 2.0172, "step": 23432 }, { "epoch": 0.9913275234791438, "grad_norm": 3.2398881912231445, "learning_rate": 0.001, "loss": 2.523, "step": 23433 }, { "epoch": 0.9913698282426602, "grad_norm": 0.14063461124897003, "learning_rate": 0.001, "loss": 1.9463, "step": 23434 }, { "epoch": 0.9914121330061765, "grad_norm": 0.2017151415348053, "learning_rate": 0.001, "loss": 1.9935, "step": 23435 }, { "epoch": 0.9914544377696929, "grad_norm": 0.1547040343284607, "learning_rate": 0.001, "loss": 1.7171, "step": 23436 }, { "epoch": 0.9914967425332092, "grad_norm": 0.1772545576095581, "learning_rate": 0.001, "loss": 2.6763, "step": 23437 }, { "epoch": 0.9915390472967256, "grad_norm": 0.158152773976326, "learning_rate": 0.001, "loss": 1.7772, "step": 23438 }, { "epoch": 0.991581352060242, "grad_norm": 0.17712469398975372, "learning_rate": 0.001, "loss": 1.6187, "step": 23439 }, { "epoch": 0.9916236568237583, "grad_norm": 0.16980746388435364, "learning_rate": 0.001, "loss": 2.7735, "step": 23440 }, { "epoch": 0.9916659615872747, "grad_norm": 1.1700719594955444, "learning_rate": 0.001, "loss": 2.1555, "step": 23441 }, { "epoch": 0.9917082663507911, "grad_norm": 0.1690703183412552, "learning_rate": 0.001, "loss": 1.6741, "step": 23442 }, { "epoch": 0.9917505711143074, "grad_norm": 0.15942858159542084, "learning_rate": 0.001, "loss": 1.7488, "step": 23443 }, { "epoch": 0.9917928758778238, "grad_norm": 0.15770910680294037, "learning_rate": 0.001, "loss": 3.3523, "step": 23444 }, { "epoch": 0.9918351806413402, "grad_norm": 11.168841361999512, "learning_rate": 0.001, "loss": 2.2278, "step": 23445 }, { "epoch": 0.9918774854048565, "grad_norm": 0.173135906457901, "learning_rate": 0.001, "loss": 2.5919, "step": 23446 }, { "epoch": 0.991919790168373, "grad_norm": 0.13830503821372986, "learning_rate": 0.001, "loss": 2.0946, "step": 23447 }, { "epoch": 0.9919620949318894, "grad_norm": 0.1732984036207199, "learning_rate": 0.001, "loss": 1.8609, "step": 23448 }, { "epoch": 0.9920043996954057, "grad_norm": 0.1577487289905548, "learning_rate": 0.001, "loss": 2.0619, "step": 23449 }, { "epoch": 0.9920467044589221, "grad_norm": 0.9779529571533203, "learning_rate": 0.001, "loss": 1.9558, "step": 23450 }, { "epoch": 0.9920890092224385, "grad_norm": 0.21312867105007172, "learning_rate": 0.001, "loss": 2.2838, "step": 23451 }, { "epoch": 0.9921313139859548, "grad_norm": 0.18126481771469116, "learning_rate": 0.001, "loss": 1.9558, "step": 23452 }, { "epoch": 0.9921736187494712, "grad_norm": 0.9032267928123474, "learning_rate": 0.001, "loss": 2.3783, "step": 23453 }, { "epoch": 0.9922159235129876, "grad_norm": 0.13526488840579987, "learning_rate": 0.001, "loss": 2.0193, "step": 23454 }, { "epoch": 0.9922582282765039, "grad_norm": 0.16340215504169464, "learning_rate": 0.001, "loss": 2.604, "step": 23455 }, { "epoch": 0.9923005330400203, "grad_norm": 0.14608405530452728, "learning_rate": 0.001, "loss": 3.0248, "step": 23456 }, { "epoch": 0.9923428378035367, "grad_norm": 0.15734529495239258, "learning_rate": 0.001, "loss": 1.6765, "step": 23457 }, { "epoch": 0.992385142567053, "grad_norm": 0.14332926273345947, "learning_rate": 0.001, "loss": 2.2598, "step": 23458 }, { "epoch": 0.9924274473305694, "grad_norm": 1.3368622064590454, "learning_rate": 0.001, "loss": 3.2208, "step": 23459 }, { "epoch": 0.9924697520940858, "grad_norm": 0.16903036832809448, "learning_rate": 0.001, "loss": 1.5743, "step": 23460 }, { "epoch": 0.9925120568576021, "grad_norm": 0.1450778692960739, "learning_rate": 0.001, "loss": 2.1063, "step": 23461 }, { "epoch": 0.9925543616211185, "grad_norm": 0.24200314283370972, "learning_rate": 0.001, "loss": 2.7203, "step": 23462 }, { "epoch": 0.992596666384635, "grad_norm": 0.1364940106868744, "learning_rate": 0.001, "loss": 2.2594, "step": 23463 }, { "epoch": 0.9926389711481513, "grad_norm": 0.15272387862205505, "learning_rate": 0.001, "loss": 1.5758, "step": 23464 }, { "epoch": 0.9926812759116677, "grad_norm": 0.1937706619501114, "learning_rate": 0.001, "loss": 2.4084, "step": 23465 }, { "epoch": 0.9927235806751841, "grad_norm": 0.1765604466199875, "learning_rate": 0.001, "loss": 1.7184, "step": 23466 }, { "epoch": 0.9927658854387004, "grad_norm": 0.17739205062389374, "learning_rate": 0.001, "loss": 1.9615, "step": 23467 }, { "epoch": 0.9928081902022168, "grad_norm": 1.2467105388641357, "learning_rate": 0.001, "loss": 1.6635, "step": 23468 }, { "epoch": 0.9928504949657332, "grad_norm": 1.0034493207931519, "learning_rate": 0.001, "loss": 2.3672, "step": 23469 }, { "epoch": 0.9928927997292495, "grad_norm": 0.19060944020748138, "learning_rate": 0.001, "loss": 2.5745, "step": 23470 }, { "epoch": 0.9929351044927659, "grad_norm": 0.18545667827129364, "learning_rate": 0.001, "loss": 2.2075, "step": 23471 }, { "epoch": 0.9929774092562823, "grad_norm": 0.1679558902978897, "learning_rate": 0.001, "loss": 1.7822, "step": 23472 }, { "epoch": 0.9930197140197986, "grad_norm": 0.21493469178676605, "learning_rate": 0.001, "loss": 3.6993, "step": 23473 }, { "epoch": 0.993062018783315, "grad_norm": 0.21583551168441772, "learning_rate": 0.001, "loss": 2.2144, "step": 23474 }, { "epoch": 0.9931043235468314, "grad_norm": 0.1545204520225525, "learning_rate": 0.001, "loss": 1.8569, "step": 23475 }, { "epoch": 0.9931466283103477, "grad_norm": 0.17547298967838287, "learning_rate": 0.001, "loss": 1.6119, "step": 23476 }, { "epoch": 0.9931889330738641, "grad_norm": 0.18335486948490143, "learning_rate": 0.001, "loss": 1.8375, "step": 23477 }, { "epoch": 0.9932312378373805, "grad_norm": 0.17604441940784454, "learning_rate": 0.001, "loss": 1.7185, "step": 23478 }, { "epoch": 0.9932735426008968, "grad_norm": 0.1612115502357483, "learning_rate": 0.001, "loss": 1.7173, "step": 23479 }, { "epoch": 0.9933158473644133, "grad_norm": 0.17903564870357513, "learning_rate": 0.001, "loss": 2.2194, "step": 23480 }, { "epoch": 0.9933581521279296, "grad_norm": 0.15683114528656006, "learning_rate": 0.001, "loss": 1.9522, "step": 23481 }, { "epoch": 0.993400456891446, "grad_norm": 0.19979719817638397, "learning_rate": 0.001, "loss": 2.8421, "step": 23482 }, { "epoch": 0.9934427616549624, "grad_norm": 0.16343528032302856, "learning_rate": 0.001, "loss": 2.3331, "step": 23483 }, { "epoch": 0.9934850664184787, "grad_norm": 0.14997512102127075, "learning_rate": 0.001, "loss": 1.9094, "step": 23484 }, { "epoch": 0.9935273711819951, "grad_norm": 0.14654596149921417, "learning_rate": 0.001, "loss": 2.325, "step": 23485 }, { "epoch": 0.9935696759455115, "grad_norm": 0.15524841845035553, "learning_rate": 0.001, "loss": 2.0226, "step": 23486 }, { "epoch": 0.9936119807090278, "grad_norm": 0.1682468056678772, "learning_rate": 0.001, "loss": 1.9113, "step": 23487 }, { "epoch": 0.9936542854725442, "grad_norm": 0.15577398240566254, "learning_rate": 0.001, "loss": 2.6625, "step": 23488 }, { "epoch": 0.9936965902360606, "grad_norm": 0.1386529803276062, "learning_rate": 0.001, "loss": 1.7642, "step": 23489 }, { "epoch": 0.9937388949995769, "grad_norm": 0.1561625450849533, "learning_rate": 0.001, "loss": 2.7162, "step": 23490 }, { "epoch": 0.9937811997630933, "grad_norm": 0.22317662835121155, "learning_rate": 0.001, "loss": 2.0305, "step": 23491 }, { "epoch": 0.9938235045266097, "grad_norm": 0.1401653289794922, "learning_rate": 0.001, "loss": 2.2692, "step": 23492 }, { "epoch": 0.993865809290126, "grad_norm": 0.14323166012763977, "learning_rate": 0.001, "loss": 1.724, "step": 23493 }, { "epoch": 0.9939081140536424, "grad_norm": 0.1282302439212799, "learning_rate": 0.001, "loss": 2.036, "step": 23494 }, { "epoch": 0.9939504188171588, "grad_norm": 0.1263008862733841, "learning_rate": 0.001, "loss": 1.3955, "step": 23495 }, { "epoch": 0.9939927235806751, "grad_norm": 0.13142234086990356, "learning_rate": 0.001, "loss": 2.1095, "step": 23496 }, { "epoch": 0.9940350283441916, "grad_norm": 0.17697520554065704, "learning_rate": 0.001, "loss": 2.3401, "step": 23497 }, { "epoch": 0.994077333107708, "grad_norm": 1.2804735898971558, "learning_rate": 0.001, "loss": 2.5458, "step": 23498 }, { "epoch": 0.9941196378712243, "grad_norm": 0.24874600768089294, "learning_rate": 0.001, "loss": 2.6729, "step": 23499 }, { "epoch": 0.9941619426347407, "grad_norm": 0.2032213658094406, "learning_rate": 0.001, "loss": 2.7131, "step": 23500 }, { "epoch": 0.9942042473982571, "grad_norm": 0.267898827791214, "learning_rate": 0.001, "loss": 2.2249, "step": 23501 }, { "epoch": 0.9942465521617734, "grad_norm": 0.1407955288887024, "learning_rate": 0.001, "loss": 2.842, "step": 23502 }, { "epoch": 0.9942888569252898, "grad_norm": 10.123157501220703, "learning_rate": 0.001, "loss": 1.9959, "step": 23503 }, { "epoch": 0.9943311616888062, "grad_norm": 0.1459282636642456, "learning_rate": 0.001, "loss": 2.1619, "step": 23504 }, { "epoch": 0.9943734664523225, "grad_norm": 0.17329807579517365, "learning_rate": 0.001, "loss": 2.1785, "step": 23505 }, { "epoch": 0.9944157712158389, "grad_norm": 0.11436944454908371, "learning_rate": 0.001, "loss": 2.0212, "step": 23506 }, { "epoch": 0.9944580759793553, "grad_norm": 0.15068282186985016, "learning_rate": 0.001, "loss": 1.7319, "step": 23507 }, { "epoch": 0.9945003807428716, "grad_norm": 0.14463482797145844, "learning_rate": 0.001, "loss": 1.932, "step": 23508 }, { "epoch": 0.994542685506388, "grad_norm": 0.1417880356311798, "learning_rate": 0.001, "loss": 2.3686, "step": 23509 }, { "epoch": 0.9945849902699044, "grad_norm": 2.411515951156616, "learning_rate": 0.001, "loss": 1.9726, "step": 23510 }, { "epoch": 0.9946272950334207, "grad_norm": 0.15016627311706543, "learning_rate": 0.001, "loss": 2.7317, "step": 23511 }, { "epoch": 0.9946695997969371, "grad_norm": 0.16307124495506287, "learning_rate": 0.001, "loss": 2.3945, "step": 23512 }, { "epoch": 0.9947119045604536, "grad_norm": 0.2472067028284073, "learning_rate": 0.001, "loss": 1.323, "step": 23513 }, { "epoch": 0.9947542093239699, "grad_norm": 0.14261025190353394, "learning_rate": 0.001, "loss": 1.6545, "step": 23514 }, { "epoch": 0.9947965140874863, "grad_norm": 0.21985666453838348, "learning_rate": 0.001, "loss": 1.6186, "step": 23515 }, { "epoch": 0.9948388188510027, "grad_norm": 0.18958121538162231, "learning_rate": 0.001, "loss": 3.1224, "step": 23516 }, { "epoch": 0.994881123614519, "grad_norm": 0.18746091425418854, "learning_rate": 0.001, "loss": 1.7352, "step": 23517 }, { "epoch": 0.9949234283780354, "grad_norm": 0.3156006932258606, "learning_rate": 0.001, "loss": 3.373, "step": 23518 }, { "epoch": 0.9949657331415518, "grad_norm": 0.1298127919435501, "learning_rate": 0.001, "loss": 2.0672, "step": 23519 }, { "epoch": 0.9950080379050681, "grad_norm": 0.12908710539340973, "learning_rate": 0.001, "loss": 1.835, "step": 23520 }, { "epoch": 0.9950503426685845, "grad_norm": 0.2133929282426834, "learning_rate": 0.001, "loss": 3.1512, "step": 23521 }, { "epoch": 0.9950926474321009, "grad_norm": 0.145905002951622, "learning_rate": 0.001, "loss": 1.3302, "step": 23522 }, { "epoch": 0.9951349521956172, "grad_norm": 0.19927392899990082, "learning_rate": 0.001, "loss": 3.0666, "step": 23523 }, { "epoch": 0.9951772569591336, "grad_norm": 0.1459832340478897, "learning_rate": 0.001, "loss": 2.3249, "step": 23524 }, { "epoch": 0.99521956172265, "grad_norm": 0.19735519587993622, "learning_rate": 0.001, "loss": 1.8252, "step": 23525 }, { "epoch": 0.9952618664861663, "grad_norm": 0.15133242309093475, "learning_rate": 0.001, "loss": 1.5066, "step": 23526 }, { "epoch": 0.9953041712496827, "grad_norm": 7.522138595581055, "learning_rate": 0.001, "loss": 2.1434, "step": 23527 }, { "epoch": 0.995346476013199, "grad_norm": 0.1376638412475586, "learning_rate": 0.001, "loss": 1.7716, "step": 23528 }, { "epoch": 0.9953887807767154, "grad_norm": 1.5856484174728394, "learning_rate": 0.001, "loss": 1.7964, "step": 23529 }, { "epoch": 0.9954310855402319, "grad_norm": 0.16695480048656464, "learning_rate": 0.001, "loss": 1.9568, "step": 23530 }, { "epoch": 0.9954733903037482, "grad_norm": 0.6410706043243408, "learning_rate": 0.001, "loss": 2.1194, "step": 23531 }, { "epoch": 0.9955156950672646, "grad_norm": 0.31505700945854187, "learning_rate": 0.001, "loss": 1.8234, "step": 23532 }, { "epoch": 0.995557999830781, "grad_norm": 0.1771719753742218, "learning_rate": 0.001, "loss": 2.5342, "step": 23533 }, { "epoch": 0.9956003045942973, "grad_norm": 0.18417403101921082, "learning_rate": 0.001, "loss": 2.5644, "step": 23534 }, { "epoch": 0.9956426093578137, "grad_norm": 0.15871447324752808, "learning_rate": 0.001, "loss": 2.7864, "step": 23535 }, { "epoch": 0.9956849141213301, "grad_norm": 0.2665135860443115, "learning_rate": 0.001, "loss": 2.847, "step": 23536 }, { "epoch": 0.9957272188848464, "grad_norm": 0.7563045620918274, "learning_rate": 0.001, "loss": 3.1899, "step": 23537 }, { "epoch": 0.9957695236483628, "grad_norm": 0.1422766149044037, "learning_rate": 0.001, "loss": 2.0713, "step": 23538 }, { "epoch": 0.9958118284118792, "grad_norm": 0.14153209328651428, "learning_rate": 0.001, "loss": 1.7366, "step": 23539 }, { "epoch": 0.9958541331753955, "grad_norm": 0.1573881059885025, "learning_rate": 0.001, "loss": 1.9069, "step": 23540 }, { "epoch": 0.9958964379389119, "grad_norm": 39.94224166870117, "learning_rate": 0.001, "loss": 2.0561, "step": 23541 }, { "epoch": 0.9959387427024283, "grad_norm": 0.1591683328151703, "learning_rate": 0.001, "loss": 3.2608, "step": 23542 }, { "epoch": 0.9959810474659446, "grad_norm": 0.19343800842761993, "learning_rate": 0.001, "loss": 3.1865, "step": 23543 }, { "epoch": 0.996023352229461, "grad_norm": 0.15812444686889648, "learning_rate": 0.001, "loss": 2.2566, "step": 23544 }, { "epoch": 0.9960656569929774, "grad_norm": 0.3023439049720764, "learning_rate": 0.001, "loss": 2.1273, "step": 23545 }, { "epoch": 0.9961079617564937, "grad_norm": 0.14426670968532562, "learning_rate": 0.001, "loss": 1.5217, "step": 23546 }, { "epoch": 0.9961502665200102, "grad_norm": 0.5261450409889221, "learning_rate": 0.001, "loss": 1.8896, "step": 23547 }, { "epoch": 0.9961925712835266, "grad_norm": 0.1593768447637558, "learning_rate": 0.001, "loss": 1.302, "step": 23548 }, { "epoch": 0.9962348760470429, "grad_norm": 0.19295907020568848, "learning_rate": 0.001, "loss": 2.4438, "step": 23549 }, { "epoch": 0.9962771808105593, "grad_norm": 0.1521875560283661, "learning_rate": 0.001, "loss": 1.3707, "step": 23550 }, { "epoch": 0.9963194855740757, "grad_norm": 0.1482807993888855, "learning_rate": 0.001, "loss": 2.3517, "step": 23551 }, { "epoch": 0.996361790337592, "grad_norm": 0.1328219175338745, "learning_rate": 0.001, "loss": 1.6607, "step": 23552 }, { "epoch": 0.9964040951011084, "grad_norm": 0.13568271696567535, "learning_rate": 0.001, "loss": 2.0808, "step": 23553 }, { "epoch": 0.9964463998646248, "grad_norm": 0.17867201566696167, "learning_rate": 0.001, "loss": 1.8838, "step": 23554 }, { "epoch": 0.9964887046281411, "grad_norm": 0.14420682191848755, "learning_rate": 0.001, "loss": 2.1963, "step": 23555 }, { "epoch": 0.9965310093916575, "grad_norm": 0.19156771898269653, "learning_rate": 0.001, "loss": 2.6953, "step": 23556 }, { "epoch": 0.9965733141551739, "grad_norm": 0.13624778389930725, "learning_rate": 0.001, "loss": 2.076, "step": 23557 }, { "epoch": 0.9966156189186902, "grad_norm": 0.15911436080932617, "learning_rate": 0.001, "loss": 2.2559, "step": 23558 }, { "epoch": 0.9966579236822066, "grad_norm": 0.16328518092632294, "learning_rate": 0.001, "loss": 1.7107, "step": 23559 }, { "epoch": 0.996700228445723, "grad_norm": 0.20304375886917114, "learning_rate": 0.001, "loss": 2.0468, "step": 23560 }, { "epoch": 0.9967425332092393, "grad_norm": 0.16803617775440216, "learning_rate": 0.001, "loss": 2.2345, "step": 23561 }, { "epoch": 0.9967848379727557, "grad_norm": 0.13952644169330597, "learning_rate": 0.001, "loss": 2.2767, "step": 23562 }, { "epoch": 0.9968271427362722, "grad_norm": 0.15917642414569855, "learning_rate": 0.001, "loss": 1.8884, "step": 23563 }, { "epoch": 0.9968694474997885, "grad_norm": 0.1422685831785202, "learning_rate": 0.001, "loss": 1.6789, "step": 23564 }, { "epoch": 0.9969117522633049, "grad_norm": 0.9447866082191467, "learning_rate": 0.001, "loss": 2.6736, "step": 23565 }, { "epoch": 0.9969540570268213, "grad_norm": 0.15833914279937744, "learning_rate": 0.001, "loss": 2.3225, "step": 23566 }, { "epoch": 0.9969963617903376, "grad_norm": 0.15113773941993713, "learning_rate": 0.001, "loss": 2.1877, "step": 23567 }, { "epoch": 0.997038666553854, "grad_norm": 0.17661526799201965, "learning_rate": 0.001, "loss": 2.0605, "step": 23568 }, { "epoch": 0.9970809713173704, "grad_norm": 0.2037663608789444, "learning_rate": 0.001, "loss": 2.095, "step": 23569 }, { "epoch": 0.9971232760808867, "grad_norm": 0.40272435545921326, "learning_rate": 0.001, "loss": 1.8401, "step": 23570 }, { "epoch": 0.9971655808444031, "grad_norm": 1.8662751913070679, "learning_rate": 0.001, "loss": 3.3036, "step": 23571 }, { "epoch": 0.9972078856079194, "grad_norm": 0.14151303470134735, "learning_rate": 0.001, "loss": 2.6741, "step": 23572 }, { "epoch": 0.9972501903714358, "grad_norm": 0.17152297496795654, "learning_rate": 0.001, "loss": 2.5857, "step": 23573 }, { "epoch": 0.9972924951349522, "grad_norm": 1.016983151435852, "learning_rate": 0.001, "loss": 1.5302, "step": 23574 }, { "epoch": 0.9973347998984685, "grad_norm": 0.16691197454929352, "learning_rate": 0.001, "loss": 2.3803, "step": 23575 }, { "epoch": 0.9973771046619849, "grad_norm": 0.16801585257053375, "learning_rate": 0.001, "loss": 1.581, "step": 23576 }, { "epoch": 0.9974194094255013, "grad_norm": 0.20410461723804474, "learning_rate": 0.001, "loss": 1.7048, "step": 23577 }, { "epoch": 0.9974617141890176, "grad_norm": 0.17868375778198242, "learning_rate": 0.001, "loss": 2.3364, "step": 23578 }, { "epoch": 0.997504018952534, "grad_norm": 0.1568222939968109, "learning_rate": 0.001, "loss": 1.8333, "step": 23579 }, { "epoch": 0.9975463237160505, "grad_norm": 0.1552361100912094, "learning_rate": 0.001, "loss": 1.9154, "step": 23580 }, { "epoch": 0.9975886284795668, "grad_norm": 0.15056028962135315, "learning_rate": 0.001, "loss": 2.2219, "step": 23581 }, { "epoch": 0.9976309332430832, "grad_norm": 0.17635402083396912, "learning_rate": 0.001, "loss": 1.6218, "step": 23582 }, { "epoch": 0.9976732380065996, "grad_norm": 0.273888498544693, "learning_rate": 0.001, "loss": 3.4827, "step": 23583 }, { "epoch": 0.9977155427701159, "grad_norm": 0.14739017188549042, "learning_rate": 0.001, "loss": 2.1994, "step": 23584 }, { "epoch": 0.9977578475336323, "grad_norm": 0.18939383327960968, "learning_rate": 0.001, "loss": 2.3165, "step": 23585 }, { "epoch": 0.9978001522971487, "grad_norm": 0.15688124299049377, "learning_rate": 0.001, "loss": 1.771, "step": 23586 }, { "epoch": 0.997842457060665, "grad_norm": 0.15662074089050293, "learning_rate": 0.001, "loss": 2.1358, "step": 23587 }, { "epoch": 0.9978847618241814, "grad_norm": 0.15011928975582123, "learning_rate": 0.001, "loss": 2.084, "step": 23588 }, { "epoch": 0.9979270665876978, "grad_norm": 0.1399097889661789, "learning_rate": 0.001, "loss": 3.6885, "step": 23589 }, { "epoch": 0.9979693713512141, "grad_norm": 0.1603403389453888, "learning_rate": 0.001, "loss": 2.0335, "step": 23590 }, { "epoch": 0.9980116761147305, "grad_norm": 0.14453408122062683, "learning_rate": 0.001, "loss": 2.7993, "step": 23591 }, { "epoch": 0.9980539808782469, "grad_norm": 0.2429407685995102, "learning_rate": 0.001, "loss": 2.0416, "step": 23592 }, { "epoch": 0.9980962856417632, "grad_norm": 0.15473546087741852, "learning_rate": 0.001, "loss": 1.5858, "step": 23593 }, { "epoch": 0.9981385904052796, "grad_norm": 0.13461685180664062, "learning_rate": 0.001, "loss": 2.3416, "step": 23594 }, { "epoch": 0.998180895168796, "grad_norm": 0.8460047841072083, "learning_rate": 0.001, "loss": 2.1287, "step": 23595 }, { "epoch": 0.9982231999323123, "grad_norm": 0.15388406813144684, "learning_rate": 0.001, "loss": 1.5546, "step": 23596 }, { "epoch": 0.9982655046958288, "grad_norm": 0.15559299290180206, "learning_rate": 0.001, "loss": 1.6043, "step": 23597 }, { "epoch": 0.9983078094593452, "grad_norm": 0.14206543564796448, "learning_rate": 0.001, "loss": 1.5113, "step": 23598 }, { "epoch": 0.9983501142228615, "grad_norm": 0.20963889360427856, "learning_rate": 0.001, "loss": 1.5628, "step": 23599 }, { "epoch": 0.9983924189863779, "grad_norm": 0.20047658681869507, "learning_rate": 0.001, "loss": 1.5766, "step": 23600 }, { "epoch": 0.9984347237498943, "grad_norm": 0.1295713633298874, "learning_rate": 0.001, "loss": 1.6966, "step": 23601 }, { "epoch": 0.9984770285134106, "grad_norm": 0.20154249668121338, "learning_rate": 0.001, "loss": 1.6119, "step": 23602 }, { "epoch": 0.998519333276927, "grad_norm": 0.14119143784046173, "learning_rate": 0.001, "loss": 1.534, "step": 23603 }, { "epoch": 0.9985616380404434, "grad_norm": 0.15057438611984253, "learning_rate": 0.001, "loss": 2.1838, "step": 23604 }, { "epoch": 0.9986039428039597, "grad_norm": 0.1763651818037033, "learning_rate": 0.001, "loss": 2.8296, "step": 23605 }, { "epoch": 0.9986462475674761, "grad_norm": 0.20839767158031464, "learning_rate": 0.001, "loss": 2.2355, "step": 23606 }, { "epoch": 0.9986885523309925, "grad_norm": 0.20311135053634644, "learning_rate": 0.001, "loss": 2.5101, "step": 23607 }, { "epoch": 0.9987308570945088, "grad_norm": 0.17892763018608093, "learning_rate": 0.001, "loss": 2.4245, "step": 23608 }, { "epoch": 0.9987731618580252, "grad_norm": 0.15312178432941437, "learning_rate": 0.001, "loss": 2.0084, "step": 23609 }, { "epoch": 0.9988154666215416, "grad_norm": 0.15701162815093994, "learning_rate": 0.001, "loss": 1.6143, "step": 23610 }, { "epoch": 0.9988577713850579, "grad_norm": 0.16364918649196625, "learning_rate": 0.001, "loss": 1.4365, "step": 23611 }, { "epoch": 0.9989000761485743, "grad_norm": 0.4083903729915619, "learning_rate": 0.001, "loss": 1.6857, "step": 23612 }, { "epoch": 0.9989423809120908, "grad_norm": 0.13597403466701508, "learning_rate": 0.001, "loss": 2.4054, "step": 23613 }, { "epoch": 0.998984685675607, "grad_norm": 0.12076670676469803, "learning_rate": 0.001, "loss": 1.7387, "step": 23614 }, { "epoch": 0.9990269904391235, "grad_norm": 0.20430530607700348, "learning_rate": 0.001, "loss": 2.5614, "step": 23615 }, { "epoch": 0.9990692952026398, "grad_norm": 0.1506972312927246, "learning_rate": 0.001, "loss": 2.3331, "step": 23616 }, { "epoch": 0.9991115999661562, "grad_norm": 0.14669986069202423, "learning_rate": 0.001, "loss": 1.5766, "step": 23617 }, { "epoch": 0.9991539047296726, "grad_norm": 0.15908612310886383, "learning_rate": 0.001, "loss": 2.0938, "step": 23618 }, { "epoch": 0.9991962094931889, "grad_norm": 0.1240277886390686, "learning_rate": 0.001, "loss": 2.1356, "step": 23619 }, { "epoch": 0.9992385142567053, "grad_norm": 0.14557062089443207, "learning_rate": 0.001, "loss": 2.1815, "step": 23620 }, { "epoch": 0.9992808190202217, "grad_norm": 0.16171297430992126, "learning_rate": 0.001, "loss": 1.7981, "step": 23621 }, { "epoch": 0.999323123783738, "grad_norm": 0.157927006483078, "learning_rate": 0.001, "loss": 1.6165, "step": 23622 }, { "epoch": 0.9993654285472544, "grad_norm": 0.15406683087348938, "learning_rate": 0.001, "loss": 2.1801, "step": 23623 }, { "epoch": 0.9994077333107708, "grad_norm": 0.19311675429344177, "learning_rate": 0.001, "loss": 1.6993, "step": 23624 }, { "epoch": 0.9994500380742871, "grad_norm": 0.24103492498397827, "learning_rate": 0.001, "loss": 2.6117, "step": 23625 }, { "epoch": 0.9994923428378035, "grad_norm": 0.20797502994537354, "learning_rate": 0.001, "loss": 2.2529, "step": 23626 }, { "epoch": 0.9995346476013199, "grad_norm": 0.3445920944213867, "learning_rate": 0.001, "loss": 2.4338, "step": 23627 }, { "epoch": 0.9995769523648362, "grad_norm": 0.1522158980369568, "learning_rate": 0.001, "loss": 1.6047, "step": 23628 }, { "epoch": 0.9996192571283526, "grad_norm": 1.6727036237716675, "learning_rate": 0.001, "loss": 3.4602, "step": 23629 }, { "epoch": 0.999661561891869, "grad_norm": 0.14390981197357178, "learning_rate": 0.001, "loss": 1.7825, "step": 23630 }, { "epoch": 0.9997038666553854, "grad_norm": 0.16790489852428436, "learning_rate": 0.001, "loss": 2.0287, "step": 23631 }, { "epoch": 0.9997461714189018, "grad_norm": 1.9648386240005493, "learning_rate": 0.001, "loss": 1.9811, "step": 23632 }, { "epoch": 0.9997884761824182, "grad_norm": 0.17755821347236633, "learning_rate": 0.001, "loss": 1.6772, "step": 23633 }, { "epoch": 0.9998307809459345, "grad_norm": 0.1543680876493454, "learning_rate": 0.001, "loss": 2.2835, "step": 23634 }, { "epoch": 0.9998730857094509, "grad_norm": 0.1278516948223114, "learning_rate": 0.001, "loss": 2.8439, "step": 23635 }, { "epoch": 0.9999153904729673, "grad_norm": 0.15573585033416748, "learning_rate": 0.001, "loss": 2.505, "step": 23636 }, { "epoch": 0.9999576952364836, "grad_norm": 0.17682018876075745, "learning_rate": 0.001, "loss": 1.2282, "step": 23637 }, { "epoch": 1.0, "grad_norm": 0.19988803565502167, "learning_rate": 0.001, "loss": 1.9946, "step": 23638 }, { "epoch": 1.0, "step": 23638, "total_flos": 1.3698602096404378e+17, "train_loss": 2.3282218011757556, "train_runtime": 26726.0005, "train_samples_per_second": 7.076, "train_steps_per_second": 0.884 } ], "logging_steps": 1, "max_steps": 23638, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 5910, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3698602096404378e+17, "train_batch_size": 8, "trial_name": null, "trial_params": null }