{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.21789284798769545, "eval_steps": 500, "global_step": 3400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.40861317610869e-05, "grad_norm": 3.049792280054225, "learning_rate": 1e-06, "loss": 0.3756, "step": 1 }, { "epoch": 0.0001281722635221738, "grad_norm": 2.736097146503981, "learning_rate": 1e-06, "loss": 0.4176, "step": 2 }, { "epoch": 0.00019225839528326071, "grad_norm": 2.696671878484295, "learning_rate": 1e-06, "loss": 0.4815, "step": 3 }, { "epoch": 0.0002563445270443476, "grad_norm": 3.337136551938015, "learning_rate": 1e-06, "loss": 0.4904, "step": 4 }, { "epoch": 0.0003204306588054345, "grad_norm": 2.6647797201470778, "learning_rate": 1e-06, "loss": 0.4185, "step": 5 }, { "epoch": 0.00038451679056652143, "grad_norm": 2.5193486744426967, "learning_rate": 1e-06, "loss": 0.4688, "step": 6 }, { "epoch": 0.0004486029223276083, "grad_norm": 2.66396905228593, "learning_rate": 1e-06, "loss": 0.3592, "step": 7 }, { "epoch": 0.0005126890540886952, "grad_norm": 3.1409024702898307, "learning_rate": 1e-06, "loss": 0.4413, "step": 8 }, { "epoch": 0.0005767751858497821, "grad_norm": 2.6431941146744, "learning_rate": 1e-06, "loss": 0.449, "step": 9 }, { "epoch": 0.000640861317610869, "grad_norm": 2.924241054090828, "learning_rate": 1e-06, "loss": 0.451, "step": 10 }, { "epoch": 0.0007049474493719559, "grad_norm": 2.622132191231163, "learning_rate": 1e-06, "loss": 0.4541, "step": 11 }, { "epoch": 0.0007690335811330429, "grad_norm": 2.6442231709945525, "learning_rate": 1e-06, "loss": 0.3841, "step": 12 }, { "epoch": 0.0008331197128941297, "grad_norm": 2.8066741487653886, "learning_rate": 1e-06, "loss": 0.3987, "step": 13 }, { "epoch": 0.0008972058446552166, "grad_norm": 2.4952793737222794, "learning_rate": 1e-06, "loss": 0.4367, "step": 14 }, { "epoch": 0.0009612919764163035, "grad_norm": 2.736382692932069, "learning_rate": 1e-06, "loss": 0.3844, "step": 15 }, { "epoch": 0.0010253781081773904, "grad_norm": 2.690189041127437, "learning_rate": 1e-06, "loss": 0.4179, "step": 16 }, { "epoch": 0.0010894642399384773, "grad_norm": 2.4809643616320916, "learning_rate": 1e-06, "loss": 0.3971, "step": 17 }, { "epoch": 0.0011535503716995643, "grad_norm": 2.470983130778616, "learning_rate": 1e-06, "loss": 0.4224, "step": 18 }, { "epoch": 0.001217636503460651, "grad_norm": 2.9011921770535833, "learning_rate": 1e-06, "loss": 0.4941, "step": 19 }, { "epoch": 0.001281722635221738, "grad_norm": 2.592260682165995, "learning_rate": 1e-06, "loss": 0.3991, "step": 20 }, { "epoch": 0.001345808766982825, "grad_norm": 2.7190365461237502, "learning_rate": 1e-06, "loss": 0.4386, "step": 21 }, { "epoch": 0.0014098948987439118, "grad_norm": 2.623825797975284, "learning_rate": 1e-06, "loss": 0.4459, "step": 22 }, { "epoch": 0.0014739810305049988, "grad_norm": 4.130313126154051, "learning_rate": 1e-06, "loss": 0.4236, "step": 23 }, { "epoch": 0.0015380671622660857, "grad_norm": 2.6968346941151387, "learning_rate": 1e-06, "loss": 0.4088, "step": 24 }, { "epoch": 0.0016021532940271724, "grad_norm": 2.478154009816993, "learning_rate": 1e-06, "loss": 0.4064, "step": 25 }, { "epoch": 0.0016662394257882594, "grad_norm": 2.878743074857694, "learning_rate": 1e-06, "loss": 0.4746, "step": 26 }, { "epoch": 0.0017303255575493463, "grad_norm": 2.572012522185562, "learning_rate": 1e-06, "loss": 0.4063, "step": 27 }, { "epoch": 0.0017944116893104333, "grad_norm": 2.6480699294244108, "learning_rate": 1e-06, "loss": 0.4893, "step": 28 }, { "epoch": 0.0018584978210715202, "grad_norm": 2.5117617032878536, "learning_rate": 1e-06, "loss": 0.4271, "step": 29 }, { "epoch": 0.001922583952832607, "grad_norm": 2.6045342374650704, "learning_rate": 1e-06, "loss": 0.4077, "step": 30 }, { "epoch": 0.001986670084593694, "grad_norm": 2.598933479100832, "learning_rate": 1e-06, "loss": 0.447, "step": 31 }, { "epoch": 0.002050756216354781, "grad_norm": 2.6774168288479685, "learning_rate": 1e-06, "loss": 0.4337, "step": 32 }, { "epoch": 0.0021148423481158678, "grad_norm": 2.559910547832753, "learning_rate": 1e-06, "loss": 0.3952, "step": 33 }, { "epoch": 0.0021789284798769547, "grad_norm": 2.8131005080313427, "learning_rate": 1e-06, "loss": 0.3904, "step": 34 }, { "epoch": 0.0022430146116380416, "grad_norm": 2.6128742530557196, "learning_rate": 1e-06, "loss": 0.4635, "step": 35 }, { "epoch": 0.0023071007433991286, "grad_norm": 2.790837783591932, "learning_rate": 1e-06, "loss": 0.4692, "step": 36 }, { "epoch": 0.0023711868751602155, "grad_norm": 2.677228029063294, "learning_rate": 1e-06, "loss": 0.4263, "step": 37 }, { "epoch": 0.002435273006921302, "grad_norm": 2.899817329376979, "learning_rate": 1e-06, "loss": 0.4265, "step": 38 }, { "epoch": 0.002499359138682389, "grad_norm": 2.5848276627009055, "learning_rate": 1e-06, "loss": 0.4095, "step": 39 }, { "epoch": 0.002563445270443476, "grad_norm": 2.5591688177632674, "learning_rate": 1e-06, "loss": 0.4879, "step": 40 }, { "epoch": 0.002627531402204563, "grad_norm": 2.628718605922321, "learning_rate": 1e-06, "loss": 0.4264, "step": 41 }, { "epoch": 0.00269161753396565, "grad_norm": 2.5750637451230385, "learning_rate": 1e-06, "loss": 0.3824, "step": 42 }, { "epoch": 0.0027557036657267367, "grad_norm": 2.583335490883839, "learning_rate": 1e-06, "loss": 0.4289, "step": 43 }, { "epoch": 0.0028197897974878237, "grad_norm": 2.5301462397652985, "learning_rate": 1e-06, "loss": 0.4284, "step": 44 }, { "epoch": 0.0028838759292489106, "grad_norm": 2.629254554816327, "learning_rate": 1e-06, "loss": 0.4944, "step": 45 }, { "epoch": 0.0029479620610099976, "grad_norm": 2.3796843494411215, "learning_rate": 1e-06, "loss": 0.4264, "step": 46 }, { "epoch": 0.0030120481927710845, "grad_norm": 2.337285306067337, "learning_rate": 1e-06, "loss": 0.3691, "step": 47 }, { "epoch": 0.0030761343245321714, "grad_norm": 2.4625613624689686, "learning_rate": 1e-06, "loss": 0.435, "step": 48 }, { "epoch": 0.003140220456293258, "grad_norm": 2.6425603802991904, "learning_rate": 1e-06, "loss": 0.4156, "step": 49 }, { "epoch": 0.003204306588054345, "grad_norm": 2.640849648601871, "learning_rate": 1e-06, "loss": 0.4261, "step": 50 }, { "epoch": 0.003268392719815432, "grad_norm": 2.5632636645352895, "learning_rate": 1e-06, "loss": 0.4231, "step": 51 }, { "epoch": 0.0033324788515765188, "grad_norm": 2.576641529620584, "learning_rate": 1e-06, "loss": 0.4591, "step": 52 }, { "epoch": 0.0033965649833376057, "grad_norm": 2.485785384457921, "learning_rate": 1e-06, "loss": 0.436, "step": 53 }, { "epoch": 0.0034606511150986926, "grad_norm": 2.546660745565414, "learning_rate": 1e-06, "loss": 0.4197, "step": 54 }, { "epoch": 0.0035247372468597796, "grad_norm": 2.4421749353029747, "learning_rate": 1e-06, "loss": 0.3905, "step": 55 }, { "epoch": 0.0035888233786208665, "grad_norm": 2.660810200932209, "learning_rate": 1e-06, "loss": 0.3768, "step": 56 }, { "epoch": 0.0036529095103819535, "grad_norm": 2.4907286529936625, "learning_rate": 1e-06, "loss": 0.4203, "step": 57 }, { "epoch": 0.0037169956421430404, "grad_norm": 2.5185462082716965, "learning_rate": 1e-06, "loss": 0.4264, "step": 58 }, { "epoch": 0.0037810817739041274, "grad_norm": 2.5075831979076475, "learning_rate": 1e-06, "loss": 0.4246, "step": 59 }, { "epoch": 0.003845167905665214, "grad_norm": 2.5893984879897434, "learning_rate": 1e-06, "loss": 0.4869, "step": 60 }, { "epoch": 0.003909254037426301, "grad_norm": 2.5513574941347916, "learning_rate": 1e-06, "loss": 0.3985, "step": 61 }, { "epoch": 0.003973340169187388, "grad_norm": 2.3634405046489824, "learning_rate": 1e-06, "loss": 0.4067, "step": 62 }, { "epoch": 0.004037426300948475, "grad_norm": 2.794196161444718, "learning_rate": 1e-06, "loss": 0.474, "step": 63 }, { "epoch": 0.004101512432709562, "grad_norm": 2.7786386772732636, "learning_rate": 1e-06, "loss": 0.4546, "step": 64 }, { "epoch": 0.0041655985644706486, "grad_norm": 2.6427053217076577, "learning_rate": 1e-06, "loss": 0.4149, "step": 65 }, { "epoch": 0.0042296846962317355, "grad_norm": 2.9375865167158963, "learning_rate": 1e-06, "loss": 0.4606, "step": 66 }, { "epoch": 0.0042937708279928224, "grad_norm": 2.3163933176028584, "learning_rate": 1e-06, "loss": 0.4355, "step": 67 }, { "epoch": 0.004357856959753909, "grad_norm": 2.4717258151766455, "learning_rate": 1e-06, "loss": 0.4136, "step": 68 }, { "epoch": 0.004421943091514996, "grad_norm": 2.6985368472703435, "learning_rate": 1e-06, "loss": 0.4447, "step": 69 }, { "epoch": 0.004486029223276083, "grad_norm": 2.6854921516916264, "learning_rate": 1e-06, "loss": 0.4055, "step": 70 }, { "epoch": 0.00455011535503717, "grad_norm": 2.69736848706748, "learning_rate": 1e-06, "loss": 0.4256, "step": 71 }, { "epoch": 0.004614201486798257, "grad_norm": 2.596490884499458, "learning_rate": 1e-06, "loss": 0.4069, "step": 72 }, { "epoch": 0.004678287618559344, "grad_norm": 2.8156615368629216, "learning_rate": 1e-06, "loss": 0.3991, "step": 73 }, { "epoch": 0.004742373750320431, "grad_norm": 2.4653779008574017, "learning_rate": 1e-06, "loss": 0.4834, "step": 74 }, { "epoch": 0.004806459882081518, "grad_norm": 2.619661319531094, "learning_rate": 1e-06, "loss": 0.4507, "step": 75 }, { "epoch": 0.004870546013842604, "grad_norm": 2.611916458285565, "learning_rate": 1e-06, "loss": 0.4103, "step": 76 }, { "epoch": 0.004934632145603691, "grad_norm": 2.5636207784022225, "learning_rate": 1e-06, "loss": 0.4467, "step": 77 }, { "epoch": 0.004998718277364778, "grad_norm": 2.4963493016425935, "learning_rate": 1e-06, "loss": 0.4775, "step": 78 }, { "epoch": 0.005062804409125865, "grad_norm": 2.8481558198252133, "learning_rate": 1e-06, "loss": 0.4645, "step": 79 }, { "epoch": 0.005126890540886952, "grad_norm": 2.724709371989913, "learning_rate": 1e-06, "loss": 0.4768, "step": 80 }, { "epoch": 0.005190976672648039, "grad_norm": 2.474072117134112, "learning_rate": 1e-06, "loss": 0.415, "step": 81 }, { "epoch": 0.005255062804409126, "grad_norm": 2.6671004940790204, "learning_rate": 1e-06, "loss": 0.4297, "step": 82 }, { "epoch": 0.005319148936170213, "grad_norm": 2.636356788264366, "learning_rate": 1e-06, "loss": 0.4816, "step": 83 }, { "epoch": 0.0053832350679313, "grad_norm": 2.5333332281115566, "learning_rate": 1e-06, "loss": 0.4175, "step": 84 }, { "epoch": 0.0054473211996923865, "grad_norm": 2.5815065252078324, "learning_rate": 1e-06, "loss": 0.4338, "step": 85 }, { "epoch": 0.0055114073314534735, "grad_norm": 2.532968445439128, "learning_rate": 1e-06, "loss": 0.4621, "step": 86 }, { "epoch": 0.00557549346321456, "grad_norm": 2.5729492311933275, "learning_rate": 1e-06, "loss": 0.4469, "step": 87 }, { "epoch": 0.005639579594975647, "grad_norm": 2.550980847594244, "learning_rate": 1e-06, "loss": 0.4541, "step": 88 }, { "epoch": 0.005703665726736734, "grad_norm": 2.341442489669966, "learning_rate": 1e-06, "loss": 0.3631, "step": 89 }, { "epoch": 0.005767751858497821, "grad_norm": 2.4357259388266046, "learning_rate": 1e-06, "loss": 0.3696, "step": 90 }, { "epoch": 0.005831837990258908, "grad_norm": 2.8327584495472657, "learning_rate": 1e-06, "loss": 0.4202, "step": 91 }, { "epoch": 0.005895924122019995, "grad_norm": 2.67552409067869, "learning_rate": 1e-06, "loss": 0.4439, "step": 92 }, { "epoch": 0.005960010253781082, "grad_norm": 2.358660039118358, "learning_rate": 1e-06, "loss": 0.4177, "step": 93 }, { "epoch": 0.006024096385542169, "grad_norm": 2.56361346512011, "learning_rate": 1e-06, "loss": 0.369, "step": 94 }, { "epoch": 0.006088182517303256, "grad_norm": 2.5406342261425143, "learning_rate": 1e-06, "loss": 0.4096, "step": 95 }, { "epoch": 0.006152268649064343, "grad_norm": 2.4792750353456876, "learning_rate": 1e-06, "loss": 0.4104, "step": 96 }, { "epoch": 0.006216354780825429, "grad_norm": 2.632504157019107, "learning_rate": 1e-06, "loss": 0.4541, "step": 97 }, { "epoch": 0.006280440912586516, "grad_norm": 2.6260305723231485, "learning_rate": 1e-06, "loss": 0.4319, "step": 98 }, { "epoch": 0.006344527044347603, "grad_norm": 2.6485300234091356, "learning_rate": 1e-06, "loss": 0.3941, "step": 99 }, { "epoch": 0.00640861317610869, "grad_norm": 2.611785234749641, "learning_rate": 1e-06, "loss": 0.4118, "step": 100 }, { "epoch": 0.006472699307869777, "grad_norm": 2.485020096159456, "learning_rate": 1e-06, "loss": 0.4099, "step": 101 }, { "epoch": 0.006536785439630864, "grad_norm": 2.515516066727853, "learning_rate": 1e-06, "loss": 0.4082, "step": 102 }, { "epoch": 0.006600871571391951, "grad_norm": 2.6842572405670575, "learning_rate": 1e-06, "loss": 0.4795, "step": 103 }, { "epoch": 0.0066649577031530375, "grad_norm": 2.7120822334389514, "learning_rate": 1e-06, "loss": 0.4101, "step": 104 }, { "epoch": 0.0067290438349141245, "grad_norm": 2.69713806260704, "learning_rate": 1e-06, "loss": 0.4411, "step": 105 }, { "epoch": 0.006793129966675211, "grad_norm": 2.823995680469878, "learning_rate": 1e-06, "loss": 0.4376, "step": 106 }, { "epoch": 0.006857216098436298, "grad_norm": 2.971152302019369, "learning_rate": 1e-06, "loss": 0.4351, "step": 107 }, { "epoch": 0.006921302230197385, "grad_norm": 2.6318324758558522, "learning_rate": 1e-06, "loss": 0.4086, "step": 108 }, { "epoch": 0.006985388361958472, "grad_norm": 2.663343057511306, "learning_rate": 1e-06, "loss": 0.4143, "step": 109 }, { "epoch": 0.007049474493719559, "grad_norm": 2.5787240892091043, "learning_rate": 1e-06, "loss": 0.4553, "step": 110 }, { "epoch": 0.007113560625480646, "grad_norm": 2.904813690584719, "learning_rate": 1e-06, "loss": 0.4463, "step": 111 }, { "epoch": 0.007177646757241733, "grad_norm": 2.6164388520336543, "learning_rate": 1e-06, "loss": 0.4237, "step": 112 }, { "epoch": 0.00724173288900282, "grad_norm": 3.62468026756726, "learning_rate": 1e-06, "loss": 0.5197, "step": 113 }, { "epoch": 0.007305819020763907, "grad_norm": 2.6283606516515183, "learning_rate": 1e-06, "loss": 0.4176, "step": 114 }, { "epoch": 0.007369905152524994, "grad_norm": 2.733978140448335, "learning_rate": 1e-06, "loss": 0.4945, "step": 115 }, { "epoch": 0.007433991284286081, "grad_norm": 2.6142513762887365, "learning_rate": 1e-06, "loss": 0.3966, "step": 116 }, { "epoch": 0.007498077416047168, "grad_norm": 2.9462705802375964, "learning_rate": 1e-06, "loss": 0.3969, "step": 117 }, { "epoch": 0.007562163547808255, "grad_norm": 2.5077415901431492, "learning_rate": 1e-06, "loss": 0.4053, "step": 118 }, { "epoch": 0.007626249679569341, "grad_norm": 2.484246468433061, "learning_rate": 1e-06, "loss": 0.4683, "step": 119 }, { "epoch": 0.007690335811330428, "grad_norm": 2.544320892555243, "learning_rate": 1e-06, "loss": 0.4388, "step": 120 }, { "epoch": 0.007754421943091515, "grad_norm": 2.4444504338812396, "learning_rate": 1e-06, "loss": 0.4482, "step": 121 }, { "epoch": 0.007818508074852602, "grad_norm": 2.6270003993720485, "learning_rate": 1e-06, "loss": 0.4309, "step": 122 }, { "epoch": 0.00788259420661369, "grad_norm": 2.5560433655850656, "learning_rate": 1e-06, "loss": 0.4204, "step": 123 }, { "epoch": 0.007946680338374775, "grad_norm": 2.583021760048913, "learning_rate": 1e-06, "loss": 0.4224, "step": 124 }, { "epoch": 0.008010766470135863, "grad_norm": 2.516809486753932, "learning_rate": 1e-06, "loss": 0.4302, "step": 125 }, { "epoch": 0.00807485260189695, "grad_norm": 2.7202512962911167, "learning_rate": 1e-06, "loss": 0.4496, "step": 126 }, { "epoch": 0.008138938733658037, "grad_norm": 2.561504469620483, "learning_rate": 1e-06, "loss": 0.3797, "step": 127 }, { "epoch": 0.008203024865419123, "grad_norm": 2.70197541717228, "learning_rate": 1e-06, "loss": 0.4708, "step": 128 }, { "epoch": 0.008267110997180211, "grad_norm": 2.703655222038152, "learning_rate": 1e-06, "loss": 0.4013, "step": 129 }, { "epoch": 0.008331197128941297, "grad_norm": 2.801075229740231, "learning_rate": 1e-06, "loss": 0.4957, "step": 130 }, { "epoch": 0.008395283260702383, "grad_norm": 2.7000542176029283, "learning_rate": 1e-06, "loss": 0.4056, "step": 131 }, { "epoch": 0.008459369392463471, "grad_norm": 2.728367510056538, "learning_rate": 1e-06, "loss": 0.4412, "step": 132 }, { "epoch": 0.008523455524224557, "grad_norm": 2.4926115155921744, "learning_rate": 1e-06, "loss": 0.4143, "step": 133 }, { "epoch": 0.008587541655985645, "grad_norm": 2.5170415635469663, "learning_rate": 1e-06, "loss": 0.4472, "step": 134 }, { "epoch": 0.008651627787746731, "grad_norm": 2.766298946998558, "learning_rate": 1e-06, "loss": 0.3751, "step": 135 }, { "epoch": 0.008715713919507819, "grad_norm": 2.725850116300731, "learning_rate": 1e-06, "loss": 0.4556, "step": 136 }, { "epoch": 0.008779800051268905, "grad_norm": 2.53900556364546, "learning_rate": 1e-06, "loss": 0.4355, "step": 137 }, { "epoch": 0.008843886183029993, "grad_norm": 2.477924972118881, "learning_rate": 1e-06, "loss": 0.4386, "step": 138 }, { "epoch": 0.008907972314791079, "grad_norm": 2.826510511701279, "learning_rate": 1e-06, "loss": 0.4638, "step": 139 }, { "epoch": 0.008972058446552167, "grad_norm": 2.6376643369143764, "learning_rate": 1e-06, "loss": 0.4355, "step": 140 }, { "epoch": 0.009036144578313253, "grad_norm": 2.554145291137539, "learning_rate": 1e-06, "loss": 0.4451, "step": 141 }, { "epoch": 0.00910023071007434, "grad_norm": 2.6743052984416154, "learning_rate": 1e-06, "loss": 0.4498, "step": 142 }, { "epoch": 0.009164316841835426, "grad_norm": 2.56483434081286, "learning_rate": 1e-06, "loss": 0.4436, "step": 143 }, { "epoch": 0.009228402973596514, "grad_norm": 2.4254491231280455, "learning_rate": 1e-06, "loss": 0.42, "step": 144 }, { "epoch": 0.0092924891053576, "grad_norm": 2.558112650901851, "learning_rate": 1e-06, "loss": 0.4531, "step": 145 }, { "epoch": 0.009356575237118688, "grad_norm": 2.405260114602089, "learning_rate": 1e-06, "loss": 0.4223, "step": 146 }, { "epoch": 0.009420661368879774, "grad_norm": 2.60820574354277, "learning_rate": 1e-06, "loss": 0.3979, "step": 147 }, { "epoch": 0.009484747500640862, "grad_norm": 2.6897694601294533, "learning_rate": 1e-06, "loss": 0.478, "step": 148 }, { "epoch": 0.009548833632401948, "grad_norm": 2.579453394198776, "learning_rate": 1e-06, "loss": 0.4296, "step": 149 }, { "epoch": 0.009612919764163036, "grad_norm": 2.7424643174105716, "learning_rate": 1e-06, "loss": 0.3937, "step": 150 }, { "epoch": 0.009677005895924122, "grad_norm": 2.7023142583494875, "learning_rate": 1e-06, "loss": 0.4058, "step": 151 }, { "epoch": 0.009741092027685208, "grad_norm": 2.506484730613173, "learning_rate": 1e-06, "loss": 0.3936, "step": 152 }, { "epoch": 0.009805178159446296, "grad_norm": 2.4749366097633874, "learning_rate": 1e-06, "loss": 0.4255, "step": 153 }, { "epoch": 0.009869264291207382, "grad_norm": 2.3269287496593414, "learning_rate": 1e-06, "loss": 0.3664, "step": 154 }, { "epoch": 0.00993335042296847, "grad_norm": 2.4212911977056604, "learning_rate": 1e-06, "loss": 0.4353, "step": 155 }, { "epoch": 0.009997436554729556, "grad_norm": 2.4468505337849593, "learning_rate": 1e-06, "loss": 0.4622, "step": 156 }, { "epoch": 0.010061522686490644, "grad_norm": 2.5645734340411206, "learning_rate": 1e-06, "loss": 0.3703, "step": 157 }, { "epoch": 0.01012560881825173, "grad_norm": 2.6534084292524764, "learning_rate": 1e-06, "loss": 0.4307, "step": 158 }, { "epoch": 0.010189694950012818, "grad_norm": 2.510221138642233, "learning_rate": 1e-06, "loss": 0.4469, "step": 159 }, { "epoch": 0.010253781081773904, "grad_norm": 2.6653843643185047, "learning_rate": 1e-06, "loss": 0.4187, "step": 160 }, { "epoch": 0.010317867213534991, "grad_norm": 2.647156935213225, "learning_rate": 1e-06, "loss": 0.4281, "step": 161 }, { "epoch": 0.010381953345296078, "grad_norm": 2.4645904023031364, "learning_rate": 1e-06, "loss": 0.4186, "step": 162 }, { "epoch": 0.010446039477057165, "grad_norm": 2.5273636798670447, "learning_rate": 1e-06, "loss": 0.4196, "step": 163 }, { "epoch": 0.010510125608818251, "grad_norm": 2.5759837139960355, "learning_rate": 1e-06, "loss": 0.4219, "step": 164 }, { "epoch": 0.01057421174057934, "grad_norm": 2.7530163451644483, "learning_rate": 1e-06, "loss": 0.4473, "step": 165 }, { "epoch": 0.010638297872340425, "grad_norm": 2.771144588040565, "learning_rate": 1e-06, "loss": 0.3832, "step": 166 }, { "epoch": 0.010702384004101513, "grad_norm": 2.6609737067836194, "learning_rate": 1e-06, "loss": 0.4258, "step": 167 }, { "epoch": 0.0107664701358626, "grad_norm": 2.76214436031771, "learning_rate": 1e-06, "loss": 0.3864, "step": 168 }, { "epoch": 0.010830556267623687, "grad_norm": 2.722402456149691, "learning_rate": 1e-06, "loss": 0.404, "step": 169 }, { "epoch": 0.010894642399384773, "grad_norm": 2.5998788986620696, "learning_rate": 1e-06, "loss": 0.4311, "step": 170 }, { "epoch": 0.01095872853114586, "grad_norm": 2.7559088943343335, "learning_rate": 1e-06, "loss": 0.4491, "step": 171 }, { "epoch": 0.011022814662906947, "grad_norm": 2.7409712519131686, "learning_rate": 1e-06, "loss": 0.4435, "step": 172 }, { "epoch": 0.011086900794668033, "grad_norm": 2.664956072118899, "learning_rate": 1e-06, "loss": 0.4193, "step": 173 }, { "epoch": 0.01115098692642912, "grad_norm": 3.1192465909037774, "learning_rate": 1e-06, "loss": 0.4669, "step": 174 }, { "epoch": 0.011215073058190207, "grad_norm": 2.563621451938816, "learning_rate": 1e-06, "loss": 0.3767, "step": 175 }, { "epoch": 0.011279159189951295, "grad_norm": 2.570045233440838, "learning_rate": 1e-06, "loss": 0.4765, "step": 176 }, { "epoch": 0.01134324532171238, "grad_norm": 2.4916231423662043, "learning_rate": 1e-06, "loss": 0.4392, "step": 177 }, { "epoch": 0.011407331453473469, "grad_norm": 2.7651096653059195, "learning_rate": 1e-06, "loss": 0.4099, "step": 178 }, { "epoch": 0.011471417585234555, "grad_norm": 2.562958698429324, "learning_rate": 1e-06, "loss": 0.4092, "step": 179 }, { "epoch": 0.011535503716995642, "grad_norm": 2.464035920659328, "learning_rate": 1e-06, "loss": 0.3709, "step": 180 }, { "epoch": 0.011599589848756729, "grad_norm": 2.751881851503854, "learning_rate": 1e-06, "loss": 0.3687, "step": 181 }, { "epoch": 0.011663675980517816, "grad_norm": 2.559719631734224, "learning_rate": 1e-06, "loss": 0.378, "step": 182 }, { "epoch": 0.011727762112278902, "grad_norm": 2.4418892922224074, "learning_rate": 1e-06, "loss": 0.4095, "step": 183 }, { "epoch": 0.01179184824403999, "grad_norm": 2.6724559730934545, "learning_rate": 1e-06, "loss": 0.4036, "step": 184 }, { "epoch": 0.011855934375801076, "grad_norm": 2.6322280599536936, "learning_rate": 1e-06, "loss": 0.4517, "step": 185 }, { "epoch": 0.011920020507562164, "grad_norm": 2.605353550494746, "learning_rate": 1e-06, "loss": 0.4107, "step": 186 }, { "epoch": 0.01198410663932325, "grad_norm": 2.564073940099639, "learning_rate": 1e-06, "loss": 0.3855, "step": 187 }, { "epoch": 0.012048192771084338, "grad_norm": 2.535479880548571, "learning_rate": 1e-06, "loss": 0.4454, "step": 188 }, { "epoch": 0.012112278902845424, "grad_norm": 2.566704466588977, "learning_rate": 1e-06, "loss": 0.4025, "step": 189 }, { "epoch": 0.012176365034606512, "grad_norm": 2.5806716178681293, "learning_rate": 1e-06, "loss": 0.3537, "step": 190 }, { "epoch": 0.012240451166367598, "grad_norm": 2.825882756789249, "learning_rate": 1e-06, "loss": 0.4407, "step": 191 }, { "epoch": 0.012304537298128686, "grad_norm": 2.578508321046613, "learning_rate": 1e-06, "loss": 0.4242, "step": 192 }, { "epoch": 0.012368623429889772, "grad_norm": 2.460610078013274, "learning_rate": 1e-06, "loss": 0.3796, "step": 193 }, { "epoch": 0.012432709561650858, "grad_norm": 2.52758953552192, "learning_rate": 1e-06, "loss": 0.3969, "step": 194 }, { "epoch": 0.012496795693411946, "grad_norm": 2.680670213978359, "learning_rate": 1e-06, "loss": 0.4494, "step": 195 }, { "epoch": 0.012560881825173032, "grad_norm": 2.5727240318324345, "learning_rate": 1e-06, "loss": 0.3651, "step": 196 }, { "epoch": 0.01262496795693412, "grad_norm": 2.5479336539567035, "learning_rate": 1e-06, "loss": 0.4283, "step": 197 }, { "epoch": 0.012689054088695206, "grad_norm": 2.5115294399067687, "learning_rate": 1e-06, "loss": 0.4641, "step": 198 }, { "epoch": 0.012753140220456293, "grad_norm": 2.78330697315584, "learning_rate": 1e-06, "loss": 0.4351, "step": 199 }, { "epoch": 0.01281722635221738, "grad_norm": 2.45488310212574, "learning_rate": 1e-06, "loss": 0.414, "step": 200 }, { "epoch": 0.012881312483978467, "grad_norm": 2.3852827309025404, "learning_rate": 1e-06, "loss": 0.3923, "step": 201 }, { "epoch": 0.012945398615739553, "grad_norm": 2.5864768221972354, "learning_rate": 1e-06, "loss": 0.4649, "step": 202 }, { "epoch": 0.013009484747500641, "grad_norm": 2.7974137736304803, "learning_rate": 1e-06, "loss": 0.4582, "step": 203 }, { "epoch": 0.013073570879261727, "grad_norm": 2.406651303043376, "learning_rate": 1e-06, "loss": 0.409, "step": 204 }, { "epoch": 0.013137657011022815, "grad_norm": 2.5980042928944735, "learning_rate": 1e-06, "loss": 0.4234, "step": 205 }, { "epoch": 0.013201743142783901, "grad_norm": 2.5707835336578237, "learning_rate": 1e-06, "loss": 0.4188, "step": 206 }, { "epoch": 0.013265829274544989, "grad_norm": 2.5628325989007665, "learning_rate": 1e-06, "loss": 0.4019, "step": 207 }, { "epoch": 0.013329915406306075, "grad_norm": 2.3178028538939057, "learning_rate": 1e-06, "loss": 0.3841, "step": 208 }, { "epoch": 0.013394001538067163, "grad_norm": 2.4943832148029372, "learning_rate": 1e-06, "loss": 0.4318, "step": 209 }, { "epoch": 0.013458087669828249, "grad_norm": 2.634069670687289, "learning_rate": 1e-06, "loss": 0.4756, "step": 210 }, { "epoch": 0.013522173801589337, "grad_norm": 2.557527545449268, "learning_rate": 1e-06, "loss": 0.4653, "step": 211 }, { "epoch": 0.013586259933350423, "grad_norm": 2.463861613281935, "learning_rate": 1e-06, "loss": 0.3948, "step": 212 }, { "epoch": 0.01365034606511151, "grad_norm": 2.3854742264069397, "learning_rate": 1e-06, "loss": 0.3689, "step": 213 }, { "epoch": 0.013714432196872597, "grad_norm": 2.4260505846618186, "learning_rate": 1e-06, "loss": 0.3644, "step": 214 }, { "epoch": 0.013778518328633685, "grad_norm": 2.5804340331860343, "learning_rate": 1e-06, "loss": 0.4042, "step": 215 }, { "epoch": 0.01384260446039477, "grad_norm": 2.6587083377401455, "learning_rate": 1e-06, "loss": 0.4072, "step": 216 }, { "epoch": 0.013906690592155857, "grad_norm": 2.617929149718754, "learning_rate": 1e-06, "loss": 0.4605, "step": 217 }, { "epoch": 0.013970776723916944, "grad_norm": 2.5803501828834143, "learning_rate": 1e-06, "loss": 0.454, "step": 218 }, { "epoch": 0.01403486285567803, "grad_norm": 2.788641206889281, "learning_rate": 1e-06, "loss": 0.404, "step": 219 }, { "epoch": 0.014098948987439118, "grad_norm": 2.506065493967879, "learning_rate": 1e-06, "loss": 0.3746, "step": 220 }, { "epoch": 0.014163035119200204, "grad_norm": 2.62297260217538, "learning_rate": 1e-06, "loss": 0.4524, "step": 221 }, { "epoch": 0.014227121250961292, "grad_norm": 2.396202673688117, "learning_rate": 1e-06, "loss": 0.423, "step": 222 }, { "epoch": 0.014291207382722378, "grad_norm": 2.5608484595985788, "learning_rate": 1e-06, "loss": 0.394, "step": 223 }, { "epoch": 0.014355293514483466, "grad_norm": 2.7259635787986722, "learning_rate": 1e-06, "loss": 0.4533, "step": 224 }, { "epoch": 0.014419379646244552, "grad_norm": 2.7655406885266967, "learning_rate": 1e-06, "loss": 0.4553, "step": 225 }, { "epoch": 0.01448346577800564, "grad_norm": 2.548446603379672, "learning_rate": 1e-06, "loss": 0.3889, "step": 226 }, { "epoch": 0.014547551909766726, "grad_norm": 2.4670836949298565, "learning_rate": 1e-06, "loss": 0.4037, "step": 227 }, { "epoch": 0.014611638041527814, "grad_norm": 2.4857360759200557, "learning_rate": 1e-06, "loss": 0.3866, "step": 228 }, { "epoch": 0.0146757241732889, "grad_norm": 2.6690124449544754, "learning_rate": 1e-06, "loss": 0.4127, "step": 229 }, { "epoch": 0.014739810305049988, "grad_norm": 2.6132105365625464, "learning_rate": 1e-06, "loss": 0.4843, "step": 230 }, { "epoch": 0.014803896436811074, "grad_norm": 2.7432970666186347, "learning_rate": 1e-06, "loss": 0.4536, "step": 231 }, { "epoch": 0.014867982568572162, "grad_norm": 2.7658731495653672, "learning_rate": 1e-06, "loss": 0.3939, "step": 232 }, { "epoch": 0.014932068700333248, "grad_norm": 2.580362454305093, "learning_rate": 1e-06, "loss": 0.3823, "step": 233 }, { "epoch": 0.014996154832094336, "grad_norm": 2.6052893301698745, "learning_rate": 1e-06, "loss": 0.4735, "step": 234 }, { "epoch": 0.015060240963855422, "grad_norm": 2.48609104419947, "learning_rate": 1e-06, "loss": 0.4368, "step": 235 }, { "epoch": 0.01512432709561651, "grad_norm": 2.4309934849231962, "learning_rate": 1e-06, "loss": 0.3756, "step": 236 }, { "epoch": 0.015188413227377595, "grad_norm": 2.437877921605441, "learning_rate": 1e-06, "loss": 0.4504, "step": 237 }, { "epoch": 0.015252499359138682, "grad_norm": 2.445601223786844, "learning_rate": 1e-06, "loss": 0.3719, "step": 238 }, { "epoch": 0.01531658549089977, "grad_norm": 2.566610647012455, "learning_rate": 1e-06, "loss": 0.4522, "step": 239 }, { "epoch": 0.015380671622660855, "grad_norm": 2.3629967174097883, "learning_rate": 1e-06, "loss": 0.3975, "step": 240 }, { "epoch": 0.015444757754421943, "grad_norm": 2.7502843636448358, "learning_rate": 1e-06, "loss": 0.4292, "step": 241 }, { "epoch": 0.01550884388618303, "grad_norm": 2.708324699769681, "learning_rate": 1e-06, "loss": 0.4187, "step": 242 }, { "epoch": 0.015572930017944117, "grad_norm": 2.444268394765568, "learning_rate": 1e-06, "loss": 0.4329, "step": 243 }, { "epoch": 0.015637016149705203, "grad_norm": 2.3920689319195025, "learning_rate": 1e-06, "loss": 0.3964, "step": 244 }, { "epoch": 0.01570110228146629, "grad_norm": 2.4825639040106156, "learning_rate": 1e-06, "loss": 0.4123, "step": 245 }, { "epoch": 0.01576518841322738, "grad_norm": 2.4814384615199123, "learning_rate": 1e-06, "loss": 0.4215, "step": 246 }, { "epoch": 0.015829274544988465, "grad_norm": 2.724229129070956, "learning_rate": 1e-06, "loss": 0.4317, "step": 247 }, { "epoch": 0.01589336067674955, "grad_norm": 2.7175053474836766, "learning_rate": 1e-06, "loss": 0.3619, "step": 248 }, { "epoch": 0.015957446808510637, "grad_norm": 2.4697050879589275, "learning_rate": 1e-06, "loss": 0.3635, "step": 249 }, { "epoch": 0.016021532940271727, "grad_norm": 2.4595339460868573, "learning_rate": 1e-06, "loss": 0.476, "step": 250 }, { "epoch": 0.016085619072032813, "grad_norm": 2.6445467786638814, "learning_rate": 1e-06, "loss": 0.4999, "step": 251 }, { "epoch": 0.0161497052037939, "grad_norm": 2.4693317351326414, "learning_rate": 1e-06, "loss": 0.4237, "step": 252 }, { "epoch": 0.016213791335554985, "grad_norm": 2.4702693466347125, "learning_rate": 1e-06, "loss": 0.4443, "step": 253 }, { "epoch": 0.016277877467316074, "grad_norm": 2.717654695399045, "learning_rate": 1e-06, "loss": 0.4019, "step": 254 }, { "epoch": 0.01634196359907716, "grad_norm": 2.603236954441173, "learning_rate": 1e-06, "loss": 0.469, "step": 255 }, { "epoch": 0.016406049730838246, "grad_norm": 2.7996949150270867, "learning_rate": 1e-06, "loss": 0.4296, "step": 256 }, { "epoch": 0.016470135862599333, "grad_norm": 2.6193082828705125, "learning_rate": 1e-06, "loss": 0.4333, "step": 257 }, { "epoch": 0.016534221994360422, "grad_norm": 2.473437533047886, "learning_rate": 1e-06, "loss": 0.4774, "step": 258 }, { "epoch": 0.016598308126121508, "grad_norm": 2.564354049838647, "learning_rate": 1e-06, "loss": 0.3997, "step": 259 }, { "epoch": 0.016662394257882594, "grad_norm": 2.547984763105764, "learning_rate": 1e-06, "loss": 0.4982, "step": 260 }, { "epoch": 0.01672648038964368, "grad_norm": 2.693431115018885, "learning_rate": 1e-06, "loss": 0.4535, "step": 261 }, { "epoch": 0.016790566521404766, "grad_norm": 2.5918028116724146, "learning_rate": 1e-06, "loss": 0.4384, "step": 262 }, { "epoch": 0.016854652653165856, "grad_norm": 2.586212163796859, "learning_rate": 1e-06, "loss": 0.4386, "step": 263 }, { "epoch": 0.016918738784926942, "grad_norm": 2.5085076025134962, "learning_rate": 1e-06, "loss": 0.4387, "step": 264 }, { "epoch": 0.016982824916688028, "grad_norm": 2.7284858517103294, "learning_rate": 1e-06, "loss": 0.4369, "step": 265 }, { "epoch": 0.017046911048449114, "grad_norm": 2.698912647175665, "learning_rate": 1e-06, "loss": 0.3981, "step": 266 }, { "epoch": 0.017110997180210204, "grad_norm": 2.433393766997923, "learning_rate": 1e-06, "loss": 0.3888, "step": 267 }, { "epoch": 0.01717508331197129, "grad_norm": 2.6338501838202606, "learning_rate": 1e-06, "loss": 0.4465, "step": 268 }, { "epoch": 0.017239169443732376, "grad_norm": 2.728265238380636, "learning_rate": 1e-06, "loss": 0.471, "step": 269 }, { "epoch": 0.017303255575493462, "grad_norm": 2.588489728565312, "learning_rate": 1e-06, "loss": 0.4759, "step": 270 }, { "epoch": 0.01736734170725455, "grad_norm": 2.611963629217684, "learning_rate": 1e-06, "loss": 0.4743, "step": 271 }, { "epoch": 0.017431427839015638, "grad_norm": 2.633648442017364, "learning_rate": 1e-06, "loss": 0.3899, "step": 272 }, { "epoch": 0.017495513970776724, "grad_norm": 2.9530713121350414, "learning_rate": 1e-06, "loss": 0.3967, "step": 273 }, { "epoch": 0.01755960010253781, "grad_norm": 2.499549993514414, "learning_rate": 1e-06, "loss": 0.4312, "step": 274 }, { "epoch": 0.0176236862342989, "grad_norm": 2.6602932818625926, "learning_rate": 1e-06, "loss": 0.4594, "step": 275 }, { "epoch": 0.017687772366059985, "grad_norm": 2.4729820946013943, "learning_rate": 1e-06, "loss": 0.3503, "step": 276 }, { "epoch": 0.01775185849782107, "grad_norm": 2.568481943130322, "learning_rate": 1e-06, "loss": 0.4095, "step": 277 }, { "epoch": 0.017815944629582157, "grad_norm": 2.5410630389732356, "learning_rate": 1e-06, "loss": 0.4558, "step": 278 }, { "epoch": 0.017880030761343247, "grad_norm": 2.8834562082794633, "learning_rate": 1e-06, "loss": 0.4472, "step": 279 }, { "epoch": 0.017944116893104333, "grad_norm": 2.9142632748077593, "learning_rate": 1e-06, "loss": 0.3867, "step": 280 }, { "epoch": 0.01800820302486542, "grad_norm": 2.685459029147027, "learning_rate": 1e-06, "loss": 0.4265, "step": 281 }, { "epoch": 0.018072289156626505, "grad_norm": 2.8592794071139687, "learning_rate": 1e-06, "loss": 0.4207, "step": 282 }, { "epoch": 0.01813637528838759, "grad_norm": 2.577623379545379, "learning_rate": 1e-06, "loss": 0.3723, "step": 283 }, { "epoch": 0.01820046142014868, "grad_norm": 2.639894455655153, "learning_rate": 1e-06, "loss": 0.4035, "step": 284 }, { "epoch": 0.018264547551909767, "grad_norm": 2.744866333497609, "learning_rate": 1e-06, "loss": 0.4558, "step": 285 }, { "epoch": 0.018328633683670853, "grad_norm": 2.7701088109503855, "learning_rate": 1e-06, "loss": 0.4056, "step": 286 }, { "epoch": 0.01839271981543194, "grad_norm": 2.7164498040000233, "learning_rate": 1e-06, "loss": 0.4093, "step": 287 }, { "epoch": 0.01845680594719303, "grad_norm": 2.8216916740476417, "learning_rate": 1e-06, "loss": 0.4087, "step": 288 }, { "epoch": 0.018520892078954115, "grad_norm": 2.570695596833045, "learning_rate": 1e-06, "loss": 0.3929, "step": 289 }, { "epoch": 0.0185849782107152, "grad_norm": 2.5436410160123093, "learning_rate": 1e-06, "loss": 0.4209, "step": 290 }, { "epoch": 0.018649064342476287, "grad_norm": 2.464564508450358, "learning_rate": 1e-06, "loss": 0.4682, "step": 291 }, { "epoch": 0.018713150474237376, "grad_norm": 2.6414925530399103, "learning_rate": 1e-06, "loss": 0.4286, "step": 292 }, { "epoch": 0.018777236605998462, "grad_norm": 2.612675091607748, "learning_rate": 1e-06, "loss": 0.4331, "step": 293 }, { "epoch": 0.01884132273775955, "grad_norm": 2.443575754509268, "learning_rate": 1e-06, "loss": 0.3982, "step": 294 }, { "epoch": 0.018905408869520635, "grad_norm": 2.580805303286195, "learning_rate": 1e-06, "loss": 0.4145, "step": 295 }, { "epoch": 0.018969495001281724, "grad_norm": 2.5273581888412453, "learning_rate": 1e-06, "loss": 0.3773, "step": 296 }, { "epoch": 0.01903358113304281, "grad_norm": 2.489408203399578, "learning_rate": 1e-06, "loss": 0.4248, "step": 297 }, { "epoch": 0.019097667264803896, "grad_norm": 2.649191810622033, "learning_rate": 1e-06, "loss": 0.4376, "step": 298 }, { "epoch": 0.019161753396564982, "grad_norm": 2.531864638870876, "learning_rate": 1e-06, "loss": 0.4435, "step": 299 }, { "epoch": 0.019225839528326072, "grad_norm": 2.405100794839331, "learning_rate": 1e-06, "loss": 0.4023, "step": 300 }, { "epoch": 0.019289925660087158, "grad_norm": 2.8102035159421064, "learning_rate": 1e-06, "loss": 0.4716, "step": 301 }, { "epoch": 0.019354011791848244, "grad_norm": 2.502247457851022, "learning_rate": 1e-06, "loss": 0.4135, "step": 302 }, { "epoch": 0.01941809792360933, "grad_norm": 2.4034822507856375, "learning_rate": 1e-06, "loss": 0.375, "step": 303 }, { "epoch": 0.019482184055370416, "grad_norm": 2.6665336665510355, "learning_rate": 1e-06, "loss": 0.4282, "step": 304 }, { "epoch": 0.019546270187131506, "grad_norm": 2.6706007873681754, "learning_rate": 1e-06, "loss": 0.4281, "step": 305 }, { "epoch": 0.019610356318892592, "grad_norm": 2.9943586682230596, "learning_rate": 1e-06, "loss": 0.4563, "step": 306 }, { "epoch": 0.019674442450653678, "grad_norm": 2.529442058426431, "learning_rate": 1e-06, "loss": 0.4235, "step": 307 }, { "epoch": 0.019738528582414764, "grad_norm": 2.6846719971532287, "learning_rate": 1e-06, "loss": 0.4301, "step": 308 }, { "epoch": 0.019802614714175854, "grad_norm": 2.656046389414779, "learning_rate": 1e-06, "loss": 0.4214, "step": 309 }, { "epoch": 0.01986670084593694, "grad_norm": 2.3228700793409316, "learning_rate": 1e-06, "loss": 0.4039, "step": 310 }, { "epoch": 0.019930786977698026, "grad_norm": 2.3570092668653313, "learning_rate": 1e-06, "loss": 0.4244, "step": 311 }, { "epoch": 0.01999487310945911, "grad_norm": 2.6047654223851087, "learning_rate": 1e-06, "loss": 0.408, "step": 312 }, { "epoch": 0.0200589592412202, "grad_norm": 2.762796362913787, "learning_rate": 1e-06, "loss": 0.3973, "step": 313 }, { "epoch": 0.020123045372981287, "grad_norm": 2.684177046272746, "learning_rate": 1e-06, "loss": 0.4441, "step": 314 }, { "epoch": 0.020187131504742373, "grad_norm": 2.7783820778054875, "learning_rate": 1e-06, "loss": 0.4093, "step": 315 }, { "epoch": 0.02025121763650346, "grad_norm": 2.6167852095843864, "learning_rate": 1e-06, "loss": 0.4247, "step": 316 }, { "epoch": 0.02031530376826455, "grad_norm": 2.6845860637575756, "learning_rate": 1e-06, "loss": 0.4023, "step": 317 }, { "epoch": 0.020379389900025635, "grad_norm": 2.602696481988515, "learning_rate": 1e-06, "loss": 0.4382, "step": 318 }, { "epoch": 0.02044347603178672, "grad_norm": 2.5342579150615006, "learning_rate": 1e-06, "loss": 0.4055, "step": 319 }, { "epoch": 0.020507562163547807, "grad_norm": 2.5854239688820377, "learning_rate": 1e-06, "loss": 0.465, "step": 320 }, { "epoch": 0.020571648295308897, "grad_norm": 2.792877790090399, "learning_rate": 1e-06, "loss": 0.4172, "step": 321 }, { "epoch": 0.020635734427069983, "grad_norm": 2.6879878145297913, "learning_rate": 1e-06, "loss": 0.3912, "step": 322 }, { "epoch": 0.02069982055883107, "grad_norm": 2.5764792723219703, "learning_rate": 1e-06, "loss": 0.4293, "step": 323 }, { "epoch": 0.020763906690592155, "grad_norm": 2.584799346003916, "learning_rate": 1e-06, "loss": 0.4235, "step": 324 }, { "epoch": 0.02082799282235324, "grad_norm": 2.52310368174129, "learning_rate": 1e-06, "loss": 0.4061, "step": 325 }, { "epoch": 0.02089207895411433, "grad_norm": 2.631813901191913, "learning_rate": 1e-06, "loss": 0.3927, "step": 326 }, { "epoch": 0.020956165085875417, "grad_norm": 2.640467081871137, "learning_rate": 1e-06, "loss": 0.4428, "step": 327 }, { "epoch": 0.021020251217636503, "grad_norm": 2.9492291344272874, "learning_rate": 1e-06, "loss": 0.4127, "step": 328 }, { "epoch": 0.02108433734939759, "grad_norm": 2.5242390714272114, "learning_rate": 1e-06, "loss": 0.5015, "step": 329 }, { "epoch": 0.02114842348115868, "grad_norm": 2.6479950311376954, "learning_rate": 1e-06, "loss": 0.4574, "step": 330 }, { "epoch": 0.021212509612919764, "grad_norm": 2.5907035344735116, "learning_rate": 1e-06, "loss": 0.4418, "step": 331 }, { "epoch": 0.02127659574468085, "grad_norm": 2.4904752366096203, "learning_rate": 1e-06, "loss": 0.4231, "step": 332 }, { "epoch": 0.021340681876441937, "grad_norm": 2.762482929644451, "learning_rate": 1e-06, "loss": 0.4319, "step": 333 }, { "epoch": 0.021404768008203026, "grad_norm": 2.257106119148726, "learning_rate": 1e-06, "loss": 0.4214, "step": 334 }, { "epoch": 0.021468854139964112, "grad_norm": 2.687648586593707, "learning_rate": 1e-06, "loss": 0.4059, "step": 335 }, { "epoch": 0.0215329402717252, "grad_norm": 2.6657368936212715, "learning_rate": 1e-06, "loss": 0.3806, "step": 336 }, { "epoch": 0.021597026403486284, "grad_norm": 2.6482827833947997, "learning_rate": 1e-06, "loss": 0.4097, "step": 337 }, { "epoch": 0.021661112535247374, "grad_norm": 2.7093340741541425, "learning_rate": 1e-06, "loss": 0.4485, "step": 338 }, { "epoch": 0.02172519866700846, "grad_norm": 2.3762505723651057, "learning_rate": 1e-06, "loss": 0.428, "step": 339 }, { "epoch": 0.021789284798769546, "grad_norm": 2.423159148830173, "learning_rate": 1e-06, "loss": 0.3828, "step": 340 }, { "epoch": 0.021853370930530632, "grad_norm": 2.716362646966935, "learning_rate": 1e-06, "loss": 0.4466, "step": 341 }, { "epoch": 0.02191745706229172, "grad_norm": 2.6724315541317494, "learning_rate": 1e-06, "loss": 0.4175, "step": 342 }, { "epoch": 0.021981543194052808, "grad_norm": 2.622452376572196, "learning_rate": 1e-06, "loss": 0.4326, "step": 343 }, { "epoch": 0.022045629325813894, "grad_norm": 2.6641408148600485, "learning_rate": 1e-06, "loss": 0.4213, "step": 344 }, { "epoch": 0.02210971545757498, "grad_norm": 2.587246078788654, "learning_rate": 1e-06, "loss": 0.4081, "step": 345 }, { "epoch": 0.022173801589336066, "grad_norm": 2.684412921798805, "learning_rate": 1e-06, "loss": 0.4255, "step": 346 }, { "epoch": 0.022237887721097156, "grad_norm": 2.4607146739904007, "learning_rate": 1e-06, "loss": 0.4439, "step": 347 }, { "epoch": 0.02230197385285824, "grad_norm": 2.7249177701726817, "learning_rate": 1e-06, "loss": 0.4347, "step": 348 }, { "epoch": 0.022366059984619328, "grad_norm": 2.7097559627929053, "learning_rate": 1e-06, "loss": 0.4123, "step": 349 }, { "epoch": 0.022430146116380414, "grad_norm": 2.6075455207333387, "learning_rate": 1e-06, "loss": 0.3605, "step": 350 }, { "epoch": 0.022494232248141503, "grad_norm": 2.526788415971298, "learning_rate": 1e-06, "loss": 0.4505, "step": 351 }, { "epoch": 0.02255831837990259, "grad_norm": 2.6078284647598204, "learning_rate": 1e-06, "loss": 0.4309, "step": 352 }, { "epoch": 0.022622404511663675, "grad_norm": 2.443089947367771, "learning_rate": 1e-06, "loss": 0.394, "step": 353 }, { "epoch": 0.02268649064342476, "grad_norm": 2.694646337485449, "learning_rate": 1e-06, "loss": 0.4094, "step": 354 }, { "epoch": 0.02275057677518585, "grad_norm": 2.539210140474977, "learning_rate": 1e-06, "loss": 0.4183, "step": 355 }, { "epoch": 0.022814662906946937, "grad_norm": 2.5494243341233602, "learning_rate": 1e-06, "loss": 0.3934, "step": 356 }, { "epoch": 0.022878749038708023, "grad_norm": 2.518198059218748, "learning_rate": 1e-06, "loss": 0.3786, "step": 357 }, { "epoch": 0.02294283517046911, "grad_norm": 2.61819759245241, "learning_rate": 1e-06, "loss": 0.4574, "step": 358 }, { "epoch": 0.0230069213022302, "grad_norm": 2.5581859112297005, "learning_rate": 1e-06, "loss": 0.3627, "step": 359 }, { "epoch": 0.023071007433991285, "grad_norm": 2.587465107279955, "learning_rate": 1e-06, "loss": 0.3798, "step": 360 }, { "epoch": 0.02313509356575237, "grad_norm": 2.648677113740983, "learning_rate": 1e-06, "loss": 0.4366, "step": 361 }, { "epoch": 0.023199179697513457, "grad_norm": 2.7540181767323664, "learning_rate": 1e-06, "loss": 0.3658, "step": 362 }, { "epoch": 0.023263265829274547, "grad_norm": 2.755584999096141, "learning_rate": 1e-06, "loss": 0.4769, "step": 363 }, { "epoch": 0.023327351961035633, "grad_norm": 2.6396695649199176, "learning_rate": 1e-06, "loss": 0.4281, "step": 364 }, { "epoch": 0.02339143809279672, "grad_norm": 2.6761169006385845, "learning_rate": 1e-06, "loss": 0.4838, "step": 365 }, { "epoch": 0.023455524224557805, "grad_norm": 2.6889950878228004, "learning_rate": 1e-06, "loss": 0.4501, "step": 366 }, { "epoch": 0.02351961035631889, "grad_norm": 2.7548073384891003, "learning_rate": 1e-06, "loss": 0.4148, "step": 367 }, { "epoch": 0.02358369648807998, "grad_norm": 2.542233508453795, "learning_rate": 1e-06, "loss": 0.4433, "step": 368 }, { "epoch": 0.023647782619841066, "grad_norm": 2.5380547510477487, "learning_rate": 1e-06, "loss": 0.4246, "step": 369 }, { "epoch": 0.023711868751602153, "grad_norm": 2.5495340569743252, "learning_rate": 1e-06, "loss": 0.3672, "step": 370 }, { "epoch": 0.02377595488336324, "grad_norm": 2.775600728413592, "learning_rate": 1e-06, "loss": 0.3908, "step": 371 }, { "epoch": 0.023840041015124328, "grad_norm": 2.60490757100387, "learning_rate": 1e-06, "loss": 0.3787, "step": 372 }, { "epoch": 0.023904127146885414, "grad_norm": 2.6266435496694682, "learning_rate": 1e-06, "loss": 0.4148, "step": 373 }, { "epoch": 0.0239682132786465, "grad_norm": 2.3238570132425744, "learning_rate": 1e-06, "loss": 0.4079, "step": 374 }, { "epoch": 0.024032299410407586, "grad_norm": 2.4584692833391917, "learning_rate": 1e-06, "loss": 0.4871, "step": 375 }, { "epoch": 0.024096385542168676, "grad_norm": 2.6874135978719065, "learning_rate": 1e-06, "loss": 0.3946, "step": 376 }, { "epoch": 0.024160471673929762, "grad_norm": 2.5538834092647704, "learning_rate": 1e-06, "loss": 0.3631, "step": 377 }, { "epoch": 0.024224557805690848, "grad_norm": 2.8327606606394724, "learning_rate": 1e-06, "loss": 0.3849, "step": 378 }, { "epoch": 0.024288643937451934, "grad_norm": 2.514808684223404, "learning_rate": 1e-06, "loss": 0.4206, "step": 379 }, { "epoch": 0.024352730069213024, "grad_norm": 2.6324540798174185, "learning_rate": 1e-06, "loss": 0.4487, "step": 380 }, { "epoch": 0.02441681620097411, "grad_norm": 2.6056342440608526, "learning_rate": 1e-06, "loss": 0.3951, "step": 381 }, { "epoch": 0.024480902332735196, "grad_norm": 2.675148296855455, "learning_rate": 1e-06, "loss": 0.4448, "step": 382 }, { "epoch": 0.024544988464496282, "grad_norm": 2.5831013701900947, "learning_rate": 1e-06, "loss": 0.4318, "step": 383 }, { "epoch": 0.02460907459625737, "grad_norm": 2.5554733350090575, "learning_rate": 1e-06, "loss": 0.4105, "step": 384 }, { "epoch": 0.024673160728018458, "grad_norm": 2.825553455546965, "learning_rate": 1e-06, "loss": 0.4427, "step": 385 }, { "epoch": 0.024737246859779544, "grad_norm": 2.598203447299543, "learning_rate": 1e-06, "loss": 0.3917, "step": 386 }, { "epoch": 0.02480133299154063, "grad_norm": 2.7532512685061796, "learning_rate": 1e-06, "loss": 0.3665, "step": 387 }, { "epoch": 0.024865419123301716, "grad_norm": 2.7759105997753264, "learning_rate": 1e-06, "loss": 0.4646, "step": 388 }, { "epoch": 0.024929505255062805, "grad_norm": 2.486755526528995, "learning_rate": 1e-06, "loss": 0.4424, "step": 389 }, { "epoch": 0.02499359138682389, "grad_norm": 2.5947708705458394, "learning_rate": 1e-06, "loss": 0.3967, "step": 390 }, { "epoch": 0.025057677518584977, "grad_norm": 2.4297539299884683, "learning_rate": 1e-06, "loss": 0.4141, "step": 391 }, { "epoch": 0.025121763650346064, "grad_norm": 2.434233908402395, "learning_rate": 1e-06, "loss": 0.4528, "step": 392 }, { "epoch": 0.025185849782107153, "grad_norm": 2.757448942774155, "learning_rate": 1e-06, "loss": 0.4467, "step": 393 }, { "epoch": 0.02524993591386824, "grad_norm": 2.4998807548143014, "learning_rate": 1e-06, "loss": 0.4334, "step": 394 }, { "epoch": 0.025314022045629325, "grad_norm": 2.419465872584267, "learning_rate": 1e-06, "loss": 0.3803, "step": 395 }, { "epoch": 0.02537810817739041, "grad_norm": 2.679576216317354, "learning_rate": 1e-06, "loss": 0.4507, "step": 396 }, { "epoch": 0.0254421943091515, "grad_norm": 2.5869533374512335, "learning_rate": 1e-06, "loss": 0.464, "step": 397 }, { "epoch": 0.025506280440912587, "grad_norm": 2.8162022718685678, "learning_rate": 1e-06, "loss": 0.4536, "step": 398 }, { "epoch": 0.025570366572673673, "grad_norm": 2.49440453274208, "learning_rate": 1e-06, "loss": 0.4472, "step": 399 }, { "epoch": 0.02563445270443476, "grad_norm": 2.7267037535153813, "learning_rate": 1e-06, "loss": 0.3788, "step": 400 }, { "epoch": 0.02569853883619585, "grad_norm": 2.6275422608283123, "learning_rate": 1e-06, "loss": 0.455, "step": 401 }, { "epoch": 0.025762624967956935, "grad_norm": 2.468485363505419, "learning_rate": 1e-06, "loss": 0.4348, "step": 402 }, { "epoch": 0.02582671109971802, "grad_norm": 2.6322373705642015, "learning_rate": 1e-06, "loss": 0.4474, "step": 403 }, { "epoch": 0.025890797231479107, "grad_norm": 2.535796436272985, "learning_rate": 1e-06, "loss": 0.4392, "step": 404 }, { "epoch": 0.025954883363240196, "grad_norm": 2.5836972859728915, "learning_rate": 1e-06, "loss": 0.429, "step": 405 }, { "epoch": 0.026018969495001282, "grad_norm": 2.7316052338513432, "learning_rate": 1e-06, "loss": 0.396, "step": 406 }, { "epoch": 0.02608305562676237, "grad_norm": 2.736398307464729, "learning_rate": 1e-06, "loss": 0.4456, "step": 407 }, { "epoch": 0.026147141758523455, "grad_norm": 2.7440451094469394, "learning_rate": 1e-06, "loss": 0.4556, "step": 408 }, { "epoch": 0.026211227890284544, "grad_norm": 2.4414167181146635, "learning_rate": 1e-06, "loss": 0.4086, "step": 409 }, { "epoch": 0.02627531402204563, "grad_norm": 2.7337025666681787, "learning_rate": 1e-06, "loss": 0.4728, "step": 410 }, { "epoch": 0.026339400153806716, "grad_norm": 2.6555366248003076, "learning_rate": 1e-06, "loss": 0.4541, "step": 411 }, { "epoch": 0.026403486285567802, "grad_norm": 2.5538593566825405, "learning_rate": 1e-06, "loss": 0.4807, "step": 412 }, { "epoch": 0.02646757241732889, "grad_norm": 2.5362557233108833, "learning_rate": 1e-06, "loss": 0.3934, "step": 413 }, { "epoch": 0.026531658549089978, "grad_norm": 2.8275479404432775, "learning_rate": 1e-06, "loss": 0.4276, "step": 414 }, { "epoch": 0.026595744680851064, "grad_norm": 2.8543582707538655, "learning_rate": 1e-06, "loss": 0.4315, "step": 415 }, { "epoch": 0.02665983081261215, "grad_norm": 2.950759907375976, "learning_rate": 1e-06, "loss": 0.5034, "step": 416 }, { "epoch": 0.026723916944373236, "grad_norm": 2.6064488094160705, "learning_rate": 1e-06, "loss": 0.4089, "step": 417 }, { "epoch": 0.026788003076134326, "grad_norm": 2.7096530877343534, "learning_rate": 1e-06, "loss": 0.4349, "step": 418 }, { "epoch": 0.026852089207895412, "grad_norm": 2.563785025605755, "learning_rate": 1e-06, "loss": 0.4283, "step": 419 }, { "epoch": 0.026916175339656498, "grad_norm": 2.5884296422116213, "learning_rate": 1e-06, "loss": 0.3884, "step": 420 }, { "epoch": 0.026980261471417584, "grad_norm": 2.652177567014019, "learning_rate": 1e-06, "loss": 0.3815, "step": 421 }, { "epoch": 0.027044347603178674, "grad_norm": 2.8145378730635477, "learning_rate": 1e-06, "loss": 0.469, "step": 422 }, { "epoch": 0.02710843373493976, "grad_norm": 2.6413005099038918, "learning_rate": 1e-06, "loss": 0.4117, "step": 423 }, { "epoch": 0.027172519866700846, "grad_norm": 2.6259835527792803, "learning_rate": 1e-06, "loss": 0.4584, "step": 424 }, { "epoch": 0.02723660599846193, "grad_norm": 2.59772191403426, "learning_rate": 1e-06, "loss": 0.4332, "step": 425 }, { "epoch": 0.02730069213022302, "grad_norm": 2.6447993885707106, "learning_rate": 1e-06, "loss": 0.4098, "step": 426 }, { "epoch": 0.027364778261984107, "grad_norm": 2.8823068371516865, "learning_rate": 1e-06, "loss": 0.4728, "step": 427 }, { "epoch": 0.027428864393745193, "grad_norm": 2.6895802156764996, "learning_rate": 1e-06, "loss": 0.4462, "step": 428 }, { "epoch": 0.02749295052550628, "grad_norm": 2.775049320708902, "learning_rate": 1e-06, "loss": 0.4015, "step": 429 }, { "epoch": 0.02755703665726737, "grad_norm": 2.4854036703312783, "learning_rate": 1e-06, "loss": 0.4104, "step": 430 }, { "epoch": 0.027621122789028455, "grad_norm": 2.628854745968563, "learning_rate": 1e-06, "loss": 0.4176, "step": 431 }, { "epoch": 0.02768520892078954, "grad_norm": 2.543581473195899, "learning_rate": 1e-06, "loss": 0.4403, "step": 432 }, { "epoch": 0.027749295052550627, "grad_norm": 2.5870776263158115, "learning_rate": 1e-06, "loss": 0.4503, "step": 433 }, { "epoch": 0.027813381184311713, "grad_norm": 2.5550230493211266, "learning_rate": 1e-06, "loss": 0.4454, "step": 434 }, { "epoch": 0.027877467316072803, "grad_norm": 2.517836595844116, "learning_rate": 1e-06, "loss": 0.3876, "step": 435 }, { "epoch": 0.02794155344783389, "grad_norm": 2.532564498900615, "learning_rate": 1e-06, "loss": 0.3899, "step": 436 }, { "epoch": 0.028005639579594975, "grad_norm": 2.617217239557506, "learning_rate": 1e-06, "loss": 0.4032, "step": 437 }, { "epoch": 0.02806972571135606, "grad_norm": 2.4932234326126985, "learning_rate": 1e-06, "loss": 0.4112, "step": 438 }, { "epoch": 0.02813381184311715, "grad_norm": 2.632681267604381, "learning_rate": 1e-06, "loss": 0.4631, "step": 439 }, { "epoch": 0.028197897974878237, "grad_norm": 2.700129131791681, "learning_rate": 1e-06, "loss": 0.3841, "step": 440 }, { "epoch": 0.028261984106639323, "grad_norm": 2.3125223839321034, "learning_rate": 1e-06, "loss": 0.4322, "step": 441 }, { "epoch": 0.02832607023840041, "grad_norm": 2.6480507139738187, "learning_rate": 1e-06, "loss": 0.4028, "step": 442 }, { "epoch": 0.0283901563701615, "grad_norm": 2.5860040775843087, "learning_rate": 1e-06, "loss": 0.441, "step": 443 }, { "epoch": 0.028454242501922584, "grad_norm": 2.623689520293853, "learning_rate": 1e-06, "loss": 0.4302, "step": 444 }, { "epoch": 0.02851832863368367, "grad_norm": 2.558893067356012, "learning_rate": 1e-06, "loss": 0.4472, "step": 445 }, { "epoch": 0.028582414765444757, "grad_norm": 2.827213781887415, "learning_rate": 1e-06, "loss": 0.4805, "step": 446 }, { "epoch": 0.028646500897205846, "grad_norm": 2.626705801630092, "learning_rate": 1e-06, "loss": 0.4232, "step": 447 }, { "epoch": 0.028710587028966932, "grad_norm": 2.3921237896610315, "learning_rate": 1e-06, "loss": 0.4341, "step": 448 }, { "epoch": 0.02877467316072802, "grad_norm": 2.594901028723936, "learning_rate": 1e-06, "loss": 0.4329, "step": 449 }, { "epoch": 0.028838759292489104, "grad_norm": 2.6928586153085465, "learning_rate": 1e-06, "loss": 0.4559, "step": 450 }, { "epoch": 0.028902845424250194, "grad_norm": 2.5095686668184713, "learning_rate": 1e-06, "loss": 0.4052, "step": 451 }, { "epoch": 0.02896693155601128, "grad_norm": 2.7755150350769773, "learning_rate": 1e-06, "loss": 0.443, "step": 452 }, { "epoch": 0.029031017687772366, "grad_norm": 2.3618305061019202, "learning_rate": 1e-06, "loss": 0.4055, "step": 453 }, { "epoch": 0.029095103819533452, "grad_norm": 2.5548014934465026, "learning_rate": 1e-06, "loss": 0.4431, "step": 454 }, { "epoch": 0.029159189951294538, "grad_norm": 2.565871109083848, "learning_rate": 1e-06, "loss": 0.4101, "step": 455 }, { "epoch": 0.029223276083055628, "grad_norm": 2.653111195337013, "learning_rate": 1e-06, "loss": 0.4435, "step": 456 }, { "epoch": 0.029287362214816714, "grad_norm": 2.4368979780190245, "learning_rate": 1e-06, "loss": 0.367, "step": 457 }, { "epoch": 0.0293514483465778, "grad_norm": 2.7911192897610047, "learning_rate": 1e-06, "loss": 0.3813, "step": 458 }, { "epoch": 0.029415534478338886, "grad_norm": 2.4492006758826106, "learning_rate": 1e-06, "loss": 0.4112, "step": 459 }, { "epoch": 0.029479620610099976, "grad_norm": 2.6617405331446555, "learning_rate": 1e-06, "loss": 0.4823, "step": 460 }, { "epoch": 0.02954370674186106, "grad_norm": 2.43862787048992, "learning_rate": 1e-06, "loss": 0.4443, "step": 461 }, { "epoch": 0.029607792873622148, "grad_norm": 2.4792268303893783, "learning_rate": 1e-06, "loss": 0.4563, "step": 462 }, { "epoch": 0.029671879005383234, "grad_norm": 2.4850255441260267, "learning_rate": 1e-06, "loss": 0.4743, "step": 463 }, { "epoch": 0.029735965137144323, "grad_norm": 2.5140153994819427, "learning_rate": 1e-06, "loss": 0.4696, "step": 464 }, { "epoch": 0.02980005126890541, "grad_norm": 2.6261266820771585, "learning_rate": 1e-06, "loss": 0.3756, "step": 465 }, { "epoch": 0.029864137400666495, "grad_norm": 2.6310050025757508, "learning_rate": 1e-06, "loss": 0.3847, "step": 466 }, { "epoch": 0.02992822353242758, "grad_norm": 2.7082985977439176, "learning_rate": 1e-06, "loss": 0.3926, "step": 467 }, { "epoch": 0.02999230966418867, "grad_norm": 2.553112208833126, "learning_rate": 1e-06, "loss": 0.396, "step": 468 }, { "epoch": 0.030056395795949757, "grad_norm": 2.612291745367972, "learning_rate": 1e-06, "loss": 0.371, "step": 469 }, { "epoch": 0.030120481927710843, "grad_norm": 2.621969421745666, "learning_rate": 1e-06, "loss": 0.4045, "step": 470 }, { "epoch": 0.03018456805947193, "grad_norm": 2.5381238717470183, "learning_rate": 1e-06, "loss": 0.4543, "step": 471 }, { "epoch": 0.03024865419123302, "grad_norm": 2.665916627779648, "learning_rate": 1e-06, "loss": 0.4564, "step": 472 }, { "epoch": 0.030312740322994105, "grad_norm": 2.4444266781182664, "learning_rate": 1e-06, "loss": 0.4394, "step": 473 }, { "epoch": 0.03037682645475519, "grad_norm": 2.669252093062087, "learning_rate": 1e-06, "loss": 0.4722, "step": 474 }, { "epoch": 0.030440912586516277, "grad_norm": 2.5986644470297136, "learning_rate": 1e-06, "loss": 0.4343, "step": 475 }, { "epoch": 0.030504998718277363, "grad_norm": 2.516452232958566, "learning_rate": 1e-06, "loss": 0.453, "step": 476 }, { "epoch": 0.030569084850038453, "grad_norm": 2.4799852114064214, "learning_rate": 1e-06, "loss": 0.4186, "step": 477 }, { "epoch": 0.03063317098179954, "grad_norm": 2.691271105524432, "learning_rate": 1e-06, "loss": 0.4461, "step": 478 }, { "epoch": 0.030697257113560625, "grad_norm": 2.6177226276670886, "learning_rate": 1e-06, "loss": 0.3892, "step": 479 }, { "epoch": 0.03076134324532171, "grad_norm": 2.6112474010731055, "learning_rate": 1e-06, "loss": 0.4304, "step": 480 }, { "epoch": 0.0308254293770828, "grad_norm": 2.43910372990363, "learning_rate": 1e-06, "loss": 0.4058, "step": 481 }, { "epoch": 0.030889515508843886, "grad_norm": 2.560767071357324, "learning_rate": 1e-06, "loss": 0.4067, "step": 482 }, { "epoch": 0.030953601640604973, "grad_norm": 2.545682557199924, "learning_rate": 1e-06, "loss": 0.4691, "step": 483 }, { "epoch": 0.03101768777236606, "grad_norm": 2.575507981220968, "learning_rate": 1e-06, "loss": 0.419, "step": 484 }, { "epoch": 0.031081773904127148, "grad_norm": 2.398778610972209, "learning_rate": 1e-06, "loss": 0.4322, "step": 485 }, { "epoch": 0.031145860035888234, "grad_norm": 2.468832999943894, "learning_rate": 1e-06, "loss": 0.4069, "step": 486 }, { "epoch": 0.03120994616764932, "grad_norm": 2.6723453958820045, "learning_rate": 1e-06, "loss": 0.4476, "step": 487 }, { "epoch": 0.031274032299410406, "grad_norm": 3.007937745124945, "learning_rate": 1e-06, "loss": 0.4408, "step": 488 }, { "epoch": 0.03133811843117149, "grad_norm": 2.5372976061475625, "learning_rate": 1e-06, "loss": 0.4811, "step": 489 }, { "epoch": 0.03140220456293258, "grad_norm": 2.4099062889186245, "learning_rate": 1e-06, "loss": 0.4239, "step": 490 }, { "epoch": 0.03146629069469367, "grad_norm": 2.561389579372859, "learning_rate": 1e-06, "loss": 0.4182, "step": 491 }, { "epoch": 0.03153037682645476, "grad_norm": 2.7289712094507492, "learning_rate": 1e-06, "loss": 0.3899, "step": 492 }, { "epoch": 0.031594462958215844, "grad_norm": 2.8885100719999253, "learning_rate": 1e-06, "loss": 0.4456, "step": 493 }, { "epoch": 0.03165854908997693, "grad_norm": 2.5064397467754573, "learning_rate": 1e-06, "loss": 0.407, "step": 494 }, { "epoch": 0.031722635221738016, "grad_norm": 2.412203696247204, "learning_rate": 1e-06, "loss": 0.3731, "step": 495 }, { "epoch": 0.0317867213534991, "grad_norm": 2.629095888173398, "learning_rate": 1e-06, "loss": 0.3977, "step": 496 }, { "epoch": 0.03185080748526019, "grad_norm": 2.4280514452263406, "learning_rate": 1e-06, "loss": 0.4447, "step": 497 }, { "epoch": 0.031914893617021274, "grad_norm": 2.5575542980194443, "learning_rate": 1e-06, "loss": 0.3789, "step": 498 }, { "epoch": 0.03197897974878236, "grad_norm": 2.5872529006915412, "learning_rate": 1e-06, "loss": 0.4307, "step": 499 }, { "epoch": 0.03204306588054345, "grad_norm": 2.5440251604452992, "learning_rate": 1e-06, "loss": 0.3903, "step": 500 }, { "epoch": 0.03210715201230454, "grad_norm": 2.7137592808907884, "learning_rate": 1e-06, "loss": 0.4266, "step": 501 }, { "epoch": 0.032171238144065625, "grad_norm": 2.6186486459780167, "learning_rate": 1e-06, "loss": 0.434, "step": 502 }, { "epoch": 0.03223532427582671, "grad_norm": 2.570526490487541, "learning_rate": 1e-06, "loss": 0.3787, "step": 503 }, { "epoch": 0.0322994104075878, "grad_norm": 2.60892271445792, "learning_rate": 1e-06, "loss": 0.3674, "step": 504 }, { "epoch": 0.032363496539348884, "grad_norm": 2.8058791368375835, "learning_rate": 1e-06, "loss": 0.371, "step": 505 }, { "epoch": 0.03242758267110997, "grad_norm": 2.3428317112470345, "learning_rate": 1e-06, "loss": 0.4225, "step": 506 }, { "epoch": 0.032491668802871056, "grad_norm": 2.532222954783407, "learning_rate": 1e-06, "loss": 0.4619, "step": 507 }, { "epoch": 0.03255575493463215, "grad_norm": 2.7351971936980672, "learning_rate": 1e-06, "loss": 0.3857, "step": 508 }, { "epoch": 0.032619841066393235, "grad_norm": 2.4604615768666416, "learning_rate": 1e-06, "loss": 0.4439, "step": 509 }, { "epoch": 0.03268392719815432, "grad_norm": 2.831748431791256, "learning_rate": 1e-06, "loss": 0.4475, "step": 510 }, { "epoch": 0.03274801332991541, "grad_norm": 2.441688126278881, "learning_rate": 1e-06, "loss": 0.4578, "step": 511 }, { "epoch": 0.03281209946167649, "grad_norm": 2.5457154513847606, "learning_rate": 1e-06, "loss": 0.3945, "step": 512 }, { "epoch": 0.03287618559343758, "grad_norm": 2.6596542207786333, "learning_rate": 1e-06, "loss": 0.4234, "step": 513 }, { "epoch": 0.032940271725198665, "grad_norm": 2.577103093592158, "learning_rate": 1e-06, "loss": 0.4362, "step": 514 }, { "epoch": 0.03300435785695975, "grad_norm": 2.6865205065165476, "learning_rate": 1e-06, "loss": 0.457, "step": 515 }, { "epoch": 0.033068443988720844, "grad_norm": 2.7935603445979593, "learning_rate": 1e-06, "loss": 0.3538, "step": 516 }, { "epoch": 0.03313253012048193, "grad_norm": 2.5709612528638646, "learning_rate": 1e-06, "loss": 0.4058, "step": 517 }, { "epoch": 0.033196616252243016, "grad_norm": 2.679906178087004, "learning_rate": 1e-06, "loss": 0.419, "step": 518 }, { "epoch": 0.0332607023840041, "grad_norm": 2.631332537538208, "learning_rate": 1e-06, "loss": 0.3987, "step": 519 }, { "epoch": 0.03332478851576519, "grad_norm": 2.742087588652849, "learning_rate": 1e-06, "loss": 0.3902, "step": 520 }, { "epoch": 0.033388874647526275, "grad_norm": 2.607913480915616, "learning_rate": 1e-06, "loss": 0.3595, "step": 521 }, { "epoch": 0.03345296077928736, "grad_norm": 2.6446962621851564, "learning_rate": 1e-06, "loss": 0.3932, "step": 522 }, { "epoch": 0.03351704691104845, "grad_norm": 2.7401141685722337, "learning_rate": 1e-06, "loss": 0.4399, "step": 523 }, { "epoch": 0.03358113304280953, "grad_norm": 2.505928983276003, "learning_rate": 1e-06, "loss": 0.501, "step": 524 }, { "epoch": 0.033645219174570626, "grad_norm": 2.7600474954065444, "learning_rate": 1e-06, "loss": 0.4258, "step": 525 }, { "epoch": 0.03370930530633171, "grad_norm": 2.7364210341460997, "learning_rate": 1e-06, "loss": 0.4107, "step": 526 }, { "epoch": 0.0337733914380928, "grad_norm": 2.750607243162528, "learning_rate": 1e-06, "loss": 0.4495, "step": 527 }, { "epoch": 0.033837477569853884, "grad_norm": 2.8190619687628273, "learning_rate": 1e-06, "loss": 0.3647, "step": 528 }, { "epoch": 0.03390156370161497, "grad_norm": 2.7488605163437243, "learning_rate": 1e-06, "loss": 0.3953, "step": 529 }, { "epoch": 0.033965649833376056, "grad_norm": 2.4591430760247475, "learning_rate": 1e-06, "loss": 0.4333, "step": 530 }, { "epoch": 0.03402973596513714, "grad_norm": 2.456421101011619, "learning_rate": 1e-06, "loss": 0.3921, "step": 531 }, { "epoch": 0.03409382209689823, "grad_norm": 2.6154427035143994, "learning_rate": 1e-06, "loss": 0.3879, "step": 532 }, { "epoch": 0.03415790822865932, "grad_norm": 2.6735778265914716, "learning_rate": 1e-06, "loss": 0.3936, "step": 533 }, { "epoch": 0.03422199436042041, "grad_norm": 2.890439024855884, "learning_rate": 1e-06, "loss": 0.4686, "step": 534 }, { "epoch": 0.034286080492181494, "grad_norm": 2.4630446860653006, "learning_rate": 1e-06, "loss": 0.427, "step": 535 }, { "epoch": 0.03435016662394258, "grad_norm": 2.6268371157846073, "learning_rate": 1e-06, "loss": 0.4102, "step": 536 }, { "epoch": 0.034414252755703666, "grad_norm": 2.610285178020935, "learning_rate": 1e-06, "loss": 0.4184, "step": 537 }, { "epoch": 0.03447833888746475, "grad_norm": 2.6839716680166577, "learning_rate": 1e-06, "loss": 0.463, "step": 538 }, { "epoch": 0.03454242501922584, "grad_norm": 2.749663653273078, "learning_rate": 1e-06, "loss": 0.4618, "step": 539 }, { "epoch": 0.034606511150986924, "grad_norm": 2.7732262749372962, "learning_rate": 1e-06, "loss": 0.4329, "step": 540 }, { "epoch": 0.03467059728274801, "grad_norm": 2.4709968867303482, "learning_rate": 1e-06, "loss": 0.4149, "step": 541 }, { "epoch": 0.0347346834145091, "grad_norm": 2.636652286257393, "learning_rate": 1e-06, "loss": 0.4409, "step": 542 }, { "epoch": 0.03479876954627019, "grad_norm": 2.6364517480767207, "learning_rate": 1e-06, "loss": 0.3699, "step": 543 }, { "epoch": 0.034862855678031275, "grad_norm": 2.602465187056353, "learning_rate": 1e-06, "loss": 0.4382, "step": 544 }, { "epoch": 0.03492694180979236, "grad_norm": 2.5206229660480135, "learning_rate": 1e-06, "loss": 0.4329, "step": 545 }, { "epoch": 0.03499102794155345, "grad_norm": 2.404100546779136, "learning_rate": 1e-06, "loss": 0.4556, "step": 546 }, { "epoch": 0.03505511407331453, "grad_norm": 2.5357402525324373, "learning_rate": 1e-06, "loss": 0.4263, "step": 547 }, { "epoch": 0.03511920020507562, "grad_norm": 2.759792501919909, "learning_rate": 1e-06, "loss": 0.4423, "step": 548 }, { "epoch": 0.035183286336836705, "grad_norm": 2.6433173572474207, "learning_rate": 1e-06, "loss": 0.4271, "step": 549 }, { "epoch": 0.0352473724685978, "grad_norm": 2.4955771953873986, "learning_rate": 1e-06, "loss": 0.4189, "step": 550 }, { "epoch": 0.035311458600358885, "grad_norm": 2.6502029354718526, "learning_rate": 1e-06, "loss": 0.4253, "step": 551 }, { "epoch": 0.03537554473211997, "grad_norm": 2.5023269519541556, "learning_rate": 1e-06, "loss": 0.4171, "step": 552 }, { "epoch": 0.03543963086388106, "grad_norm": 2.427919137521944, "learning_rate": 1e-06, "loss": 0.3839, "step": 553 }, { "epoch": 0.03550371699564214, "grad_norm": 2.547181435232563, "learning_rate": 1e-06, "loss": 0.4268, "step": 554 }, { "epoch": 0.03556780312740323, "grad_norm": 2.8027164655802412, "learning_rate": 1e-06, "loss": 0.4133, "step": 555 }, { "epoch": 0.035631889259164315, "grad_norm": 2.6901481129495024, "learning_rate": 1e-06, "loss": 0.4793, "step": 556 }, { "epoch": 0.0356959753909254, "grad_norm": 2.5079034785333008, "learning_rate": 1e-06, "loss": 0.4461, "step": 557 }, { "epoch": 0.035760061522686494, "grad_norm": 2.403696079239236, "learning_rate": 1e-06, "loss": 0.444, "step": 558 }, { "epoch": 0.03582414765444758, "grad_norm": 2.569305452424107, "learning_rate": 1e-06, "loss": 0.3475, "step": 559 }, { "epoch": 0.035888233786208666, "grad_norm": 2.6545444286825983, "learning_rate": 1e-06, "loss": 0.3869, "step": 560 }, { "epoch": 0.03595231991796975, "grad_norm": 2.440955279891903, "learning_rate": 1e-06, "loss": 0.3581, "step": 561 }, { "epoch": 0.03601640604973084, "grad_norm": 2.847512451653497, "learning_rate": 1e-06, "loss": 0.4296, "step": 562 }, { "epoch": 0.036080492181491924, "grad_norm": 2.6466369305096666, "learning_rate": 1e-06, "loss": 0.4061, "step": 563 }, { "epoch": 0.03614457831325301, "grad_norm": 2.6832876331361475, "learning_rate": 1e-06, "loss": 0.4232, "step": 564 }, { "epoch": 0.0362086644450141, "grad_norm": 2.5616868912458157, "learning_rate": 1e-06, "loss": 0.4081, "step": 565 }, { "epoch": 0.03627275057677518, "grad_norm": 2.4878609095378854, "learning_rate": 1e-06, "loss": 0.455, "step": 566 }, { "epoch": 0.036336836708536276, "grad_norm": 2.46531966648379, "learning_rate": 1e-06, "loss": 0.4627, "step": 567 }, { "epoch": 0.03640092284029736, "grad_norm": 2.516195232483696, "learning_rate": 1e-06, "loss": 0.4258, "step": 568 }, { "epoch": 0.03646500897205845, "grad_norm": 2.6044742204218174, "learning_rate": 1e-06, "loss": 0.4174, "step": 569 }, { "epoch": 0.036529095103819534, "grad_norm": 2.5714531773465312, "learning_rate": 1e-06, "loss": 0.4185, "step": 570 }, { "epoch": 0.03659318123558062, "grad_norm": 2.5553692530565004, "learning_rate": 1e-06, "loss": 0.4518, "step": 571 }, { "epoch": 0.036657267367341706, "grad_norm": 2.5353351960631363, "learning_rate": 1e-06, "loss": 0.4417, "step": 572 }, { "epoch": 0.03672135349910279, "grad_norm": 2.690225025998638, "learning_rate": 1e-06, "loss": 0.4141, "step": 573 }, { "epoch": 0.03678543963086388, "grad_norm": 2.709566881886002, "learning_rate": 1e-06, "loss": 0.4066, "step": 574 }, { "epoch": 0.03684952576262497, "grad_norm": 2.755681344115124, "learning_rate": 1e-06, "loss": 0.4367, "step": 575 }, { "epoch": 0.03691361189438606, "grad_norm": 2.5693249686778685, "learning_rate": 1e-06, "loss": 0.4407, "step": 576 }, { "epoch": 0.03697769802614714, "grad_norm": 2.4624250441741364, "learning_rate": 1e-06, "loss": 0.3905, "step": 577 }, { "epoch": 0.03704178415790823, "grad_norm": 2.73004795352225, "learning_rate": 1e-06, "loss": 0.456, "step": 578 }, { "epoch": 0.037105870289669315, "grad_norm": 2.7527083634276592, "learning_rate": 1e-06, "loss": 0.3965, "step": 579 }, { "epoch": 0.0371699564214304, "grad_norm": 2.8028124217258004, "learning_rate": 1e-06, "loss": 0.471, "step": 580 }, { "epoch": 0.03723404255319149, "grad_norm": 2.5880344409030807, "learning_rate": 1e-06, "loss": 0.3902, "step": 581 }, { "epoch": 0.037298128684952574, "grad_norm": 2.4467098820143263, "learning_rate": 1e-06, "loss": 0.4309, "step": 582 }, { "epoch": 0.03736221481671366, "grad_norm": 2.6909263668214414, "learning_rate": 1e-06, "loss": 0.4424, "step": 583 }, { "epoch": 0.03742630094847475, "grad_norm": 2.5028705246054064, "learning_rate": 1e-06, "loss": 0.4534, "step": 584 }, { "epoch": 0.03749038708023584, "grad_norm": 2.632149240813846, "learning_rate": 1e-06, "loss": 0.4429, "step": 585 }, { "epoch": 0.037554473211996925, "grad_norm": 2.676547497619598, "learning_rate": 1e-06, "loss": 0.4092, "step": 586 }, { "epoch": 0.03761855934375801, "grad_norm": 2.593412911659521, "learning_rate": 1e-06, "loss": 0.3827, "step": 587 }, { "epoch": 0.0376826454755191, "grad_norm": 2.5646155672189868, "learning_rate": 1e-06, "loss": 0.4445, "step": 588 }, { "epoch": 0.03774673160728018, "grad_norm": 2.6958374982600857, "learning_rate": 1e-06, "loss": 0.4088, "step": 589 }, { "epoch": 0.03781081773904127, "grad_norm": 2.944875349581748, "learning_rate": 1e-06, "loss": 0.4577, "step": 590 }, { "epoch": 0.037874903870802355, "grad_norm": 2.4909466851587285, "learning_rate": 1e-06, "loss": 0.3851, "step": 591 }, { "epoch": 0.03793899000256345, "grad_norm": 2.6354951431246474, "learning_rate": 1e-06, "loss": 0.4192, "step": 592 }, { "epoch": 0.038003076134324534, "grad_norm": 2.5659179494331514, "learning_rate": 1e-06, "loss": 0.3952, "step": 593 }, { "epoch": 0.03806716226608562, "grad_norm": 2.7030130238756387, "learning_rate": 1e-06, "loss": 0.4631, "step": 594 }, { "epoch": 0.038131248397846706, "grad_norm": 2.5036363687595413, "learning_rate": 1e-06, "loss": 0.3573, "step": 595 }, { "epoch": 0.03819533452960779, "grad_norm": 2.568713565345006, "learning_rate": 1e-06, "loss": 0.4051, "step": 596 }, { "epoch": 0.03825942066136888, "grad_norm": 2.7356219550249543, "learning_rate": 1e-06, "loss": 0.3943, "step": 597 }, { "epoch": 0.038323506793129965, "grad_norm": 2.589753760976193, "learning_rate": 1e-06, "loss": 0.4184, "step": 598 }, { "epoch": 0.03838759292489105, "grad_norm": 2.5004031202976598, "learning_rate": 1e-06, "loss": 0.4061, "step": 599 }, { "epoch": 0.038451679056652144, "grad_norm": 2.640214926702102, "learning_rate": 1e-06, "loss": 0.4696, "step": 600 }, { "epoch": 0.03851576518841323, "grad_norm": 2.4476421037580227, "learning_rate": 1e-06, "loss": 0.3422, "step": 601 }, { "epoch": 0.038579851320174316, "grad_norm": 2.6129179653473673, "learning_rate": 1e-06, "loss": 0.385, "step": 602 }, { "epoch": 0.0386439374519354, "grad_norm": 2.8065814805198643, "learning_rate": 1e-06, "loss": 0.4985, "step": 603 }, { "epoch": 0.03870802358369649, "grad_norm": 2.7506065468121665, "learning_rate": 1e-06, "loss": 0.38, "step": 604 }, { "epoch": 0.038772109715457574, "grad_norm": 2.743913537942316, "learning_rate": 1e-06, "loss": 0.3987, "step": 605 }, { "epoch": 0.03883619584721866, "grad_norm": 2.7130495638219982, "learning_rate": 1e-06, "loss": 0.4095, "step": 606 }, { "epoch": 0.038900281978979746, "grad_norm": 2.5914175128432153, "learning_rate": 1e-06, "loss": 0.4005, "step": 607 }, { "epoch": 0.03896436811074083, "grad_norm": 2.5482424585290366, "learning_rate": 1e-06, "loss": 0.4562, "step": 608 }, { "epoch": 0.039028454242501925, "grad_norm": 2.7290427011123395, "learning_rate": 1e-06, "loss": 0.4013, "step": 609 }, { "epoch": 0.03909254037426301, "grad_norm": 2.5427635742774117, "learning_rate": 1e-06, "loss": 0.3788, "step": 610 }, { "epoch": 0.0391566265060241, "grad_norm": 2.561653737428255, "learning_rate": 1e-06, "loss": 0.4245, "step": 611 }, { "epoch": 0.039220712637785184, "grad_norm": 2.5955070887416323, "learning_rate": 1e-06, "loss": 0.4266, "step": 612 }, { "epoch": 0.03928479876954627, "grad_norm": 2.54265029522293, "learning_rate": 1e-06, "loss": 0.4145, "step": 613 }, { "epoch": 0.039348884901307356, "grad_norm": 2.606212536478632, "learning_rate": 1e-06, "loss": 0.43, "step": 614 }, { "epoch": 0.03941297103306844, "grad_norm": 2.6840178730682682, "learning_rate": 1e-06, "loss": 0.4871, "step": 615 }, { "epoch": 0.03947705716482953, "grad_norm": 2.650414922571063, "learning_rate": 1e-06, "loss": 0.4061, "step": 616 }, { "epoch": 0.03954114329659062, "grad_norm": 2.673526302945202, "learning_rate": 1e-06, "loss": 0.5218, "step": 617 }, { "epoch": 0.03960522942835171, "grad_norm": 2.553206108448424, "learning_rate": 1e-06, "loss": 0.4068, "step": 618 }, { "epoch": 0.03966931556011279, "grad_norm": 3.8623068130329914, "learning_rate": 1e-06, "loss": 0.4685, "step": 619 }, { "epoch": 0.03973340169187388, "grad_norm": 2.611209530336014, "learning_rate": 1e-06, "loss": 0.4692, "step": 620 }, { "epoch": 0.039797487823634965, "grad_norm": 2.612983189021595, "learning_rate": 1e-06, "loss": 0.3927, "step": 621 }, { "epoch": 0.03986157395539605, "grad_norm": 2.642479051481998, "learning_rate": 1e-06, "loss": 0.4703, "step": 622 }, { "epoch": 0.03992566008715714, "grad_norm": 2.7796089353924867, "learning_rate": 1e-06, "loss": 0.4151, "step": 623 }, { "epoch": 0.03998974621891822, "grad_norm": 2.58513886175909, "learning_rate": 1e-06, "loss": 0.3998, "step": 624 }, { "epoch": 0.040053832350679316, "grad_norm": 2.5219815352139916, "learning_rate": 1e-06, "loss": 0.4732, "step": 625 }, { "epoch": 0.0401179184824404, "grad_norm": 2.427279598308452, "learning_rate": 1e-06, "loss": 0.4312, "step": 626 }, { "epoch": 0.04018200461420149, "grad_norm": 2.7451148950328763, "learning_rate": 1e-06, "loss": 0.451, "step": 627 }, { "epoch": 0.040246090745962575, "grad_norm": 2.935612939952673, "learning_rate": 1e-06, "loss": 0.408, "step": 628 }, { "epoch": 0.04031017687772366, "grad_norm": 2.315351642452188, "learning_rate": 1e-06, "loss": 0.42, "step": 629 }, { "epoch": 0.04037426300948475, "grad_norm": 2.616335080998465, "learning_rate": 1e-06, "loss": 0.459, "step": 630 }, { "epoch": 0.04043834914124583, "grad_norm": 2.7994650340660527, "learning_rate": 1e-06, "loss": 0.4556, "step": 631 }, { "epoch": 0.04050243527300692, "grad_norm": 2.7990589665063315, "learning_rate": 1e-06, "loss": 0.433, "step": 632 }, { "epoch": 0.040566521404768005, "grad_norm": 2.726625812194557, "learning_rate": 1e-06, "loss": 0.4934, "step": 633 }, { "epoch": 0.0406306075365291, "grad_norm": 2.6595974411965004, "learning_rate": 1e-06, "loss": 0.3912, "step": 634 }, { "epoch": 0.040694693668290184, "grad_norm": 2.6317170132064054, "learning_rate": 1e-06, "loss": 0.4233, "step": 635 }, { "epoch": 0.04075877980005127, "grad_norm": 2.7113262836328507, "learning_rate": 1e-06, "loss": 0.3809, "step": 636 }, { "epoch": 0.040822865931812356, "grad_norm": 2.7276916836582963, "learning_rate": 1e-06, "loss": 0.4244, "step": 637 }, { "epoch": 0.04088695206357344, "grad_norm": 2.511155076141722, "learning_rate": 1e-06, "loss": 0.3905, "step": 638 }, { "epoch": 0.04095103819533453, "grad_norm": 2.6231514302817867, "learning_rate": 1e-06, "loss": 0.4993, "step": 639 }, { "epoch": 0.041015124327095615, "grad_norm": 2.8738210559312836, "learning_rate": 1e-06, "loss": 0.4589, "step": 640 }, { "epoch": 0.0410792104588567, "grad_norm": 2.630954950788824, "learning_rate": 1e-06, "loss": 0.4045, "step": 641 }, { "epoch": 0.041143296590617794, "grad_norm": 2.3813698177760885, "learning_rate": 1e-06, "loss": 0.3638, "step": 642 }, { "epoch": 0.04120738272237888, "grad_norm": 2.559089835922891, "learning_rate": 1e-06, "loss": 0.4153, "step": 643 }, { "epoch": 0.041271468854139966, "grad_norm": 2.6301614669286764, "learning_rate": 1e-06, "loss": 0.3787, "step": 644 }, { "epoch": 0.04133555498590105, "grad_norm": 2.826724753336134, "learning_rate": 1e-06, "loss": 0.4785, "step": 645 }, { "epoch": 0.04139964111766214, "grad_norm": 2.5852268129152094, "learning_rate": 1e-06, "loss": 0.4127, "step": 646 }, { "epoch": 0.041463727249423224, "grad_norm": 2.5517838435167337, "learning_rate": 1e-06, "loss": 0.4187, "step": 647 }, { "epoch": 0.04152781338118431, "grad_norm": 2.5003343630514485, "learning_rate": 1e-06, "loss": 0.4664, "step": 648 }, { "epoch": 0.041591899512945396, "grad_norm": 2.660162354251772, "learning_rate": 1e-06, "loss": 0.4587, "step": 649 }, { "epoch": 0.04165598564470648, "grad_norm": 2.519788649872128, "learning_rate": 1e-06, "loss": 0.4124, "step": 650 }, { "epoch": 0.041720071776467575, "grad_norm": 2.6811482996986684, "learning_rate": 1e-06, "loss": 0.4979, "step": 651 }, { "epoch": 0.04178415790822866, "grad_norm": 2.617520724416409, "learning_rate": 1e-06, "loss": 0.4485, "step": 652 }, { "epoch": 0.04184824403998975, "grad_norm": 2.488909344232931, "learning_rate": 1e-06, "loss": 0.4211, "step": 653 }, { "epoch": 0.04191233017175083, "grad_norm": 2.6149410343930644, "learning_rate": 1e-06, "loss": 0.3825, "step": 654 }, { "epoch": 0.04197641630351192, "grad_norm": 2.5128514869435525, "learning_rate": 1e-06, "loss": 0.4126, "step": 655 }, { "epoch": 0.042040502435273006, "grad_norm": 2.677894898570438, "learning_rate": 1e-06, "loss": 0.3868, "step": 656 }, { "epoch": 0.04210458856703409, "grad_norm": 2.5073846226183494, "learning_rate": 1e-06, "loss": 0.4001, "step": 657 }, { "epoch": 0.04216867469879518, "grad_norm": 2.5959443672213887, "learning_rate": 1e-06, "loss": 0.3518, "step": 658 }, { "epoch": 0.04223276083055627, "grad_norm": 2.6503459543932113, "learning_rate": 1e-06, "loss": 0.4283, "step": 659 }, { "epoch": 0.04229684696231736, "grad_norm": 2.43063879482731, "learning_rate": 1e-06, "loss": 0.4251, "step": 660 }, { "epoch": 0.04236093309407844, "grad_norm": 2.349576722965158, "learning_rate": 1e-06, "loss": 0.3834, "step": 661 }, { "epoch": 0.04242501922583953, "grad_norm": 2.537089027277767, "learning_rate": 1e-06, "loss": 0.3797, "step": 662 }, { "epoch": 0.042489105357600615, "grad_norm": 2.7224838638581756, "learning_rate": 1e-06, "loss": 0.4099, "step": 663 }, { "epoch": 0.0425531914893617, "grad_norm": 2.7403703661711303, "learning_rate": 1e-06, "loss": 0.4155, "step": 664 }, { "epoch": 0.04261727762112279, "grad_norm": 2.481829066148711, "learning_rate": 1e-06, "loss": 0.4395, "step": 665 }, { "epoch": 0.04268136375288387, "grad_norm": 2.7058959104350286, "learning_rate": 1e-06, "loss": 0.3839, "step": 666 }, { "epoch": 0.042745449884644966, "grad_norm": 2.664470985436932, "learning_rate": 1e-06, "loss": 0.4535, "step": 667 }, { "epoch": 0.04280953601640605, "grad_norm": 2.989381219322504, "learning_rate": 1e-06, "loss": 0.4469, "step": 668 }, { "epoch": 0.04287362214816714, "grad_norm": 2.456391983663052, "learning_rate": 1e-06, "loss": 0.4497, "step": 669 }, { "epoch": 0.042937708279928224, "grad_norm": 2.5686791944245355, "learning_rate": 1e-06, "loss": 0.3879, "step": 670 }, { "epoch": 0.04300179441168931, "grad_norm": 2.5171257630542496, "learning_rate": 1e-06, "loss": 0.3855, "step": 671 }, { "epoch": 0.0430658805434504, "grad_norm": 2.7514044064489958, "learning_rate": 1e-06, "loss": 0.4274, "step": 672 }, { "epoch": 0.04312996667521148, "grad_norm": 2.588593452159588, "learning_rate": 1e-06, "loss": 0.4361, "step": 673 }, { "epoch": 0.04319405280697257, "grad_norm": 2.5337204798422133, "learning_rate": 1e-06, "loss": 0.4651, "step": 674 }, { "epoch": 0.043258138938733655, "grad_norm": 2.80848924235811, "learning_rate": 1e-06, "loss": 0.3884, "step": 675 }, { "epoch": 0.04332222507049475, "grad_norm": 2.4781318443632703, "learning_rate": 1e-06, "loss": 0.4071, "step": 676 }, { "epoch": 0.043386311202255834, "grad_norm": 2.6636080398686017, "learning_rate": 1e-06, "loss": 0.4434, "step": 677 }, { "epoch": 0.04345039733401692, "grad_norm": 2.6875653127583643, "learning_rate": 1e-06, "loss": 0.4095, "step": 678 }, { "epoch": 0.043514483465778006, "grad_norm": 2.593545387753299, "learning_rate": 1e-06, "loss": 0.4077, "step": 679 }, { "epoch": 0.04357856959753909, "grad_norm": 2.6927729543543393, "learning_rate": 1e-06, "loss": 0.4338, "step": 680 }, { "epoch": 0.04364265572930018, "grad_norm": 2.683321727952175, "learning_rate": 1e-06, "loss": 0.4527, "step": 681 }, { "epoch": 0.043706741861061264, "grad_norm": 2.4442159701665838, "learning_rate": 1e-06, "loss": 0.3911, "step": 682 }, { "epoch": 0.04377082799282235, "grad_norm": 2.6443057440007722, "learning_rate": 1e-06, "loss": 0.4226, "step": 683 }, { "epoch": 0.04383491412458344, "grad_norm": 2.771577021353034, "learning_rate": 1e-06, "loss": 0.4209, "step": 684 }, { "epoch": 0.04389900025634453, "grad_norm": 2.5830604191286577, "learning_rate": 1e-06, "loss": 0.4699, "step": 685 }, { "epoch": 0.043963086388105616, "grad_norm": 2.7145392457704367, "learning_rate": 1e-06, "loss": 0.3852, "step": 686 }, { "epoch": 0.0440271725198667, "grad_norm": 2.6292966393223773, "learning_rate": 1e-06, "loss": 0.4188, "step": 687 }, { "epoch": 0.04409125865162779, "grad_norm": 2.549423161227808, "learning_rate": 1e-06, "loss": 0.4607, "step": 688 }, { "epoch": 0.044155344783388874, "grad_norm": 2.8191967007014416, "learning_rate": 1e-06, "loss": 0.4822, "step": 689 }, { "epoch": 0.04421943091514996, "grad_norm": 2.7238873369280596, "learning_rate": 1e-06, "loss": 0.4639, "step": 690 }, { "epoch": 0.044283517046911046, "grad_norm": 2.633151381982928, "learning_rate": 1e-06, "loss": 0.4557, "step": 691 }, { "epoch": 0.04434760317867213, "grad_norm": 2.7228749845037745, "learning_rate": 1e-06, "loss": 0.4778, "step": 692 }, { "epoch": 0.044411689310433225, "grad_norm": 2.7679898994678944, "learning_rate": 1e-06, "loss": 0.4181, "step": 693 }, { "epoch": 0.04447577544219431, "grad_norm": 2.589789446094907, "learning_rate": 1e-06, "loss": 0.4236, "step": 694 }, { "epoch": 0.0445398615739554, "grad_norm": 2.511018179407139, "learning_rate": 1e-06, "loss": 0.4469, "step": 695 }, { "epoch": 0.04460394770571648, "grad_norm": 2.7885072030309206, "learning_rate": 1e-06, "loss": 0.4553, "step": 696 }, { "epoch": 0.04466803383747757, "grad_norm": 2.7021041650758506, "learning_rate": 1e-06, "loss": 0.3864, "step": 697 }, { "epoch": 0.044732119969238655, "grad_norm": 2.788293327898669, "learning_rate": 1e-06, "loss": 0.4676, "step": 698 }, { "epoch": 0.04479620610099974, "grad_norm": 2.5530815918507215, "learning_rate": 1e-06, "loss": 0.4304, "step": 699 }, { "epoch": 0.04486029223276083, "grad_norm": 2.3701294923623166, "learning_rate": 1e-06, "loss": 0.3835, "step": 700 }, { "epoch": 0.04492437836452192, "grad_norm": 2.521705987774555, "learning_rate": 1e-06, "loss": 0.4147, "step": 701 }, { "epoch": 0.04498846449628301, "grad_norm": 2.7337919356294273, "learning_rate": 1e-06, "loss": 0.4036, "step": 702 }, { "epoch": 0.04505255062804409, "grad_norm": 2.7334466833635713, "learning_rate": 1e-06, "loss": 0.4737, "step": 703 }, { "epoch": 0.04511663675980518, "grad_norm": 2.412574306067936, "learning_rate": 1e-06, "loss": 0.4092, "step": 704 }, { "epoch": 0.045180722891566265, "grad_norm": 2.6647181788543675, "learning_rate": 1e-06, "loss": 0.4644, "step": 705 }, { "epoch": 0.04524480902332735, "grad_norm": 2.8176686628649867, "learning_rate": 1e-06, "loss": 0.3859, "step": 706 }, { "epoch": 0.04530889515508844, "grad_norm": 3.5376406758684555, "learning_rate": 1e-06, "loss": 0.4637, "step": 707 }, { "epoch": 0.04537298128684952, "grad_norm": 2.648046979088884, "learning_rate": 1e-06, "loss": 0.436, "step": 708 }, { "epoch": 0.045437067418610616, "grad_norm": 2.714391240179539, "learning_rate": 1e-06, "loss": 0.4007, "step": 709 }, { "epoch": 0.0455011535503717, "grad_norm": 2.4256539477484993, "learning_rate": 1e-06, "loss": 0.4697, "step": 710 }, { "epoch": 0.04556523968213279, "grad_norm": 2.53217824501919, "learning_rate": 1e-06, "loss": 0.4386, "step": 711 }, { "epoch": 0.045629325813893874, "grad_norm": 2.627890030554265, "learning_rate": 1e-06, "loss": 0.3706, "step": 712 }, { "epoch": 0.04569341194565496, "grad_norm": 2.518196492721495, "learning_rate": 1e-06, "loss": 0.4087, "step": 713 }, { "epoch": 0.045757498077416046, "grad_norm": 2.496806126024874, "learning_rate": 1e-06, "loss": 0.4553, "step": 714 }, { "epoch": 0.04582158420917713, "grad_norm": 2.5758802594573242, "learning_rate": 1e-06, "loss": 0.3946, "step": 715 }, { "epoch": 0.04588567034093822, "grad_norm": 2.6327983887455875, "learning_rate": 1e-06, "loss": 0.4266, "step": 716 }, { "epoch": 0.045949756472699305, "grad_norm": 2.5940640401988104, "learning_rate": 1e-06, "loss": 0.4717, "step": 717 }, { "epoch": 0.0460138426044604, "grad_norm": 2.6292851190752056, "learning_rate": 1e-06, "loss": 0.4645, "step": 718 }, { "epoch": 0.046077928736221484, "grad_norm": 2.5407643752820706, "learning_rate": 1e-06, "loss": 0.4496, "step": 719 }, { "epoch": 0.04614201486798257, "grad_norm": 2.7968598288395667, "learning_rate": 1e-06, "loss": 0.4301, "step": 720 }, { "epoch": 0.046206100999743656, "grad_norm": 2.5396197101295246, "learning_rate": 1e-06, "loss": 0.4702, "step": 721 }, { "epoch": 0.04627018713150474, "grad_norm": 2.6880034453423662, "learning_rate": 1e-06, "loss": 0.4089, "step": 722 }, { "epoch": 0.04633427326326583, "grad_norm": 2.534639229971012, "learning_rate": 1e-06, "loss": 0.4418, "step": 723 }, { "epoch": 0.046398359395026914, "grad_norm": 2.7035380317990705, "learning_rate": 1e-06, "loss": 0.4194, "step": 724 }, { "epoch": 0.046462445526788, "grad_norm": 2.5100605403625913, "learning_rate": 1e-06, "loss": 0.4387, "step": 725 }, { "epoch": 0.04652653165854909, "grad_norm": 2.787377804865803, "learning_rate": 1e-06, "loss": 0.4391, "step": 726 }, { "epoch": 0.04659061779031018, "grad_norm": 2.758137307249647, "learning_rate": 1e-06, "loss": 0.4297, "step": 727 }, { "epoch": 0.046654703922071265, "grad_norm": 2.6893335848609987, "learning_rate": 1e-06, "loss": 0.4442, "step": 728 }, { "epoch": 0.04671879005383235, "grad_norm": 2.596806970800409, "learning_rate": 1e-06, "loss": 0.4434, "step": 729 }, { "epoch": 0.04678287618559344, "grad_norm": 2.607569512934613, "learning_rate": 1e-06, "loss": 0.3982, "step": 730 }, { "epoch": 0.046846962317354524, "grad_norm": 2.5297392747277994, "learning_rate": 1e-06, "loss": 0.4196, "step": 731 }, { "epoch": 0.04691104844911561, "grad_norm": 2.6521936744466195, "learning_rate": 1e-06, "loss": 0.4147, "step": 732 }, { "epoch": 0.046975134580876696, "grad_norm": 2.876213805294052, "learning_rate": 1e-06, "loss": 0.395, "step": 733 }, { "epoch": 0.04703922071263778, "grad_norm": 2.4043885041918243, "learning_rate": 1e-06, "loss": 0.4338, "step": 734 }, { "epoch": 0.047103306844398875, "grad_norm": 2.5867612136864055, "learning_rate": 1e-06, "loss": 0.4444, "step": 735 }, { "epoch": 0.04716739297615996, "grad_norm": 2.5203294696988134, "learning_rate": 1e-06, "loss": 0.437, "step": 736 }, { "epoch": 0.04723147910792105, "grad_norm": 2.6451605123098587, "learning_rate": 1e-06, "loss": 0.4449, "step": 737 }, { "epoch": 0.04729556523968213, "grad_norm": 2.6941293120299137, "learning_rate": 1e-06, "loss": 0.4292, "step": 738 }, { "epoch": 0.04735965137144322, "grad_norm": 2.6302676678079546, "learning_rate": 1e-06, "loss": 0.4505, "step": 739 }, { "epoch": 0.047423737503204305, "grad_norm": 2.592261969149947, "learning_rate": 1e-06, "loss": 0.4215, "step": 740 }, { "epoch": 0.04748782363496539, "grad_norm": 2.682832075298025, "learning_rate": 1e-06, "loss": 0.4149, "step": 741 }, { "epoch": 0.04755190976672648, "grad_norm": 2.5907747282023887, "learning_rate": 1e-06, "loss": 0.4235, "step": 742 }, { "epoch": 0.04761599589848757, "grad_norm": 2.6833032775341943, "learning_rate": 1e-06, "loss": 0.399, "step": 743 }, { "epoch": 0.047680082030248656, "grad_norm": 2.518606400690256, "learning_rate": 1e-06, "loss": 0.3945, "step": 744 }, { "epoch": 0.04774416816200974, "grad_norm": 2.479270755919287, "learning_rate": 1e-06, "loss": 0.4386, "step": 745 }, { "epoch": 0.04780825429377083, "grad_norm": 2.72515135116294, "learning_rate": 1e-06, "loss": 0.4247, "step": 746 }, { "epoch": 0.047872340425531915, "grad_norm": 2.4749722434919716, "learning_rate": 1e-06, "loss": 0.4729, "step": 747 }, { "epoch": 0.047936426557293, "grad_norm": 2.5106869846945323, "learning_rate": 1e-06, "loss": 0.4118, "step": 748 }, { "epoch": 0.04800051268905409, "grad_norm": 2.665593207338953, "learning_rate": 1e-06, "loss": 0.4037, "step": 749 }, { "epoch": 0.04806459882081517, "grad_norm": 2.7037283823905405, "learning_rate": 1e-06, "loss": 0.451, "step": 750 }, { "epoch": 0.048128684952576266, "grad_norm": 2.433991765145583, "learning_rate": 1e-06, "loss": 0.4518, "step": 751 }, { "epoch": 0.04819277108433735, "grad_norm": 2.6227479993797833, "learning_rate": 1e-06, "loss": 0.327, "step": 752 }, { "epoch": 0.04825685721609844, "grad_norm": 2.5651412483654323, "learning_rate": 1e-06, "loss": 0.4565, "step": 753 }, { "epoch": 0.048320943347859524, "grad_norm": 2.4244933108312683, "learning_rate": 1e-06, "loss": 0.4195, "step": 754 }, { "epoch": 0.04838502947962061, "grad_norm": 2.41146156297117, "learning_rate": 1e-06, "loss": 0.4178, "step": 755 }, { "epoch": 0.048449115611381696, "grad_norm": 2.8872723239228097, "learning_rate": 1e-06, "loss": 0.4369, "step": 756 }, { "epoch": 0.04851320174314278, "grad_norm": 2.762725037885391, "learning_rate": 1e-06, "loss": 0.3781, "step": 757 }, { "epoch": 0.04857728787490387, "grad_norm": 3.836723679916666, "learning_rate": 1e-06, "loss": 0.4037, "step": 758 }, { "epoch": 0.048641374006664954, "grad_norm": 2.8109148560486727, "learning_rate": 1e-06, "loss": 0.4655, "step": 759 }, { "epoch": 0.04870546013842605, "grad_norm": 2.577080298605142, "learning_rate": 1e-06, "loss": 0.4606, "step": 760 }, { "epoch": 0.048769546270187134, "grad_norm": 2.65526854422781, "learning_rate": 1e-06, "loss": 0.5098, "step": 761 }, { "epoch": 0.04883363240194822, "grad_norm": 2.525793932433678, "learning_rate": 1e-06, "loss": 0.3975, "step": 762 }, { "epoch": 0.048897718533709306, "grad_norm": 2.7031533973927915, "learning_rate": 1e-06, "loss": 0.4672, "step": 763 }, { "epoch": 0.04896180466547039, "grad_norm": 2.772546979327886, "learning_rate": 1e-06, "loss": 0.4592, "step": 764 }, { "epoch": 0.04902589079723148, "grad_norm": 2.7047272161441316, "learning_rate": 1e-06, "loss": 0.4611, "step": 765 }, { "epoch": 0.049089976928992564, "grad_norm": 2.6979587125081963, "learning_rate": 1e-06, "loss": 0.4094, "step": 766 }, { "epoch": 0.04915406306075365, "grad_norm": 2.525668570583122, "learning_rate": 1e-06, "loss": 0.38, "step": 767 }, { "epoch": 0.04921814919251474, "grad_norm": 2.538200424294564, "learning_rate": 1e-06, "loss": 0.3992, "step": 768 }, { "epoch": 0.04928223532427583, "grad_norm": 2.4589512816952372, "learning_rate": 1e-06, "loss": 0.3886, "step": 769 }, { "epoch": 0.049346321456036915, "grad_norm": 2.74365547311966, "learning_rate": 1e-06, "loss": 0.4395, "step": 770 }, { "epoch": 0.049410407587798, "grad_norm": 2.8613608191226456, "learning_rate": 1e-06, "loss": 0.417, "step": 771 }, { "epoch": 0.04947449371955909, "grad_norm": 2.4818805818190293, "learning_rate": 1e-06, "loss": 0.4414, "step": 772 }, { "epoch": 0.04953857985132017, "grad_norm": 3.073554752078375, "learning_rate": 1e-06, "loss": 0.3623, "step": 773 }, { "epoch": 0.04960266598308126, "grad_norm": 2.6000204539317986, "learning_rate": 1e-06, "loss": 0.4448, "step": 774 }, { "epoch": 0.049666752114842345, "grad_norm": 2.619867444073265, "learning_rate": 1e-06, "loss": 0.4846, "step": 775 }, { "epoch": 0.04973083824660343, "grad_norm": 2.670953914332981, "learning_rate": 1e-06, "loss": 0.4124, "step": 776 }, { "epoch": 0.049794924378364525, "grad_norm": 2.6431772566079537, "learning_rate": 1e-06, "loss": 0.4111, "step": 777 }, { "epoch": 0.04985901051012561, "grad_norm": 2.379782074934585, "learning_rate": 1e-06, "loss": 0.4399, "step": 778 }, { "epoch": 0.0499230966418867, "grad_norm": 2.5242383740225645, "learning_rate": 1e-06, "loss": 0.3977, "step": 779 }, { "epoch": 0.04998718277364778, "grad_norm": 2.4585612977491214, "learning_rate": 1e-06, "loss": 0.4342, "step": 780 }, { "epoch": 0.05005126890540887, "grad_norm": 2.5353463721233833, "learning_rate": 1e-06, "loss": 0.409, "step": 781 }, { "epoch": 0.050115355037169955, "grad_norm": 2.771798641700129, "learning_rate": 1e-06, "loss": 0.4163, "step": 782 }, { "epoch": 0.05017944116893104, "grad_norm": 2.4567227275972785, "learning_rate": 1e-06, "loss": 0.4343, "step": 783 }, { "epoch": 0.05024352730069213, "grad_norm": 2.776241471884275, "learning_rate": 1e-06, "loss": 0.3772, "step": 784 }, { "epoch": 0.05030761343245322, "grad_norm": 2.73917716665947, "learning_rate": 1e-06, "loss": 0.421, "step": 785 }, { "epoch": 0.050371699564214306, "grad_norm": 2.4608060384184696, "learning_rate": 1e-06, "loss": 0.4078, "step": 786 }, { "epoch": 0.05043578569597539, "grad_norm": 2.7782795289847333, "learning_rate": 1e-06, "loss": 0.4286, "step": 787 }, { "epoch": 0.05049987182773648, "grad_norm": 2.4978989115524106, "learning_rate": 1e-06, "loss": 0.3732, "step": 788 }, { "epoch": 0.050563957959497564, "grad_norm": 2.5533215903850977, "learning_rate": 1e-06, "loss": 0.3687, "step": 789 }, { "epoch": 0.05062804409125865, "grad_norm": 2.7587787615557113, "learning_rate": 1e-06, "loss": 0.427, "step": 790 }, { "epoch": 0.05069213022301974, "grad_norm": 2.6589908486758125, "learning_rate": 1e-06, "loss": 0.4661, "step": 791 }, { "epoch": 0.05075621635478082, "grad_norm": 2.574238405368089, "learning_rate": 1e-06, "loss": 0.4779, "step": 792 }, { "epoch": 0.050820302486541916, "grad_norm": 2.6274966108889255, "learning_rate": 1e-06, "loss": 0.4217, "step": 793 }, { "epoch": 0.050884388618303, "grad_norm": 3.2801557437132556, "learning_rate": 1e-06, "loss": 0.3728, "step": 794 }, { "epoch": 0.05094847475006409, "grad_norm": 2.6941030070682874, "learning_rate": 1e-06, "loss": 0.466, "step": 795 }, { "epoch": 0.051012560881825174, "grad_norm": 2.5176002839833918, "learning_rate": 1e-06, "loss": 0.4492, "step": 796 }, { "epoch": 0.05107664701358626, "grad_norm": 2.7530193209358567, "learning_rate": 1e-06, "loss": 0.4408, "step": 797 }, { "epoch": 0.051140733145347346, "grad_norm": 2.565968695255342, "learning_rate": 1e-06, "loss": 0.3498, "step": 798 }, { "epoch": 0.05120481927710843, "grad_norm": 2.6259720811960445, "learning_rate": 1e-06, "loss": 0.3989, "step": 799 }, { "epoch": 0.05126890540886952, "grad_norm": 2.6154860713109773, "learning_rate": 1e-06, "loss": 0.4164, "step": 800 }, { "epoch": 0.051332991540630604, "grad_norm": 2.7563490613331054, "learning_rate": 1e-06, "loss": 0.4331, "step": 801 }, { "epoch": 0.0513970776723917, "grad_norm": 2.588060868381382, "learning_rate": 1e-06, "loss": 0.4109, "step": 802 }, { "epoch": 0.05146116380415278, "grad_norm": 2.4579680369870234, "learning_rate": 1e-06, "loss": 0.4286, "step": 803 }, { "epoch": 0.05152524993591387, "grad_norm": 2.4116736631822464, "learning_rate": 1e-06, "loss": 0.4438, "step": 804 }, { "epoch": 0.051589336067674955, "grad_norm": 2.695563644208034, "learning_rate": 1e-06, "loss": 0.4688, "step": 805 }, { "epoch": 0.05165342219943604, "grad_norm": 2.675047438176051, "learning_rate": 1e-06, "loss": 0.4056, "step": 806 }, { "epoch": 0.05171750833119713, "grad_norm": 2.748181109672208, "learning_rate": 1e-06, "loss": 0.4171, "step": 807 }, { "epoch": 0.051781594462958214, "grad_norm": 2.3059289383795263, "learning_rate": 1e-06, "loss": 0.3945, "step": 808 }, { "epoch": 0.0518456805947193, "grad_norm": 2.5384132760352434, "learning_rate": 1e-06, "loss": 0.4316, "step": 809 }, { "epoch": 0.05190976672648039, "grad_norm": 2.647356877858427, "learning_rate": 1e-06, "loss": 0.4583, "step": 810 }, { "epoch": 0.05197385285824148, "grad_norm": 2.6468297347585636, "learning_rate": 1e-06, "loss": 0.4099, "step": 811 }, { "epoch": 0.052037938990002565, "grad_norm": 2.694772877787142, "learning_rate": 1e-06, "loss": 0.4127, "step": 812 }, { "epoch": 0.05210202512176365, "grad_norm": 2.7739794514088914, "learning_rate": 1e-06, "loss": 0.4089, "step": 813 }, { "epoch": 0.05216611125352474, "grad_norm": 2.7432106258958444, "learning_rate": 1e-06, "loss": 0.4478, "step": 814 }, { "epoch": 0.05223019738528582, "grad_norm": 2.59447934250964, "learning_rate": 1e-06, "loss": 0.4175, "step": 815 }, { "epoch": 0.05229428351704691, "grad_norm": 2.482449882094235, "learning_rate": 1e-06, "loss": 0.3899, "step": 816 }, { "epoch": 0.052358369648807995, "grad_norm": 2.6200358287917687, "learning_rate": 1e-06, "loss": 0.4187, "step": 817 }, { "epoch": 0.05242245578056909, "grad_norm": 2.6787235723782645, "learning_rate": 1e-06, "loss": 0.4276, "step": 818 }, { "epoch": 0.052486541912330174, "grad_norm": 2.857976316647533, "learning_rate": 1e-06, "loss": 0.3971, "step": 819 }, { "epoch": 0.05255062804409126, "grad_norm": 2.4674037209798287, "learning_rate": 1e-06, "loss": 0.4056, "step": 820 }, { "epoch": 0.052614714175852347, "grad_norm": 2.581328829253118, "learning_rate": 1e-06, "loss": 0.4306, "step": 821 }, { "epoch": 0.05267880030761343, "grad_norm": 2.5713295173776984, "learning_rate": 1e-06, "loss": 0.4197, "step": 822 }, { "epoch": 0.05274288643937452, "grad_norm": 2.6617855766435903, "learning_rate": 1e-06, "loss": 0.453, "step": 823 }, { "epoch": 0.052806972571135605, "grad_norm": 2.5223890207481925, "learning_rate": 1e-06, "loss": 0.3927, "step": 824 }, { "epoch": 0.05287105870289669, "grad_norm": 2.532919291158444, "learning_rate": 1e-06, "loss": 0.4401, "step": 825 }, { "epoch": 0.05293514483465778, "grad_norm": 2.6565628872172256, "learning_rate": 1e-06, "loss": 0.4112, "step": 826 }, { "epoch": 0.05299923096641887, "grad_norm": 2.593028943009522, "learning_rate": 1e-06, "loss": 0.429, "step": 827 }, { "epoch": 0.053063317098179956, "grad_norm": 2.713800694208457, "learning_rate": 1e-06, "loss": 0.4659, "step": 828 }, { "epoch": 0.05312740322994104, "grad_norm": 2.4576869289326706, "learning_rate": 1e-06, "loss": 0.4199, "step": 829 }, { "epoch": 0.05319148936170213, "grad_norm": 2.4801827666076552, "learning_rate": 1e-06, "loss": 0.4272, "step": 830 }, { "epoch": 0.053255575493463214, "grad_norm": 2.582789118414582, "learning_rate": 1e-06, "loss": 0.4289, "step": 831 }, { "epoch": 0.0533196616252243, "grad_norm": 2.48821093605307, "learning_rate": 1e-06, "loss": 0.4121, "step": 832 }, { "epoch": 0.053383747756985386, "grad_norm": 2.6186323943996435, "learning_rate": 1e-06, "loss": 0.441, "step": 833 }, { "epoch": 0.05344783388874647, "grad_norm": 2.5517032828506334, "learning_rate": 1e-06, "loss": 0.421, "step": 834 }, { "epoch": 0.053511920020507565, "grad_norm": 2.5356349325994647, "learning_rate": 1e-06, "loss": 0.4274, "step": 835 }, { "epoch": 0.05357600615226865, "grad_norm": 2.428198485601343, "learning_rate": 1e-06, "loss": 0.4706, "step": 836 }, { "epoch": 0.05364009228402974, "grad_norm": 2.587909226071667, "learning_rate": 1e-06, "loss": 0.4062, "step": 837 }, { "epoch": 0.053704178415790824, "grad_norm": 2.309073113032567, "learning_rate": 1e-06, "loss": 0.4032, "step": 838 }, { "epoch": 0.05376826454755191, "grad_norm": 2.6703159072154077, "learning_rate": 1e-06, "loss": 0.419, "step": 839 }, { "epoch": 0.053832350679312996, "grad_norm": 2.71350739697573, "learning_rate": 1e-06, "loss": 0.4495, "step": 840 }, { "epoch": 0.05389643681107408, "grad_norm": 2.434892539079085, "learning_rate": 1e-06, "loss": 0.4535, "step": 841 }, { "epoch": 0.05396052294283517, "grad_norm": 2.5963354526268425, "learning_rate": 1e-06, "loss": 0.425, "step": 842 }, { "epoch": 0.054024609074596254, "grad_norm": 2.483702809890466, "learning_rate": 1e-06, "loss": 0.4452, "step": 843 }, { "epoch": 0.05408869520635735, "grad_norm": 2.5440750960584957, "learning_rate": 1e-06, "loss": 0.3545, "step": 844 }, { "epoch": 0.05415278133811843, "grad_norm": 2.483060350154876, "learning_rate": 1e-06, "loss": 0.4014, "step": 845 }, { "epoch": 0.05421686746987952, "grad_norm": 2.65872885985031, "learning_rate": 1e-06, "loss": 0.3978, "step": 846 }, { "epoch": 0.054280953601640605, "grad_norm": 2.5818989112102173, "learning_rate": 1e-06, "loss": 0.3821, "step": 847 }, { "epoch": 0.05434503973340169, "grad_norm": 2.4076980150335734, "learning_rate": 1e-06, "loss": 0.3354, "step": 848 }, { "epoch": 0.05440912586516278, "grad_norm": 2.698007185792458, "learning_rate": 1e-06, "loss": 0.4506, "step": 849 }, { "epoch": 0.05447321199692386, "grad_norm": 2.519075483872261, "learning_rate": 1e-06, "loss": 0.4133, "step": 850 }, { "epoch": 0.05453729812868495, "grad_norm": 2.717885074101232, "learning_rate": 1e-06, "loss": 0.4175, "step": 851 }, { "epoch": 0.05460138426044604, "grad_norm": 2.5856521915485646, "learning_rate": 1e-06, "loss": 0.4253, "step": 852 }, { "epoch": 0.05466547039220713, "grad_norm": 2.4556651119087127, "learning_rate": 1e-06, "loss": 0.3957, "step": 853 }, { "epoch": 0.054729556523968215, "grad_norm": 2.7721277540478146, "learning_rate": 1e-06, "loss": 0.3671, "step": 854 }, { "epoch": 0.0547936426557293, "grad_norm": 2.657352492338648, "learning_rate": 1e-06, "loss": 0.4178, "step": 855 }, { "epoch": 0.05485772878749039, "grad_norm": 2.7006089581535075, "learning_rate": 1e-06, "loss": 0.4183, "step": 856 }, { "epoch": 0.05492181491925147, "grad_norm": 2.5670709358295647, "learning_rate": 1e-06, "loss": 0.4558, "step": 857 }, { "epoch": 0.05498590105101256, "grad_norm": 2.628637826655462, "learning_rate": 1e-06, "loss": 0.4069, "step": 858 }, { "epoch": 0.055049987182773645, "grad_norm": 2.6148005379775783, "learning_rate": 1e-06, "loss": 0.4136, "step": 859 }, { "epoch": 0.05511407331453474, "grad_norm": 2.591000403813115, "learning_rate": 1e-06, "loss": 0.3866, "step": 860 }, { "epoch": 0.055178159446295824, "grad_norm": 2.507601296484238, "learning_rate": 1e-06, "loss": 0.3998, "step": 861 }, { "epoch": 0.05524224557805691, "grad_norm": 2.787771022744685, "learning_rate": 1e-06, "loss": 0.4026, "step": 862 }, { "epoch": 0.055306331709817996, "grad_norm": 2.518082388993556, "learning_rate": 1e-06, "loss": 0.4215, "step": 863 }, { "epoch": 0.05537041784157908, "grad_norm": 2.4587915178077777, "learning_rate": 1e-06, "loss": 0.4266, "step": 864 }, { "epoch": 0.05543450397334017, "grad_norm": 2.615686357034752, "learning_rate": 1e-06, "loss": 0.368, "step": 865 }, { "epoch": 0.055498590105101255, "grad_norm": 2.761059961241006, "learning_rate": 1e-06, "loss": 0.5015, "step": 866 }, { "epoch": 0.05556267623686234, "grad_norm": 2.5272432083229535, "learning_rate": 1e-06, "loss": 0.4705, "step": 867 }, { "epoch": 0.05562676236862343, "grad_norm": 2.59284819194525, "learning_rate": 1e-06, "loss": 0.403, "step": 868 }, { "epoch": 0.05569084850038452, "grad_norm": 2.533049808944894, "learning_rate": 1e-06, "loss": 0.394, "step": 869 }, { "epoch": 0.055754934632145606, "grad_norm": 2.504421255278961, "learning_rate": 1e-06, "loss": 0.3689, "step": 870 }, { "epoch": 0.05581902076390669, "grad_norm": 2.771424162674612, "learning_rate": 1e-06, "loss": 0.4824, "step": 871 }, { "epoch": 0.05588310689566778, "grad_norm": 2.5494936421830285, "learning_rate": 1e-06, "loss": 0.4088, "step": 872 }, { "epoch": 0.055947193027428864, "grad_norm": 2.626180064034078, "learning_rate": 1e-06, "loss": 0.4615, "step": 873 }, { "epoch": 0.05601127915918995, "grad_norm": 2.845992108008082, "learning_rate": 1e-06, "loss": 0.4442, "step": 874 }, { "epoch": 0.056075365290951036, "grad_norm": 2.741059913668609, "learning_rate": 1e-06, "loss": 0.4047, "step": 875 }, { "epoch": 0.05613945142271212, "grad_norm": 2.870070922473511, "learning_rate": 1e-06, "loss": 0.4648, "step": 876 }, { "epoch": 0.056203537554473215, "grad_norm": 2.5058746300769434, "learning_rate": 1e-06, "loss": 0.3809, "step": 877 }, { "epoch": 0.0562676236862343, "grad_norm": 2.560616702011279, "learning_rate": 1e-06, "loss": 0.4123, "step": 878 }, { "epoch": 0.05633170981799539, "grad_norm": 2.6594905071835955, "learning_rate": 1e-06, "loss": 0.4145, "step": 879 }, { "epoch": 0.05639579594975647, "grad_norm": 2.5214292041437827, "learning_rate": 1e-06, "loss": 0.4427, "step": 880 }, { "epoch": 0.05645988208151756, "grad_norm": 2.5077576684919407, "learning_rate": 1e-06, "loss": 0.4656, "step": 881 }, { "epoch": 0.056523968213278646, "grad_norm": 2.5092725959002546, "learning_rate": 1e-06, "loss": 0.39, "step": 882 }, { "epoch": 0.05658805434503973, "grad_norm": 2.580445517000057, "learning_rate": 1e-06, "loss": 0.4093, "step": 883 }, { "epoch": 0.05665214047680082, "grad_norm": 2.5735197772804193, "learning_rate": 1e-06, "loss": 0.4221, "step": 884 }, { "epoch": 0.056716226608561904, "grad_norm": 2.574408375918761, "learning_rate": 1e-06, "loss": 0.4423, "step": 885 }, { "epoch": 0.056780312740323, "grad_norm": 2.780282548148286, "learning_rate": 1e-06, "loss": 0.4947, "step": 886 }, { "epoch": 0.05684439887208408, "grad_norm": 2.4928626518173274, "learning_rate": 1e-06, "loss": 0.3504, "step": 887 }, { "epoch": 0.05690848500384517, "grad_norm": 2.553051950988801, "learning_rate": 1e-06, "loss": 0.3975, "step": 888 }, { "epoch": 0.056972571135606255, "grad_norm": 2.658416930515607, "learning_rate": 1e-06, "loss": 0.4394, "step": 889 }, { "epoch": 0.05703665726736734, "grad_norm": 2.485961423782349, "learning_rate": 1e-06, "loss": 0.4279, "step": 890 }, { "epoch": 0.05710074339912843, "grad_norm": 2.667713489712823, "learning_rate": 1e-06, "loss": 0.4705, "step": 891 }, { "epoch": 0.05716482953088951, "grad_norm": 2.534564282196064, "learning_rate": 1e-06, "loss": 0.3889, "step": 892 }, { "epoch": 0.0572289156626506, "grad_norm": 2.4356599669017496, "learning_rate": 1e-06, "loss": 0.3985, "step": 893 }, { "epoch": 0.05729300179441169, "grad_norm": 2.7300363917836066, "learning_rate": 1e-06, "loss": 0.4208, "step": 894 }, { "epoch": 0.05735708792617278, "grad_norm": 2.4426263028281503, "learning_rate": 1e-06, "loss": 0.4138, "step": 895 }, { "epoch": 0.057421174057933864, "grad_norm": 2.4273359063929116, "learning_rate": 1e-06, "loss": 0.3869, "step": 896 }, { "epoch": 0.05748526018969495, "grad_norm": 2.683961557261951, "learning_rate": 1e-06, "loss": 0.4253, "step": 897 }, { "epoch": 0.05754934632145604, "grad_norm": 2.716449807419154, "learning_rate": 1e-06, "loss": 0.4144, "step": 898 }, { "epoch": 0.05761343245321712, "grad_norm": 2.489474578355706, "learning_rate": 1e-06, "loss": 0.4242, "step": 899 }, { "epoch": 0.05767751858497821, "grad_norm": 2.621299699094432, "learning_rate": 1e-06, "loss": 0.3961, "step": 900 }, { "epoch": 0.057741604716739295, "grad_norm": 2.7042519805441545, "learning_rate": 1e-06, "loss": 0.4085, "step": 901 }, { "epoch": 0.05780569084850039, "grad_norm": 2.8709190595594793, "learning_rate": 1e-06, "loss": 0.4351, "step": 902 }, { "epoch": 0.057869776980261474, "grad_norm": 2.7239595338843543, "learning_rate": 1e-06, "loss": 0.4457, "step": 903 }, { "epoch": 0.05793386311202256, "grad_norm": 2.6942314635143743, "learning_rate": 1e-06, "loss": 0.437, "step": 904 }, { "epoch": 0.057997949243783646, "grad_norm": 2.561892748218017, "learning_rate": 1e-06, "loss": 0.4445, "step": 905 }, { "epoch": 0.05806203537554473, "grad_norm": 2.6940166517257977, "learning_rate": 1e-06, "loss": 0.4407, "step": 906 }, { "epoch": 0.05812612150730582, "grad_norm": 2.8794761385227083, "learning_rate": 1e-06, "loss": 0.4117, "step": 907 }, { "epoch": 0.058190207639066904, "grad_norm": 2.6035218967013662, "learning_rate": 1e-06, "loss": 0.4566, "step": 908 }, { "epoch": 0.05825429377082799, "grad_norm": 2.658548866501046, "learning_rate": 1e-06, "loss": 0.4506, "step": 909 }, { "epoch": 0.058318379902589076, "grad_norm": 2.7266152577743723, "learning_rate": 1e-06, "loss": 0.502, "step": 910 }, { "epoch": 0.05838246603435017, "grad_norm": 2.480477229574691, "learning_rate": 1e-06, "loss": 0.4381, "step": 911 }, { "epoch": 0.058446552166111256, "grad_norm": 2.571579618617921, "learning_rate": 1e-06, "loss": 0.4126, "step": 912 }, { "epoch": 0.05851063829787234, "grad_norm": 2.673325790509034, "learning_rate": 1e-06, "loss": 0.4085, "step": 913 }, { "epoch": 0.05857472442963343, "grad_norm": 2.585655375519541, "learning_rate": 1e-06, "loss": 0.4323, "step": 914 }, { "epoch": 0.058638810561394514, "grad_norm": 2.5704842362508846, "learning_rate": 1e-06, "loss": 0.4449, "step": 915 }, { "epoch": 0.0587028966931556, "grad_norm": 2.6248859605653276, "learning_rate": 1e-06, "loss": 0.4606, "step": 916 }, { "epoch": 0.058766982824916686, "grad_norm": 2.5772275270301703, "learning_rate": 1e-06, "loss": 0.4135, "step": 917 }, { "epoch": 0.05883106895667777, "grad_norm": 2.524083865479341, "learning_rate": 1e-06, "loss": 0.4281, "step": 918 }, { "epoch": 0.058895155088438865, "grad_norm": 2.5691457666125697, "learning_rate": 1e-06, "loss": 0.4609, "step": 919 }, { "epoch": 0.05895924122019995, "grad_norm": 2.613129783593128, "learning_rate": 1e-06, "loss": 0.4304, "step": 920 }, { "epoch": 0.05902332735196104, "grad_norm": 2.645453363847247, "learning_rate": 1e-06, "loss": 0.4506, "step": 921 }, { "epoch": 0.05908741348372212, "grad_norm": 2.5056096531333516, "learning_rate": 1e-06, "loss": 0.3821, "step": 922 }, { "epoch": 0.05915149961548321, "grad_norm": 2.8282030249135, "learning_rate": 1e-06, "loss": 0.4979, "step": 923 }, { "epoch": 0.059215585747244295, "grad_norm": 2.545655906976193, "learning_rate": 1e-06, "loss": 0.4205, "step": 924 }, { "epoch": 0.05927967187900538, "grad_norm": 2.4742283942662486, "learning_rate": 1e-06, "loss": 0.3709, "step": 925 }, { "epoch": 0.05934375801076647, "grad_norm": 2.609486423567062, "learning_rate": 1e-06, "loss": 0.3883, "step": 926 }, { "epoch": 0.059407844142527554, "grad_norm": 2.433383851079032, "learning_rate": 1e-06, "loss": 0.465, "step": 927 }, { "epoch": 0.05947193027428865, "grad_norm": 2.4780835152841405, "learning_rate": 1e-06, "loss": 0.4182, "step": 928 }, { "epoch": 0.05953601640604973, "grad_norm": 2.5995419876722883, "learning_rate": 1e-06, "loss": 0.4444, "step": 929 }, { "epoch": 0.05960010253781082, "grad_norm": 2.802476257110543, "learning_rate": 1e-06, "loss": 0.4203, "step": 930 }, { "epoch": 0.059664188669571905, "grad_norm": 2.696579952644756, "learning_rate": 1e-06, "loss": 0.369, "step": 931 }, { "epoch": 0.05972827480133299, "grad_norm": 2.764140417821217, "learning_rate": 1e-06, "loss": 0.4162, "step": 932 }, { "epoch": 0.05979236093309408, "grad_norm": 2.7153933671973727, "learning_rate": 1e-06, "loss": 0.4039, "step": 933 }, { "epoch": 0.05985644706485516, "grad_norm": 2.5802340494659806, "learning_rate": 1e-06, "loss": 0.423, "step": 934 }, { "epoch": 0.05992053319661625, "grad_norm": 2.8596854328037984, "learning_rate": 1e-06, "loss": 0.4285, "step": 935 }, { "epoch": 0.05998461932837734, "grad_norm": 2.4558109539962976, "learning_rate": 1e-06, "loss": 0.3674, "step": 936 }, { "epoch": 0.06004870546013843, "grad_norm": 2.565179500088762, "learning_rate": 1e-06, "loss": 0.4408, "step": 937 }, { "epoch": 0.060112791591899514, "grad_norm": 2.8418067743587625, "learning_rate": 1e-06, "loss": 0.3897, "step": 938 }, { "epoch": 0.0601768777236606, "grad_norm": 2.664804134655138, "learning_rate": 1e-06, "loss": 0.4296, "step": 939 }, { "epoch": 0.060240963855421686, "grad_norm": 2.4571750352285804, "learning_rate": 1e-06, "loss": 0.3878, "step": 940 }, { "epoch": 0.06030504998718277, "grad_norm": 2.61630303904412, "learning_rate": 1e-06, "loss": 0.4347, "step": 941 }, { "epoch": 0.06036913611894386, "grad_norm": 2.7027575415190896, "learning_rate": 1e-06, "loss": 0.3921, "step": 942 }, { "epoch": 0.060433222250704945, "grad_norm": 3.010427817021502, "learning_rate": 1e-06, "loss": 0.3863, "step": 943 }, { "epoch": 0.06049730838246604, "grad_norm": 2.7087312174774065, "learning_rate": 1e-06, "loss": 0.3995, "step": 944 }, { "epoch": 0.060561394514227124, "grad_norm": 2.4384517149103226, "learning_rate": 1e-06, "loss": 0.3957, "step": 945 }, { "epoch": 0.06062548064598821, "grad_norm": 2.3482061954090616, "learning_rate": 1e-06, "loss": 0.3846, "step": 946 }, { "epoch": 0.060689566777749296, "grad_norm": 2.5136646304971033, "learning_rate": 1e-06, "loss": 0.4334, "step": 947 }, { "epoch": 0.06075365290951038, "grad_norm": 2.5457038683619015, "learning_rate": 1e-06, "loss": 0.3798, "step": 948 }, { "epoch": 0.06081773904127147, "grad_norm": 2.420555660582623, "learning_rate": 1e-06, "loss": 0.3932, "step": 949 }, { "epoch": 0.060881825173032554, "grad_norm": 2.6412791172215524, "learning_rate": 1e-06, "loss": 0.4662, "step": 950 }, { "epoch": 0.06094591130479364, "grad_norm": 2.9291322019172688, "learning_rate": 1e-06, "loss": 0.4281, "step": 951 }, { "epoch": 0.061009997436554726, "grad_norm": 2.7145651389710177, "learning_rate": 1e-06, "loss": 0.4171, "step": 952 }, { "epoch": 0.06107408356831582, "grad_norm": 2.6986578195358666, "learning_rate": 1e-06, "loss": 0.3915, "step": 953 }, { "epoch": 0.061138169700076905, "grad_norm": 2.554566833482712, "learning_rate": 1e-06, "loss": 0.4024, "step": 954 }, { "epoch": 0.06120225583183799, "grad_norm": 2.476875377384491, "learning_rate": 1e-06, "loss": 0.398, "step": 955 }, { "epoch": 0.06126634196359908, "grad_norm": 2.587063107108279, "learning_rate": 1e-06, "loss": 0.3987, "step": 956 }, { "epoch": 0.061330428095360164, "grad_norm": 2.5604522601800404, "learning_rate": 1e-06, "loss": 0.4748, "step": 957 }, { "epoch": 0.06139451422712125, "grad_norm": 2.5258207322773614, "learning_rate": 1e-06, "loss": 0.44, "step": 958 }, { "epoch": 0.061458600358882336, "grad_norm": 2.6373864349591107, "learning_rate": 1e-06, "loss": 0.4521, "step": 959 }, { "epoch": 0.06152268649064342, "grad_norm": 2.702180991893202, "learning_rate": 1e-06, "loss": 0.424, "step": 960 }, { "epoch": 0.061586772622404515, "grad_norm": 2.4921759193243074, "learning_rate": 1e-06, "loss": 0.3869, "step": 961 }, { "epoch": 0.0616508587541656, "grad_norm": 2.450635064392266, "learning_rate": 1e-06, "loss": 0.407, "step": 962 }, { "epoch": 0.06171494488592669, "grad_norm": 2.489839839860473, "learning_rate": 1e-06, "loss": 0.4472, "step": 963 }, { "epoch": 0.06177903101768777, "grad_norm": 2.563181821436447, "learning_rate": 1e-06, "loss": 0.4138, "step": 964 }, { "epoch": 0.06184311714944886, "grad_norm": 2.7120831137952637, "learning_rate": 1e-06, "loss": 0.3688, "step": 965 }, { "epoch": 0.061907203281209945, "grad_norm": 2.623528912470007, "learning_rate": 1e-06, "loss": 0.4902, "step": 966 }, { "epoch": 0.06197128941297103, "grad_norm": 2.70653288950328, "learning_rate": 1e-06, "loss": 0.3872, "step": 967 }, { "epoch": 0.06203537554473212, "grad_norm": 2.589210582203363, "learning_rate": 1e-06, "loss": 0.4239, "step": 968 }, { "epoch": 0.06209946167649321, "grad_norm": 2.5398221503803544, "learning_rate": 1e-06, "loss": 0.4281, "step": 969 }, { "epoch": 0.062163547808254296, "grad_norm": 2.454628695755026, "learning_rate": 1e-06, "loss": 0.4235, "step": 970 }, { "epoch": 0.06222763394001538, "grad_norm": 2.848700306726633, "learning_rate": 1e-06, "loss": 0.4546, "step": 971 }, { "epoch": 0.06229172007177647, "grad_norm": 2.6158789298430816, "learning_rate": 1e-06, "loss": 0.4367, "step": 972 }, { "epoch": 0.062355806203537555, "grad_norm": 2.4948748651670978, "learning_rate": 1e-06, "loss": 0.4011, "step": 973 }, { "epoch": 0.06241989233529864, "grad_norm": 2.559300985421126, "learning_rate": 1e-06, "loss": 0.3945, "step": 974 }, { "epoch": 0.06248397846705973, "grad_norm": 2.506236058687631, "learning_rate": 1e-06, "loss": 0.3754, "step": 975 }, { "epoch": 0.06254806459882081, "grad_norm": 2.802408713917196, "learning_rate": 1e-06, "loss": 0.4177, "step": 976 }, { "epoch": 0.0626121507305819, "grad_norm": 2.5662771461384297, "learning_rate": 1e-06, "loss": 0.4466, "step": 977 }, { "epoch": 0.06267623686234298, "grad_norm": 2.4273852123129736, "learning_rate": 1e-06, "loss": 0.431, "step": 978 }, { "epoch": 0.06274032299410408, "grad_norm": 2.695506665685708, "learning_rate": 1e-06, "loss": 0.4042, "step": 979 }, { "epoch": 0.06280440912586516, "grad_norm": 2.5089248203469117, "learning_rate": 1e-06, "loss": 0.4118, "step": 980 }, { "epoch": 0.06286849525762625, "grad_norm": 2.610225075497294, "learning_rate": 1e-06, "loss": 0.425, "step": 981 }, { "epoch": 0.06293258138938734, "grad_norm": 2.6515675578565365, "learning_rate": 1e-06, "loss": 0.4858, "step": 982 }, { "epoch": 0.06299666752114842, "grad_norm": 2.662942034843004, "learning_rate": 1e-06, "loss": 0.4146, "step": 983 }, { "epoch": 0.06306075365290952, "grad_norm": 2.483294863504189, "learning_rate": 1e-06, "loss": 0.4229, "step": 984 }, { "epoch": 0.0631248397846706, "grad_norm": 2.774408912609533, "learning_rate": 1e-06, "loss": 0.4477, "step": 985 }, { "epoch": 0.06318892591643169, "grad_norm": 2.6992735362714746, "learning_rate": 1e-06, "loss": 0.3877, "step": 986 }, { "epoch": 0.06325301204819277, "grad_norm": 2.574806326857814, "learning_rate": 1e-06, "loss": 0.4137, "step": 987 }, { "epoch": 0.06331709817995386, "grad_norm": 2.6815055430989316, "learning_rate": 1e-06, "loss": 0.4214, "step": 988 }, { "epoch": 0.06338118431171494, "grad_norm": 2.519521321534601, "learning_rate": 1e-06, "loss": 0.389, "step": 989 }, { "epoch": 0.06344527044347603, "grad_norm": 2.33602785029866, "learning_rate": 1e-06, "loss": 0.4372, "step": 990 }, { "epoch": 0.06350935657523712, "grad_norm": 2.71707391218128, "learning_rate": 1e-06, "loss": 0.4465, "step": 991 }, { "epoch": 0.0635734427069982, "grad_norm": 2.636719039925872, "learning_rate": 1e-06, "loss": 0.3749, "step": 992 }, { "epoch": 0.0636375288387593, "grad_norm": 2.4058618312611983, "learning_rate": 1e-06, "loss": 0.3711, "step": 993 }, { "epoch": 0.06370161497052038, "grad_norm": 2.6341360576740214, "learning_rate": 1e-06, "loss": 0.4046, "step": 994 }, { "epoch": 0.06376570110228147, "grad_norm": 2.6734271373551226, "learning_rate": 1e-06, "loss": 0.4286, "step": 995 }, { "epoch": 0.06382978723404255, "grad_norm": 2.448615128413396, "learning_rate": 1e-06, "loss": 0.3743, "step": 996 }, { "epoch": 0.06389387336580364, "grad_norm": 2.465073956868142, "learning_rate": 1e-06, "loss": 0.4479, "step": 997 }, { "epoch": 0.06395795949756472, "grad_norm": 2.740786732279467, "learning_rate": 1e-06, "loss": 0.4407, "step": 998 }, { "epoch": 0.06402204562932581, "grad_norm": 2.9571363244427626, "learning_rate": 1e-06, "loss": 0.4127, "step": 999 }, { "epoch": 0.0640861317610869, "grad_norm": 2.6266858312799912, "learning_rate": 1e-06, "loss": 0.4022, "step": 1000 }, { "epoch": 0.06415021789284799, "grad_norm": 2.566332751578703, "learning_rate": 1e-06, "loss": 0.4415, "step": 1001 }, { "epoch": 0.06421430402460908, "grad_norm": 2.8348967920960946, "learning_rate": 1e-06, "loss": 0.4631, "step": 1002 }, { "epoch": 0.06427839015637016, "grad_norm": 2.783573396035872, "learning_rate": 1e-06, "loss": 0.4117, "step": 1003 }, { "epoch": 0.06434247628813125, "grad_norm": 2.573686094710799, "learning_rate": 1e-06, "loss": 0.4196, "step": 1004 }, { "epoch": 0.06440656241989233, "grad_norm": 2.4367691385400554, "learning_rate": 1e-06, "loss": 0.4291, "step": 1005 }, { "epoch": 0.06447064855165342, "grad_norm": 2.5403039658130417, "learning_rate": 1e-06, "loss": 0.4099, "step": 1006 }, { "epoch": 0.06453473468341452, "grad_norm": 2.6028748942628352, "learning_rate": 1e-06, "loss": 0.4034, "step": 1007 }, { "epoch": 0.0645988208151756, "grad_norm": 2.4662519444773148, "learning_rate": 1e-06, "loss": 0.399, "step": 1008 }, { "epoch": 0.06466290694693669, "grad_norm": 2.51916140654751, "learning_rate": 1e-06, "loss": 0.4602, "step": 1009 }, { "epoch": 0.06472699307869777, "grad_norm": 2.5823634639004185, "learning_rate": 1e-06, "loss": 0.4225, "step": 1010 }, { "epoch": 0.06479107921045886, "grad_norm": 2.778381438547188, "learning_rate": 1e-06, "loss": 0.4606, "step": 1011 }, { "epoch": 0.06485516534221994, "grad_norm": 2.556163096331897, "learning_rate": 1e-06, "loss": 0.3915, "step": 1012 }, { "epoch": 0.06491925147398103, "grad_norm": 2.759435920952366, "learning_rate": 1e-06, "loss": 0.4893, "step": 1013 }, { "epoch": 0.06498333760574211, "grad_norm": 2.6059925655979033, "learning_rate": 1e-06, "loss": 0.4138, "step": 1014 }, { "epoch": 0.0650474237375032, "grad_norm": 2.6958934341111997, "learning_rate": 1e-06, "loss": 0.4431, "step": 1015 }, { "epoch": 0.0651115098692643, "grad_norm": 2.5129883965336983, "learning_rate": 1e-06, "loss": 0.3692, "step": 1016 }, { "epoch": 0.06517559600102538, "grad_norm": 2.617160055089755, "learning_rate": 1e-06, "loss": 0.3773, "step": 1017 }, { "epoch": 0.06523968213278647, "grad_norm": 2.596654082237727, "learning_rate": 1e-06, "loss": 0.3705, "step": 1018 }, { "epoch": 0.06530376826454755, "grad_norm": 2.6853485295779964, "learning_rate": 1e-06, "loss": 0.4032, "step": 1019 }, { "epoch": 0.06536785439630864, "grad_norm": 2.731951426496971, "learning_rate": 1e-06, "loss": 0.4325, "step": 1020 }, { "epoch": 0.06543194052806972, "grad_norm": 2.615306745896242, "learning_rate": 1e-06, "loss": 0.4863, "step": 1021 }, { "epoch": 0.06549602665983081, "grad_norm": 2.7173205806268603, "learning_rate": 1e-06, "loss": 0.4243, "step": 1022 }, { "epoch": 0.06556011279159189, "grad_norm": 2.6805449715216567, "learning_rate": 1e-06, "loss": 0.394, "step": 1023 }, { "epoch": 0.06562419892335299, "grad_norm": 2.580930131918419, "learning_rate": 1e-06, "loss": 0.4065, "step": 1024 }, { "epoch": 0.06568828505511408, "grad_norm": 2.597849794416895, "learning_rate": 1e-06, "loss": 0.4519, "step": 1025 }, { "epoch": 0.06575237118687516, "grad_norm": 2.599092164234108, "learning_rate": 1e-06, "loss": 0.4076, "step": 1026 }, { "epoch": 0.06581645731863625, "grad_norm": 2.496092273953171, "learning_rate": 1e-06, "loss": 0.3984, "step": 1027 }, { "epoch": 0.06588054345039733, "grad_norm": 2.5031105951259547, "learning_rate": 1e-06, "loss": 0.3922, "step": 1028 }, { "epoch": 0.06594462958215842, "grad_norm": 2.580869432971234, "learning_rate": 1e-06, "loss": 0.4078, "step": 1029 }, { "epoch": 0.0660087157139195, "grad_norm": 2.3408972954509957, "learning_rate": 1e-06, "loss": 0.3927, "step": 1030 }, { "epoch": 0.0660728018456806, "grad_norm": 2.404906585724594, "learning_rate": 1e-06, "loss": 0.4575, "step": 1031 }, { "epoch": 0.06613688797744169, "grad_norm": 2.635559029994182, "learning_rate": 1e-06, "loss": 0.431, "step": 1032 }, { "epoch": 0.06620097410920277, "grad_norm": 2.7759398385370195, "learning_rate": 1e-06, "loss": 0.4244, "step": 1033 }, { "epoch": 0.06626506024096386, "grad_norm": 2.7080679996277444, "learning_rate": 1e-06, "loss": 0.4226, "step": 1034 }, { "epoch": 0.06632914637272494, "grad_norm": 2.5479868488751345, "learning_rate": 1e-06, "loss": 0.4295, "step": 1035 }, { "epoch": 0.06639323250448603, "grad_norm": 2.5285529800347284, "learning_rate": 1e-06, "loss": 0.4695, "step": 1036 }, { "epoch": 0.06645731863624711, "grad_norm": 2.6519770537147407, "learning_rate": 1e-06, "loss": 0.4483, "step": 1037 }, { "epoch": 0.0665214047680082, "grad_norm": 2.533724568813007, "learning_rate": 1e-06, "loss": 0.4122, "step": 1038 }, { "epoch": 0.06658549089976928, "grad_norm": 2.521873472809296, "learning_rate": 1e-06, "loss": 0.3602, "step": 1039 }, { "epoch": 0.06664957703153038, "grad_norm": 2.8267483275910443, "learning_rate": 1e-06, "loss": 0.4167, "step": 1040 }, { "epoch": 0.06671366316329147, "grad_norm": 2.467528084282326, "learning_rate": 1e-06, "loss": 0.4311, "step": 1041 }, { "epoch": 0.06677774929505255, "grad_norm": 2.561755755376321, "learning_rate": 1e-06, "loss": 0.4501, "step": 1042 }, { "epoch": 0.06684183542681364, "grad_norm": 2.6711046370183373, "learning_rate": 1e-06, "loss": 0.3674, "step": 1043 }, { "epoch": 0.06690592155857472, "grad_norm": 2.7762462243085735, "learning_rate": 1e-06, "loss": 0.434, "step": 1044 }, { "epoch": 0.06697000769033581, "grad_norm": 2.5653129293074213, "learning_rate": 1e-06, "loss": 0.372, "step": 1045 }, { "epoch": 0.0670340938220969, "grad_norm": 2.706282450560898, "learning_rate": 1e-06, "loss": 0.444, "step": 1046 }, { "epoch": 0.06709817995385799, "grad_norm": 2.622565934269703, "learning_rate": 1e-06, "loss": 0.4176, "step": 1047 }, { "epoch": 0.06716226608561907, "grad_norm": 2.781376720798491, "learning_rate": 1e-06, "loss": 0.4173, "step": 1048 }, { "epoch": 0.06722635221738016, "grad_norm": 2.8092295996044228, "learning_rate": 1e-06, "loss": 0.4043, "step": 1049 }, { "epoch": 0.06729043834914125, "grad_norm": 2.4394636221770556, "learning_rate": 1e-06, "loss": 0.5048, "step": 1050 }, { "epoch": 0.06735452448090233, "grad_norm": 2.452372374028152, "learning_rate": 1e-06, "loss": 0.4492, "step": 1051 }, { "epoch": 0.06741861061266342, "grad_norm": 2.3619178075691307, "learning_rate": 1e-06, "loss": 0.3886, "step": 1052 }, { "epoch": 0.0674826967444245, "grad_norm": 2.800387735902425, "learning_rate": 1e-06, "loss": 0.4693, "step": 1053 }, { "epoch": 0.0675467828761856, "grad_norm": 2.6335374299550316, "learning_rate": 1e-06, "loss": 0.386, "step": 1054 }, { "epoch": 0.06761086900794668, "grad_norm": 2.6774069184367177, "learning_rate": 1e-06, "loss": 0.3891, "step": 1055 }, { "epoch": 0.06767495513970777, "grad_norm": 2.7446624322855886, "learning_rate": 1e-06, "loss": 0.4485, "step": 1056 }, { "epoch": 0.06773904127146885, "grad_norm": 2.5979310178492585, "learning_rate": 1e-06, "loss": 0.3936, "step": 1057 }, { "epoch": 0.06780312740322994, "grad_norm": 2.565876902003627, "learning_rate": 1e-06, "loss": 0.4408, "step": 1058 }, { "epoch": 0.06786721353499103, "grad_norm": 2.670634426279264, "learning_rate": 1e-06, "loss": 0.4304, "step": 1059 }, { "epoch": 0.06793129966675211, "grad_norm": 2.6113141898048293, "learning_rate": 1e-06, "loss": 0.3744, "step": 1060 }, { "epoch": 0.0679953857985132, "grad_norm": 2.7067225739696212, "learning_rate": 1e-06, "loss": 0.4545, "step": 1061 }, { "epoch": 0.06805947193027428, "grad_norm": 2.793453856124571, "learning_rate": 1e-06, "loss": 0.4007, "step": 1062 }, { "epoch": 0.06812355806203538, "grad_norm": 2.756931561264504, "learning_rate": 1e-06, "loss": 0.4316, "step": 1063 }, { "epoch": 0.06818764419379646, "grad_norm": 2.597642965053177, "learning_rate": 1e-06, "loss": 0.3865, "step": 1064 }, { "epoch": 0.06825173032555755, "grad_norm": 2.6663999203185385, "learning_rate": 1e-06, "loss": 0.4348, "step": 1065 }, { "epoch": 0.06831581645731864, "grad_norm": 2.8609809542202487, "learning_rate": 1e-06, "loss": 0.3609, "step": 1066 }, { "epoch": 0.06837990258907972, "grad_norm": 2.652883468791596, "learning_rate": 1e-06, "loss": 0.4343, "step": 1067 }, { "epoch": 0.06844398872084081, "grad_norm": 2.9494281680723646, "learning_rate": 1e-06, "loss": 0.3445, "step": 1068 }, { "epoch": 0.0685080748526019, "grad_norm": 2.6494659758233983, "learning_rate": 1e-06, "loss": 0.4532, "step": 1069 }, { "epoch": 0.06857216098436299, "grad_norm": 2.687343190573424, "learning_rate": 1e-06, "loss": 0.4343, "step": 1070 }, { "epoch": 0.06863624711612407, "grad_norm": 2.522789534611682, "learning_rate": 1e-06, "loss": 0.4329, "step": 1071 }, { "epoch": 0.06870033324788516, "grad_norm": 2.668827458426054, "learning_rate": 1e-06, "loss": 0.3978, "step": 1072 }, { "epoch": 0.06876441937964624, "grad_norm": 2.5380058649439676, "learning_rate": 1e-06, "loss": 0.3881, "step": 1073 }, { "epoch": 0.06882850551140733, "grad_norm": 2.5193129973843287, "learning_rate": 1e-06, "loss": 0.3855, "step": 1074 }, { "epoch": 0.06889259164316842, "grad_norm": 2.6544689506543473, "learning_rate": 1e-06, "loss": 0.4046, "step": 1075 }, { "epoch": 0.0689566777749295, "grad_norm": 2.752467894455635, "learning_rate": 1e-06, "loss": 0.4065, "step": 1076 }, { "epoch": 0.0690207639066906, "grad_norm": 2.6254575219907803, "learning_rate": 1e-06, "loss": 0.3959, "step": 1077 }, { "epoch": 0.06908485003845168, "grad_norm": 2.5639116654438254, "learning_rate": 1e-06, "loss": 0.4176, "step": 1078 }, { "epoch": 0.06914893617021277, "grad_norm": 2.816930112267722, "learning_rate": 1e-06, "loss": 0.4839, "step": 1079 }, { "epoch": 0.06921302230197385, "grad_norm": 2.6424585821266313, "learning_rate": 1e-06, "loss": 0.4152, "step": 1080 }, { "epoch": 0.06927710843373494, "grad_norm": 2.598199833909251, "learning_rate": 1e-06, "loss": 0.4057, "step": 1081 }, { "epoch": 0.06934119456549602, "grad_norm": 2.6818415968968385, "learning_rate": 1e-06, "loss": 0.3833, "step": 1082 }, { "epoch": 0.06940528069725711, "grad_norm": 2.7587247856867463, "learning_rate": 1e-06, "loss": 0.4621, "step": 1083 }, { "epoch": 0.0694693668290182, "grad_norm": 2.669134371598925, "learning_rate": 1e-06, "loss": 0.4269, "step": 1084 }, { "epoch": 0.06953345296077929, "grad_norm": 2.506076752346152, "learning_rate": 1e-06, "loss": 0.4204, "step": 1085 }, { "epoch": 0.06959753909254038, "grad_norm": 2.5858512706812475, "learning_rate": 1e-06, "loss": 0.4303, "step": 1086 }, { "epoch": 0.06966162522430146, "grad_norm": 2.661423397910268, "learning_rate": 1e-06, "loss": 0.4027, "step": 1087 }, { "epoch": 0.06972571135606255, "grad_norm": 2.5027730869065947, "learning_rate": 1e-06, "loss": 0.369, "step": 1088 }, { "epoch": 0.06978979748782363, "grad_norm": 2.5945275163137262, "learning_rate": 1e-06, "loss": 0.4553, "step": 1089 }, { "epoch": 0.06985388361958472, "grad_norm": 2.847369043442018, "learning_rate": 1e-06, "loss": 0.4604, "step": 1090 }, { "epoch": 0.06991796975134582, "grad_norm": 2.513424892957305, "learning_rate": 1e-06, "loss": 0.3673, "step": 1091 }, { "epoch": 0.0699820558831069, "grad_norm": 2.7317286397718914, "learning_rate": 1e-06, "loss": 0.4119, "step": 1092 }, { "epoch": 0.07004614201486799, "grad_norm": 2.4113150816480986, "learning_rate": 1e-06, "loss": 0.415, "step": 1093 }, { "epoch": 0.07011022814662907, "grad_norm": 2.6688521108098375, "learning_rate": 1e-06, "loss": 0.4192, "step": 1094 }, { "epoch": 0.07017431427839016, "grad_norm": 2.5120122115920633, "learning_rate": 1e-06, "loss": 0.4205, "step": 1095 }, { "epoch": 0.07023840041015124, "grad_norm": 2.65132626428691, "learning_rate": 1e-06, "loss": 0.4336, "step": 1096 }, { "epoch": 0.07030248654191233, "grad_norm": 2.6400689871000056, "learning_rate": 1e-06, "loss": 0.4401, "step": 1097 }, { "epoch": 0.07036657267367341, "grad_norm": 2.467297818144308, "learning_rate": 1e-06, "loss": 0.4335, "step": 1098 }, { "epoch": 0.0704306588054345, "grad_norm": 2.478710251122422, "learning_rate": 1e-06, "loss": 0.4625, "step": 1099 }, { "epoch": 0.0704947449371956, "grad_norm": 2.506389963438959, "learning_rate": 1e-06, "loss": 0.4214, "step": 1100 }, { "epoch": 0.07055883106895668, "grad_norm": 2.571801825624722, "learning_rate": 1e-06, "loss": 0.3919, "step": 1101 }, { "epoch": 0.07062291720071777, "grad_norm": 2.419710867732795, "learning_rate": 1e-06, "loss": 0.3828, "step": 1102 }, { "epoch": 0.07068700333247885, "grad_norm": 2.599190425523394, "learning_rate": 1e-06, "loss": 0.3828, "step": 1103 }, { "epoch": 0.07075108946423994, "grad_norm": 2.607254163899496, "learning_rate": 1e-06, "loss": 0.4243, "step": 1104 }, { "epoch": 0.07081517559600102, "grad_norm": 2.358621192505561, "learning_rate": 1e-06, "loss": 0.3716, "step": 1105 }, { "epoch": 0.07087926172776211, "grad_norm": 2.539685863939085, "learning_rate": 1e-06, "loss": 0.4441, "step": 1106 }, { "epoch": 0.07094334785952319, "grad_norm": 2.4938855723828772, "learning_rate": 1e-06, "loss": 0.3537, "step": 1107 }, { "epoch": 0.07100743399128429, "grad_norm": 2.5238087008493304, "learning_rate": 1e-06, "loss": 0.45, "step": 1108 }, { "epoch": 0.07107152012304538, "grad_norm": 2.7442192669650094, "learning_rate": 1e-06, "loss": 0.3776, "step": 1109 }, { "epoch": 0.07113560625480646, "grad_norm": 2.5839741345460627, "learning_rate": 1e-06, "loss": 0.5042, "step": 1110 }, { "epoch": 0.07119969238656755, "grad_norm": 2.6663297246069355, "learning_rate": 1e-06, "loss": 0.3484, "step": 1111 }, { "epoch": 0.07126377851832863, "grad_norm": 2.598906923733249, "learning_rate": 1e-06, "loss": 0.431, "step": 1112 }, { "epoch": 0.07132786465008972, "grad_norm": 2.6202715077828085, "learning_rate": 1e-06, "loss": 0.388, "step": 1113 }, { "epoch": 0.0713919507818508, "grad_norm": 2.4825489453871974, "learning_rate": 1e-06, "loss": 0.4029, "step": 1114 }, { "epoch": 0.0714560369136119, "grad_norm": 3.6753116517960454, "learning_rate": 1e-06, "loss": 0.4002, "step": 1115 }, { "epoch": 0.07152012304537299, "grad_norm": 2.6693428456376393, "learning_rate": 1e-06, "loss": 0.4982, "step": 1116 }, { "epoch": 0.07158420917713407, "grad_norm": 2.3868609496718136, "learning_rate": 1e-06, "loss": 0.462, "step": 1117 }, { "epoch": 0.07164829530889516, "grad_norm": 2.7746202720611635, "learning_rate": 1e-06, "loss": 0.3875, "step": 1118 }, { "epoch": 0.07171238144065624, "grad_norm": 2.4389606051674395, "learning_rate": 1e-06, "loss": 0.427, "step": 1119 }, { "epoch": 0.07177646757241733, "grad_norm": 2.743220624439705, "learning_rate": 1e-06, "loss": 0.4451, "step": 1120 }, { "epoch": 0.07184055370417841, "grad_norm": 2.430870254174703, "learning_rate": 1e-06, "loss": 0.3746, "step": 1121 }, { "epoch": 0.0719046398359395, "grad_norm": 2.534393202382834, "learning_rate": 1e-06, "loss": 0.371, "step": 1122 }, { "epoch": 0.07196872596770058, "grad_norm": 2.7817519211332393, "learning_rate": 1e-06, "loss": 0.4502, "step": 1123 }, { "epoch": 0.07203281209946168, "grad_norm": 2.9100386240706517, "learning_rate": 1e-06, "loss": 0.3884, "step": 1124 }, { "epoch": 0.07209689823122277, "grad_norm": 2.826915075639032, "learning_rate": 1e-06, "loss": 0.4718, "step": 1125 }, { "epoch": 0.07216098436298385, "grad_norm": 2.6944257999670826, "learning_rate": 1e-06, "loss": 0.4484, "step": 1126 }, { "epoch": 0.07222507049474494, "grad_norm": 2.4687223760052435, "learning_rate": 1e-06, "loss": 0.4122, "step": 1127 }, { "epoch": 0.07228915662650602, "grad_norm": 2.4178966947676646, "learning_rate": 1e-06, "loss": 0.3825, "step": 1128 }, { "epoch": 0.07235324275826711, "grad_norm": 2.752834047078145, "learning_rate": 1e-06, "loss": 0.4372, "step": 1129 }, { "epoch": 0.0724173288900282, "grad_norm": 2.6488729911222206, "learning_rate": 1e-06, "loss": 0.42, "step": 1130 }, { "epoch": 0.07248141502178929, "grad_norm": 2.501959418170512, "learning_rate": 1e-06, "loss": 0.3996, "step": 1131 }, { "epoch": 0.07254550115355037, "grad_norm": 2.596725048264129, "learning_rate": 1e-06, "loss": 0.401, "step": 1132 }, { "epoch": 0.07260958728531146, "grad_norm": 2.787403116588525, "learning_rate": 1e-06, "loss": 0.4004, "step": 1133 }, { "epoch": 0.07267367341707255, "grad_norm": 2.658465773541618, "learning_rate": 1e-06, "loss": 0.4132, "step": 1134 }, { "epoch": 0.07273775954883363, "grad_norm": 2.482592801042877, "learning_rate": 1e-06, "loss": 0.4216, "step": 1135 }, { "epoch": 0.07280184568059472, "grad_norm": 2.6974112554570926, "learning_rate": 1e-06, "loss": 0.4661, "step": 1136 }, { "epoch": 0.0728659318123558, "grad_norm": 2.7703393520834614, "learning_rate": 1e-06, "loss": 0.4758, "step": 1137 }, { "epoch": 0.0729300179441169, "grad_norm": 2.52696180165321, "learning_rate": 1e-06, "loss": 0.4251, "step": 1138 }, { "epoch": 0.07299410407587797, "grad_norm": 2.543291457166555, "learning_rate": 1e-06, "loss": 0.412, "step": 1139 }, { "epoch": 0.07305819020763907, "grad_norm": 2.518067877477988, "learning_rate": 1e-06, "loss": 0.4186, "step": 1140 }, { "epoch": 0.07312227633940016, "grad_norm": 2.649865343001732, "learning_rate": 1e-06, "loss": 0.4216, "step": 1141 }, { "epoch": 0.07318636247116124, "grad_norm": 2.6317013476687454, "learning_rate": 1e-06, "loss": 0.4166, "step": 1142 }, { "epoch": 0.07325044860292233, "grad_norm": 2.5713731021703294, "learning_rate": 1e-06, "loss": 0.3974, "step": 1143 }, { "epoch": 0.07331453473468341, "grad_norm": 2.7931203073361788, "learning_rate": 1e-06, "loss": 0.4184, "step": 1144 }, { "epoch": 0.0733786208664445, "grad_norm": 2.599003814107677, "learning_rate": 1e-06, "loss": 0.4507, "step": 1145 }, { "epoch": 0.07344270699820558, "grad_norm": 2.7684915380392354, "learning_rate": 1e-06, "loss": 0.4471, "step": 1146 }, { "epoch": 0.07350679312996668, "grad_norm": 2.4796130862109735, "learning_rate": 1e-06, "loss": 0.4646, "step": 1147 }, { "epoch": 0.07357087926172776, "grad_norm": 2.691420190189633, "learning_rate": 1e-06, "loss": 0.4762, "step": 1148 }, { "epoch": 0.07363496539348885, "grad_norm": 2.7664381558741065, "learning_rate": 1e-06, "loss": 0.448, "step": 1149 }, { "epoch": 0.07369905152524994, "grad_norm": 2.5162422680138814, "learning_rate": 1e-06, "loss": 0.4012, "step": 1150 }, { "epoch": 0.07376313765701102, "grad_norm": 2.5781952395234633, "learning_rate": 1e-06, "loss": 0.3675, "step": 1151 }, { "epoch": 0.07382722378877211, "grad_norm": 2.6625898353464157, "learning_rate": 1e-06, "loss": 0.379, "step": 1152 }, { "epoch": 0.0738913099205332, "grad_norm": 2.5901429074988553, "learning_rate": 1e-06, "loss": 0.4186, "step": 1153 }, { "epoch": 0.07395539605229429, "grad_norm": 2.7942392683449846, "learning_rate": 1e-06, "loss": 0.3987, "step": 1154 }, { "epoch": 0.07401948218405537, "grad_norm": 2.6654460748878286, "learning_rate": 1e-06, "loss": 0.409, "step": 1155 }, { "epoch": 0.07408356831581646, "grad_norm": 2.5476306648846294, "learning_rate": 1e-06, "loss": 0.4411, "step": 1156 }, { "epoch": 0.07414765444757754, "grad_norm": 2.591611279936759, "learning_rate": 1e-06, "loss": 0.4648, "step": 1157 }, { "epoch": 0.07421174057933863, "grad_norm": 2.6704534006186087, "learning_rate": 1e-06, "loss": 0.4079, "step": 1158 }, { "epoch": 0.07427582671109972, "grad_norm": 2.671768868673718, "learning_rate": 1e-06, "loss": 0.4416, "step": 1159 }, { "epoch": 0.0743399128428608, "grad_norm": 2.4873744200996404, "learning_rate": 1e-06, "loss": 0.4139, "step": 1160 }, { "epoch": 0.0744039989746219, "grad_norm": 2.4953535418400232, "learning_rate": 1e-06, "loss": 0.4331, "step": 1161 }, { "epoch": 0.07446808510638298, "grad_norm": 2.526056293976957, "learning_rate": 1e-06, "loss": 0.4408, "step": 1162 }, { "epoch": 0.07453217123814407, "grad_norm": 2.588818835559356, "learning_rate": 1e-06, "loss": 0.4776, "step": 1163 }, { "epoch": 0.07459625736990515, "grad_norm": 2.666576479172023, "learning_rate": 1e-06, "loss": 0.4057, "step": 1164 }, { "epoch": 0.07466034350166624, "grad_norm": 2.961651092072556, "learning_rate": 1e-06, "loss": 0.5216, "step": 1165 }, { "epoch": 0.07472442963342732, "grad_norm": 2.520589368360706, "learning_rate": 1e-06, "loss": 0.4646, "step": 1166 }, { "epoch": 0.07478851576518841, "grad_norm": 2.5775375985686435, "learning_rate": 1e-06, "loss": 0.4166, "step": 1167 }, { "epoch": 0.0748526018969495, "grad_norm": 2.618379504403705, "learning_rate": 1e-06, "loss": 0.3859, "step": 1168 }, { "epoch": 0.07491668802871058, "grad_norm": 2.701174665426732, "learning_rate": 1e-06, "loss": 0.4412, "step": 1169 }, { "epoch": 0.07498077416047168, "grad_norm": 2.879711459534598, "learning_rate": 1e-06, "loss": 0.4384, "step": 1170 }, { "epoch": 0.07504486029223276, "grad_norm": 2.437675394493719, "learning_rate": 1e-06, "loss": 0.4061, "step": 1171 }, { "epoch": 0.07510894642399385, "grad_norm": 2.378217037869276, "learning_rate": 1e-06, "loss": 0.4029, "step": 1172 }, { "epoch": 0.07517303255575493, "grad_norm": 2.6864533867239104, "learning_rate": 1e-06, "loss": 0.4255, "step": 1173 }, { "epoch": 0.07523711868751602, "grad_norm": 2.9070178703797755, "learning_rate": 1e-06, "loss": 0.4432, "step": 1174 }, { "epoch": 0.07530120481927711, "grad_norm": 2.394244267169891, "learning_rate": 1e-06, "loss": 0.3855, "step": 1175 }, { "epoch": 0.0753652909510382, "grad_norm": 2.613237904551317, "learning_rate": 1e-06, "loss": 0.3646, "step": 1176 }, { "epoch": 0.07542937708279929, "grad_norm": 2.5704274998773995, "learning_rate": 1e-06, "loss": 0.4264, "step": 1177 }, { "epoch": 0.07549346321456037, "grad_norm": 2.647086455310922, "learning_rate": 1e-06, "loss": 0.4598, "step": 1178 }, { "epoch": 0.07555754934632146, "grad_norm": 2.4851338144046866, "learning_rate": 1e-06, "loss": 0.3922, "step": 1179 }, { "epoch": 0.07562163547808254, "grad_norm": 2.6992112402066373, "learning_rate": 1e-06, "loss": 0.4165, "step": 1180 }, { "epoch": 0.07568572160984363, "grad_norm": 2.443187282946552, "learning_rate": 1e-06, "loss": 0.421, "step": 1181 }, { "epoch": 0.07574980774160471, "grad_norm": 2.5382215760682336, "learning_rate": 1e-06, "loss": 0.3895, "step": 1182 }, { "epoch": 0.0758138938733658, "grad_norm": 2.6296100830741005, "learning_rate": 1e-06, "loss": 0.427, "step": 1183 }, { "epoch": 0.0758779800051269, "grad_norm": 2.437492691585318, "learning_rate": 1e-06, "loss": 0.4015, "step": 1184 }, { "epoch": 0.07594206613688798, "grad_norm": 2.5072485842248082, "learning_rate": 1e-06, "loss": 0.3903, "step": 1185 }, { "epoch": 0.07600615226864907, "grad_norm": 2.6751785189294455, "learning_rate": 1e-06, "loss": 0.4258, "step": 1186 }, { "epoch": 0.07607023840041015, "grad_norm": 2.603241674792852, "learning_rate": 1e-06, "loss": 0.3732, "step": 1187 }, { "epoch": 0.07613432453217124, "grad_norm": 2.4459489689703724, "learning_rate": 1e-06, "loss": 0.4164, "step": 1188 }, { "epoch": 0.07619841066393232, "grad_norm": 2.936377164469067, "learning_rate": 1e-06, "loss": 0.4382, "step": 1189 }, { "epoch": 0.07626249679569341, "grad_norm": 2.581801369589373, "learning_rate": 1e-06, "loss": 0.4646, "step": 1190 }, { "epoch": 0.07632658292745449, "grad_norm": 2.6802303045344513, "learning_rate": 1e-06, "loss": 0.3503, "step": 1191 }, { "epoch": 0.07639066905921559, "grad_norm": 2.6291026196838994, "learning_rate": 1e-06, "loss": 0.3919, "step": 1192 }, { "epoch": 0.07645475519097668, "grad_norm": 2.5858158792263684, "learning_rate": 1e-06, "loss": 0.4262, "step": 1193 }, { "epoch": 0.07651884132273776, "grad_norm": 2.658961870365221, "learning_rate": 1e-06, "loss": 0.4472, "step": 1194 }, { "epoch": 0.07658292745449885, "grad_norm": 2.6170446639780445, "learning_rate": 1e-06, "loss": 0.3768, "step": 1195 }, { "epoch": 0.07664701358625993, "grad_norm": 2.449050801281128, "learning_rate": 1e-06, "loss": 0.4166, "step": 1196 }, { "epoch": 0.07671109971802102, "grad_norm": 2.510377162453613, "learning_rate": 1e-06, "loss": 0.4748, "step": 1197 }, { "epoch": 0.0767751858497821, "grad_norm": 2.408176741926398, "learning_rate": 1e-06, "loss": 0.3704, "step": 1198 }, { "epoch": 0.0768392719815432, "grad_norm": 2.510120365800847, "learning_rate": 1e-06, "loss": 0.4427, "step": 1199 }, { "epoch": 0.07690335811330429, "grad_norm": 2.570673514283029, "learning_rate": 1e-06, "loss": 0.3971, "step": 1200 }, { "epoch": 0.07696744424506537, "grad_norm": 2.560138302433304, "learning_rate": 1e-06, "loss": 0.4463, "step": 1201 }, { "epoch": 0.07703153037682646, "grad_norm": 2.6974205413109953, "learning_rate": 1e-06, "loss": 0.4467, "step": 1202 }, { "epoch": 0.07709561650858754, "grad_norm": 2.6092456235211183, "learning_rate": 1e-06, "loss": 0.3956, "step": 1203 }, { "epoch": 0.07715970264034863, "grad_norm": 2.64955554722162, "learning_rate": 1e-06, "loss": 0.4856, "step": 1204 }, { "epoch": 0.07722378877210971, "grad_norm": 2.8128688811623337, "learning_rate": 1e-06, "loss": 0.4009, "step": 1205 }, { "epoch": 0.0772878749038708, "grad_norm": 2.5192936294423074, "learning_rate": 1e-06, "loss": 0.4242, "step": 1206 }, { "epoch": 0.07735196103563188, "grad_norm": 2.68278659957716, "learning_rate": 1e-06, "loss": 0.4704, "step": 1207 }, { "epoch": 0.07741604716739298, "grad_norm": 2.6168633523606064, "learning_rate": 1e-06, "loss": 0.3387, "step": 1208 }, { "epoch": 0.07748013329915407, "grad_norm": 2.6114164138499887, "learning_rate": 1e-06, "loss": 0.4408, "step": 1209 }, { "epoch": 0.07754421943091515, "grad_norm": 2.6161792038015053, "learning_rate": 1e-06, "loss": 0.406, "step": 1210 }, { "epoch": 0.07760830556267624, "grad_norm": 2.7520314159563033, "learning_rate": 1e-06, "loss": 0.3833, "step": 1211 }, { "epoch": 0.07767239169443732, "grad_norm": 2.774588610799305, "learning_rate": 1e-06, "loss": 0.5036, "step": 1212 }, { "epoch": 0.07773647782619841, "grad_norm": 2.5033422537991994, "learning_rate": 1e-06, "loss": 0.4058, "step": 1213 }, { "epoch": 0.07780056395795949, "grad_norm": 2.73481240056627, "learning_rate": 1e-06, "loss": 0.4345, "step": 1214 }, { "epoch": 0.07786465008972059, "grad_norm": 2.6851998419349026, "learning_rate": 1e-06, "loss": 0.4647, "step": 1215 }, { "epoch": 0.07792873622148166, "grad_norm": 2.7237868971195596, "learning_rate": 1e-06, "loss": 0.4302, "step": 1216 }, { "epoch": 0.07799282235324276, "grad_norm": 2.541489190154758, "learning_rate": 1e-06, "loss": 0.3638, "step": 1217 }, { "epoch": 0.07805690848500385, "grad_norm": 2.464482316323183, "learning_rate": 1e-06, "loss": 0.4373, "step": 1218 }, { "epoch": 0.07812099461676493, "grad_norm": 2.572853991725539, "learning_rate": 1e-06, "loss": 0.4129, "step": 1219 }, { "epoch": 0.07818508074852602, "grad_norm": 2.6668459095154216, "learning_rate": 1e-06, "loss": 0.4249, "step": 1220 }, { "epoch": 0.0782491668802871, "grad_norm": 2.782296312797821, "learning_rate": 1e-06, "loss": 0.4296, "step": 1221 }, { "epoch": 0.0783132530120482, "grad_norm": 2.6235991335311937, "learning_rate": 1e-06, "loss": 0.4165, "step": 1222 }, { "epoch": 0.07837733914380927, "grad_norm": 2.7714978455242028, "learning_rate": 1e-06, "loss": 0.364, "step": 1223 }, { "epoch": 0.07844142527557037, "grad_norm": 2.3752856980488266, "learning_rate": 1e-06, "loss": 0.3848, "step": 1224 }, { "epoch": 0.07850551140733146, "grad_norm": 2.7992009765527515, "learning_rate": 1e-06, "loss": 0.3977, "step": 1225 }, { "epoch": 0.07856959753909254, "grad_norm": 2.633436222450441, "learning_rate": 1e-06, "loss": 0.4539, "step": 1226 }, { "epoch": 0.07863368367085363, "grad_norm": 2.577156182418178, "learning_rate": 1e-06, "loss": 0.4118, "step": 1227 }, { "epoch": 0.07869776980261471, "grad_norm": 2.720243320628882, "learning_rate": 1e-06, "loss": 0.4416, "step": 1228 }, { "epoch": 0.0787618559343758, "grad_norm": 2.7155698373395354, "learning_rate": 1e-06, "loss": 0.4508, "step": 1229 }, { "epoch": 0.07882594206613688, "grad_norm": 2.343026422126699, "learning_rate": 1e-06, "loss": 0.4059, "step": 1230 }, { "epoch": 0.07889002819789798, "grad_norm": 2.5559831262150574, "learning_rate": 1e-06, "loss": 0.4686, "step": 1231 }, { "epoch": 0.07895411432965906, "grad_norm": 2.493013880324132, "learning_rate": 1e-06, "loss": 0.4157, "step": 1232 }, { "epoch": 0.07901820046142015, "grad_norm": 2.5095068573703463, "learning_rate": 1e-06, "loss": 0.4078, "step": 1233 }, { "epoch": 0.07908228659318124, "grad_norm": 2.6581528818814975, "learning_rate": 1e-06, "loss": 0.4285, "step": 1234 }, { "epoch": 0.07914637272494232, "grad_norm": 2.7654271167249793, "learning_rate": 1e-06, "loss": 0.47, "step": 1235 }, { "epoch": 0.07921045885670341, "grad_norm": 2.6893412954625853, "learning_rate": 1e-06, "loss": 0.4038, "step": 1236 }, { "epoch": 0.0792745449884645, "grad_norm": 2.456340889110768, "learning_rate": 1e-06, "loss": 0.4103, "step": 1237 }, { "epoch": 0.07933863112022559, "grad_norm": 2.556312174382247, "learning_rate": 1e-06, "loss": 0.4516, "step": 1238 }, { "epoch": 0.07940271725198667, "grad_norm": 2.6753669316919417, "learning_rate": 1e-06, "loss": 0.4232, "step": 1239 }, { "epoch": 0.07946680338374776, "grad_norm": 2.6035452011928903, "learning_rate": 1e-06, "loss": 0.3405, "step": 1240 }, { "epoch": 0.07953088951550884, "grad_norm": 2.4434288383339946, "learning_rate": 1e-06, "loss": 0.4881, "step": 1241 }, { "epoch": 0.07959497564726993, "grad_norm": 2.662120824414917, "learning_rate": 1e-06, "loss": 0.382, "step": 1242 }, { "epoch": 0.07965906177903102, "grad_norm": 2.6177518200932, "learning_rate": 1e-06, "loss": 0.4306, "step": 1243 }, { "epoch": 0.0797231479107921, "grad_norm": 2.4114846966775767, "learning_rate": 1e-06, "loss": 0.3994, "step": 1244 }, { "epoch": 0.0797872340425532, "grad_norm": 2.6835316926141863, "learning_rate": 1e-06, "loss": 0.4706, "step": 1245 }, { "epoch": 0.07985132017431427, "grad_norm": 2.5651819030486496, "learning_rate": 1e-06, "loss": 0.4262, "step": 1246 }, { "epoch": 0.07991540630607537, "grad_norm": 2.3934181889049126, "learning_rate": 1e-06, "loss": 0.4224, "step": 1247 }, { "epoch": 0.07997949243783645, "grad_norm": 2.643579877871764, "learning_rate": 1e-06, "loss": 0.4524, "step": 1248 }, { "epoch": 0.08004357856959754, "grad_norm": 2.5313599900999337, "learning_rate": 1e-06, "loss": 0.364, "step": 1249 }, { "epoch": 0.08010766470135863, "grad_norm": 2.619558211523681, "learning_rate": 1e-06, "loss": 0.3862, "step": 1250 }, { "epoch": 0.08017175083311971, "grad_norm": 2.5146257629070097, "learning_rate": 1e-06, "loss": 0.4005, "step": 1251 }, { "epoch": 0.0802358369648808, "grad_norm": 2.591349223638374, "learning_rate": 1e-06, "loss": 0.4145, "step": 1252 }, { "epoch": 0.08029992309664188, "grad_norm": 2.59587465957244, "learning_rate": 1e-06, "loss": 0.437, "step": 1253 }, { "epoch": 0.08036400922840298, "grad_norm": 2.9832966117856667, "learning_rate": 1e-06, "loss": 0.4246, "step": 1254 }, { "epoch": 0.08042809536016406, "grad_norm": 2.6040546777081657, "learning_rate": 1e-06, "loss": 0.3766, "step": 1255 }, { "epoch": 0.08049218149192515, "grad_norm": 2.496979332905621, "learning_rate": 1e-06, "loss": 0.4219, "step": 1256 }, { "epoch": 0.08055626762368623, "grad_norm": 2.629851175282855, "learning_rate": 1e-06, "loss": 0.3876, "step": 1257 }, { "epoch": 0.08062035375544732, "grad_norm": 2.5674825630257896, "learning_rate": 1e-06, "loss": 0.4325, "step": 1258 }, { "epoch": 0.08068443988720841, "grad_norm": 2.940799171634024, "learning_rate": 1e-06, "loss": 0.468, "step": 1259 }, { "epoch": 0.0807485260189695, "grad_norm": 2.650948845848417, "learning_rate": 1e-06, "loss": 0.4211, "step": 1260 }, { "epoch": 0.08081261215073059, "grad_norm": 2.679500087359039, "learning_rate": 1e-06, "loss": 0.4538, "step": 1261 }, { "epoch": 0.08087669828249167, "grad_norm": 2.390614137526815, "learning_rate": 1e-06, "loss": 0.3643, "step": 1262 }, { "epoch": 0.08094078441425276, "grad_norm": 2.6276093482133867, "learning_rate": 1e-06, "loss": 0.4292, "step": 1263 }, { "epoch": 0.08100487054601384, "grad_norm": 2.61781590271726, "learning_rate": 1e-06, "loss": 0.4178, "step": 1264 }, { "epoch": 0.08106895667777493, "grad_norm": 2.779503761768041, "learning_rate": 1e-06, "loss": 0.4117, "step": 1265 }, { "epoch": 0.08113304280953601, "grad_norm": 2.561366232789244, "learning_rate": 1e-06, "loss": 0.4184, "step": 1266 }, { "epoch": 0.0811971289412971, "grad_norm": 2.8717807574636174, "learning_rate": 1e-06, "loss": 0.4089, "step": 1267 }, { "epoch": 0.0812612150730582, "grad_norm": 2.7921707046886977, "learning_rate": 1e-06, "loss": 0.4527, "step": 1268 }, { "epoch": 0.08132530120481928, "grad_norm": 2.5928313310418565, "learning_rate": 1e-06, "loss": 0.4103, "step": 1269 }, { "epoch": 0.08138938733658037, "grad_norm": 2.9418199515370085, "learning_rate": 1e-06, "loss": 0.4114, "step": 1270 }, { "epoch": 0.08145347346834145, "grad_norm": 2.474771537185245, "learning_rate": 1e-06, "loss": 0.4398, "step": 1271 }, { "epoch": 0.08151755960010254, "grad_norm": 2.4288233134888104, "learning_rate": 1e-06, "loss": 0.4071, "step": 1272 }, { "epoch": 0.08158164573186362, "grad_norm": 2.728783148369517, "learning_rate": 1e-06, "loss": 0.3944, "step": 1273 }, { "epoch": 0.08164573186362471, "grad_norm": 2.595463299855329, "learning_rate": 1e-06, "loss": 0.3961, "step": 1274 }, { "epoch": 0.08170981799538579, "grad_norm": 2.4186223539565472, "learning_rate": 1e-06, "loss": 0.3815, "step": 1275 }, { "epoch": 0.08177390412714688, "grad_norm": 2.5691674213334412, "learning_rate": 1e-06, "loss": 0.4347, "step": 1276 }, { "epoch": 0.08183799025890798, "grad_norm": 2.772988911794295, "learning_rate": 1e-06, "loss": 0.4498, "step": 1277 }, { "epoch": 0.08190207639066906, "grad_norm": 2.5770033358021176, "learning_rate": 1e-06, "loss": 0.4306, "step": 1278 }, { "epoch": 0.08196616252243015, "grad_norm": 2.7479896736468534, "learning_rate": 1e-06, "loss": 0.4102, "step": 1279 }, { "epoch": 0.08203024865419123, "grad_norm": 2.463361879732375, "learning_rate": 1e-06, "loss": 0.4234, "step": 1280 }, { "epoch": 0.08209433478595232, "grad_norm": 2.5773689957088974, "learning_rate": 1e-06, "loss": 0.4061, "step": 1281 }, { "epoch": 0.0821584209177134, "grad_norm": 2.7031134347973205, "learning_rate": 1e-06, "loss": 0.3718, "step": 1282 }, { "epoch": 0.0822225070494745, "grad_norm": 2.3685374130275436, "learning_rate": 1e-06, "loss": 0.4034, "step": 1283 }, { "epoch": 0.08228659318123559, "grad_norm": 2.5898467699040904, "learning_rate": 1e-06, "loss": 0.5035, "step": 1284 }, { "epoch": 0.08235067931299667, "grad_norm": 2.566905337207484, "learning_rate": 1e-06, "loss": 0.4253, "step": 1285 }, { "epoch": 0.08241476544475776, "grad_norm": 2.7563322922109172, "learning_rate": 1e-06, "loss": 0.3726, "step": 1286 }, { "epoch": 0.08247885157651884, "grad_norm": 2.491496676280438, "learning_rate": 1e-06, "loss": 0.37, "step": 1287 }, { "epoch": 0.08254293770827993, "grad_norm": 2.6730510178537608, "learning_rate": 1e-06, "loss": 0.384, "step": 1288 }, { "epoch": 0.08260702384004101, "grad_norm": 2.607473514155551, "learning_rate": 1e-06, "loss": 0.4547, "step": 1289 }, { "epoch": 0.0826711099718021, "grad_norm": 2.664953352790864, "learning_rate": 1e-06, "loss": 0.4457, "step": 1290 }, { "epoch": 0.08273519610356318, "grad_norm": 2.9065089288452675, "learning_rate": 1e-06, "loss": 0.4133, "step": 1291 }, { "epoch": 0.08279928223532428, "grad_norm": 2.761268347894189, "learning_rate": 1e-06, "loss": 0.4013, "step": 1292 }, { "epoch": 0.08286336836708537, "grad_norm": 2.53874487571015, "learning_rate": 1e-06, "loss": 0.4745, "step": 1293 }, { "epoch": 0.08292745449884645, "grad_norm": 2.5396881198425603, "learning_rate": 1e-06, "loss": 0.4437, "step": 1294 }, { "epoch": 0.08299154063060754, "grad_norm": 2.547768008887249, "learning_rate": 1e-06, "loss": 0.3651, "step": 1295 }, { "epoch": 0.08305562676236862, "grad_norm": 2.499185905675141, "learning_rate": 1e-06, "loss": 0.4153, "step": 1296 }, { "epoch": 0.08311971289412971, "grad_norm": 2.4836012038697457, "learning_rate": 1e-06, "loss": 0.4259, "step": 1297 }, { "epoch": 0.08318379902589079, "grad_norm": 2.621155242220239, "learning_rate": 1e-06, "loss": 0.4255, "step": 1298 }, { "epoch": 0.08324788515765189, "grad_norm": 2.592565931013178, "learning_rate": 1e-06, "loss": 0.3688, "step": 1299 }, { "epoch": 0.08331197128941296, "grad_norm": 2.5498345680795924, "learning_rate": 1e-06, "loss": 0.3821, "step": 1300 }, { "epoch": 0.08337605742117406, "grad_norm": 2.4929209233901295, "learning_rate": 1e-06, "loss": 0.4552, "step": 1301 }, { "epoch": 0.08344014355293515, "grad_norm": 2.6882977211963905, "learning_rate": 1e-06, "loss": 0.4062, "step": 1302 }, { "epoch": 0.08350422968469623, "grad_norm": 2.473705037392068, "learning_rate": 1e-06, "loss": 0.3817, "step": 1303 }, { "epoch": 0.08356831581645732, "grad_norm": 2.5317476990261465, "learning_rate": 1e-06, "loss": 0.3704, "step": 1304 }, { "epoch": 0.0836324019482184, "grad_norm": 2.5664114423982074, "learning_rate": 1e-06, "loss": 0.4016, "step": 1305 }, { "epoch": 0.0836964880799795, "grad_norm": 2.5912791482520943, "learning_rate": 1e-06, "loss": 0.392, "step": 1306 }, { "epoch": 0.08376057421174057, "grad_norm": 2.9901280232724976, "learning_rate": 1e-06, "loss": 0.4434, "step": 1307 }, { "epoch": 0.08382466034350167, "grad_norm": 2.5967756023713995, "learning_rate": 1e-06, "loss": 0.3927, "step": 1308 }, { "epoch": 0.08388874647526276, "grad_norm": 2.73521407336545, "learning_rate": 1e-06, "loss": 0.4241, "step": 1309 }, { "epoch": 0.08395283260702384, "grad_norm": 2.919428684494416, "learning_rate": 1e-06, "loss": 0.4205, "step": 1310 }, { "epoch": 0.08401691873878493, "grad_norm": 2.7758438993320484, "learning_rate": 1e-06, "loss": 0.4519, "step": 1311 }, { "epoch": 0.08408100487054601, "grad_norm": 2.715976314244008, "learning_rate": 1e-06, "loss": 0.3993, "step": 1312 }, { "epoch": 0.0841450910023071, "grad_norm": 2.5924326736887213, "learning_rate": 1e-06, "loss": 0.4025, "step": 1313 }, { "epoch": 0.08420917713406818, "grad_norm": 2.7752283664217017, "learning_rate": 1e-06, "loss": 0.3956, "step": 1314 }, { "epoch": 0.08427326326582928, "grad_norm": 2.4160129671872337, "learning_rate": 1e-06, "loss": 0.4106, "step": 1315 }, { "epoch": 0.08433734939759036, "grad_norm": 2.5407463488739053, "learning_rate": 1e-06, "loss": 0.4163, "step": 1316 }, { "epoch": 0.08440143552935145, "grad_norm": 2.801380710310921, "learning_rate": 1e-06, "loss": 0.3871, "step": 1317 }, { "epoch": 0.08446552166111254, "grad_norm": 2.6269565817549925, "learning_rate": 1e-06, "loss": 0.4203, "step": 1318 }, { "epoch": 0.08452960779287362, "grad_norm": 2.5243065786237944, "learning_rate": 1e-06, "loss": 0.373, "step": 1319 }, { "epoch": 0.08459369392463471, "grad_norm": 2.8116644364078986, "learning_rate": 1e-06, "loss": 0.4163, "step": 1320 }, { "epoch": 0.08465778005639579, "grad_norm": 2.575799701620765, "learning_rate": 1e-06, "loss": 0.4277, "step": 1321 }, { "epoch": 0.08472186618815689, "grad_norm": 2.75865288811887, "learning_rate": 1e-06, "loss": 0.4687, "step": 1322 }, { "epoch": 0.08478595231991796, "grad_norm": 2.7042258614953467, "learning_rate": 1e-06, "loss": 0.4051, "step": 1323 }, { "epoch": 0.08485003845167906, "grad_norm": 2.6044252860407213, "learning_rate": 1e-06, "loss": 0.4243, "step": 1324 }, { "epoch": 0.08491412458344014, "grad_norm": 2.655140452465128, "learning_rate": 1e-06, "loss": 0.4242, "step": 1325 }, { "epoch": 0.08497821071520123, "grad_norm": 2.6694681152353072, "learning_rate": 1e-06, "loss": 0.4693, "step": 1326 }, { "epoch": 0.08504229684696232, "grad_norm": 2.6321438736595364, "learning_rate": 1e-06, "loss": 0.447, "step": 1327 }, { "epoch": 0.0851063829787234, "grad_norm": 2.4939365988778572, "learning_rate": 1e-06, "loss": 0.384, "step": 1328 }, { "epoch": 0.0851704691104845, "grad_norm": 2.7686831102984186, "learning_rate": 1e-06, "loss": 0.4096, "step": 1329 }, { "epoch": 0.08523455524224557, "grad_norm": 2.4734804109635533, "learning_rate": 1e-06, "loss": 0.4172, "step": 1330 }, { "epoch": 0.08529864137400667, "grad_norm": 2.3310389802867224, "learning_rate": 1e-06, "loss": 0.4499, "step": 1331 }, { "epoch": 0.08536272750576775, "grad_norm": 2.5427992914352733, "learning_rate": 1e-06, "loss": 0.5163, "step": 1332 }, { "epoch": 0.08542681363752884, "grad_norm": 2.550742214022169, "learning_rate": 1e-06, "loss": 0.3351, "step": 1333 }, { "epoch": 0.08549089976928993, "grad_norm": 2.3491321424112823, "learning_rate": 1e-06, "loss": 0.3927, "step": 1334 }, { "epoch": 0.08555498590105101, "grad_norm": 7.183934088342243, "learning_rate": 1e-06, "loss": 0.4585, "step": 1335 }, { "epoch": 0.0856190720328121, "grad_norm": 2.727510164110277, "learning_rate": 1e-06, "loss": 0.403, "step": 1336 }, { "epoch": 0.08568315816457318, "grad_norm": 2.463122951016014, "learning_rate": 1e-06, "loss": 0.3738, "step": 1337 }, { "epoch": 0.08574724429633428, "grad_norm": 2.4246794004524856, "learning_rate": 1e-06, "loss": 0.4131, "step": 1338 }, { "epoch": 0.08581133042809536, "grad_norm": 2.505910282661219, "learning_rate": 1e-06, "loss": 0.3954, "step": 1339 }, { "epoch": 0.08587541655985645, "grad_norm": 2.7449797274573906, "learning_rate": 1e-06, "loss": 0.4332, "step": 1340 }, { "epoch": 0.08593950269161753, "grad_norm": 2.602286871420609, "learning_rate": 1e-06, "loss": 0.4568, "step": 1341 }, { "epoch": 0.08600358882337862, "grad_norm": 2.4878615903294303, "learning_rate": 1e-06, "loss": 0.3549, "step": 1342 }, { "epoch": 0.08606767495513971, "grad_norm": 2.377510719482693, "learning_rate": 1e-06, "loss": 0.4109, "step": 1343 }, { "epoch": 0.0861317610869008, "grad_norm": 2.5420518872600377, "learning_rate": 1e-06, "loss": 0.402, "step": 1344 }, { "epoch": 0.08619584721866189, "grad_norm": 2.619739587147752, "learning_rate": 1e-06, "loss": 0.431, "step": 1345 }, { "epoch": 0.08625993335042297, "grad_norm": 2.5009317514537237, "learning_rate": 1e-06, "loss": 0.433, "step": 1346 }, { "epoch": 0.08632401948218406, "grad_norm": 3.129107498488318, "learning_rate": 1e-06, "loss": 0.3725, "step": 1347 }, { "epoch": 0.08638810561394514, "grad_norm": 2.475758470143281, "learning_rate": 1e-06, "loss": 0.45, "step": 1348 }, { "epoch": 0.08645219174570623, "grad_norm": 2.181341026400154, "learning_rate": 1e-06, "loss": 0.3971, "step": 1349 }, { "epoch": 0.08651627787746731, "grad_norm": 2.4954104610238024, "learning_rate": 1e-06, "loss": 0.4054, "step": 1350 }, { "epoch": 0.0865803640092284, "grad_norm": 2.6628673858751255, "learning_rate": 1e-06, "loss": 0.4717, "step": 1351 }, { "epoch": 0.0866444501409895, "grad_norm": 2.6090665119674967, "learning_rate": 1e-06, "loss": 0.4544, "step": 1352 }, { "epoch": 0.08670853627275057, "grad_norm": 2.6018719333061986, "learning_rate": 1e-06, "loss": 0.4399, "step": 1353 }, { "epoch": 0.08677262240451167, "grad_norm": 2.4434199739785827, "learning_rate": 1e-06, "loss": 0.415, "step": 1354 }, { "epoch": 0.08683670853627275, "grad_norm": 2.769828363674665, "learning_rate": 1e-06, "loss": 0.4179, "step": 1355 }, { "epoch": 0.08690079466803384, "grad_norm": 2.7397769472949247, "learning_rate": 1e-06, "loss": 0.4056, "step": 1356 }, { "epoch": 0.08696488079979492, "grad_norm": 2.6172378352859726, "learning_rate": 1e-06, "loss": 0.4165, "step": 1357 }, { "epoch": 0.08702896693155601, "grad_norm": 2.539813707503197, "learning_rate": 1e-06, "loss": 0.4665, "step": 1358 }, { "epoch": 0.08709305306331709, "grad_norm": 2.6327431750979695, "learning_rate": 1e-06, "loss": 0.4685, "step": 1359 }, { "epoch": 0.08715713919507818, "grad_norm": 2.7709842385532455, "learning_rate": 1e-06, "loss": 0.495, "step": 1360 }, { "epoch": 0.08722122532683928, "grad_norm": 2.5015074677105753, "learning_rate": 1e-06, "loss": 0.4131, "step": 1361 }, { "epoch": 0.08728531145860036, "grad_norm": 2.5041650372229305, "learning_rate": 1e-06, "loss": 0.4544, "step": 1362 }, { "epoch": 0.08734939759036145, "grad_norm": 2.71401356392254, "learning_rate": 1e-06, "loss": 0.4458, "step": 1363 }, { "epoch": 0.08741348372212253, "grad_norm": 2.531157702107666, "learning_rate": 1e-06, "loss": 0.3734, "step": 1364 }, { "epoch": 0.08747756985388362, "grad_norm": 2.626679235090447, "learning_rate": 1e-06, "loss": 0.4067, "step": 1365 }, { "epoch": 0.0875416559856447, "grad_norm": 2.5951080452068704, "learning_rate": 1e-06, "loss": 0.3876, "step": 1366 }, { "epoch": 0.0876057421174058, "grad_norm": 2.646236280137177, "learning_rate": 1e-06, "loss": 0.4023, "step": 1367 }, { "epoch": 0.08766982824916689, "grad_norm": 2.565685965978931, "learning_rate": 1e-06, "loss": 0.3883, "step": 1368 }, { "epoch": 0.08773391438092797, "grad_norm": 2.566570629622735, "learning_rate": 1e-06, "loss": 0.406, "step": 1369 }, { "epoch": 0.08779800051268906, "grad_norm": 2.558331006355462, "learning_rate": 1e-06, "loss": 0.4179, "step": 1370 }, { "epoch": 0.08786208664445014, "grad_norm": 2.812026841029743, "learning_rate": 1e-06, "loss": 0.4102, "step": 1371 }, { "epoch": 0.08792617277621123, "grad_norm": 2.593914945874754, "learning_rate": 1e-06, "loss": 0.4115, "step": 1372 }, { "epoch": 0.08799025890797231, "grad_norm": 2.673191899027595, "learning_rate": 1e-06, "loss": 0.4581, "step": 1373 }, { "epoch": 0.0880543450397334, "grad_norm": 2.6943890741457452, "learning_rate": 1e-06, "loss": 0.3679, "step": 1374 }, { "epoch": 0.08811843117149448, "grad_norm": 2.7105087759870248, "learning_rate": 1e-06, "loss": 0.3861, "step": 1375 }, { "epoch": 0.08818251730325558, "grad_norm": 2.6588773058390567, "learning_rate": 1e-06, "loss": 0.4262, "step": 1376 }, { "epoch": 0.08824660343501667, "grad_norm": 2.624668121030694, "learning_rate": 1e-06, "loss": 0.4408, "step": 1377 }, { "epoch": 0.08831068956677775, "grad_norm": 2.567696520761241, "learning_rate": 1e-06, "loss": 0.3944, "step": 1378 }, { "epoch": 0.08837477569853884, "grad_norm": 2.525791516217746, "learning_rate": 1e-06, "loss": 0.3737, "step": 1379 }, { "epoch": 0.08843886183029992, "grad_norm": 2.5589736672483916, "learning_rate": 1e-06, "loss": 0.376, "step": 1380 }, { "epoch": 0.08850294796206101, "grad_norm": 2.8070743006208843, "learning_rate": 1e-06, "loss": 0.456, "step": 1381 }, { "epoch": 0.08856703409382209, "grad_norm": 2.5993874207613006, "learning_rate": 1e-06, "loss": 0.4197, "step": 1382 }, { "epoch": 0.08863112022558318, "grad_norm": 2.5051066249655825, "learning_rate": 1e-06, "loss": 0.385, "step": 1383 }, { "epoch": 0.08869520635734426, "grad_norm": 2.405748485125088, "learning_rate": 1e-06, "loss": 0.4049, "step": 1384 }, { "epoch": 0.08875929248910536, "grad_norm": 2.6145829424746965, "learning_rate": 1e-06, "loss": 0.4247, "step": 1385 }, { "epoch": 0.08882337862086645, "grad_norm": 2.6173900351223964, "learning_rate": 1e-06, "loss": 0.4221, "step": 1386 }, { "epoch": 0.08888746475262753, "grad_norm": 2.6806113776153246, "learning_rate": 1e-06, "loss": 0.4292, "step": 1387 }, { "epoch": 0.08895155088438862, "grad_norm": 2.796773269226567, "learning_rate": 1e-06, "loss": 0.4554, "step": 1388 }, { "epoch": 0.0890156370161497, "grad_norm": 2.570155349023824, "learning_rate": 1e-06, "loss": 0.4609, "step": 1389 }, { "epoch": 0.0890797231479108, "grad_norm": 2.651717663982742, "learning_rate": 1e-06, "loss": 0.3871, "step": 1390 }, { "epoch": 0.08914380927967187, "grad_norm": 2.9890601911555574, "learning_rate": 1e-06, "loss": 0.4246, "step": 1391 }, { "epoch": 0.08920789541143297, "grad_norm": 2.816018482470761, "learning_rate": 1e-06, "loss": 0.4105, "step": 1392 }, { "epoch": 0.08927198154319406, "grad_norm": 2.806504598338548, "learning_rate": 1e-06, "loss": 0.4205, "step": 1393 }, { "epoch": 0.08933606767495514, "grad_norm": 2.773138983571822, "learning_rate": 1e-06, "loss": 0.439, "step": 1394 }, { "epoch": 0.08940015380671623, "grad_norm": 2.685056882343485, "learning_rate": 1e-06, "loss": 0.4444, "step": 1395 }, { "epoch": 0.08946423993847731, "grad_norm": 2.9257692535431485, "learning_rate": 1e-06, "loss": 0.4641, "step": 1396 }, { "epoch": 0.0895283260702384, "grad_norm": 2.4710812395258905, "learning_rate": 1e-06, "loss": 0.4463, "step": 1397 }, { "epoch": 0.08959241220199948, "grad_norm": 2.70288826605684, "learning_rate": 1e-06, "loss": 0.4057, "step": 1398 }, { "epoch": 0.08965649833376058, "grad_norm": 2.4331622087280045, "learning_rate": 1e-06, "loss": 0.452, "step": 1399 }, { "epoch": 0.08972058446552166, "grad_norm": 2.324146163478173, "learning_rate": 1e-06, "loss": 0.4225, "step": 1400 }, { "epoch": 0.08978467059728275, "grad_norm": 2.6508507688510754, "learning_rate": 1e-06, "loss": 0.4437, "step": 1401 }, { "epoch": 0.08984875672904384, "grad_norm": 2.6863035227529344, "learning_rate": 1e-06, "loss": 0.4067, "step": 1402 }, { "epoch": 0.08991284286080492, "grad_norm": 2.5644169132895493, "learning_rate": 1e-06, "loss": 0.431, "step": 1403 }, { "epoch": 0.08997692899256601, "grad_norm": 2.4924959261581043, "learning_rate": 1e-06, "loss": 0.4128, "step": 1404 }, { "epoch": 0.09004101512432709, "grad_norm": 2.680924691244528, "learning_rate": 1e-06, "loss": 0.4216, "step": 1405 }, { "epoch": 0.09010510125608819, "grad_norm": 2.5648825557723898, "learning_rate": 1e-06, "loss": 0.3999, "step": 1406 }, { "epoch": 0.09016918738784926, "grad_norm": 2.6750967093694578, "learning_rate": 1e-06, "loss": 0.3778, "step": 1407 }, { "epoch": 0.09023327351961036, "grad_norm": 2.6244024961560157, "learning_rate": 1e-06, "loss": 0.4582, "step": 1408 }, { "epoch": 0.09029735965137144, "grad_norm": 2.632057969960539, "learning_rate": 1e-06, "loss": 0.4357, "step": 1409 }, { "epoch": 0.09036144578313253, "grad_norm": 2.6751906186648946, "learning_rate": 1e-06, "loss": 0.413, "step": 1410 }, { "epoch": 0.09042553191489362, "grad_norm": 2.561674070651734, "learning_rate": 1e-06, "loss": 0.4063, "step": 1411 }, { "epoch": 0.0904896180466547, "grad_norm": 2.6877044774483827, "learning_rate": 1e-06, "loss": 0.4689, "step": 1412 }, { "epoch": 0.0905537041784158, "grad_norm": 2.5896953889358607, "learning_rate": 1e-06, "loss": 0.4043, "step": 1413 }, { "epoch": 0.09061779031017687, "grad_norm": 2.421302707798909, "learning_rate": 1e-06, "loss": 0.3714, "step": 1414 }, { "epoch": 0.09068187644193797, "grad_norm": 2.7539329450540277, "learning_rate": 1e-06, "loss": 0.3832, "step": 1415 }, { "epoch": 0.09074596257369905, "grad_norm": 2.6356933313255277, "learning_rate": 1e-06, "loss": 0.3381, "step": 1416 }, { "epoch": 0.09081004870546014, "grad_norm": 2.36289210209112, "learning_rate": 1e-06, "loss": 0.4371, "step": 1417 }, { "epoch": 0.09087413483722123, "grad_norm": 2.6024801679680905, "learning_rate": 1e-06, "loss": 0.4301, "step": 1418 }, { "epoch": 0.09093822096898231, "grad_norm": 2.5452100945357117, "learning_rate": 1e-06, "loss": 0.4905, "step": 1419 }, { "epoch": 0.0910023071007434, "grad_norm": 2.471533347199074, "learning_rate": 1e-06, "loss": 0.3574, "step": 1420 }, { "epoch": 0.09106639323250448, "grad_norm": 2.684599051779771, "learning_rate": 1e-06, "loss": 0.4455, "step": 1421 }, { "epoch": 0.09113047936426558, "grad_norm": 2.6706289104425345, "learning_rate": 1e-06, "loss": 0.4298, "step": 1422 }, { "epoch": 0.09119456549602666, "grad_norm": 2.7372861347057706, "learning_rate": 1e-06, "loss": 0.4044, "step": 1423 }, { "epoch": 0.09125865162778775, "grad_norm": 2.6311831051474974, "learning_rate": 1e-06, "loss": 0.4472, "step": 1424 }, { "epoch": 0.09132273775954883, "grad_norm": 2.505239288524563, "learning_rate": 1e-06, "loss": 0.4608, "step": 1425 }, { "epoch": 0.09138682389130992, "grad_norm": 2.455646364875542, "learning_rate": 1e-06, "loss": 0.4488, "step": 1426 }, { "epoch": 0.09145091002307101, "grad_norm": 2.732934220116128, "learning_rate": 1e-06, "loss": 0.4206, "step": 1427 }, { "epoch": 0.09151499615483209, "grad_norm": 2.74323981935251, "learning_rate": 1e-06, "loss": 0.4234, "step": 1428 }, { "epoch": 0.09157908228659319, "grad_norm": 2.7158136731219646, "learning_rate": 1e-06, "loss": 0.4209, "step": 1429 }, { "epoch": 0.09164316841835426, "grad_norm": 2.6247631977279595, "learning_rate": 1e-06, "loss": 0.4421, "step": 1430 }, { "epoch": 0.09170725455011536, "grad_norm": 2.637304136296822, "learning_rate": 1e-06, "loss": 0.484, "step": 1431 }, { "epoch": 0.09177134068187644, "grad_norm": 2.852998156294522, "learning_rate": 1e-06, "loss": 0.4155, "step": 1432 }, { "epoch": 0.09183542681363753, "grad_norm": 2.781417431625871, "learning_rate": 1e-06, "loss": 0.4231, "step": 1433 }, { "epoch": 0.09189951294539861, "grad_norm": 2.628138673747872, "learning_rate": 1e-06, "loss": 0.3949, "step": 1434 }, { "epoch": 0.0919635990771597, "grad_norm": 2.667218051474725, "learning_rate": 1e-06, "loss": 0.3949, "step": 1435 }, { "epoch": 0.0920276852089208, "grad_norm": 2.786734610663218, "learning_rate": 1e-06, "loss": 0.4462, "step": 1436 }, { "epoch": 0.09209177134068187, "grad_norm": 2.5652996360198013, "learning_rate": 1e-06, "loss": 0.4285, "step": 1437 }, { "epoch": 0.09215585747244297, "grad_norm": 2.614618685141593, "learning_rate": 1e-06, "loss": 0.4204, "step": 1438 }, { "epoch": 0.09221994360420405, "grad_norm": 2.60209536356399, "learning_rate": 1e-06, "loss": 0.4186, "step": 1439 }, { "epoch": 0.09228402973596514, "grad_norm": 2.669408041556916, "learning_rate": 1e-06, "loss": 0.4278, "step": 1440 }, { "epoch": 0.09234811586772622, "grad_norm": 2.5842411481731635, "learning_rate": 1e-06, "loss": 0.3888, "step": 1441 }, { "epoch": 0.09241220199948731, "grad_norm": 2.4305355503889077, "learning_rate": 1e-06, "loss": 0.4215, "step": 1442 }, { "epoch": 0.0924762881312484, "grad_norm": 2.6165578444721995, "learning_rate": 1e-06, "loss": 0.4132, "step": 1443 }, { "epoch": 0.09254037426300948, "grad_norm": 2.559076650522681, "learning_rate": 1e-06, "loss": 0.468, "step": 1444 }, { "epoch": 0.09260446039477058, "grad_norm": 2.789845249548872, "learning_rate": 1e-06, "loss": 0.4552, "step": 1445 }, { "epoch": 0.09266854652653166, "grad_norm": 2.602140727223901, "learning_rate": 1e-06, "loss": 0.3931, "step": 1446 }, { "epoch": 0.09273263265829275, "grad_norm": 2.7240550984204632, "learning_rate": 1e-06, "loss": 0.431, "step": 1447 }, { "epoch": 0.09279671879005383, "grad_norm": 2.5188910583037294, "learning_rate": 1e-06, "loss": 0.4161, "step": 1448 }, { "epoch": 0.09286080492181492, "grad_norm": 2.5994000439724054, "learning_rate": 1e-06, "loss": 0.4029, "step": 1449 }, { "epoch": 0.092924891053576, "grad_norm": 2.6047446760108954, "learning_rate": 1e-06, "loss": 0.4495, "step": 1450 }, { "epoch": 0.0929889771853371, "grad_norm": 2.537159921941756, "learning_rate": 1e-06, "loss": 0.4281, "step": 1451 }, { "epoch": 0.09305306331709819, "grad_norm": 2.7029522825334475, "learning_rate": 1e-06, "loss": 0.4348, "step": 1452 }, { "epoch": 0.09311714944885927, "grad_norm": 2.5101563201833286, "learning_rate": 1e-06, "loss": 0.3314, "step": 1453 }, { "epoch": 0.09318123558062036, "grad_norm": 2.5375880368645523, "learning_rate": 1e-06, "loss": 0.3821, "step": 1454 }, { "epoch": 0.09324532171238144, "grad_norm": 2.630162183349999, "learning_rate": 1e-06, "loss": 0.4482, "step": 1455 }, { "epoch": 0.09330940784414253, "grad_norm": 2.770034028918006, "learning_rate": 1e-06, "loss": 0.4052, "step": 1456 }, { "epoch": 0.09337349397590361, "grad_norm": 2.5993245566678507, "learning_rate": 1e-06, "loss": 0.3816, "step": 1457 }, { "epoch": 0.0934375801076647, "grad_norm": 2.64778391952047, "learning_rate": 1e-06, "loss": 0.4181, "step": 1458 }, { "epoch": 0.09350166623942578, "grad_norm": 2.7001931669814008, "learning_rate": 1e-06, "loss": 0.3884, "step": 1459 }, { "epoch": 0.09356575237118687, "grad_norm": 2.7310231499002313, "learning_rate": 1e-06, "loss": 0.4263, "step": 1460 }, { "epoch": 0.09362983850294797, "grad_norm": 2.5333368747726444, "learning_rate": 1e-06, "loss": 0.4217, "step": 1461 }, { "epoch": 0.09369392463470905, "grad_norm": 2.573504068195423, "learning_rate": 1e-06, "loss": 0.4123, "step": 1462 }, { "epoch": 0.09375801076647014, "grad_norm": 2.586567658193003, "learning_rate": 1e-06, "loss": 0.4025, "step": 1463 }, { "epoch": 0.09382209689823122, "grad_norm": 2.561777711659588, "learning_rate": 1e-06, "loss": 0.4438, "step": 1464 }, { "epoch": 0.09388618302999231, "grad_norm": 2.8983923631902617, "learning_rate": 1e-06, "loss": 0.4852, "step": 1465 }, { "epoch": 0.09395026916175339, "grad_norm": 2.6375367410550017, "learning_rate": 1e-06, "loss": 0.39, "step": 1466 }, { "epoch": 0.09401435529351448, "grad_norm": 2.8361863431121375, "learning_rate": 1e-06, "loss": 0.3983, "step": 1467 }, { "epoch": 0.09407844142527556, "grad_norm": 2.6342268308154058, "learning_rate": 1e-06, "loss": 0.4052, "step": 1468 }, { "epoch": 0.09414252755703666, "grad_norm": 2.608017548825659, "learning_rate": 1e-06, "loss": 0.4211, "step": 1469 }, { "epoch": 0.09420661368879775, "grad_norm": 2.449627254791894, "learning_rate": 1e-06, "loss": 0.4113, "step": 1470 }, { "epoch": 0.09427069982055883, "grad_norm": 2.4811525375719556, "learning_rate": 1e-06, "loss": 0.377, "step": 1471 }, { "epoch": 0.09433478595231992, "grad_norm": 2.6371030366074617, "learning_rate": 1e-06, "loss": 0.389, "step": 1472 }, { "epoch": 0.094398872084081, "grad_norm": 2.499025005200448, "learning_rate": 1e-06, "loss": 0.4178, "step": 1473 }, { "epoch": 0.0944629582158421, "grad_norm": 2.449095256001988, "learning_rate": 1e-06, "loss": 0.4191, "step": 1474 }, { "epoch": 0.09452704434760317, "grad_norm": 2.5906968749902073, "learning_rate": 1e-06, "loss": 0.4099, "step": 1475 }, { "epoch": 0.09459113047936427, "grad_norm": 2.7404480397879336, "learning_rate": 1e-06, "loss": 0.456, "step": 1476 }, { "epoch": 0.09465521661112536, "grad_norm": 2.72078552266344, "learning_rate": 1e-06, "loss": 0.4425, "step": 1477 }, { "epoch": 0.09471930274288644, "grad_norm": 2.6857231786650315, "learning_rate": 1e-06, "loss": 0.4032, "step": 1478 }, { "epoch": 0.09478338887464753, "grad_norm": 2.788718665825192, "learning_rate": 1e-06, "loss": 0.4309, "step": 1479 }, { "epoch": 0.09484747500640861, "grad_norm": 2.5435811015181735, "learning_rate": 1e-06, "loss": 0.4508, "step": 1480 }, { "epoch": 0.0949115611381697, "grad_norm": 2.6178528404278567, "learning_rate": 1e-06, "loss": 0.375, "step": 1481 }, { "epoch": 0.09497564726993078, "grad_norm": 2.690631217688418, "learning_rate": 1e-06, "loss": 0.4573, "step": 1482 }, { "epoch": 0.09503973340169188, "grad_norm": 2.8404450272707704, "learning_rate": 1e-06, "loss": 0.4252, "step": 1483 }, { "epoch": 0.09510381953345295, "grad_norm": 2.546535336347818, "learning_rate": 1e-06, "loss": 0.3952, "step": 1484 }, { "epoch": 0.09516790566521405, "grad_norm": 2.825421925183025, "learning_rate": 1e-06, "loss": 0.4806, "step": 1485 }, { "epoch": 0.09523199179697514, "grad_norm": 2.6961291272224654, "learning_rate": 1e-06, "loss": 0.3879, "step": 1486 }, { "epoch": 0.09529607792873622, "grad_norm": 2.7748192803256577, "learning_rate": 1e-06, "loss": 0.4424, "step": 1487 }, { "epoch": 0.09536016406049731, "grad_norm": 2.598558063958087, "learning_rate": 1e-06, "loss": 0.483, "step": 1488 }, { "epoch": 0.09542425019225839, "grad_norm": 2.5230579044355554, "learning_rate": 1e-06, "loss": 0.4463, "step": 1489 }, { "epoch": 0.09548833632401948, "grad_norm": 2.7678832307087347, "learning_rate": 1e-06, "loss": 0.3969, "step": 1490 }, { "epoch": 0.09555242245578056, "grad_norm": 2.6987595821256365, "learning_rate": 1e-06, "loss": 0.4439, "step": 1491 }, { "epoch": 0.09561650858754166, "grad_norm": 2.5431991351880656, "learning_rate": 1e-06, "loss": 0.4091, "step": 1492 }, { "epoch": 0.09568059471930274, "grad_norm": 2.684245256638835, "learning_rate": 1e-06, "loss": 0.4585, "step": 1493 }, { "epoch": 0.09574468085106383, "grad_norm": 2.552815133009136, "learning_rate": 1e-06, "loss": 0.4242, "step": 1494 }, { "epoch": 0.09580876698282492, "grad_norm": 2.547549931917095, "learning_rate": 1e-06, "loss": 0.3841, "step": 1495 }, { "epoch": 0.095872853114586, "grad_norm": 2.583837338026024, "learning_rate": 1e-06, "loss": 0.4405, "step": 1496 }, { "epoch": 0.0959369392463471, "grad_norm": 2.6677862575379168, "learning_rate": 1e-06, "loss": 0.473, "step": 1497 }, { "epoch": 0.09600102537810817, "grad_norm": 2.9204657147492794, "learning_rate": 1e-06, "loss": 0.5309, "step": 1498 }, { "epoch": 0.09606511150986927, "grad_norm": 2.677637058756192, "learning_rate": 1e-06, "loss": 0.3905, "step": 1499 }, { "epoch": 0.09612919764163035, "grad_norm": 2.7688834587766435, "learning_rate": 1e-06, "loss": 0.4399, "step": 1500 }, { "epoch": 0.09619328377339144, "grad_norm": 2.740388435781337, "learning_rate": 1e-06, "loss": 0.4204, "step": 1501 }, { "epoch": 0.09625736990515253, "grad_norm": 2.3972218665471514, "learning_rate": 1e-06, "loss": 0.4034, "step": 1502 }, { "epoch": 0.09632145603691361, "grad_norm": 2.6633730067165526, "learning_rate": 1e-06, "loss": 0.4054, "step": 1503 }, { "epoch": 0.0963855421686747, "grad_norm": 2.456909671813905, "learning_rate": 1e-06, "loss": 0.4143, "step": 1504 }, { "epoch": 0.09644962830043578, "grad_norm": 2.6087008055470235, "learning_rate": 1e-06, "loss": 0.4382, "step": 1505 }, { "epoch": 0.09651371443219688, "grad_norm": 2.934760294530396, "learning_rate": 1e-06, "loss": 0.4752, "step": 1506 }, { "epoch": 0.09657780056395796, "grad_norm": 2.7777807926127185, "learning_rate": 1e-06, "loss": 0.3972, "step": 1507 }, { "epoch": 0.09664188669571905, "grad_norm": 2.5245228852446124, "learning_rate": 1e-06, "loss": 0.4838, "step": 1508 }, { "epoch": 0.09670597282748013, "grad_norm": 2.556699635104518, "learning_rate": 1e-06, "loss": 0.3796, "step": 1509 }, { "epoch": 0.09677005895924122, "grad_norm": 2.6734250594176294, "learning_rate": 1e-06, "loss": 0.3988, "step": 1510 }, { "epoch": 0.09683414509100231, "grad_norm": 2.779651783350114, "learning_rate": 1e-06, "loss": 0.4375, "step": 1511 }, { "epoch": 0.09689823122276339, "grad_norm": 2.7072349412684886, "learning_rate": 1e-06, "loss": 0.4093, "step": 1512 }, { "epoch": 0.09696231735452449, "grad_norm": 2.9895637028649746, "learning_rate": 1e-06, "loss": 0.4021, "step": 1513 }, { "epoch": 0.09702640348628556, "grad_norm": 2.6235342457993416, "learning_rate": 1e-06, "loss": 0.4582, "step": 1514 }, { "epoch": 0.09709048961804666, "grad_norm": 2.6685946420553273, "learning_rate": 1e-06, "loss": 0.491, "step": 1515 }, { "epoch": 0.09715457574980774, "grad_norm": 2.6912226523276086, "learning_rate": 1e-06, "loss": 0.3626, "step": 1516 }, { "epoch": 0.09721866188156883, "grad_norm": 2.5055739358842346, "learning_rate": 1e-06, "loss": 0.4461, "step": 1517 }, { "epoch": 0.09728274801332991, "grad_norm": 3.0575599131857327, "learning_rate": 1e-06, "loss": 0.3804, "step": 1518 }, { "epoch": 0.097346834145091, "grad_norm": 2.4144353166108634, "learning_rate": 1e-06, "loss": 0.383, "step": 1519 }, { "epoch": 0.0974109202768521, "grad_norm": 2.485527122413705, "learning_rate": 1e-06, "loss": 0.4088, "step": 1520 }, { "epoch": 0.09747500640861317, "grad_norm": 2.727634679983602, "learning_rate": 1e-06, "loss": 0.391, "step": 1521 }, { "epoch": 0.09753909254037427, "grad_norm": 2.688199231816162, "learning_rate": 1e-06, "loss": 0.4088, "step": 1522 }, { "epoch": 0.09760317867213535, "grad_norm": 2.597138289171914, "learning_rate": 1e-06, "loss": 0.436, "step": 1523 }, { "epoch": 0.09766726480389644, "grad_norm": 2.4989362972544966, "learning_rate": 1e-06, "loss": 0.3662, "step": 1524 }, { "epoch": 0.09773135093565752, "grad_norm": 2.6467220685385926, "learning_rate": 1e-06, "loss": 0.4828, "step": 1525 }, { "epoch": 0.09779543706741861, "grad_norm": 2.570899529787848, "learning_rate": 1e-06, "loss": 0.4307, "step": 1526 }, { "epoch": 0.0978595231991797, "grad_norm": 2.5504651035630803, "learning_rate": 1e-06, "loss": 0.4346, "step": 1527 }, { "epoch": 0.09792360933094078, "grad_norm": 2.658388218479122, "learning_rate": 1e-06, "loss": 0.3835, "step": 1528 }, { "epoch": 0.09798769546270188, "grad_norm": 2.5854205257184955, "learning_rate": 1e-06, "loss": 0.4067, "step": 1529 }, { "epoch": 0.09805178159446296, "grad_norm": 2.549513145186835, "learning_rate": 1e-06, "loss": 0.4431, "step": 1530 }, { "epoch": 0.09811586772622405, "grad_norm": 2.7201381698108715, "learning_rate": 1e-06, "loss": 0.4101, "step": 1531 }, { "epoch": 0.09817995385798513, "grad_norm": 2.5040689871867934, "learning_rate": 1e-06, "loss": 0.3734, "step": 1532 }, { "epoch": 0.09824403998974622, "grad_norm": 2.714762788781154, "learning_rate": 1e-06, "loss": 0.4831, "step": 1533 }, { "epoch": 0.0983081261215073, "grad_norm": 2.552541268474821, "learning_rate": 1e-06, "loss": 0.3674, "step": 1534 }, { "epoch": 0.09837221225326839, "grad_norm": 2.57284676055446, "learning_rate": 1e-06, "loss": 0.3585, "step": 1535 }, { "epoch": 0.09843629838502949, "grad_norm": 2.721429265477203, "learning_rate": 1e-06, "loss": 0.4301, "step": 1536 }, { "epoch": 0.09850038451679057, "grad_norm": 2.774050751464011, "learning_rate": 1e-06, "loss": 0.4887, "step": 1537 }, { "epoch": 0.09856447064855166, "grad_norm": 2.7744727611174964, "learning_rate": 1e-06, "loss": 0.3775, "step": 1538 }, { "epoch": 0.09862855678031274, "grad_norm": 2.510003290063987, "learning_rate": 1e-06, "loss": 0.3722, "step": 1539 }, { "epoch": 0.09869264291207383, "grad_norm": 2.535552620114136, "learning_rate": 1e-06, "loss": 0.4426, "step": 1540 }, { "epoch": 0.09875672904383491, "grad_norm": 2.564367168182366, "learning_rate": 1e-06, "loss": 0.4083, "step": 1541 }, { "epoch": 0.098820815175596, "grad_norm": 2.7394137242469947, "learning_rate": 1e-06, "loss": 0.4465, "step": 1542 }, { "epoch": 0.09888490130735708, "grad_norm": 2.545975890654039, "learning_rate": 1e-06, "loss": 0.3485, "step": 1543 }, { "epoch": 0.09894898743911817, "grad_norm": 2.774458084017365, "learning_rate": 1e-06, "loss": 0.4045, "step": 1544 }, { "epoch": 0.09901307357087927, "grad_norm": 2.5574985197779805, "learning_rate": 1e-06, "loss": 0.4363, "step": 1545 }, { "epoch": 0.09907715970264035, "grad_norm": 2.6854709411400104, "learning_rate": 1e-06, "loss": 0.4384, "step": 1546 }, { "epoch": 0.09914124583440144, "grad_norm": 2.596155628657547, "learning_rate": 1e-06, "loss": 0.3991, "step": 1547 }, { "epoch": 0.09920533196616252, "grad_norm": 2.725319331359241, "learning_rate": 1e-06, "loss": 0.4757, "step": 1548 }, { "epoch": 0.09926941809792361, "grad_norm": 2.4312408844513835, "learning_rate": 1e-06, "loss": 0.3759, "step": 1549 }, { "epoch": 0.09933350422968469, "grad_norm": 3.0975479055506665, "learning_rate": 1e-06, "loss": 0.405, "step": 1550 }, { "epoch": 0.09939759036144578, "grad_norm": 2.6897850500620626, "learning_rate": 1e-06, "loss": 0.478, "step": 1551 }, { "epoch": 0.09946167649320686, "grad_norm": 2.4014922518328414, "learning_rate": 1e-06, "loss": 0.3817, "step": 1552 }, { "epoch": 0.09952576262496796, "grad_norm": 2.5781390912257507, "learning_rate": 1e-06, "loss": 0.3929, "step": 1553 }, { "epoch": 0.09958984875672905, "grad_norm": 2.6814168690887734, "learning_rate": 1e-06, "loss": 0.4167, "step": 1554 }, { "epoch": 0.09965393488849013, "grad_norm": 2.8229320999460366, "learning_rate": 1e-06, "loss": 0.4424, "step": 1555 }, { "epoch": 0.09971802102025122, "grad_norm": 2.671523799350974, "learning_rate": 1e-06, "loss": 0.417, "step": 1556 }, { "epoch": 0.0997821071520123, "grad_norm": 2.816317550333446, "learning_rate": 1e-06, "loss": 0.4308, "step": 1557 }, { "epoch": 0.0998461932837734, "grad_norm": 2.8263116326882476, "learning_rate": 1e-06, "loss": 0.461, "step": 1558 }, { "epoch": 0.09991027941553447, "grad_norm": 2.773664845095712, "learning_rate": 1e-06, "loss": 0.3831, "step": 1559 }, { "epoch": 0.09997436554729557, "grad_norm": 2.465271500495268, "learning_rate": 1e-06, "loss": 0.4111, "step": 1560 }, { "epoch": 0.10003845167905666, "grad_norm": 2.5630490824452696, "learning_rate": 1e-06, "loss": 0.3999, "step": 1561 }, { "epoch": 0.10010253781081774, "grad_norm": 2.443896329725649, "learning_rate": 1e-06, "loss": 0.3384, "step": 1562 }, { "epoch": 0.10016662394257883, "grad_norm": 2.679277621913989, "learning_rate": 1e-06, "loss": 0.4006, "step": 1563 }, { "epoch": 0.10023071007433991, "grad_norm": 2.687935697149264, "learning_rate": 1e-06, "loss": 0.3993, "step": 1564 }, { "epoch": 0.100294796206101, "grad_norm": 2.672827135397584, "learning_rate": 1e-06, "loss": 0.4441, "step": 1565 }, { "epoch": 0.10035888233786208, "grad_norm": 2.5859584068225305, "learning_rate": 1e-06, "loss": 0.3911, "step": 1566 }, { "epoch": 0.10042296846962318, "grad_norm": 2.6623095578704845, "learning_rate": 1e-06, "loss": 0.4869, "step": 1567 }, { "epoch": 0.10048705460138425, "grad_norm": 2.751166514445602, "learning_rate": 1e-06, "loss": 0.3948, "step": 1568 }, { "epoch": 0.10055114073314535, "grad_norm": 2.5789596779897996, "learning_rate": 1e-06, "loss": 0.4196, "step": 1569 }, { "epoch": 0.10061522686490644, "grad_norm": 2.733584791811764, "learning_rate": 1e-06, "loss": 0.4257, "step": 1570 }, { "epoch": 0.10067931299666752, "grad_norm": 2.771740197510259, "learning_rate": 1e-06, "loss": 0.4188, "step": 1571 }, { "epoch": 0.10074339912842861, "grad_norm": 2.8752303657524387, "learning_rate": 1e-06, "loss": 0.4678, "step": 1572 }, { "epoch": 0.10080748526018969, "grad_norm": 2.558260771757404, "learning_rate": 1e-06, "loss": 0.3632, "step": 1573 }, { "epoch": 0.10087157139195078, "grad_norm": 2.5885122104751193, "learning_rate": 1e-06, "loss": 0.3969, "step": 1574 }, { "epoch": 0.10093565752371186, "grad_norm": 2.535083774597389, "learning_rate": 1e-06, "loss": 0.413, "step": 1575 }, { "epoch": 0.10099974365547296, "grad_norm": 2.4010853501994966, "learning_rate": 1e-06, "loss": 0.451, "step": 1576 }, { "epoch": 0.10106382978723404, "grad_norm": 2.649504940578061, "learning_rate": 1e-06, "loss": 0.4474, "step": 1577 }, { "epoch": 0.10112791591899513, "grad_norm": 2.410664702100842, "learning_rate": 1e-06, "loss": 0.3643, "step": 1578 }, { "epoch": 0.10119200205075622, "grad_norm": 2.614304760449613, "learning_rate": 1e-06, "loss": 0.4721, "step": 1579 }, { "epoch": 0.1012560881825173, "grad_norm": 2.5811032100103417, "learning_rate": 1e-06, "loss": 0.4441, "step": 1580 }, { "epoch": 0.1013201743142784, "grad_norm": 2.479567803200828, "learning_rate": 1e-06, "loss": 0.4105, "step": 1581 }, { "epoch": 0.10138426044603947, "grad_norm": 2.822694534916483, "learning_rate": 1e-06, "loss": 0.4662, "step": 1582 }, { "epoch": 0.10144834657780057, "grad_norm": 2.7523444736975406, "learning_rate": 1e-06, "loss": 0.4016, "step": 1583 }, { "epoch": 0.10151243270956165, "grad_norm": 2.7855545296692408, "learning_rate": 1e-06, "loss": 0.4851, "step": 1584 }, { "epoch": 0.10157651884132274, "grad_norm": 2.69264141081618, "learning_rate": 1e-06, "loss": 0.4044, "step": 1585 }, { "epoch": 0.10164060497308383, "grad_norm": 2.5649058822118764, "learning_rate": 1e-06, "loss": 0.3722, "step": 1586 }, { "epoch": 0.10170469110484491, "grad_norm": 2.5866879553520246, "learning_rate": 1e-06, "loss": 0.4265, "step": 1587 }, { "epoch": 0.101768777236606, "grad_norm": 2.4330295355820577, "learning_rate": 1e-06, "loss": 0.4152, "step": 1588 }, { "epoch": 0.10183286336836708, "grad_norm": 2.570725714010572, "learning_rate": 1e-06, "loss": 0.3802, "step": 1589 }, { "epoch": 0.10189694950012818, "grad_norm": 2.6275641529435845, "learning_rate": 1e-06, "loss": 0.3901, "step": 1590 }, { "epoch": 0.10196103563188925, "grad_norm": 2.7067442146798943, "learning_rate": 1e-06, "loss": 0.4823, "step": 1591 }, { "epoch": 0.10202512176365035, "grad_norm": 2.4594883517967885, "learning_rate": 1e-06, "loss": 0.3733, "step": 1592 }, { "epoch": 0.10208920789541143, "grad_norm": 2.588977297029969, "learning_rate": 1e-06, "loss": 0.4596, "step": 1593 }, { "epoch": 0.10215329402717252, "grad_norm": 2.6727644456390935, "learning_rate": 1e-06, "loss": 0.4501, "step": 1594 }, { "epoch": 0.10221738015893361, "grad_norm": 2.5011109727208813, "learning_rate": 1e-06, "loss": 0.3772, "step": 1595 }, { "epoch": 0.10228146629069469, "grad_norm": 2.729101743448196, "learning_rate": 1e-06, "loss": 0.3365, "step": 1596 }, { "epoch": 0.10234555242245578, "grad_norm": 2.7459192458327633, "learning_rate": 1e-06, "loss": 0.4438, "step": 1597 }, { "epoch": 0.10240963855421686, "grad_norm": 2.9193827751910133, "learning_rate": 1e-06, "loss": 0.4325, "step": 1598 }, { "epoch": 0.10247372468597796, "grad_norm": 2.5884629470491247, "learning_rate": 1e-06, "loss": 0.4163, "step": 1599 }, { "epoch": 0.10253781081773904, "grad_norm": 2.6180046140478512, "learning_rate": 1e-06, "loss": 0.4598, "step": 1600 }, { "epoch": 0.10260189694950013, "grad_norm": 2.6215411725840774, "learning_rate": 1e-06, "loss": 0.4109, "step": 1601 }, { "epoch": 0.10266598308126121, "grad_norm": 2.7502317600306654, "learning_rate": 1e-06, "loss": 0.4366, "step": 1602 }, { "epoch": 0.1027300692130223, "grad_norm": 2.473642725106791, "learning_rate": 1e-06, "loss": 0.3513, "step": 1603 }, { "epoch": 0.1027941553447834, "grad_norm": 3.027137495649638, "learning_rate": 1e-06, "loss": 0.4232, "step": 1604 }, { "epoch": 0.10285824147654447, "grad_norm": 2.589351540135811, "learning_rate": 1e-06, "loss": 0.3928, "step": 1605 }, { "epoch": 0.10292232760830557, "grad_norm": 2.605232656127217, "learning_rate": 1e-06, "loss": 0.3833, "step": 1606 }, { "epoch": 0.10298641374006665, "grad_norm": 2.5160966475689373, "learning_rate": 1e-06, "loss": 0.4443, "step": 1607 }, { "epoch": 0.10305049987182774, "grad_norm": 2.9291249601398945, "learning_rate": 1e-06, "loss": 0.3807, "step": 1608 }, { "epoch": 0.10311458600358882, "grad_norm": 2.5567951767508417, "learning_rate": 1e-06, "loss": 0.4061, "step": 1609 }, { "epoch": 0.10317867213534991, "grad_norm": 2.73170975100072, "learning_rate": 1e-06, "loss": 0.4131, "step": 1610 }, { "epoch": 0.103242758267111, "grad_norm": 2.635090816171602, "learning_rate": 1e-06, "loss": 0.3814, "step": 1611 }, { "epoch": 0.10330684439887208, "grad_norm": 2.461341775873606, "learning_rate": 1e-06, "loss": 0.4498, "step": 1612 }, { "epoch": 0.10337093053063318, "grad_norm": 2.62340434297987, "learning_rate": 1e-06, "loss": 0.4303, "step": 1613 }, { "epoch": 0.10343501666239426, "grad_norm": 2.418377756799505, "learning_rate": 1e-06, "loss": 0.423, "step": 1614 }, { "epoch": 0.10349910279415535, "grad_norm": 2.480451698537808, "learning_rate": 1e-06, "loss": 0.4039, "step": 1615 }, { "epoch": 0.10356318892591643, "grad_norm": 2.5930294009627506, "learning_rate": 1e-06, "loss": 0.4249, "step": 1616 }, { "epoch": 0.10362727505767752, "grad_norm": 2.5585904418749568, "learning_rate": 1e-06, "loss": 0.4271, "step": 1617 }, { "epoch": 0.1036913611894386, "grad_norm": 2.8192930639241442, "learning_rate": 1e-06, "loss": 0.4281, "step": 1618 }, { "epoch": 0.10375544732119969, "grad_norm": 2.6233847466921216, "learning_rate": 1e-06, "loss": 0.4337, "step": 1619 }, { "epoch": 0.10381953345296079, "grad_norm": 2.476019776497116, "learning_rate": 1e-06, "loss": 0.3443, "step": 1620 }, { "epoch": 0.10388361958472186, "grad_norm": 2.65079762175131, "learning_rate": 1e-06, "loss": 0.4388, "step": 1621 }, { "epoch": 0.10394770571648296, "grad_norm": 2.617383658663692, "learning_rate": 1e-06, "loss": 0.4207, "step": 1622 }, { "epoch": 0.10401179184824404, "grad_norm": 2.6226899184245305, "learning_rate": 1e-06, "loss": 0.4373, "step": 1623 }, { "epoch": 0.10407587798000513, "grad_norm": 2.6357591101547806, "learning_rate": 1e-06, "loss": 0.4235, "step": 1624 }, { "epoch": 0.10413996411176621, "grad_norm": 2.6520449138547604, "learning_rate": 1e-06, "loss": 0.4487, "step": 1625 }, { "epoch": 0.1042040502435273, "grad_norm": 2.6677348499168803, "learning_rate": 1e-06, "loss": 0.3955, "step": 1626 }, { "epoch": 0.10426813637528838, "grad_norm": 2.485785549204256, "learning_rate": 1e-06, "loss": 0.4121, "step": 1627 }, { "epoch": 0.10433222250704947, "grad_norm": 2.6469670405473655, "learning_rate": 1e-06, "loss": 0.4526, "step": 1628 }, { "epoch": 0.10439630863881057, "grad_norm": 2.6217926498103394, "learning_rate": 1e-06, "loss": 0.4237, "step": 1629 }, { "epoch": 0.10446039477057165, "grad_norm": 2.5329404663917767, "learning_rate": 1e-06, "loss": 0.4267, "step": 1630 }, { "epoch": 0.10452448090233274, "grad_norm": 2.799258811339821, "learning_rate": 1e-06, "loss": 0.3584, "step": 1631 }, { "epoch": 0.10458856703409382, "grad_norm": 2.6785670772725885, "learning_rate": 1e-06, "loss": 0.4077, "step": 1632 }, { "epoch": 0.10465265316585491, "grad_norm": 2.8053785359525754, "learning_rate": 1e-06, "loss": 0.4357, "step": 1633 }, { "epoch": 0.10471673929761599, "grad_norm": 2.617661148969151, "learning_rate": 1e-06, "loss": 0.4612, "step": 1634 }, { "epoch": 0.10478082542937708, "grad_norm": 2.7631606730879077, "learning_rate": 1e-06, "loss": 0.4398, "step": 1635 }, { "epoch": 0.10484491156113818, "grad_norm": 2.6745121058036068, "learning_rate": 1e-06, "loss": 0.3933, "step": 1636 }, { "epoch": 0.10490899769289926, "grad_norm": 2.6261615730588677, "learning_rate": 1e-06, "loss": 0.4276, "step": 1637 }, { "epoch": 0.10497308382466035, "grad_norm": 2.4662550009165747, "learning_rate": 1e-06, "loss": 0.3995, "step": 1638 }, { "epoch": 0.10503716995642143, "grad_norm": 2.586298212854455, "learning_rate": 1e-06, "loss": 0.3858, "step": 1639 }, { "epoch": 0.10510125608818252, "grad_norm": 2.6534127259916374, "learning_rate": 1e-06, "loss": 0.4182, "step": 1640 }, { "epoch": 0.1051653422199436, "grad_norm": 2.795573062823909, "learning_rate": 1e-06, "loss": 0.4032, "step": 1641 }, { "epoch": 0.10522942835170469, "grad_norm": 2.7539546932567767, "learning_rate": 1e-06, "loss": 0.4529, "step": 1642 }, { "epoch": 0.10529351448346577, "grad_norm": 2.6383536806239123, "learning_rate": 1e-06, "loss": 0.4243, "step": 1643 }, { "epoch": 0.10535760061522687, "grad_norm": 2.5524878339737436, "learning_rate": 1e-06, "loss": 0.4276, "step": 1644 }, { "epoch": 0.10542168674698796, "grad_norm": 2.365115184404631, "learning_rate": 1e-06, "loss": 0.4012, "step": 1645 }, { "epoch": 0.10548577287874904, "grad_norm": 2.562456499015141, "learning_rate": 1e-06, "loss": 0.4182, "step": 1646 }, { "epoch": 0.10554985901051013, "grad_norm": 2.540218757528349, "learning_rate": 1e-06, "loss": 0.3861, "step": 1647 }, { "epoch": 0.10561394514227121, "grad_norm": 2.7507708144988645, "learning_rate": 1e-06, "loss": 0.4476, "step": 1648 }, { "epoch": 0.1056780312740323, "grad_norm": 2.569575329697874, "learning_rate": 1e-06, "loss": 0.4077, "step": 1649 }, { "epoch": 0.10574211740579338, "grad_norm": 2.820273901570955, "learning_rate": 1e-06, "loss": 0.3955, "step": 1650 }, { "epoch": 0.10580620353755447, "grad_norm": 2.5636949127298423, "learning_rate": 1e-06, "loss": 0.4195, "step": 1651 }, { "epoch": 0.10587028966931555, "grad_norm": 2.4593346132223326, "learning_rate": 1e-06, "loss": 0.4317, "step": 1652 }, { "epoch": 0.10593437580107665, "grad_norm": 2.4987644523028387, "learning_rate": 1e-06, "loss": 0.3765, "step": 1653 }, { "epoch": 0.10599846193283774, "grad_norm": 2.658818194022743, "learning_rate": 1e-06, "loss": 0.4174, "step": 1654 }, { "epoch": 0.10606254806459882, "grad_norm": 2.5958535933853213, "learning_rate": 1e-06, "loss": 0.3846, "step": 1655 }, { "epoch": 0.10612663419635991, "grad_norm": 2.664277085039511, "learning_rate": 1e-06, "loss": 0.4129, "step": 1656 }, { "epoch": 0.10619072032812099, "grad_norm": 2.6513772408963665, "learning_rate": 1e-06, "loss": 0.4107, "step": 1657 }, { "epoch": 0.10625480645988208, "grad_norm": 2.7469582827871566, "learning_rate": 1e-06, "loss": 0.4148, "step": 1658 }, { "epoch": 0.10631889259164316, "grad_norm": 2.7045725526549576, "learning_rate": 1e-06, "loss": 0.4139, "step": 1659 }, { "epoch": 0.10638297872340426, "grad_norm": 2.623229446283425, "learning_rate": 1e-06, "loss": 0.4439, "step": 1660 }, { "epoch": 0.10644706485516534, "grad_norm": 2.586774207872217, "learning_rate": 1e-06, "loss": 0.4244, "step": 1661 }, { "epoch": 0.10651115098692643, "grad_norm": 2.953381892885236, "learning_rate": 1e-06, "loss": 0.4256, "step": 1662 }, { "epoch": 0.10657523711868752, "grad_norm": 2.769340797223471, "learning_rate": 1e-06, "loss": 0.4559, "step": 1663 }, { "epoch": 0.1066393232504486, "grad_norm": 2.638792643534101, "learning_rate": 1e-06, "loss": 0.4563, "step": 1664 }, { "epoch": 0.1067034093822097, "grad_norm": 2.780562883204113, "learning_rate": 1e-06, "loss": 0.4674, "step": 1665 }, { "epoch": 0.10676749551397077, "grad_norm": 2.6954405632484923, "learning_rate": 1e-06, "loss": 0.4098, "step": 1666 }, { "epoch": 0.10683158164573187, "grad_norm": 2.7618401225967273, "learning_rate": 1e-06, "loss": 0.4703, "step": 1667 }, { "epoch": 0.10689566777749294, "grad_norm": 2.5574016056264837, "learning_rate": 1e-06, "loss": 0.4101, "step": 1668 }, { "epoch": 0.10695975390925404, "grad_norm": 2.8001123266252077, "learning_rate": 1e-06, "loss": 0.3912, "step": 1669 }, { "epoch": 0.10702384004101513, "grad_norm": 2.378615768916347, "learning_rate": 1e-06, "loss": 0.3575, "step": 1670 }, { "epoch": 0.10708792617277621, "grad_norm": 2.8999182844615885, "learning_rate": 1e-06, "loss": 0.4575, "step": 1671 }, { "epoch": 0.1071520123045373, "grad_norm": 2.698293857397677, "learning_rate": 1e-06, "loss": 0.4647, "step": 1672 }, { "epoch": 0.10721609843629838, "grad_norm": 2.7334643214109278, "learning_rate": 1e-06, "loss": 0.4029, "step": 1673 }, { "epoch": 0.10728018456805948, "grad_norm": 2.711296438189429, "learning_rate": 1e-06, "loss": 0.3844, "step": 1674 }, { "epoch": 0.10734427069982055, "grad_norm": 2.3686553393233916, "learning_rate": 1e-06, "loss": 0.404, "step": 1675 }, { "epoch": 0.10740835683158165, "grad_norm": 2.4640210766920694, "learning_rate": 1e-06, "loss": 0.4225, "step": 1676 }, { "epoch": 0.10747244296334273, "grad_norm": 2.694372214956423, "learning_rate": 1e-06, "loss": 0.3985, "step": 1677 }, { "epoch": 0.10753652909510382, "grad_norm": 2.629781628214181, "learning_rate": 1e-06, "loss": 0.464, "step": 1678 }, { "epoch": 0.10760061522686491, "grad_norm": 2.491325092259068, "learning_rate": 1e-06, "loss": 0.4328, "step": 1679 }, { "epoch": 0.10766470135862599, "grad_norm": 2.788493606737418, "learning_rate": 1e-06, "loss": 0.3886, "step": 1680 }, { "epoch": 0.10772878749038708, "grad_norm": 2.916286988190873, "learning_rate": 1e-06, "loss": 0.4411, "step": 1681 }, { "epoch": 0.10779287362214816, "grad_norm": 2.583616470716141, "learning_rate": 1e-06, "loss": 0.4642, "step": 1682 }, { "epoch": 0.10785695975390926, "grad_norm": 2.4721597951784906, "learning_rate": 1e-06, "loss": 0.3899, "step": 1683 }, { "epoch": 0.10792104588567034, "grad_norm": 2.67527569790069, "learning_rate": 1e-06, "loss": 0.4089, "step": 1684 }, { "epoch": 0.10798513201743143, "grad_norm": 2.5895651711542, "learning_rate": 1e-06, "loss": 0.3923, "step": 1685 }, { "epoch": 0.10804921814919251, "grad_norm": 2.5846016622522403, "learning_rate": 1e-06, "loss": 0.4352, "step": 1686 }, { "epoch": 0.1081133042809536, "grad_norm": 2.777517387535592, "learning_rate": 1e-06, "loss": 0.3758, "step": 1687 }, { "epoch": 0.1081773904127147, "grad_norm": 2.6384771823414557, "learning_rate": 1e-06, "loss": 0.395, "step": 1688 }, { "epoch": 0.10824147654447577, "grad_norm": 2.3574131829286817, "learning_rate": 1e-06, "loss": 0.455, "step": 1689 }, { "epoch": 0.10830556267623687, "grad_norm": 2.6567719560903567, "learning_rate": 1e-06, "loss": 0.3988, "step": 1690 }, { "epoch": 0.10836964880799795, "grad_norm": 2.5692755223481507, "learning_rate": 1e-06, "loss": 0.4512, "step": 1691 }, { "epoch": 0.10843373493975904, "grad_norm": 2.499408417636477, "learning_rate": 1e-06, "loss": 0.4029, "step": 1692 }, { "epoch": 0.10849782107152012, "grad_norm": 2.7581677889012046, "learning_rate": 1e-06, "loss": 0.4781, "step": 1693 }, { "epoch": 0.10856190720328121, "grad_norm": 2.7449113590760703, "learning_rate": 1e-06, "loss": 0.3819, "step": 1694 }, { "epoch": 0.1086259933350423, "grad_norm": 2.4758292398159507, "learning_rate": 1e-06, "loss": 0.4012, "step": 1695 }, { "epoch": 0.10869007946680338, "grad_norm": 2.776264244021826, "learning_rate": 1e-06, "loss": 0.4143, "step": 1696 }, { "epoch": 0.10875416559856448, "grad_norm": 2.638478281868802, "learning_rate": 1e-06, "loss": 0.4193, "step": 1697 }, { "epoch": 0.10881825173032555, "grad_norm": 2.481263674878327, "learning_rate": 1e-06, "loss": 0.4411, "step": 1698 }, { "epoch": 0.10888233786208665, "grad_norm": 2.552295863623659, "learning_rate": 1e-06, "loss": 0.433, "step": 1699 }, { "epoch": 0.10894642399384773, "grad_norm": 2.52105190848856, "learning_rate": 1e-06, "loss": 0.3908, "step": 1700 }, { "epoch": 0.10901051012560882, "grad_norm": 2.518675874240785, "learning_rate": 1e-06, "loss": 0.4043, "step": 1701 }, { "epoch": 0.1090745962573699, "grad_norm": 2.614317095507154, "learning_rate": 1e-06, "loss": 0.387, "step": 1702 }, { "epoch": 0.10913868238913099, "grad_norm": 2.670338340620114, "learning_rate": 1e-06, "loss": 0.4327, "step": 1703 }, { "epoch": 0.10920276852089209, "grad_norm": 2.679878055563821, "learning_rate": 1e-06, "loss": 0.4729, "step": 1704 }, { "epoch": 0.10926685465265316, "grad_norm": 2.493372948578064, "learning_rate": 1e-06, "loss": 0.4325, "step": 1705 }, { "epoch": 0.10933094078441426, "grad_norm": 2.5642512118942187, "learning_rate": 1e-06, "loss": 0.4222, "step": 1706 }, { "epoch": 0.10939502691617534, "grad_norm": 2.5169199510593927, "learning_rate": 1e-06, "loss": 0.426, "step": 1707 }, { "epoch": 0.10945911304793643, "grad_norm": 2.4887182192261665, "learning_rate": 1e-06, "loss": 0.409, "step": 1708 }, { "epoch": 0.10952319917969751, "grad_norm": 2.7289202425480417, "learning_rate": 1e-06, "loss": 0.383, "step": 1709 }, { "epoch": 0.1095872853114586, "grad_norm": 2.61721341106042, "learning_rate": 1e-06, "loss": 0.4173, "step": 1710 }, { "epoch": 0.10965137144321968, "grad_norm": 2.72774723157587, "learning_rate": 1e-06, "loss": 0.4001, "step": 1711 }, { "epoch": 0.10971545757498077, "grad_norm": 2.413345581192089, "learning_rate": 1e-06, "loss": 0.3807, "step": 1712 }, { "epoch": 0.10977954370674187, "grad_norm": 2.7211350379570955, "learning_rate": 1e-06, "loss": 0.3684, "step": 1713 }, { "epoch": 0.10984362983850295, "grad_norm": 2.737851682076553, "learning_rate": 1e-06, "loss": 0.4509, "step": 1714 }, { "epoch": 0.10990771597026404, "grad_norm": 2.5769530061758275, "learning_rate": 1e-06, "loss": 0.3692, "step": 1715 }, { "epoch": 0.10997180210202512, "grad_norm": 2.570743845028058, "learning_rate": 1e-06, "loss": 0.4236, "step": 1716 }, { "epoch": 0.11003588823378621, "grad_norm": 2.756598918829375, "learning_rate": 1e-06, "loss": 0.4437, "step": 1717 }, { "epoch": 0.11009997436554729, "grad_norm": 2.57919783563559, "learning_rate": 1e-06, "loss": 0.3837, "step": 1718 }, { "epoch": 0.11016406049730838, "grad_norm": 2.5580986528574887, "learning_rate": 1e-06, "loss": 0.4406, "step": 1719 }, { "epoch": 0.11022814662906948, "grad_norm": 2.668776644544978, "learning_rate": 1e-06, "loss": 0.451, "step": 1720 }, { "epoch": 0.11029223276083056, "grad_norm": 2.523514496339625, "learning_rate": 1e-06, "loss": 0.41, "step": 1721 }, { "epoch": 0.11035631889259165, "grad_norm": 2.603462247414295, "learning_rate": 1e-06, "loss": 0.4276, "step": 1722 }, { "epoch": 0.11042040502435273, "grad_norm": 2.855953751352866, "learning_rate": 1e-06, "loss": 0.4488, "step": 1723 }, { "epoch": 0.11048449115611382, "grad_norm": 2.459019889378647, "learning_rate": 1e-06, "loss": 0.3524, "step": 1724 }, { "epoch": 0.1105485772878749, "grad_norm": 2.6543139169949668, "learning_rate": 1e-06, "loss": 0.3758, "step": 1725 }, { "epoch": 0.11061266341963599, "grad_norm": 2.732939934915263, "learning_rate": 1e-06, "loss": 0.4017, "step": 1726 }, { "epoch": 0.11067674955139707, "grad_norm": 2.5573541578391663, "learning_rate": 1e-06, "loss": 0.4032, "step": 1727 }, { "epoch": 0.11074083568315816, "grad_norm": 2.48054846948909, "learning_rate": 1e-06, "loss": 0.4306, "step": 1728 }, { "epoch": 0.11080492181491926, "grad_norm": 2.521020872733791, "learning_rate": 1e-06, "loss": 0.435, "step": 1729 }, { "epoch": 0.11086900794668034, "grad_norm": 2.401450393189567, "learning_rate": 1e-06, "loss": 0.3783, "step": 1730 }, { "epoch": 0.11093309407844143, "grad_norm": 2.617773165324972, "learning_rate": 1e-06, "loss": 0.4264, "step": 1731 }, { "epoch": 0.11099718021020251, "grad_norm": 2.6692631322553035, "learning_rate": 1e-06, "loss": 0.442, "step": 1732 }, { "epoch": 0.1110612663419636, "grad_norm": 2.6413046769270627, "learning_rate": 1e-06, "loss": 0.4591, "step": 1733 }, { "epoch": 0.11112535247372468, "grad_norm": 2.5232036530580353, "learning_rate": 1e-06, "loss": 0.408, "step": 1734 }, { "epoch": 0.11118943860548577, "grad_norm": 2.598170322205431, "learning_rate": 1e-06, "loss": 0.3985, "step": 1735 }, { "epoch": 0.11125352473724685, "grad_norm": 2.5204863843861713, "learning_rate": 1e-06, "loss": 0.3859, "step": 1736 }, { "epoch": 0.11131761086900795, "grad_norm": 2.531925371710578, "learning_rate": 1e-06, "loss": 0.4006, "step": 1737 }, { "epoch": 0.11138169700076904, "grad_norm": 2.5551159173093456, "learning_rate": 1e-06, "loss": 0.382, "step": 1738 }, { "epoch": 0.11144578313253012, "grad_norm": 2.76432440791053, "learning_rate": 1e-06, "loss": 0.4738, "step": 1739 }, { "epoch": 0.11150986926429121, "grad_norm": 2.7599097775430863, "learning_rate": 1e-06, "loss": 0.4049, "step": 1740 }, { "epoch": 0.11157395539605229, "grad_norm": 2.727649566997873, "learning_rate": 1e-06, "loss": 0.4498, "step": 1741 }, { "epoch": 0.11163804152781338, "grad_norm": 2.548484134262224, "learning_rate": 1e-06, "loss": 0.3875, "step": 1742 }, { "epoch": 0.11170212765957446, "grad_norm": 2.4432877629217225, "learning_rate": 1e-06, "loss": 0.3908, "step": 1743 }, { "epoch": 0.11176621379133556, "grad_norm": 3.1019322307948913, "learning_rate": 1e-06, "loss": 0.4498, "step": 1744 }, { "epoch": 0.11183029992309665, "grad_norm": 3.0096044794329613, "learning_rate": 1e-06, "loss": 0.4709, "step": 1745 }, { "epoch": 0.11189438605485773, "grad_norm": 2.469796239735023, "learning_rate": 1e-06, "loss": 0.4759, "step": 1746 }, { "epoch": 0.11195847218661882, "grad_norm": 2.5208667157610196, "learning_rate": 1e-06, "loss": 0.4266, "step": 1747 }, { "epoch": 0.1120225583183799, "grad_norm": 2.5523207697871175, "learning_rate": 1e-06, "loss": 0.4218, "step": 1748 }, { "epoch": 0.112086644450141, "grad_norm": 2.517061409866147, "learning_rate": 1e-06, "loss": 0.3933, "step": 1749 }, { "epoch": 0.11215073058190207, "grad_norm": 2.5726688003570772, "learning_rate": 1e-06, "loss": 0.4698, "step": 1750 }, { "epoch": 0.11221481671366317, "grad_norm": 2.660408633103373, "learning_rate": 1e-06, "loss": 0.4223, "step": 1751 }, { "epoch": 0.11227890284542424, "grad_norm": 2.4592087263046505, "learning_rate": 1e-06, "loss": 0.3798, "step": 1752 }, { "epoch": 0.11234298897718534, "grad_norm": 2.583126620448884, "learning_rate": 1e-06, "loss": 0.444, "step": 1753 }, { "epoch": 0.11240707510894643, "grad_norm": 2.385137220467907, "learning_rate": 1e-06, "loss": 0.3733, "step": 1754 }, { "epoch": 0.11247116124070751, "grad_norm": 2.583408504897648, "learning_rate": 1e-06, "loss": 0.3652, "step": 1755 }, { "epoch": 0.1125352473724686, "grad_norm": 2.528928465202496, "learning_rate": 1e-06, "loss": 0.4839, "step": 1756 }, { "epoch": 0.11259933350422968, "grad_norm": 2.4844004028727578, "learning_rate": 1e-06, "loss": 0.3589, "step": 1757 }, { "epoch": 0.11266341963599077, "grad_norm": 2.6836038257816552, "learning_rate": 1e-06, "loss": 0.3876, "step": 1758 }, { "epoch": 0.11272750576775185, "grad_norm": 2.5384485116106017, "learning_rate": 1e-06, "loss": 0.4044, "step": 1759 }, { "epoch": 0.11279159189951295, "grad_norm": 2.753583038091332, "learning_rate": 1e-06, "loss": 0.4455, "step": 1760 }, { "epoch": 0.11285567803127403, "grad_norm": 2.9345543598136827, "learning_rate": 1e-06, "loss": 0.4116, "step": 1761 }, { "epoch": 0.11291976416303512, "grad_norm": 2.7326619237198067, "learning_rate": 1e-06, "loss": 0.3968, "step": 1762 }, { "epoch": 0.11298385029479621, "grad_norm": 2.64669411763611, "learning_rate": 1e-06, "loss": 0.3625, "step": 1763 }, { "epoch": 0.11304793642655729, "grad_norm": 2.552350284148369, "learning_rate": 1e-06, "loss": 0.3777, "step": 1764 }, { "epoch": 0.11311202255831838, "grad_norm": 2.8295097832904132, "learning_rate": 1e-06, "loss": 0.4645, "step": 1765 }, { "epoch": 0.11317610869007946, "grad_norm": 2.77247592560161, "learning_rate": 1e-06, "loss": 0.4338, "step": 1766 }, { "epoch": 0.11324019482184056, "grad_norm": 2.5665589634227968, "learning_rate": 1e-06, "loss": 0.4078, "step": 1767 }, { "epoch": 0.11330428095360164, "grad_norm": 2.5795506039181593, "learning_rate": 1e-06, "loss": 0.4332, "step": 1768 }, { "epoch": 0.11336836708536273, "grad_norm": 2.509627569113794, "learning_rate": 1e-06, "loss": 0.3402, "step": 1769 }, { "epoch": 0.11343245321712381, "grad_norm": 2.8115985620573087, "learning_rate": 1e-06, "loss": 0.4901, "step": 1770 }, { "epoch": 0.1134965393488849, "grad_norm": 2.839001751369005, "learning_rate": 1e-06, "loss": 0.4355, "step": 1771 }, { "epoch": 0.113560625480646, "grad_norm": 2.565272919502216, "learning_rate": 1e-06, "loss": 0.378, "step": 1772 }, { "epoch": 0.11362471161240707, "grad_norm": 2.5032995137142797, "learning_rate": 1e-06, "loss": 0.371, "step": 1773 }, { "epoch": 0.11368879774416817, "grad_norm": 2.5599129867744015, "learning_rate": 1e-06, "loss": 0.4237, "step": 1774 }, { "epoch": 0.11375288387592924, "grad_norm": 2.8282808159804227, "learning_rate": 1e-06, "loss": 0.397, "step": 1775 }, { "epoch": 0.11381697000769034, "grad_norm": 2.435898564425463, "learning_rate": 1e-06, "loss": 0.4122, "step": 1776 }, { "epoch": 0.11388105613945142, "grad_norm": 2.590842457237392, "learning_rate": 1e-06, "loss": 0.4441, "step": 1777 }, { "epoch": 0.11394514227121251, "grad_norm": 2.78652717011761, "learning_rate": 1e-06, "loss": 0.4343, "step": 1778 }, { "epoch": 0.1140092284029736, "grad_norm": 2.8160506518694404, "learning_rate": 1e-06, "loss": 0.4206, "step": 1779 }, { "epoch": 0.11407331453473468, "grad_norm": 2.7610789996086043, "learning_rate": 1e-06, "loss": 0.4261, "step": 1780 }, { "epoch": 0.11413740066649578, "grad_norm": 2.6617844651355846, "learning_rate": 1e-06, "loss": 0.3811, "step": 1781 }, { "epoch": 0.11420148679825685, "grad_norm": 2.496233276933666, "learning_rate": 1e-06, "loss": 0.4673, "step": 1782 }, { "epoch": 0.11426557293001795, "grad_norm": 2.455381531806013, "learning_rate": 1e-06, "loss": 0.3537, "step": 1783 }, { "epoch": 0.11432965906177903, "grad_norm": 2.7512371176528867, "learning_rate": 1e-06, "loss": 0.3862, "step": 1784 }, { "epoch": 0.11439374519354012, "grad_norm": 2.455432753073087, "learning_rate": 1e-06, "loss": 0.3869, "step": 1785 }, { "epoch": 0.1144578313253012, "grad_norm": 2.5204235042326544, "learning_rate": 1e-06, "loss": 0.3941, "step": 1786 }, { "epoch": 0.11452191745706229, "grad_norm": 2.8503009414302665, "learning_rate": 1e-06, "loss": 0.3999, "step": 1787 }, { "epoch": 0.11458600358882338, "grad_norm": 2.6189598564105228, "learning_rate": 1e-06, "loss": 0.4742, "step": 1788 }, { "epoch": 0.11465008972058446, "grad_norm": 2.605962177702438, "learning_rate": 1e-06, "loss": 0.4198, "step": 1789 }, { "epoch": 0.11471417585234556, "grad_norm": 2.6260862237551996, "learning_rate": 1e-06, "loss": 0.4471, "step": 1790 }, { "epoch": 0.11477826198410664, "grad_norm": 2.9144544195926447, "learning_rate": 1e-06, "loss": 0.4141, "step": 1791 }, { "epoch": 0.11484234811586773, "grad_norm": 2.7848484757662604, "learning_rate": 1e-06, "loss": 0.4851, "step": 1792 }, { "epoch": 0.11490643424762881, "grad_norm": 2.6432933600668926, "learning_rate": 1e-06, "loss": 0.4358, "step": 1793 }, { "epoch": 0.1149705203793899, "grad_norm": 2.5969710533282697, "learning_rate": 1e-06, "loss": 0.3901, "step": 1794 }, { "epoch": 0.11503460651115098, "grad_norm": 2.6038648664893076, "learning_rate": 1e-06, "loss": 0.3698, "step": 1795 }, { "epoch": 0.11509869264291207, "grad_norm": 2.664492510496455, "learning_rate": 1e-06, "loss": 0.4344, "step": 1796 }, { "epoch": 0.11516277877467317, "grad_norm": 2.4865877780180248, "learning_rate": 1e-06, "loss": 0.4152, "step": 1797 }, { "epoch": 0.11522686490643425, "grad_norm": 2.6980174755024864, "learning_rate": 1e-06, "loss": 0.4049, "step": 1798 }, { "epoch": 0.11529095103819534, "grad_norm": 2.7132888219218447, "learning_rate": 1e-06, "loss": 0.4101, "step": 1799 }, { "epoch": 0.11535503716995642, "grad_norm": 2.6163019786785755, "learning_rate": 1e-06, "loss": 0.3957, "step": 1800 }, { "epoch": 0.11541912330171751, "grad_norm": 2.438070830380359, "learning_rate": 1e-06, "loss": 0.3896, "step": 1801 }, { "epoch": 0.11548320943347859, "grad_norm": 2.777361514104172, "learning_rate": 1e-06, "loss": 0.4361, "step": 1802 }, { "epoch": 0.11554729556523968, "grad_norm": 2.4615952090613153, "learning_rate": 1e-06, "loss": 0.4725, "step": 1803 }, { "epoch": 0.11561138169700078, "grad_norm": 2.786019550812383, "learning_rate": 1e-06, "loss": 0.4232, "step": 1804 }, { "epoch": 0.11567546782876185, "grad_norm": 2.582580806012161, "learning_rate": 1e-06, "loss": 0.4174, "step": 1805 }, { "epoch": 0.11573955396052295, "grad_norm": 2.6551798788814307, "learning_rate": 1e-06, "loss": 0.4031, "step": 1806 }, { "epoch": 0.11580364009228403, "grad_norm": 2.628264393086419, "learning_rate": 1e-06, "loss": 0.4183, "step": 1807 }, { "epoch": 0.11586772622404512, "grad_norm": 2.554298651583823, "learning_rate": 1e-06, "loss": 0.4543, "step": 1808 }, { "epoch": 0.1159318123558062, "grad_norm": 2.5870692588572477, "learning_rate": 1e-06, "loss": 0.4007, "step": 1809 }, { "epoch": 0.11599589848756729, "grad_norm": 2.536498615958139, "learning_rate": 1e-06, "loss": 0.4527, "step": 1810 }, { "epoch": 0.11605998461932837, "grad_norm": 2.7350417193936605, "learning_rate": 1e-06, "loss": 0.4948, "step": 1811 }, { "epoch": 0.11612407075108946, "grad_norm": 2.8962172722341446, "learning_rate": 1e-06, "loss": 0.4139, "step": 1812 }, { "epoch": 0.11618815688285056, "grad_norm": 2.447551642523489, "learning_rate": 1e-06, "loss": 0.4313, "step": 1813 }, { "epoch": 0.11625224301461164, "grad_norm": 2.7581062937740324, "learning_rate": 1e-06, "loss": 0.4046, "step": 1814 }, { "epoch": 0.11631632914637273, "grad_norm": 2.6613780988530387, "learning_rate": 1e-06, "loss": 0.4378, "step": 1815 }, { "epoch": 0.11638041527813381, "grad_norm": 2.7024783765058067, "learning_rate": 1e-06, "loss": 0.3638, "step": 1816 }, { "epoch": 0.1164445014098949, "grad_norm": 2.610733885000389, "learning_rate": 1e-06, "loss": 0.4581, "step": 1817 }, { "epoch": 0.11650858754165598, "grad_norm": 2.7818604058811367, "learning_rate": 1e-06, "loss": 0.3369, "step": 1818 }, { "epoch": 0.11657267367341707, "grad_norm": 2.852704189136639, "learning_rate": 1e-06, "loss": 0.4626, "step": 1819 }, { "epoch": 0.11663675980517815, "grad_norm": 2.6925423286197496, "learning_rate": 1e-06, "loss": 0.4053, "step": 1820 }, { "epoch": 0.11670084593693925, "grad_norm": 2.8192484239080056, "learning_rate": 1e-06, "loss": 0.4099, "step": 1821 }, { "epoch": 0.11676493206870034, "grad_norm": 2.8485436665584536, "learning_rate": 1e-06, "loss": 0.4731, "step": 1822 }, { "epoch": 0.11682901820046142, "grad_norm": 2.6475121457375645, "learning_rate": 1e-06, "loss": 0.4149, "step": 1823 }, { "epoch": 0.11689310433222251, "grad_norm": 2.8733685444969903, "learning_rate": 1e-06, "loss": 0.3954, "step": 1824 }, { "epoch": 0.11695719046398359, "grad_norm": 2.6062579224564595, "learning_rate": 1e-06, "loss": 0.386, "step": 1825 }, { "epoch": 0.11702127659574468, "grad_norm": 2.6613936407149485, "learning_rate": 1e-06, "loss": 0.3948, "step": 1826 }, { "epoch": 0.11708536272750576, "grad_norm": 2.607041472301362, "learning_rate": 1e-06, "loss": 0.4213, "step": 1827 }, { "epoch": 0.11714944885926686, "grad_norm": 2.595697100885704, "learning_rate": 1e-06, "loss": 0.3763, "step": 1828 }, { "epoch": 0.11721353499102795, "grad_norm": 2.627079136159903, "learning_rate": 1e-06, "loss": 0.4152, "step": 1829 }, { "epoch": 0.11727762112278903, "grad_norm": 2.8799355704445695, "learning_rate": 1e-06, "loss": 0.4191, "step": 1830 }, { "epoch": 0.11734170725455012, "grad_norm": 2.66793046478117, "learning_rate": 1e-06, "loss": 0.4221, "step": 1831 }, { "epoch": 0.1174057933863112, "grad_norm": 2.5435157561676003, "learning_rate": 1e-06, "loss": 0.4753, "step": 1832 }, { "epoch": 0.11746987951807229, "grad_norm": 2.6227753499816684, "learning_rate": 1e-06, "loss": 0.4436, "step": 1833 }, { "epoch": 0.11753396564983337, "grad_norm": 2.6787886244585413, "learning_rate": 1e-06, "loss": 0.3946, "step": 1834 }, { "epoch": 0.11759805178159446, "grad_norm": 2.6989424714818435, "learning_rate": 1e-06, "loss": 0.3964, "step": 1835 }, { "epoch": 0.11766213791335554, "grad_norm": 2.693869459395887, "learning_rate": 1e-06, "loss": 0.3903, "step": 1836 }, { "epoch": 0.11772622404511664, "grad_norm": 2.5534714081744165, "learning_rate": 1e-06, "loss": 0.4123, "step": 1837 }, { "epoch": 0.11779031017687773, "grad_norm": 2.5396904316255298, "learning_rate": 1e-06, "loss": 0.4625, "step": 1838 }, { "epoch": 0.11785439630863881, "grad_norm": 2.6864914728899834, "learning_rate": 1e-06, "loss": 0.4628, "step": 1839 }, { "epoch": 0.1179184824403999, "grad_norm": 2.4031481570736, "learning_rate": 1e-06, "loss": 0.4412, "step": 1840 }, { "epoch": 0.11798256857216098, "grad_norm": 2.8510808627208566, "learning_rate": 1e-06, "loss": 0.4295, "step": 1841 }, { "epoch": 0.11804665470392207, "grad_norm": 2.625121495397035, "learning_rate": 1e-06, "loss": 0.4293, "step": 1842 }, { "epoch": 0.11811074083568315, "grad_norm": 2.379041716484206, "learning_rate": 1e-06, "loss": 0.3987, "step": 1843 }, { "epoch": 0.11817482696744425, "grad_norm": 2.941854719930437, "learning_rate": 1e-06, "loss": 0.4159, "step": 1844 }, { "epoch": 0.11823891309920533, "grad_norm": 2.659123817605973, "learning_rate": 1e-06, "loss": 0.3979, "step": 1845 }, { "epoch": 0.11830299923096642, "grad_norm": 2.664461683615228, "learning_rate": 1e-06, "loss": 0.372, "step": 1846 }, { "epoch": 0.11836708536272751, "grad_norm": 2.4454388383128145, "learning_rate": 1e-06, "loss": 0.3694, "step": 1847 }, { "epoch": 0.11843117149448859, "grad_norm": 2.9853732892969007, "learning_rate": 1e-06, "loss": 0.4965, "step": 1848 }, { "epoch": 0.11849525762624968, "grad_norm": 2.5862583091122637, "learning_rate": 1e-06, "loss": 0.429, "step": 1849 }, { "epoch": 0.11855934375801076, "grad_norm": 2.650195995580348, "learning_rate": 1e-06, "loss": 0.4274, "step": 1850 }, { "epoch": 0.11862342988977186, "grad_norm": 2.4840409762343776, "learning_rate": 1e-06, "loss": 0.4511, "step": 1851 }, { "epoch": 0.11868751602153294, "grad_norm": 2.8313952437027763, "learning_rate": 1e-06, "loss": 0.3909, "step": 1852 }, { "epoch": 0.11875160215329403, "grad_norm": 2.598131250280352, "learning_rate": 1e-06, "loss": 0.4383, "step": 1853 }, { "epoch": 0.11881568828505511, "grad_norm": 2.5956430213471644, "learning_rate": 1e-06, "loss": 0.3624, "step": 1854 }, { "epoch": 0.1188797744168162, "grad_norm": 2.819100294041442, "learning_rate": 1e-06, "loss": 0.4846, "step": 1855 }, { "epoch": 0.1189438605485773, "grad_norm": 2.659323824569532, "learning_rate": 1e-06, "loss": 0.4342, "step": 1856 }, { "epoch": 0.11900794668033837, "grad_norm": 2.5236617424640784, "learning_rate": 1e-06, "loss": 0.4015, "step": 1857 }, { "epoch": 0.11907203281209947, "grad_norm": 2.4090615191739526, "learning_rate": 1e-06, "loss": 0.4327, "step": 1858 }, { "epoch": 0.11913611894386054, "grad_norm": 2.769880388165618, "learning_rate": 1e-06, "loss": 0.381, "step": 1859 }, { "epoch": 0.11920020507562164, "grad_norm": 2.819628662213885, "learning_rate": 1e-06, "loss": 0.4547, "step": 1860 }, { "epoch": 0.11926429120738272, "grad_norm": 2.5754514969457873, "learning_rate": 1e-06, "loss": 0.4594, "step": 1861 }, { "epoch": 0.11932837733914381, "grad_norm": 2.5022880249203325, "learning_rate": 1e-06, "loss": 0.4144, "step": 1862 }, { "epoch": 0.1193924634709049, "grad_norm": 2.4699745018799404, "learning_rate": 1e-06, "loss": 0.3806, "step": 1863 }, { "epoch": 0.11945654960266598, "grad_norm": 2.6456565415557374, "learning_rate": 1e-06, "loss": 0.4065, "step": 1864 }, { "epoch": 0.11952063573442707, "grad_norm": 2.4005550753618015, "learning_rate": 1e-06, "loss": 0.4158, "step": 1865 }, { "epoch": 0.11958472186618815, "grad_norm": 2.5060801713618983, "learning_rate": 1e-06, "loss": 0.4542, "step": 1866 }, { "epoch": 0.11964880799794925, "grad_norm": 2.4513719291485585, "learning_rate": 1e-06, "loss": 0.4339, "step": 1867 }, { "epoch": 0.11971289412971033, "grad_norm": 2.6769921439010753, "learning_rate": 1e-06, "loss": 0.4052, "step": 1868 }, { "epoch": 0.11977698026147142, "grad_norm": 2.6840492380158834, "learning_rate": 1e-06, "loss": 0.4007, "step": 1869 }, { "epoch": 0.1198410663932325, "grad_norm": 2.803156210251872, "learning_rate": 1e-06, "loss": 0.39, "step": 1870 }, { "epoch": 0.11990515252499359, "grad_norm": 2.6987854999821597, "learning_rate": 1e-06, "loss": 0.4034, "step": 1871 }, { "epoch": 0.11996923865675468, "grad_norm": 2.665721870443163, "learning_rate": 1e-06, "loss": 0.4221, "step": 1872 }, { "epoch": 0.12003332478851576, "grad_norm": 2.6560564490065093, "learning_rate": 1e-06, "loss": 0.3591, "step": 1873 }, { "epoch": 0.12009741092027686, "grad_norm": 2.7021179621334004, "learning_rate": 1e-06, "loss": 0.4218, "step": 1874 }, { "epoch": 0.12016149705203794, "grad_norm": 2.6055942460970827, "learning_rate": 1e-06, "loss": 0.4245, "step": 1875 }, { "epoch": 0.12022558318379903, "grad_norm": 2.416483571969232, "learning_rate": 1e-06, "loss": 0.4176, "step": 1876 }, { "epoch": 0.12028966931556011, "grad_norm": 2.8828531541836506, "learning_rate": 1e-06, "loss": 0.4091, "step": 1877 }, { "epoch": 0.1203537554473212, "grad_norm": 2.6443922479248787, "learning_rate": 1e-06, "loss": 0.4548, "step": 1878 }, { "epoch": 0.12041784157908228, "grad_norm": 2.688894934206754, "learning_rate": 1e-06, "loss": 0.4015, "step": 1879 }, { "epoch": 0.12048192771084337, "grad_norm": 2.633924101866232, "learning_rate": 1e-06, "loss": 0.3938, "step": 1880 }, { "epoch": 0.12054601384260447, "grad_norm": 2.807137697859837, "learning_rate": 1e-06, "loss": 0.413, "step": 1881 }, { "epoch": 0.12061009997436554, "grad_norm": 2.552820173004839, "learning_rate": 1e-06, "loss": 0.4118, "step": 1882 }, { "epoch": 0.12067418610612664, "grad_norm": 2.6265690322793978, "learning_rate": 1e-06, "loss": 0.3926, "step": 1883 }, { "epoch": 0.12073827223788772, "grad_norm": 2.678479232293037, "learning_rate": 1e-06, "loss": 0.4639, "step": 1884 }, { "epoch": 0.12080235836964881, "grad_norm": 2.6050744958084264, "learning_rate": 1e-06, "loss": 0.4349, "step": 1885 }, { "epoch": 0.12086644450140989, "grad_norm": 2.7082918851876427, "learning_rate": 1e-06, "loss": 0.4765, "step": 1886 }, { "epoch": 0.12093053063317098, "grad_norm": 2.526845919082137, "learning_rate": 1e-06, "loss": 0.3662, "step": 1887 }, { "epoch": 0.12099461676493208, "grad_norm": 2.6302833083000703, "learning_rate": 1e-06, "loss": 0.366, "step": 1888 }, { "epoch": 0.12105870289669315, "grad_norm": 2.553849692507201, "learning_rate": 1e-06, "loss": 0.423, "step": 1889 }, { "epoch": 0.12112278902845425, "grad_norm": 2.683491080670172, "learning_rate": 1e-06, "loss": 0.4514, "step": 1890 }, { "epoch": 0.12118687516021533, "grad_norm": 2.6364753658326125, "learning_rate": 1e-06, "loss": 0.4189, "step": 1891 }, { "epoch": 0.12125096129197642, "grad_norm": 2.7165916312599423, "learning_rate": 1e-06, "loss": 0.3922, "step": 1892 }, { "epoch": 0.1213150474237375, "grad_norm": 2.549170313734264, "learning_rate": 1e-06, "loss": 0.4106, "step": 1893 }, { "epoch": 0.12137913355549859, "grad_norm": 2.6838182220230906, "learning_rate": 1e-06, "loss": 0.3677, "step": 1894 }, { "epoch": 0.12144321968725967, "grad_norm": 2.6414043011658865, "learning_rate": 1e-06, "loss": 0.3912, "step": 1895 }, { "epoch": 0.12150730581902076, "grad_norm": 2.5632629923340775, "learning_rate": 1e-06, "loss": 0.4224, "step": 1896 }, { "epoch": 0.12157139195078186, "grad_norm": 2.544984397970424, "learning_rate": 1e-06, "loss": 0.4227, "step": 1897 }, { "epoch": 0.12163547808254294, "grad_norm": 2.5847715613939455, "learning_rate": 1e-06, "loss": 0.4267, "step": 1898 }, { "epoch": 0.12169956421430403, "grad_norm": 2.65803391386428, "learning_rate": 1e-06, "loss": 0.3746, "step": 1899 }, { "epoch": 0.12176365034606511, "grad_norm": 2.6897903682954847, "learning_rate": 1e-06, "loss": 0.4652, "step": 1900 }, { "epoch": 0.1218277364778262, "grad_norm": 2.56277118816797, "learning_rate": 1e-06, "loss": 0.4529, "step": 1901 }, { "epoch": 0.12189182260958728, "grad_norm": 2.5517501695285016, "learning_rate": 1e-06, "loss": 0.465, "step": 1902 }, { "epoch": 0.12195590874134837, "grad_norm": 2.648252581171726, "learning_rate": 1e-06, "loss": 0.455, "step": 1903 }, { "epoch": 0.12201999487310945, "grad_norm": 2.6367007538455938, "learning_rate": 1e-06, "loss": 0.4412, "step": 1904 }, { "epoch": 0.12208408100487055, "grad_norm": 2.6880014376963044, "learning_rate": 1e-06, "loss": 0.4533, "step": 1905 }, { "epoch": 0.12214816713663164, "grad_norm": 2.6551564910990058, "learning_rate": 1e-06, "loss": 0.3949, "step": 1906 }, { "epoch": 0.12221225326839272, "grad_norm": 2.814098153913346, "learning_rate": 1e-06, "loss": 0.37, "step": 1907 }, { "epoch": 0.12227633940015381, "grad_norm": 2.5032472998077466, "learning_rate": 1e-06, "loss": 0.3843, "step": 1908 }, { "epoch": 0.12234042553191489, "grad_norm": 2.8306387124319192, "learning_rate": 1e-06, "loss": 0.4515, "step": 1909 }, { "epoch": 0.12240451166367598, "grad_norm": 2.42122372122656, "learning_rate": 1e-06, "loss": 0.4164, "step": 1910 }, { "epoch": 0.12246859779543706, "grad_norm": 2.537249205277831, "learning_rate": 1e-06, "loss": 0.4379, "step": 1911 }, { "epoch": 0.12253268392719815, "grad_norm": 2.6442746159357924, "learning_rate": 1e-06, "loss": 0.4344, "step": 1912 }, { "epoch": 0.12259677005895925, "grad_norm": 2.656606308028338, "learning_rate": 1e-06, "loss": 0.379, "step": 1913 }, { "epoch": 0.12266085619072033, "grad_norm": 2.8088794752870014, "learning_rate": 1e-06, "loss": 0.4047, "step": 1914 }, { "epoch": 0.12272494232248142, "grad_norm": 2.582575451213974, "learning_rate": 1e-06, "loss": 0.4024, "step": 1915 }, { "epoch": 0.1227890284542425, "grad_norm": 2.710660924430504, "learning_rate": 1e-06, "loss": 0.4155, "step": 1916 }, { "epoch": 0.12285311458600359, "grad_norm": 2.42215293270404, "learning_rate": 1e-06, "loss": 0.4298, "step": 1917 }, { "epoch": 0.12291720071776467, "grad_norm": 2.5921801959218316, "learning_rate": 1e-06, "loss": 0.3942, "step": 1918 }, { "epoch": 0.12298128684952576, "grad_norm": 2.5278890165586057, "learning_rate": 1e-06, "loss": 0.3819, "step": 1919 }, { "epoch": 0.12304537298128684, "grad_norm": 2.524488936361231, "learning_rate": 1e-06, "loss": 0.4046, "step": 1920 }, { "epoch": 0.12310945911304794, "grad_norm": 2.4328966559195675, "learning_rate": 1e-06, "loss": 0.4195, "step": 1921 }, { "epoch": 0.12317354524480903, "grad_norm": 2.774360894158552, "learning_rate": 1e-06, "loss": 0.4204, "step": 1922 }, { "epoch": 0.12323763137657011, "grad_norm": 2.518717450808227, "learning_rate": 1e-06, "loss": 0.4391, "step": 1923 }, { "epoch": 0.1233017175083312, "grad_norm": 2.832159155238884, "learning_rate": 1e-06, "loss": 0.4229, "step": 1924 }, { "epoch": 0.12336580364009228, "grad_norm": 2.511577878519737, "learning_rate": 1e-06, "loss": 0.4165, "step": 1925 }, { "epoch": 0.12342988977185337, "grad_norm": 3.5208364019882885, "learning_rate": 1e-06, "loss": 0.3873, "step": 1926 }, { "epoch": 0.12349397590361445, "grad_norm": 2.427688682125614, "learning_rate": 1e-06, "loss": 0.377, "step": 1927 }, { "epoch": 0.12355806203537555, "grad_norm": 2.6404501242098837, "learning_rate": 1e-06, "loss": 0.454, "step": 1928 }, { "epoch": 0.12362214816713663, "grad_norm": 2.441849984145885, "learning_rate": 1e-06, "loss": 0.3963, "step": 1929 }, { "epoch": 0.12368623429889772, "grad_norm": 2.443122503103709, "learning_rate": 1e-06, "loss": 0.4631, "step": 1930 }, { "epoch": 0.12375032043065881, "grad_norm": 2.8171252209762474, "learning_rate": 1e-06, "loss": 0.4887, "step": 1931 }, { "epoch": 0.12381440656241989, "grad_norm": 2.4480320743078887, "learning_rate": 1e-06, "loss": 0.3477, "step": 1932 }, { "epoch": 0.12387849269418098, "grad_norm": 2.6932808980425107, "learning_rate": 1e-06, "loss": 0.4569, "step": 1933 }, { "epoch": 0.12394257882594206, "grad_norm": 2.820935235598579, "learning_rate": 1e-06, "loss": 0.4038, "step": 1934 }, { "epoch": 0.12400666495770316, "grad_norm": 2.6341412685655983, "learning_rate": 1e-06, "loss": 0.4254, "step": 1935 }, { "epoch": 0.12407075108946423, "grad_norm": 2.6019885732357326, "learning_rate": 1e-06, "loss": 0.4036, "step": 1936 }, { "epoch": 0.12413483722122533, "grad_norm": 2.6751182734908965, "learning_rate": 1e-06, "loss": 0.4484, "step": 1937 }, { "epoch": 0.12419892335298642, "grad_norm": 2.5870567275864325, "learning_rate": 1e-06, "loss": 0.4145, "step": 1938 }, { "epoch": 0.1242630094847475, "grad_norm": 2.5353320352240525, "learning_rate": 1e-06, "loss": 0.4034, "step": 1939 }, { "epoch": 0.12432709561650859, "grad_norm": 2.598518940629256, "learning_rate": 1e-06, "loss": 0.3844, "step": 1940 }, { "epoch": 0.12439118174826967, "grad_norm": 2.562879451548365, "learning_rate": 1e-06, "loss": 0.3962, "step": 1941 }, { "epoch": 0.12445526788003076, "grad_norm": 2.645786720650379, "learning_rate": 1e-06, "loss": 0.4838, "step": 1942 }, { "epoch": 0.12451935401179184, "grad_norm": 2.6403502102882808, "learning_rate": 1e-06, "loss": 0.4711, "step": 1943 }, { "epoch": 0.12458344014355294, "grad_norm": 2.876774406457867, "learning_rate": 1e-06, "loss": 0.4092, "step": 1944 }, { "epoch": 0.12464752627531402, "grad_norm": 2.751100020052121, "learning_rate": 1e-06, "loss": 0.4066, "step": 1945 }, { "epoch": 0.12471161240707511, "grad_norm": 2.721494548343405, "learning_rate": 1e-06, "loss": 0.4333, "step": 1946 }, { "epoch": 0.1247756985388362, "grad_norm": 3.0707931228709318, "learning_rate": 1e-06, "loss": 0.4444, "step": 1947 }, { "epoch": 0.12483978467059728, "grad_norm": 2.5536278780711874, "learning_rate": 1e-06, "loss": 0.4432, "step": 1948 }, { "epoch": 0.12490387080235837, "grad_norm": 2.5952295096194327, "learning_rate": 1e-06, "loss": 0.4064, "step": 1949 }, { "epoch": 0.12496795693411945, "grad_norm": 2.804966831875095, "learning_rate": 1e-06, "loss": 0.4377, "step": 1950 }, { "epoch": 0.12503204306588053, "grad_norm": 2.6254645986466016, "learning_rate": 1e-06, "loss": 0.3889, "step": 1951 }, { "epoch": 0.12509612919764163, "grad_norm": 2.8575547875752734, "learning_rate": 1e-06, "loss": 0.4404, "step": 1952 }, { "epoch": 0.12516021532940272, "grad_norm": 2.890946909114489, "learning_rate": 1e-06, "loss": 0.3916, "step": 1953 }, { "epoch": 0.1252243014611638, "grad_norm": 2.4936005818287588, "learning_rate": 1e-06, "loss": 0.4075, "step": 1954 }, { "epoch": 0.1252883875929249, "grad_norm": 2.6488281056040917, "learning_rate": 1e-06, "loss": 0.4452, "step": 1955 }, { "epoch": 0.12535247372468597, "grad_norm": 2.5861471919909875, "learning_rate": 1e-06, "loss": 0.3942, "step": 1956 }, { "epoch": 0.12541655985644706, "grad_norm": 3.5318637594090085, "learning_rate": 1e-06, "loss": 0.3976, "step": 1957 }, { "epoch": 0.12548064598820816, "grad_norm": 2.399050718582829, "learning_rate": 1e-06, "loss": 0.3801, "step": 1958 }, { "epoch": 0.12554473211996925, "grad_norm": 2.597714681002882, "learning_rate": 1e-06, "loss": 0.3961, "step": 1959 }, { "epoch": 0.12560881825173031, "grad_norm": 2.4663048518140362, "learning_rate": 1e-06, "loss": 0.4151, "step": 1960 }, { "epoch": 0.1256729043834914, "grad_norm": 2.5624663402691126, "learning_rate": 1e-06, "loss": 0.4148, "step": 1961 }, { "epoch": 0.1257369905152525, "grad_norm": 3.140524750218864, "learning_rate": 1e-06, "loss": 0.4058, "step": 1962 }, { "epoch": 0.1258010766470136, "grad_norm": 2.810127821809344, "learning_rate": 1e-06, "loss": 0.4366, "step": 1963 }, { "epoch": 0.1258651627787747, "grad_norm": 2.5209202909604, "learning_rate": 1e-06, "loss": 0.4337, "step": 1964 }, { "epoch": 0.12592924891053575, "grad_norm": 2.651740149209284, "learning_rate": 1e-06, "loss": 0.4091, "step": 1965 }, { "epoch": 0.12599333504229684, "grad_norm": 2.6638550091846, "learning_rate": 1e-06, "loss": 0.3951, "step": 1966 }, { "epoch": 0.12605742117405794, "grad_norm": 2.7390919328674106, "learning_rate": 1e-06, "loss": 0.4224, "step": 1967 }, { "epoch": 0.12612150730581903, "grad_norm": 2.568954836564736, "learning_rate": 1e-06, "loss": 0.4179, "step": 1968 }, { "epoch": 0.1261855934375801, "grad_norm": 2.623192224449949, "learning_rate": 1e-06, "loss": 0.4082, "step": 1969 }, { "epoch": 0.1262496795693412, "grad_norm": 2.772705656163098, "learning_rate": 1e-06, "loss": 0.4291, "step": 1970 }, { "epoch": 0.12631376570110228, "grad_norm": 3.0653029335133293, "learning_rate": 1e-06, "loss": 0.4196, "step": 1971 }, { "epoch": 0.12637785183286337, "grad_norm": 2.71401827712792, "learning_rate": 1e-06, "loss": 0.4095, "step": 1972 }, { "epoch": 0.12644193796462447, "grad_norm": 2.3069147395316767, "learning_rate": 1e-06, "loss": 0.4296, "step": 1973 }, { "epoch": 0.12650602409638553, "grad_norm": 2.944709378967964, "learning_rate": 1e-06, "loss": 0.4009, "step": 1974 }, { "epoch": 0.12657011022814663, "grad_norm": 2.6377150914839675, "learning_rate": 1e-06, "loss": 0.4023, "step": 1975 }, { "epoch": 0.12663419635990772, "grad_norm": 2.8380873293773226, "learning_rate": 1e-06, "loss": 0.4077, "step": 1976 }, { "epoch": 0.1266982824916688, "grad_norm": 2.677246943205837, "learning_rate": 1e-06, "loss": 0.4423, "step": 1977 }, { "epoch": 0.12676236862342988, "grad_norm": 2.5127787900075402, "learning_rate": 1e-06, "loss": 0.3851, "step": 1978 }, { "epoch": 0.12682645475519097, "grad_norm": 2.4144112563671793, "learning_rate": 1e-06, "loss": 0.4408, "step": 1979 }, { "epoch": 0.12689054088695206, "grad_norm": 2.592467710784995, "learning_rate": 1e-06, "loss": 0.4076, "step": 1980 }, { "epoch": 0.12695462701871316, "grad_norm": 2.702943687034177, "learning_rate": 1e-06, "loss": 0.4539, "step": 1981 }, { "epoch": 0.12701871315047425, "grad_norm": 2.555904697910308, "learning_rate": 1e-06, "loss": 0.3962, "step": 1982 }, { "epoch": 0.12708279928223531, "grad_norm": 2.6617695924556775, "learning_rate": 1e-06, "loss": 0.4315, "step": 1983 }, { "epoch": 0.1271468854139964, "grad_norm": 2.5865121829306155, "learning_rate": 1e-06, "loss": 0.378, "step": 1984 }, { "epoch": 0.1272109715457575, "grad_norm": 2.4733075250800067, "learning_rate": 1e-06, "loss": 0.4381, "step": 1985 }, { "epoch": 0.1272750576775186, "grad_norm": 2.7177984913035558, "learning_rate": 1e-06, "loss": 0.4293, "step": 1986 }, { "epoch": 0.12733914380927966, "grad_norm": 2.68776734487849, "learning_rate": 1e-06, "loss": 0.3889, "step": 1987 }, { "epoch": 0.12740322994104075, "grad_norm": 2.619007004179668, "learning_rate": 1e-06, "loss": 0.4024, "step": 1988 }, { "epoch": 0.12746731607280185, "grad_norm": 2.65050880017839, "learning_rate": 1e-06, "loss": 0.4148, "step": 1989 }, { "epoch": 0.12753140220456294, "grad_norm": 2.6588622202293077, "learning_rate": 1e-06, "loss": 0.4207, "step": 1990 }, { "epoch": 0.12759548833632403, "grad_norm": 2.708910525725563, "learning_rate": 1e-06, "loss": 0.4158, "step": 1991 }, { "epoch": 0.1276595744680851, "grad_norm": 2.5687777946786285, "learning_rate": 1e-06, "loss": 0.3429, "step": 1992 }, { "epoch": 0.1277236605998462, "grad_norm": 2.8445974840088857, "learning_rate": 1e-06, "loss": 0.3966, "step": 1993 }, { "epoch": 0.12778774673160728, "grad_norm": 2.4812860715534737, "learning_rate": 1e-06, "loss": 0.4086, "step": 1994 }, { "epoch": 0.12785183286336838, "grad_norm": 2.5067894831823274, "learning_rate": 1e-06, "loss": 0.3674, "step": 1995 }, { "epoch": 0.12791591899512944, "grad_norm": 3.120477088832285, "learning_rate": 1e-06, "loss": 0.4126, "step": 1996 }, { "epoch": 0.12798000512689053, "grad_norm": 2.68728983577007, "learning_rate": 1e-06, "loss": 0.3998, "step": 1997 }, { "epoch": 0.12804409125865163, "grad_norm": 2.5708000190381006, "learning_rate": 1e-06, "loss": 0.3876, "step": 1998 }, { "epoch": 0.12810817739041272, "grad_norm": 2.816934859878245, "learning_rate": 1e-06, "loss": 0.4359, "step": 1999 }, { "epoch": 0.1281722635221738, "grad_norm": 2.7079638576190903, "learning_rate": 1e-06, "loss": 0.4471, "step": 2000 }, { "epoch": 0.12823634965393488, "grad_norm": 2.5800674482671635, "learning_rate": 1e-06, "loss": 0.4317, "step": 2001 }, { "epoch": 0.12830043578569597, "grad_norm": 2.4591428885818507, "learning_rate": 1e-06, "loss": 0.421, "step": 2002 }, { "epoch": 0.12836452191745706, "grad_norm": 2.6827533850410337, "learning_rate": 1e-06, "loss": 0.3498, "step": 2003 }, { "epoch": 0.12842860804921816, "grad_norm": 2.709188331291668, "learning_rate": 1e-06, "loss": 0.4272, "step": 2004 }, { "epoch": 0.12849269418097922, "grad_norm": 2.499208486274247, "learning_rate": 1e-06, "loss": 0.4053, "step": 2005 }, { "epoch": 0.12855678031274032, "grad_norm": 2.6909496268940214, "learning_rate": 1e-06, "loss": 0.4524, "step": 2006 }, { "epoch": 0.1286208664445014, "grad_norm": 2.802587101271318, "learning_rate": 1e-06, "loss": 0.4081, "step": 2007 }, { "epoch": 0.1286849525762625, "grad_norm": 2.4601661186060673, "learning_rate": 1e-06, "loss": 0.3981, "step": 2008 }, { "epoch": 0.1287490387080236, "grad_norm": 2.654138037986819, "learning_rate": 1e-06, "loss": 0.3885, "step": 2009 }, { "epoch": 0.12881312483978466, "grad_norm": 2.8370521642356876, "learning_rate": 1e-06, "loss": 0.434, "step": 2010 }, { "epoch": 0.12887721097154575, "grad_norm": 2.5252795408324964, "learning_rate": 1e-06, "loss": 0.4169, "step": 2011 }, { "epoch": 0.12894129710330685, "grad_norm": 2.4799437101808866, "learning_rate": 1e-06, "loss": 0.3828, "step": 2012 }, { "epoch": 0.12900538323506794, "grad_norm": 2.7494549099164614, "learning_rate": 1e-06, "loss": 0.4594, "step": 2013 }, { "epoch": 0.12906946936682903, "grad_norm": 2.4795137457784926, "learning_rate": 1e-06, "loss": 0.4005, "step": 2014 }, { "epoch": 0.1291335554985901, "grad_norm": 2.7520680465218175, "learning_rate": 1e-06, "loss": 0.4466, "step": 2015 }, { "epoch": 0.1291976416303512, "grad_norm": 2.722284636249325, "learning_rate": 1e-06, "loss": 0.3849, "step": 2016 }, { "epoch": 0.12926172776211228, "grad_norm": 2.6524413526819237, "learning_rate": 1e-06, "loss": 0.4324, "step": 2017 }, { "epoch": 0.12932581389387338, "grad_norm": 2.66292985459627, "learning_rate": 1e-06, "loss": 0.4615, "step": 2018 }, { "epoch": 0.12938990002563444, "grad_norm": 2.6159139687609576, "learning_rate": 1e-06, "loss": 0.4115, "step": 2019 }, { "epoch": 0.12945398615739553, "grad_norm": 2.7173243701716823, "learning_rate": 1e-06, "loss": 0.4108, "step": 2020 }, { "epoch": 0.12951807228915663, "grad_norm": 2.7996012785431357, "learning_rate": 1e-06, "loss": 0.418, "step": 2021 }, { "epoch": 0.12958215842091772, "grad_norm": 2.6421635527383063, "learning_rate": 1e-06, "loss": 0.5005, "step": 2022 }, { "epoch": 0.1296462445526788, "grad_norm": 2.701234561198063, "learning_rate": 1e-06, "loss": 0.4144, "step": 2023 }, { "epoch": 0.12971033068443988, "grad_norm": 2.5982870193962793, "learning_rate": 1e-06, "loss": 0.4336, "step": 2024 }, { "epoch": 0.12977441681620097, "grad_norm": 3.0523581053313165, "learning_rate": 1e-06, "loss": 0.4677, "step": 2025 }, { "epoch": 0.12983850294796206, "grad_norm": 2.498398555961267, "learning_rate": 1e-06, "loss": 0.4339, "step": 2026 }, { "epoch": 0.12990258907972316, "grad_norm": 2.6602278770071623, "learning_rate": 1e-06, "loss": 0.4301, "step": 2027 }, { "epoch": 0.12996667521148422, "grad_norm": 2.4848270044860645, "learning_rate": 1e-06, "loss": 0.4618, "step": 2028 }, { "epoch": 0.13003076134324532, "grad_norm": 2.6426560908155805, "learning_rate": 1e-06, "loss": 0.4431, "step": 2029 }, { "epoch": 0.1300948474750064, "grad_norm": 2.685119281293971, "learning_rate": 1e-06, "loss": 0.4239, "step": 2030 }, { "epoch": 0.1301589336067675, "grad_norm": 2.653893626434556, "learning_rate": 1e-06, "loss": 0.4626, "step": 2031 }, { "epoch": 0.1302230197385286, "grad_norm": 2.693815977647124, "learning_rate": 1e-06, "loss": 0.4073, "step": 2032 }, { "epoch": 0.13028710587028966, "grad_norm": 2.7742930167368316, "learning_rate": 1e-06, "loss": 0.4358, "step": 2033 }, { "epoch": 0.13035119200205075, "grad_norm": 2.4521871241228532, "learning_rate": 1e-06, "loss": 0.3665, "step": 2034 }, { "epoch": 0.13041527813381185, "grad_norm": 2.5778926021918656, "learning_rate": 1e-06, "loss": 0.4419, "step": 2035 }, { "epoch": 0.13047936426557294, "grad_norm": 2.5233030916242307, "learning_rate": 1e-06, "loss": 0.4635, "step": 2036 }, { "epoch": 0.130543450397334, "grad_norm": 2.7769656735415227, "learning_rate": 1e-06, "loss": 0.4503, "step": 2037 }, { "epoch": 0.1306075365290951, "grad_norm": 2.6061311223083528, "learning_rate": 1e-06, "loss": 0.4037, "step": 2038 }, { "epoch": 0.1306716226608562, "grad_norm": 2.6441536424858416, "learning_rate": 1e-06, "loss": 0.4084, "step": 2039 }, { "epoch": 0.13073570879261728, "grad_norm": 2.646813268165125, "learning_rate": 1e-06, "loss": 0.404, "step": 2040 }, { "epoch": 0.13079979492437838, "grad_norm": 2.718568711377987, "learning_rate": 1e-06, "loss": 0.3933, "step": 2041 }, { "epoch": 0.13086388105613944, "grad_norm": 2.4331819490330076, "learning_rate": 1e-06, "loss": 0.4277, "step": 2042 }, { "epoch": 0.13092796718790053, "grad_norm": 2.5525577204591787, "learning_rate": 1e-06, "loss": 0.4228, "step": 2043 }, { "epoch": 0.13099205331966163, "grad_norm": 2.591680461417647, "learning_rate": 1e-06, "loss": 0.3663, "step": 2044 }, { "epoch": 0.13105613945142272, "grad_norm": 2.7473732680920464, "learning_rate": 1e-06, "loss": 0.4217, "step": 2045 }, { "epoch": 0.13112022558318379, "grad_norm": 2.512421236575192, "learning_rate": 1e-06, "loss": 0.4137, "step": 2046 }, { "epoch": 0.13118431171494488, "grad_norm": 2.745025052977934, "learning_rate": 1e-06, "loss": 0.4289, "step": 2047 }, { "epoch": 0.13124839784670597, "grad_norm": 2.6990060791654327, "learning_rate": 1e-06, "loss": 0.4014, "step": 2048 }, { "epoch": 0.13131248397846706, "grad_norm": 2.603256256896644, "learning_rate": 1e-06, "loss": 0.4655, "step": 2049 }, { "epoch": 0.13137657011022816, "grad_norm": 2.6494921431521687, "learning_rate": 1e-06, "loss": 0.3774, "step": 2050 }, { "epoch": 0.13144065624198922, "grad_norm": 2.8543194864950965, "learning_rate": 1e-06, "loss": 0.4587, "step": 2051 }, { "epoch": 0.13150474237375032, "grad_norm": 2.7739591966852166, "learning_rate": 1e-06, "loss": 0.3972, "step": 2052 }, { "epoch": 0.1315688285055114, "grad_norm": 2.4982817995739444, "learning_rate": 1e-06, "loss": 0.3863, "step": 2053 }, { "epoch": 0.1316329146372725, "grad_norm": 2.6312308133622286, "learning_rate": 1e-06, "loss": 0.4148, "step": 2054 }, { "epoch": 0.13169700076903357, "grad_norm": 3.0636204926026425, "learning_rate": 1e-06, "loss": 0.4028, "step": 2055 }, { "epoch": 0.13176108690079466, "grad_norm": 2.7556518659223626, "learning_rate": 1e-06, "loss": 0.4212, "step": 2056 }, { "epoch": 0.13182517303255575, "grad_norm": 2.7385712365433936, "learning_rate": 1e-06, "loss": 0.4222, "step": 2057 }, { "epoch": 0.13188925916431685, "grad_norm": 2.533880989808742, "learning_rate": 1e-06, "loss": 0.4315, "step": 2058 }, { "epoch": 0.13195334529607794, "grad_norm": 2.426764055890877, "learning_rate": 1e-06, "loss": 0.4003, "step": 2059 }, { "epoch": 0.132017431427839, "grad_norm": 2.5769365252329246, "learning_rate": 1e-06, "loss": 0.4141, "step": 2060 }, { "epoch": 0.1320815175596001, "grad_norm": 2.5672902578483447, "learning_rate": 1e-06, "loss": 0.3979, "step": 2061 }, { "epoch": 0.1321456036913612, "grad_norm": 2.727113752369044, "learning_rate": 1e-06, "loss": 0.407, "step": 2062 }, { "epoch": 0.13220968982312228, "grad_norm": 2.598448088421263, "learning_rate": 1e-06, "loss": 0.3973, "step": 2063 }, { "epoch": 0.13227377595488338, "grad_norm": 2.808346442547483, "learning_rate": 1e-06, "loss": 0.3907, "step": 2064 }, { "epoch": 0.13233786208664444, "grad_norm": 2.5537562254410817, "learning_rate": 1e-06, "loss": 0.3879, "step": 2065 }, { "epoch": 0.13240194821840554, "grad_norm": 2.507844085434063, "learning_rate": 1e-06, "loss": 0.3912, "step": 2066 }, { "epoch": 0.13246603435016663, "grad_norm": 2.6236845142825556, "learning_rate": 1e-06, "loss": 0.3805, "step": 2067 }, { "epoch": 0.13253012048192772, "grad_norm": 2.5809839489336666, "learning_rate": 1e-06, "loss": 0.391, "step": 2068 }, { "epoch": 0.1325942066136888, "grad_norm": 2.5456272047937096, "learning_rate": 1e-06, "loss": 0.3996, "step": 2069 }, { "epoch": 0.13265829274544988, "grad_norm": 2.7029859623452235, "learning_rate": 1e-06, "loss": 0.3932, "step": 2070 }, { "epoch": 0.13272237887721097, "grad_norm": 3.113893501887302, "learning_rate": 1e-06, "loss": 0.4189, "step": 2071 }, { "epoch": 0.13278646500897207, "grad_norm": 2.895251806197545, "learning_rate": 1e-06, "loss": 0.4129, "step": 2072 }, { "epoch": 0.13285055114073316, "grad_norm": 2.692808938525751, "learning_rate": 1e-06, "loss": 0.3822, "step": 2073 }, { "epoch": 0.13291463727249422, "grad_norm": 2.666108543162362, "learning_rate": 1e-06, "loss": 0.4093, "step": 2074 }, { "epoch": 0.13297872340425532, "grad_norm": 2.628539864622391, "learning_rate": 1e-06, "loss": 0.4195, "step": 2075 }, { "epoch": 0.1330428095360164, "grad_norm": 2.6457888313754854, "learning_rate": 1e-06, "loss": 0.3665, "step": 2076 }, { "epoch": 0.1331068956677775, "grad_norm": 2.73962603052359, "learning_rate": 1e-06, "loss": 0.4013, "step": 2077 }, { "epoch": 0.13317098179953857, "grad_norm": 2.73176395407253, "learning_rate": 1e-06, "loss": 0.4025, "step": 2078 }, { "epoch": 0.13323506793129966, "grad_norm": 2.698908843828074, "learning_rate": 1e-06, "loss": 0.4199, "step": 2079 }, { "epoch": 0.13329915406306075, "grad_norm": 2.717511171326012, "learning_rate": 1e-06, "loss": 0.4055, "step": 2080 }, { "epoch": 0.13336324019482185, "grad_norm": 2.663032680662114, "learning_rate": 1e-06, "loss": 0.4314, "step": 2081 }, { "epoch": 0.13342732632658294, "grad_norm": 2.67227144538236, "learning_rate": 1e-06, "loss": 0.3949, "step": 2082 }, { "epoch": 0.133491412458344, "grad_norm": 2.7192184119233347, "learning_rate": 1e-06, "loss": 0.3971, "step": 2083 }, { "epoch": 0.1335554985901051, "grad_norm": 2.80607421933027, "learning_rate": 1e-06, "loss": 0.442, "step": 2084 }, { "epoch": 0.1336195847218662, "grad_norm": 2.4282833747159365, "learning_rate": 1e-06, "loss": 0.3403, "step": 2085 }, { "epoch": 0.13368367085362728, "grad_norm": 2.779501854115342, "learning_rate": 1e-06, "loss": 0.3793, "step": 2086 }, { "epoch": 0.13374775698538835, "grad_norm": 2.605891767733823, "learning_rate": 1e-06, "loss": 0.4258, "step": 2087 }, { "epoch": 0.13381184311714944, "grad_norm": 2.4367002776811315, "learning_rate": 1e-06, "loss": 0.4342, "step": 2088 }, { "epoch": 0.13387592924891054, "grad_norm": 2.462742753214077, "learning_rate": 1e-06, "loss": 0.408, "step": 2089 }, { "epoch": 0.13394001538067163, "grad_norm": 2.717385353303187, "learning_rate": 1e-06, "loss": 0.3728, "step": 2090 }, { "epoch": 0.13400410151243272, "grad_norm": 2.5506542547042583, "learning_rate": 1e-06, "loss": 0.3751, "step": 2091 }, { "epoch": 0.1340681876441938, "grad_norm": 2.48170579062675, "learning_rate": 1e-06, "loss": 0.3592, "step": 2092 }, { "epoch": 0.13413227377595488, "grad_norm": 2.5466247727125646, "learning_rate": 1e-06, "loss": 0.3816, "step": 2093 }, { "epoch": 0.13419635990771597, "grad_norm": 2.792497217280625, "learning_rate": 1e-06, "loss": 0.4217, "step": 2094 }, { "epoch": 0.13426044603947707, "grad_norm": 2.8605300019220246, "learning_rate": 1e-06, "loss": 0.4964, "step": 2095 }, { "epoch": 0.13432453217123813, "grad_norm": 2.5038991233158363, "learning_rate": 1e-06, "loss": 0.4279, "step": 2096 }, { "epoch": 0.13438861830299922, "grad_norm": 2.868839229735299, "learning_rate": 1e-06, "loss": 0.4488, "step": 2097 }, { "epoch": 0.13445270443476032, "grad_norm": 2.545144883407565, "learning_rate": 1e-06, "loss": 0.4652, "step": 2098 }, { "epoch": 0.1345167905665214, "grad_norm": 2.5425577896010565, "learning_rate": 1e-06, "loss": 0.3585, "step": 2099 }, { "epoch": 0.1345808766982825, "grad_norm": 2.6384130176202962, "learning_rate": 1e-06, "loss": 0.4988, "step": 2100 }, { "epoch": 0.13464496283004357, "grad_norm": 2.6027499158121215, "learning_rate": 1e-06, "loss": 0.4604, "step": 2101 }, { "epoch": 0.13470904896180466, "grad_norm": 2.6048158509628614, "learning_rate": 1e-06, "loss": 0.3841, "step": 2102 }, { "epoch": 0.13477313509356575, "grad_norm": 2.8293034564567248, "learning_rate": 1e-06, "loss": 0.408, "step": 2103 }, { "epoch": 0.13483722122532685, "grad_norm": 2.592521713422449, "learning_rate": 1e-06, "loss": 0.4517, "step": 2104 }, { "epoch": 0.1349013073570879, "grad_norm": 2.6693886764853407, "learning_rate": 1e-06, "loss": 0.3943, "step": 2105 }, { "epoch": 0.134965393488849, "grad_norm": 2.504808703738117, "learning_rate": 1e-06, "loss": 0.3935, "step": 2106 }, { "epoch": 0.1350294796206101, "grad_norm": 2.907032906917195, "learning_rate": 1e-06, "loss": 0.4508, "step": 2107 }, { "epoch": 0.1350935657523712, "grad_norm": 2.679610851400339, "learning_rate": 1e-06, "loss": 0.3949, "step": 2108 }, { "epoch": 0.13515765188413228, "grad_norm": 2.748289961872713, "learning_rate": 1e-06, "loss": 0.4391, "step": 2109 }, { "epoch": 0.13522173801589335, "grad_norm": 2.7234870239625493, "learning_rate": 1e-06, "loss": 0.4582, "step": 2110 }, { "epoch": 0.13528582414765444, "grad_norm": 2.7140754867383023, "learning_rate": 1e-06, "loss": 0.4414, "step": 2111 }, { "epoch": 0.13534991027941554, "grad_norm": 2.6496771127358527, "learning_rate": 1e-06, "loss": 0.4467, "step": 2112 }, { "epoch": 0.13541399641117663, "grad_norm": 2.730252973494939, "learning_rate": 1e-06, "loss": 0.4407, "step": 2113 }, { "epoch": 0.1354780825429377, "grad_norm": 2.655324313984564, "learning_rate": 1e-06, "loss": 0.4708, "step": 2114 }, { "epoch": 0.1355421686746988, "grad_norm": 2.8728419806844454, "learning_rate": 1e-06, "loss": 0.473, "step": 2115 }, { "epoch": 0.13560625480645988, "grad_norm": 2.5073844860924184, "learning_rate": 1e-06, "loss": 0.3807, "step": 2116 }, { "epoch": 0.13567034093822097, "grad_norm": 2.60753256323996, "learning_rate": 1e-06, "loss": 0.4838, "step": 2117 }, { "epoch": 0.13573442706998207, "grad_norm": 2.7082299427616907, "learning_rate": 1e-06, "loss": 0.3978, "step": 2118 }, { "epoch": 0.13579851320174313, "grad_norm": 2.4118401233619253, "learning_rate": 1e-06, "loss": 0.4567, "step": 2119 }, { "epoch": 0.13586259933350422, "grad_norm": 2.4643325105841503, "learning_rate": 1e-06, "loss": 0.4006, "step": 2120 }, { "epoch": 0.13592668546526532, "grad_norm": 2.5275100662919554, "learning_rate": 1e-06, "loss": 0.4766, "step": 2121 }, { "epoch": 0.1359907715970264, "grad_norm": 2.655859438369582, "learning_rate": 1e-06, "loss": 0.4157, "step": 2122 }, { "epoch": 0.1360548577287875, "grad_norm": 2.6427058505139187, "learning_rate": 1e-06, "loss": 0.3924, "step": 2123 }, { "epoch": 0.13611894386054857, "grad_norm": 2.6632577787547, "learning_rate": 1e-06, "loss": 0.4052, "step": 2124 }, { "epoch": 0.13618302999230966, "grad_norm": 2.6287929102425003, "learning_rate": 1e-06, "loss": 0.3918, "step": 2125 }, { "epoch": 0.13624711612407076, "grad_norm": 2.8016371088295475, "learning_rate": 1e-06, "loss": 0.4356, "step": 2126 }, { "epoch": 0.13631120225583185, "grad_norm": 3.3386006555368057, "learning_rate": 1e-06, "loss": 0.3933, "step": 2127 }, { "epoch": 0.1363752883875929, "grad_norm": 2.7097965814412226, "learning_rate": 1e-06, "loss": 0.4021, "step": 2128 }, { "epoch": 0.136439374519354, "grad_norm": 2.7644699066410983, "learning_rate": 1e-06, "loss": 0.4464, "step": 2129 }, { "epoch": 0.1365034606511151, "grad_norm": 2.4771892454096793, "learning_rate": 1e-06, "loss": 0.3746, "step": 2130 }, { "epoch": 0.1365675467828762, "grad_norm": 2.760564997369234, "learning_rate": 1e-06, "loss": 0.3566, "step": 2131 }, { "epoch": 0.13663163291463729, "grad_norm": 2.661760302416408, "learning_rate": 1e-06, "loss": 0.5215, "step": 2132 }, { "epoch": 0.13669571904639835, "grad_norm": 2.623671240870864, "learning_rate": 1e-06, "loss": 0.4042, "step": 2133 }, { "epoch": 0.13675980517815944, "grad_norm": 2.9925947621163673, "learning_rate": 1e-06, "loss": 0.4295, "step": 2134 }, { "epoch": 0.13682389130992054, "grad_norm": 2.7417367216554207, "learning_rate": 1e-06, "loss": 0.4468, "step": 2135 }, { "epoch": 0.13688797744168163, "grad_norm": 2.669990654570271, "learning_rate": 1e-06, "loss": 0.4087, "step": 2136 }, { "epoch": 0.1369520635734427, "grad_norm": 2.8533638654559805, "learning_rate": 1e-06, "loss": 0.4808, "step": 2137 }, { "epoch": 0.1370161497052038, "grad_norm": 2.7643977807163793, "learning_rate": 1e-06, "loss": 0.3754, "step": 2138 }, { "epoch": 0.13708023583696488, "grad_norm": 2.536365532104418, "learning_rate": 1e-06, "loss": 0.3998, "step": 2139 }, { "epoch": 0.13714432196872597, "grad_norm": 2.637414739617339, "learning_rate": 1e-06, "loss": 0.4129, "step": 2140 }, { "epoch": 0.13720840810048707, "grad_norm": 2.5290361793526963, "learning_rate": 1e-06, "loss": 0.4058, "step": 2141 }, { "epoch": 0.13727249423224813, "grad_norm": 2.600362258487816, "learning_rate": 1e-06, "loss": 0.3776, "step": 2142 }, { "epoch": 0.13733658036400923, "grad_norm": 2.5573978268464623, "learning_rate": 1e-06, "loss": 0.3914, "step": 2143 }, { "epoch": 0.13740066649577032, "grad_norm": 2.680943994912968, "learning_rate": 1e-06, "loss": 0.4214, "step": 2144 }, { "epoch": 0.1374647526275314, "grad_norm": 2.6925962306306146, "learning_rate": 1e-06, "loss": 0.4573, "step": 2145 }, { "epoch": 0.13752883875929248, "grad_norm": 2.689427306845986, "learning_rate": 1e-06, "loss": 0.4184, "step": 2146 }, { "epoch": 0.13759292489105357, "grad_norm": 2.7293189543709766, "learning_rate": 1e-06, "loss": 0.3834, "step": 2147 }, { "epoch": 0.13765701102281466, "grad_norm": 2.8777615193581676, "learning_rate": 1e-06, "loss": 0.4442, "step": 2148 }, { "epoch": 0.13772109715457576, "grad_norm": 2.6444638826899634, "learning_rate": 1e-06, "loss": 0.4287, "step": 2149 }, { "epoch": 0.13778518328633685, "grad_norm": 2.7759250640439217, "learning_rate": 1e-06, "loss": 0.3773, "step": 2150 }, { "epoch": 0.13784926941809791, "grad_norm": 2.616339376376333, "learning_rate": 1e-06, "loss": 0.3957, "step": 2151 }, { "epoch": 0.137913355549859, "grad_norm": 2.538884599658828, "learning_rate": 1e-06, "loss": 0.4364, "step": 2152 }, { "epoch": 0.1379774416816201, "grad_norm": 2.75943965508161, "learning_rate": 1e-06, "loss": 0.404, "step": 2153 }, { "epoch": 0.1380415278133812, "grad_norm": 2.5818273891275867, "learning_rate": 1e-06, "loss": 0.455, "step": 2154 }, { "epoch": 0.13810561394514226, "grad_norm": 2.9759449959751176, "learning_rate": 1e-06, "loss": 0.4126, "step": 2155 }, { "epoch": 0.13816970007690335, "grad_norm": 2.595854159746892, "learning_rate": 1e-06, "loss": 0.4279, "step": 2156 }, { "epoch": 0.13823378620866444, "grad_norm": 2.970107649172304, "learning_rate": 1e-06, "loss": 0.3991, "step": 2157 }, { "epoch": 0.13829787234042554, "grad_norm": 2.5620174383001, "learning_rate": 1e-06, "loss": 0.3853, "step": 2158 }, { "epoch": 0.13836195847218663, "grad_norm": 2.682996794741283, "learning_rate": 1e-06, "loss": 0.3926, "step": 2159 }, { "epoch": 0.1384260446039477, "grad_norm": 2.640623080908026, "learning_rate": 1e-06, "loss": 0.4437, "step": 2160 }, { "epoch": 0.1384901307357088, "grad_norm": 2.562450172958601, "learning_rate": 1e-06, "loss": 0.4001, "step": 2161 }, { "epoch": 0.13855421686746988, "grad_norm": 2.625741101049455, "learning_rate": 1e-06, "loss": 0.4063, "step": 2162 }, { "epoch": 0.13861830299923097, "grad_norm": 2.926628566586209, "learning_rate": 1e-06, "loss": 0.4099, "step": 2163 }, { "epoch": 0.13868238913099204, "grad_norm": 2.532631132337974, "learning_rate": 1e-06, "loss": 0.4167, "step": 2164 }, { "epoch": 0.13874647526275313, "grad_norm": 2.41138765437668, "learning_rate": 1e-06, "loss": 0.3966, "step": 2165 }, { "epoch": 0.13881056139451423, "grad_norm": 2.7163462717552247, "learning_rate": 1e-06, "loss": 0.4605, "step": 2166 }, { "epoch": 0.13887464752627532, "grad_norm": 2.7349179779173682, "learning_rate": 1e-06, "loss": 0.4129, "step": 2167 }, { "epoch": 0.1389387336580364, "grad_norm": 2.705844709704702, "learning_rate": 1e-06, "loss": 0.3875, "step": 2168 }, { "epoch": 0.13900281978979748, "grad_norm": 3.077466061475745, "learning_rate": 1e-06, "loss": 0.461, "step": 2169 }, { "epoch": 0.13906690592155857, "grad_norm": 2.7230463846895163, "learning_rate": 1e-06, "loss": 0.4436, "step": 2170 }, { "epoch": 0.13913099205331966, "grad_norm": 2.8623601827737355, "learning_rate": 1e-06, "loss": 0.4651, "step": 2171 }, { "epoch": 0.13919507818508076, "grad_norm": 2.681961203424956, "learning_rate": 1e-06, "loss": 0.3852, "step": 2172 }, { "epoch": 0.13925916431684185, "grad_norm": 2.646391464509139, "learning_rate": 1e-06, "loss": 0.3898, "step": 2173 }, { "epoch": 0.13932325044860291, "grad_norm": 2.7593356672279805, "learning_rate": 1e-06, "loss": 0.3854, "step": 2174 }, { "epoch": 0.139387336580364, "grad_norm": 2.742273416019991, "learning_rate": 1e-06, "loss": 0.4334, "step": 2175 }, { "epoch": 0.1394514227121251, "grad_norm": 2.609949808118975, "learning_rate": 1e-06, "loss": 0.3883, "step": 2176 }, { "epoch": 0.1395155088438862, "grad_norm": 2.688282306305331, "learning_rate": 1e-06, "loss": 0.3954, "step": 2177 }, { "epoch": 0.13957959497564726, "grad_norm": 2.664368149793294, "learning_rate": 1e-06, "loss": 0.3988, "step": 2178 }, { "epoch": 0.13964368110740835, "grad_norm": 2.5754163962200782, "learning_rate": 1e-06, "loss": 0.3572, "step": 2179 }, { "epoch": 0.13970776723916944, "grad_norm": 2.7535770236048647, "learning_rate": 1e-06, "loss": 0.4486, "step": 2180 }, { "epoch": 0.13977185337093054, "grad_norm": 2.5586750005179812, "learning_rate": 1e-06, "loss": 0.4398, "step": 2181 }, { "epoch": 0.13983593950269163, "grad_norm": 2.5801312411637634, "learning_rate": 1e-06, "loss": 0.4103, "step": 2182 }, { "epoch": 0.1399000256344527, "grad_norm": 2.6418380770398233, "learning_rate": 1e-06, "loss": 0.384, "step": 2183 }, { "epoch": 0.1399641117662138, "grad_norm": 2.8406851039525205, "learning_rate": 1e-06, "loss": 0.4637, "step": 2184 }, { "epoch": 0.14002819789797488, "grad_norm": 2.6629292430069524, "learning_rate": 1e-06, "loss": 0.4501, "step": 2185 }, { "epoch": 0.14009228402973598, "grad_norm": 2.654798177824672, "learning_rate": 1e-06, "loss": 0.4054, "step": 2186 }, { "epoch": 0.14015637016149704, "grad_norm": 2.9168291610332195, "learning_rate": 1e-06, "loss": 0.4182, "step": 2187 }, { "epoch": 0.14022045629325813, "grad_norm": 2.600267663410967, "learning_rate": 1e-06, "loss": 0.4414, "step": 2188 }, { "epoch": 0.14028454242501923, "grad_norm": 2.5417581238364906, "learning_rate": 1e-06, "loss": 0.4318, "step": 2189 }, { "epoch": 0.14034862855678032, "grad_norm": 2.454596888462331, "learning_rate": 1e-06, "loss": 0.3628, "step": 2190 }, { "epoch": 0.1404127146885414, "grad_norm": 2.8569961486821867, "learning_rate": 1e-06, "loss": 0.4434, "step": 2191 }, { "epoch": 0.14047680082030248, "grad_norm": 2.8200076888696706, "learning_rate": 1e-06, "loss": 0.3914, "step": 2192 }, { "epoch": 0.14054088695206357, "grad_norm": 2.7538541707394315, "learning_rate": 1e-06, "loss": 0.3932, "step": 2193 }, { "epoch": 0.14060497308382466, "grad_norm": 2.756896816994633, "learning_rate": 1e-06, "loss": 0.3879, "step": 2194 }, { "epoch": 0.14066905921558576, "grad_norm": 2.387845561333487, "learning_rate": 1e-06, "loss": 0.418, "step": 2195 }, { "epoch": 0.14073314534734682, "grad_norm": 2.7006071057783876, "learning_rate": 1e-06, "loss": 0.4469, "step": 2196 }, { "epoch": 0.14079723147910791, "grad_norm": 2.6467935141543086, "learning_rate": 1e-06, "loss": 0.4038, "step": 2197 }, { "epoch": 0.140861317610869, "grad_norm": 2.6198363896498282, "learning_rate": 1e-06, "loss": 0.425, "step": 2198 }, { "epoch": 0.1409254037426301, "grad_norm": 2.5636411622366264, "learning_rate": 1e-06, "loss": 0.4789, "step": 2199 }, { "epoch": 0.1409894898743912, "grad_norm": 2.4631479300062114, "learning_rate": 1e-06, "loss": 0.4166, "step": 2200 }, { "epoch": 0.14105357600615226, "grad_norm": 2.691147262921431, "learning_rate": 1e-06, "loss": 0.4125, "step": 2201 }, { "epoch": 0.14111766213791335, "grad_norm": 2.653751571043374, "learning_rate": 1e-06, "loss": 0.4041, "step": 2202 }, { "epoch": 0.14118174826967445, "grad_norm": 2.457050148938937, "learning_rate": 1e-06, "loss": 0.3652, "step": 2203 }, { "epoch": 0.14124583440143554, "grad_norm": 2.3973561946298103, "learning_rate": 1e-06, "loss": 0.3735, "step": 2204 }, { "epoch": 0.1413099205331966, "grad_norm": 2.7407883027389155, "learning_rate": 1e-06, "loss": 0.4125, "step": 2205 }, { "epoch": 0.1413740066649577, "grad_norm": 2.6688132463232317, "learning_rate": 1e-06, "loss": 0.4421, "step": 2206 }, { "epoch": 0.1414380927967188, "grad_norm": 2.643644919200164, "learning_rate": 1e-06, "loss": 0.3683, "step": 2207 }, { "epoch": 0.14150217892847988, "grad_norm": 2.65080615124718, "learning_rate": 1e-06, "loss": 0.4559, "step": 2208 }, { "epoch": 0.14156626506024098, "grad_norm": 2.776708320652998, "learning_rate": 1e-06, "loss": 0.4148, "step": 2209 }, { "epoch": 0.14163035119200204, "grad_norm": 2.7020700688413513, "learning_rate": 1e-06, "loss": 0.4608, "step": 2210 }, { "epoch": 0.14169443732376313, "grad_norm": 2.4552041930115904, "learning_rate": 1e-06, "loss": 0.391, "step": 2211 }, { "epoch": 0.14175852345552423, "grad_norm": 2.5767945349775854, "learning_rate": 1e-06, "loss": 0.401, "step": 2212 }, { "epoch": 0.14182260958728532, "grad_norm": 2.6559174118536046, "learning_rate": 1e-06, "loss": 0.4242, "step": 2213 }, { "epoch": 0.14188669571904639, "grad_norm": 2.7023876372387985, "learning_rate": 1e-06, "loss": 0.4348, "step": 2214 }, { "epoch": 0.14195078185080748, "grad_norm": 2.610694095494199, "learning_rate": 1e-06, "loss": 0.4084, "step": 2215 }, { "epoch": 0.14201486798256857, "grad_norm": 2.6285159451990623, "learning_rate": 1e-06, "loss": 0.4375, "step": 2216 }, { "epoch": 0.14207895411432966, "grad_norm": 2.640627442850314, "learning_rate": 1e-06, "loss": 0.4541, "step": 2217 }, { "epoch": 0.14214304024609076, "grad_norm": 2.6134124413492756, "learning_rate": 1e-06, "loss": 0.4073, "step": 2218 }, { "epoch": 0.14220712637785182, "grad_norm": 2.6094590815898457, "learning_rate": 1e-06, "loss": 0.4346, "step": 2219 }, { "epoch": 0.14227121250961292, "grad_norm": 2.6490584464866247, "learning_rate": 1e-06, "loss": 0.4433, "step": 2220 }, { "epoch": 0.142335298641374, "grad_norm": 2.5338550549194956, "learning_rate": 1e-06, "loss": 0.4395, "step": 2221 }, { "epoch": 0.1423993847731351, "grad_norm": 2.6873280903762553, "learning_rate": 1e-06, "loss": 0.3746, "step": 2222 }, { "epoch": 0.14246347090489617, "grad_norm": 2.882287571490444, "learning_rate": 1e-06, "loss": 0.4053, "step": 2223 }, { "epoch": 0.14252755703665726, "grad_norm": 2.6621684855736145, "learning_rate": 1e-06, "loss": 0.4098, "step": 2224 }, { "epoch": 0.14259164316841835, "grad_norm": 2.5399516145610406, "learning_rate": 1e-06, "loss": 0.4198, "step": 2225 }, { "epoch": 0.14265572930017945, "grad_norm": 5.304960869663389, "learning_rate": 1e-06, "loss": 0.4114, "step": 2226 }, { "epoch": 0.14271981543194054, "grad_norm": 2.6465131106356563, "learning_rate": 1e-06, "loss": 0.4118, "step": 2227 }, { "epoch": 0.1427839015637016, "grad_norm": 2.823785893975285, "learning_rate": 1e-06, "loss": 0.35, "step": 2228 }, { "epoch": 0.1428479876954627, "grad_norm": 2.6252988135275728, "learning_rate": 1e-06, "loss": 0.4251, "step": 2229 }, { "epoch": 0.1429120738272238, "grad_norm": 2.5715196655427395, "learning_rate": 1e-06, "loss": 0.4214, "step": 2230 }, { "epoch": 0.14297615995898488, "grad_norm": 2.5374478270268215, "learning_rate": 1e-06, "loss": 0.392, "step": 2231 }, { "epoch": 0.14304024609074598, "grad_norm": 2.693764343659205, "learning_rate": 1e-06, "loss": 0.4185, "step": 2232 }, { "epoch": 0.14310433222250704, "grad_norm": 2.5538527593794145, "learning_rate": 1e-06, "loss": 0.4159, "step": 2233 }, { "epoch": 0.14316841835426813, "grad_norm": 2.643995614450662, "learning_rate": 1e-06, "loss": 0.4139, "step": 2234 }, { "epoch": 0.14323250448602923, "grad_norm": 2.525937144073715, "learning_rate": 1e-06, "loss": 0.4341, "step": 2235 }, { "epoch": 0.14329659061779032, "grad_norm": 2.451146215447884, "learning_rate": 1e-06, "loss": 0.4535, "step": 2236 }, { "epoch": 0.14336067674955139, "grad_norm": 2.9775043321883623, "learning_rate": 1e-06, "loss": 0.4192, "step": 2237 }, { "epoch": 0.14342476288131248, "grad_norm": 2.6372943700404314, "learning_rate": 1e-06, "loss": 0.3781, "step": 2238 }, { "epoch": 0.14348884901307357, "grad_norm": 2.681809857926576, "learning_rate": 1e-06, "loss": 0.3501, "step": 2239 }, { "epoch": 0.14355293514483466, "grad_norm": 2.6330684169629888, "learning_rate": 1e-06, "loss": 0.4125, "step": 2240 }, { "epoch": 0.14361702127659576, "grad_norm": 2.5635021280630146, "learning_rate": 1e-06, "loss": 0.4494, "step": 2241 }, { "epoch": 0.14368110740835682, "grad_norm": 2.5420088820932087, "learning_rate": 1e-06, "loss": 0.4034, "step": 2242 }, { "epoch": 0.14374519354011792, "grad_norm": 2.5737910098068064, "learning_rate": 1e-06, "loss": 0.3646, "step": 2243 }, { "epoch": 0.143809279671879, "grad_norm": 2.824372043653846, "learning_rate": 1e-06, "loss": 0.4076, "step": 2244 }, { "epoch": 0.1438733658036401, "grad_norm": 2.4381851437931386, "learning_rate": 1e-06, "loss": 0.377, "step": 2245 }, { "epoch": 0.14393745193540117, "grad_norm": 2.508371697735693, "learning_rate": 1e-06, "loss": 0.4176, "step": 2246 }, { "epoch": 0.14400153806716226, "grad_norm": 2.720177214825913, "learning_rate": 1e-06, "loss": 0.4186, "step": 2247 }, { "epoch": 0.14406562419892335, "grad_norm": 2.6059820662859936, "learning_rate": 1e-06, "loss": 0.4461, "step": 2248 }, { "epoch": 0.14412971033068445, "grad_norm": 2.6734684323623865, "learning_rate": 1e-06, "loss": 0.4172, "step": 2249 }, { "epoch": 0.14419379646244554, "grad_norm": 2.71227587875518, "learning_rate": 1e-06, "loss": 0.3907, "step": 2250 }, { "epoch": 0.1442578825942066, "grad_norm": 2.63605657042964, "learning_rate": 1e-06, "loss": 0.4382, "step": 2251 }, { "epoch": 0.1443219687259677, "grad_norm": 2.596026537683679, "learning_rate": 1e-06, "loss": 0.4052, "step": 2252 }, { "epoch": 0.1443860548577288, "grad_norm": 2.6134149718188047, "learning_rate": 1e-06, "loss": 0.4782, "step": 2253 }, { "epoch": 0.14445014098948988, "grad_norm": 2.692257498204221, "learning_rate": 1e-06, "loss": 0.4078, "step": 2254 }, { "epoch": 0.14451422712125095, "grad_norm": 2.4847848114936006, "learning_rate": 1e-06, "loss": 0.3496, "step": 2255 }, { "epoch": 0.14457831325301204, "grad_norm": 2.642340400044151, "learning_rate": 1e-06, "loss": 0.3692, "step": 2256 }, { "epoch": 0.14464239938477313, "grad_norm": 2.8905906794526293, "learning_rate": 1e-06, "loss": 0.4426, "step": 2257 }, { "epoch": 0.14470648551653423, "grad_norm": 2.6608506377012247, "learning_rate": 1e-06, "loss": 0.408, "step": 2258 }, { "epoch": 0.14477057164829532, "grad_norm": 2.6869403811241064, "learning_rate": 1e-06, "loss": 0.4095, "step": 2259 }, { "epoch": 0.1448346577800564, "grad_norm": 2.512352252639373, "learning_rate": 1e-06, "loss": 0.3989, "step": 2260 }, { "epoch": 0.14489874391181748, "grad_norm": 2.622494628319042, "learning_rate": 1e-06, "loss": 0.4209, "step": 2261 }, { "epoch": 0.14496283004357857, "grad_norm": 2.7770276907918974, "learning_rate": 1e-06, "loss": 0.4578, "step": 2262 }, { "epoch": 0.14502691617533967, "grad_norm": 2.666828543397899, "learning_rate": 1e-06, "loss": 0.3942, "step": 2263 }, { "epoch": 0.14509100230710073, "grad_norm": 2.765447332478381, "learning_rate": 1e-06, "loss": 0.4545, "step": 2264 }, { "epoch": 0.14515508843886182, "grad_norm": 2.4732133303198967, "learning_rate": 1e-06, "loss": 0.4159, "step": 2265 }, { "epoch": 0.14521917457062292, "grad_norm": 2.715075164848874, "learning_rate": 1e-06, "loss": 0.4377, "step": 2266 }, { "epoch": 0.145283260702384, "grad_norm": 2.647220900812757, "learning_rate": 1e-06, "loss": 0.414, "step": 2267 }, { "epoch": 0.1453473468341451, "grad_norm": 2.799960861199678, "learning_rate": 1e-06, "loss": 0.3925, "step": 2268 }, { "epoch": 0.14541143296590617, "grad_norm": 2.4458853797244573, "learning_rate": 1e-06, "loss": 0.4423, "step": 2269 }, { "epoch": 0.14547551909766726, "grad_norm": 2.6759906456774503, "learning_rate": 1e-06, "loss": 0.4692, "step": 2270 }, { "epoch": 0.14553960522942835, "grad_norm": 2.570308914424076, "learning_rate": 1e-06, "loss": 0.3719, "step": 2271 }, { "epoch": 0.14560369136118945, "grad_norm": 2.4318165071673215, "learning_rate": 1e-06, "loss": 0.3948, "step": 2272 }, { "epoch": 0.1456677774929505, "grad_norm": 2.747814037626672, "learning_rate": 1e-06, "loss": 0.4243, "step": 2273 }, { "epoch": 0.1457318636247116, "grad_norm": 2.6257289323903743, "learning_rate": 1e-06, "loss": 0.4053, "step": 2274 }, { "epoch": 0.1457959497564727, "grad_norm": 2.4752797780084985, "learning_rate": 1e-06, "loss": 0.4473, "step": 2275 }, { "epoch": 0.1458600358882338, "grad_norm": 2.3987218490349473, "learning_rate": 1e-06, "loss": 0.3736, "step": 2276 }, { "epoch": 0.14592412201999488, "grad_norm": 2.8867281314680997, "learning_rate": 1e-06, "loss": 0.3696, "step": 2277 }, { "epoch": 0.14598820815175595, "grad_norm": 2.7716528014364146, "learning_rate": 1e-06, "loss": 0.3906, "step": 2278 }, { "epoch": 0.14605229428351704, "grad_norm": 2.8974582112907363, "learning_rate": 1e-06, "loss": 0.421, "step": 2279 }, { "epoch": 0.14611638041527814, "grad_norm": 2.880131925690081, "learning_rate": 1e-06, "loss": 0.4175, "step": 2280 }, { "epoch": 0.14618046654703923, "grad_norm": 2.8585496372328723, "learning_rate": 1e-06, "loss": 0.3892, "step": 2281 }, { "epoch": 0.14624455267880032, "grad_norm": 2.4339756990712744, "learning_rate": 1e-06, "loss": 0.4184, "step": 2282 }, { "epoch": 0.1463086388105614, "grad_norm": 2.5992029115648316, "learning_rate": 1e-06, "loss": 0.3717, "step": 2283 }, { "epoch": 0.14637272494232248, "grad_norm": 2.797688498581479, "learning_rate": 1e-06, "loss": 0.4105, "step": 2284 }, { "epoch": 0.14643681107408357, "grad_norm": 2.4833588908902557, "learning_rate": 1e-06, "loss": 0.3732, "step": 2285 }, { "epoch": 0.14650089720584467, "grad_norm": 2.58446876123646, "learning_rate": 1e-06, "loss": 0.3839, "step": 2286 }, { "epoch": 0.14656498333760573, "grad_norm": 2.606881969638494, "learning_rate": 1e-06, "loss": 0.4249, "step": 2287 }, { "epoch": 0.14662906946936682, "grad_norm": 2.6604866091293475, "learning_rate": 1e-06, "loss": 0.4077, "step": 2288 }, { "epoch": 0.14669315560112792, "grad_norm": 2.6989920957953246, "learning_rate": 1e-06, "loss": 0.4315, "step": 2289 }, { "epoch": 0.146757241732889, "grad_norm": 2.725493235912062, "learning_rate": 1e-06, "loss": 0.3907, "step": 2290 }, { "epoch": 0.1468213278646501, "grad_norm": 2.69132660874459, "learning_rate": 1e-06, "loss": 0.3964, "step": 2291 }, { "epoch": 0.14688541399641117, "grad_norm": 2.610103572519288, "learning_rate": 1e-06, "loss": 0.4651, "step": 2292 }, { "epoch": 0.14694950012817226, "grad_norm": 2.4991515605638455, "learning_rate": 1e-06, "loss": 0.4226, "step": 2293 }, { "epoch": 0.14701358625993335, "grad_norm": 2.538974285721927, "learning_rate": 1e-06, "loss": 0.4055, "step": 2294 }, { "epoch": 0.14707767239169445, "grad_norm": 2.8066736531056464, "learning_rate": 1e-06, "loss": 0.387, "step": 2295 }, { "epoch": 0.1471417585234555, "grad_norm": 2.855173144029738, "learning_rate": 1e-06, "loss": 0.4252, "step": 2296 }, { "epoch": 0.1472058446552166, "grad_norm": 2.8441153351557293, "learning_rate": 1e-06, "loss": 0.3698, "step": 2297 }, { "epoch": 0.1472699307869777, "grad_norm": 2.6372999046635357, "learning_rate": 1e-06, "loss": 0.4374, "step": 2298 }, { "epoch": 0.1473340169187388, "grad_norm": 2.561119349595809, "learning_rate": 1e-06, "loss": 0.4233, "step": 2299 }, { "epoch": 0.14739810305049988, "grad_norm": 2.6508050362160485, "learning_rate": 1e-06, "loss": 0.4292, "step": 2300 }, { "epoch": 0.14746218918226095, "grad_norm": 2.627610268613044, "learning_rate": 1e-06, "loss": 0.419, "step": 2301 }, { "epoch": 0.14752627531402204, "grad_norm": 2.811581016043246, "learning_rate": 1e-06, "loss": 0.4672, "step": 2302 }, { "epoch": 0.14759036144578314, "grad_norm": 2.8176789877873993, "learning_rate": 1e-06, "loss": 0.44, "step": 2303 }, { "epoch": 0.14765444757754423, "grad_norm": 2.6136882447196848, "learning_rate": 1e-06, "loss": 0.4411, "step": 2304 }, { "epoch": 0.1477185337093053, "grad_norm": 2.631618330649024, "learning_rate": 1e-06, "loss": 0.4069, "step": 2305 }, { "epoch": 0.1477826198410664, "grad_norm": 2.8307020910722445, "learning_rate": 1e-06, "loss": 0.445, "step": 2306 }, { "epoch": 0.14784670597282748, "grad_norm": 2.859794204166223, "learning_rate": 1e-06, "loss": 0.3921, "step": 2307 }, { "epoch": 0.14791079210458857, "grad_norm": 3.029930250277341, "learning_rate": 1e-06, "loss": 0.3989, "step": 2308 }, { "epoch": 0.14797487823634967, "grad_norm": 2.769615320491857, "learning_rate": 1e-06, "loss": 0.467, "step": 2309 }, { "epoch": 0.14803896436811073, "grad_norm": 2.8163409163128414, "learning_rate": 1e-06, "loss": 0.4292, "step": 2310 }, { "epoch": 0.14810305049987182, "grad_norm": 2.9181548672688438, "learning_rate": 1e-06, "loss": 0.398, "step": 2311 }, { "epoch": 0.14816713663163292, "grad_norm": 2.515786063454079, "learning_rate": 1e-06, "loss": 0.396, "step": 2312 }, { "epoch": 0.148231222763394, "grad_norm": 2.6501859655841136, "learning_rate": 1e-06, "loss": 0.3726, "step": 2313 }, { "epoch": 0.14829530889515508, "grad_norm": 2.727336603534406, "learning_rate": 1e-06, "loss": 0.3959, "step": 2314 }, { "epoch": 0.14835939502691617, "grad_norm": 2.7322829607818786, "learning_rate": 1e-06, "loss": 0.3931, "step": 2315 }, { "epoch": 0.14842348115867726, "grad_norm": 2.4220749790430496, "learning_rate": 1e-06, "loss": 0.4277, "step": 2316 }, { "epoch": 0.14848756729043835, "grad_norm": 2.6338981053065598, "learning_rate": 1e-06, "loss": 0.3565, "step": 2317 }, { "epoch": 0.14855165342219945, "grad_norm": 2.773079409430958, "learning_rate": 1e-06, "loss": 0.4168, "step": 2318 }, { "epoch": 0.1486157395539605, "grad_norm": 2.5455067815807704, "learning_rate": 1e-06, "loss": 0.4192, "step": 2319 }, { "epoch": 0.1486798256857216, "grad_norm": 2.5151484665388555, "learning_rate": 1e-06, "loss": 0.3734, "step": 2320 }, { "epoch": 0.1487439118174827, "grad_norm": 2.6818970061144043, "learning_rate": 1e-06, "loss": 0.4192, "step": 2321 }, { "epoch": 0.1488079979492438, "grad_norm": 2.831179594461404, "learning_rate": 1e-06, "loss": 0.4075, "step": 2322 }, { "epoch": 0.14887208408100486, "grad_norm": 2.8459896863335987, "learning_rate": 1e-06, "loss": 0.4173, "step": 2323 }, { "epoch": 0.14893617021276595, "grad_norm": 2.393045917617131, "learning_rate": 1e-06, "loss": 0.3683, "step": 2324 }, { "epoch": 0.14900025634452704, "grad_norm": 2.592468826362506, "learning_rate": 1e-06, "loss": 0.448, "step": 2325 }, { "epoch": 0.14906434247628814, "grad_norm": 2.5695432484816036, "learning_rate": 1e-06, "loss": 0.4202, "step": 2326 }, { "epoch": 0.14912842860804923, "grad_norm": 2.7467870217923966, "learning_rate": 1e-06, "loss": 0.3948, "step": 2327 }, { "epoch": 0.1491925147398103, "grad_norm": 2.7219181762815765, "learning_rate": 1e-06, "loss": 0.3861, "step": 2328 }, { "epoch": 0.1492566008715714, "grad_norm": 2.6606309439094575, "learning_rate": 1e-06, "loss": 0.3892, "step": 2329 }, { "epoch": 0.14932068700333248, "grad_norm": 2.4477701980906903, "learning_rate": 1e-06, "loss": 0.3845, "step": 2330 }, { "epoch": 0.14938477313509357, "grad_norm": 2.5710900386400555, "learning_rate": 1e-06, "loss": 0.4313, "step": 2331 }, { "epoch": 0.14944885926685464, "grad_norm": 2.9202947763246847, "learning_rate": 1e-06, "loss": 0.4593, "step": 2332 }, { "epoch": 0.14951294539861573, "grad_norm": 2.8278221339598812, "learning_rate": 1e-06, "loss": 0.4257, "step": 2333 }, { "epoch": 0.14957703153037682, "grad_norm": 2.8006536711305854, "learning_rate": 1e-06, "loss": 0.3817, "step": 2334 }, { "epoch": 0.14964111766213792, "grad_norm": 2.5025824047107026, "learning_rate": 1e-06, "loss": 0.4401, "step": 2335 }, { "epoch": 0.149705203793899, "grad_norm": 2.647992832106811, "learning_rate": 1e-06, "loss": 0.3943, "step": 2336 }, { "epoch": 0.14976928992566008, "grad_norm": 2.534047410212519, "learning_rate": 1e-06, "loss": 0.4123, "step": 2337 }, { "epoch": 0.14983337605742117, "grad_norm": 2.544201360530356, "learning_rate": 1e-06, "loss": 0.4222, "step": 2338 }, { "epoch": 0.14989746218918226, "grad_norm": 2.598334654859618, "learning_rate": 1e-06, "loss": 0.4778, "step": 2339 }, { "epoch": 0.14996154832094336, "grad_norm": 2.6037963285409806, "learning_rate": 1e-06, "loss": 0.4313, "step": 2340 }, { "epoch": 0.15002563445270445, "grad_norm": 2.5511618288184152, "learning_rate": 1e-06, "loss": 0.4308, "step": 2341 }, { "epoch": 0.1500897205844655, "grad_norm": 2.596673832687979, "learning_rate": 1e-06, "loss": 0.392, "step": 2342 }, { "epoch": 0.1501538067162266, "grad_norm": 2.5935959167733493, "learning_rate": 1e-06, "loss": 0.3799, "step": 2343 }, { "epoch": 0.1502178928479877, "grad_norm": 2.8157616888072776, "learning_rate": 1e-06, "loss": 0.3875, "step": 2344 }, { "epoch": 0.1502819789797488, "grad_norm": 2.7239095516119964, "learning_rate": 1e-06, "loss": 0.3579, "step": 2345 }, { "epoch": 0.15034606511150986, "grad_norm": 2.8732628594396914, "learning_rate": 1e-06, "loss": 0.4426, "step": 2346 }, { "epoch": 0.15041015124327095, "grad_norm": 2.6053261102088596, "learning_rate": 1e-06, "loss": 0.4361, "step": 2347 }, { "epoch": 0.15047423737503204, "grad_norm": 2.7501538950837006, "learning_rate": 1e-06, "loss": 0.4225, "step": 2348 }, { "epoch": 0.15053832350679314, "grad_norm": 2.6748933692579016, "learning_rate": 1e-06, "loss": 0.4139, "step": 2349 }, { "epoch": 0.15060240963855423, "grad_norm": 2.5140175391129467, "learning_rate": 1e-06, "loss": 0.396, "step": 2350 }, { "epoch": 0.1506664957703153, "grad_norm": 2.51220473120615, "learning_rate": 1e-06, "loss": 0.4299, "step": 2351 }, { "epoch": 0.1507305819020764, "grad_norm": 2.5533477749431985, "learning_rate": 1e-06, "loss": 0.4048, "step": 2352 }, { "epoch": 0.15079466803383748, "grad_norm": 2.7622224664376276, "learning_rate": 1e-06, "loss": 0.3982, "step": 2353 }, { "epoch": 0.15085875416559857, "grad_norm": 2.745535809237263, "learning_rate": 1e-06, "loss": 0.4543, "step": 2354 }, { "epoch": 0.15092284029735964, "grad_norm": 2.7300386583191885, "learning_rate": 1e-06, "loss": 0.4357, "step": 2355 }, { "epoch": 0.15098692642912073, "grad_norm": 2.7017399198890613, "learning_rate": 1e-06, "loss": 0.3976, "step": 2356 }, { "epoch": 0.15105101256088183, "grad_norm": 2.641428987366016, "learning_rate": 1e-06, "loss": 0.4643, "step": 2357 }, { "epoch": 0.15111509869264292, "grad_norm": 2.6569814357613186, "learning_rate": 1e-06, "loss": 0.4498, "step": 2358 }, { "epoch": 0.151179184824404, "grad_norm": 2.5401208849561416, "learning_rate": 1e-06, "loss": 0.4235, "step": 2359 }, { "epoch": 0.15124327095616508, "grad_norm": 2.5054149135493136, "learning_rate": 1e-06, "loss": 0.4279, "step": 2360 }, { "epoch": 0.15130735708792617, "grad_norm": 2.6694292857092905, "learning_rate": 1e-06, "loss": 0.4528, "step": 2361 }, { "epoch": 0.15137144321968726, "grad_norm": 2.5114041686713646, "learning_rate": 1e-06, "loss": 0.4483, "step": 2362 }, { "epoch": 0.15143552935144836, "grad_norm": 2.6480189800249803, "learning_rate": 1e-06, "loss": 0.4174, "step": 2363 }, { "epoch": 0.15149961548320942, "grad_norm": 2.4534126730482573, "learning_rate": 1e-06, "loss": 0.3561, "step": 2364 }, { "epoch": 0.15156370161497051, "grad_norm": 2.6671292785945866, "learning_rate": 1e-06, "loss": 0.4201, "step": 2365 }, { "epoch": 0.1516277877467316, "grad_norm": 2.726065605093534, "learning_rate": 1e-06, "loss": 0.4679, "step": 2366 }, { "epoch": 0.1516918738784927, "grad_norm": 2.545323534174155, "learning_rate": 1e-06, "loss": 0.401, "step": 2367 }, { "epoch": 0.1517559600102538, "grad_norm": 2.791402105635689, "learning_rate": 1e-06, "loss": 0.3948, "step": 2368 }, { "epoch": 0.15182004614201486, "grad_norm": 2.6468669705500565, "learning_rate": 1e-06, "loss": 0.4129, "step": 2369 }, { "epoch": 0.15188413227377595, "grad_norm": 2.5476833598403528, "learning_rate": 1e-06, "loss": 0.4278, "step": 2370 }, { "epoch": 0.15194821840553704, "grad_norm": 2.506687865478058, "learning_rate": 1e-06, "loss": 0.3836, "step": 2371 }, { "epoch": 0.15201230453729814, "grad_norm": 2.750669796319302, "learning_rate": 1e-06, "loss": 0.4487, "step": 2372 }, { "epoch": 0.1520763906690592, "grad_norm": 2.6545688375212264, "learning_rate": 1e-06, "loss": 0.4, "step": 2373 }, { "epoch": 0.1521404768008203, "grad_norm": 2.5413046028139235, "learning_rate": 1e-06, "loss": 0.4104, "step": 2374 }, { "epoch": 0.1522045629325814, "grad_norm": 2.554444200234787, "learning_rate": 1e-06, "loss": 0.395, "step": 2375 }, { "epoch": 0.15226864906434248, "grad_norm": 2.579414741406172, "learning_rate": 1e-06, "loss": 0.3627, "step": 2376 }, { "epoch": 0.15233273519610357, "grad_norm": 2.6671368075780446, "learning_rate": 1e-06, "loss": 0.4369, "step": 2377 }, { "epoch": 0.15239682132786464, "grad_norm": 2.6810079593267346, "learning_rate": 1e-06, "loss": 0.4162, "step": 2378 }, { "epoch": 0.15246090745962573, "grad_norm": 2.683487202560093, "learning_rate": 1e-06, "loss": 0.4557, "step": 2379 }, { "epoch": 0.15252499359138683, "grad_norm": 2.589917499641482, "learning_rate": 1e-06, "loss": 0.3939, "step": 2380 }, { "epoch": 0.15258907972314792, "grad_norm": 2.8205977099230477, "learning_rate": 1e-06, "loss": 0.4398, "step": 2381 }, { "epoch": 0.15265316585490898, "grad_norm": 2.592663049668745, "learning_rate": 1e-06, "loss": 0.3787, "step": 2382 }, { "epoch": 0.15271725198667008, "grad_norm": 2.5460240252707265, "learning_rate": 1e-06, "loss": 0.4179, "step": 2383 }, { "epoch": 0.15278133811843117, "grad_norm": 2.5645214681159696, "learning_rate": 1e-06, "loss": 0.3777, "step": 2384 }, { "epoch": 0.15284542425019226, "grad_norm": 2.7015014505643604, "learning_rate": 1e-06, "loss": 0.4084, "step": 2385 }, { "epoch": 0.15290951038195336, "grad_norm": 2.6132337605579186, "learning_rate": 1e-06, "loss": 0.3922, "step": 2386 }, { "epoch": 0.15297359651371442, "grad_norm": 2.565897987852246, "learning_rate": 1e-06, "loss": 0.4156, "step": 2387 }, { "epoch": 0.15303768264547551, "grad_norm": 2.5809983715596645, "learning_rate": 1e-06, "loss": 0.3894, "step": 2388 }, { "epoch": 0.1531017687772366, "grad_norm": 2.5560680323288376, "learning_rate": 1e-06, "loss": 0.3725, "step": 2389 }, { "epoch": 0.1531658549089977, "grad_norm": 2.6934001340487073, "learning_rate": 1e-06, "loss": 0.4946, "step": 2390 }, { "epoch": 0.1532299410407588, "grad_norm": 2.4350422689075937, "learning_rate": 1e-06, "loss": 0.3635, "step": 2391 }, { "epoch": 0.15329402717251986, "grad_norm": 2.5978625164002436, "learning_rate": 1e-06, "loss": 0.3755, "step": 2392 }, { "epoch": 0.15335811330428095, "grad_norm": 2.7060153807077065, "learning_rate": 1e-06, "loss": 0.3844, "step": 2393 }, { "epoch": 0.15342219943604204, "grad_norm": 2.5977471758004533, "learning_rate": 1e-06, "loss": 0.398, "step": 2394 }, { "epoch": 0.15348628556780314, "grad_norm": 2.8192146974315007, "learning_rate": 1e-06, "loss": 0.4224, "step": 2395 }, { "epoch": 0.1535503716995642, "grad_norm": 2.8507772617205984, "learning_rate": 1e-06, "loss": 0.4593, "step": 2396 }, { "epoch": 0.1536144578313253, "grad_norm": 2.757912230882947, "learning_rate": 1e-06, "loss": 0.376, "step": 2397 }, { "epoch": 0.1536785439630864, "grad_norm": 2.914634273117128, "learning_rate": 1e-06, "loss": 0.3884, "step": 2398 }, { "epoch": 0.15374263009484748, "grad_norm": 2.7028190039958164, "learning_rate": 1e-06, "loss": 0.429, "step": 2399 }, { "epoch": 0.15380671622660858, "grad_norm": 2.805975220284593, "learning_rate": 1e-06, "loss": 0.4336, "step": 2400 }, { "epoch": 0.15387080235836964, "grad_norm": 2.612350568348285, "learning_rate": 1e-06, "loss": 0.3692, "step": 2401 }, { "epoch": 0.15393488849013073, "grad_norm": 2.6235487697848012, "learning_rate": 1e-06, "loss": 0.3941, "step": 2402 }, { "epoch": 0.15399897462189183, "grad_norm": 2.5075163297958416, "learning_rate": 1e-06, "loss": 0.443, "step": 2403 }, { "epoch": 0.15406306075365292, "grad_norm": 2.5941254002232106, "learning_rate": 1e-06, "loss": 0.4505, "step": 2404 }, { "epoch": 0.15412714688541398, "grad_norm": 2.646111568843153, "learning_rate": 1e-06, "loss": 0.4259, "step": 2405 }, { "epoch": 0.15419123301717508, "grad_norm": 2.627844708101551, "learning_rate": 1e-06, "loss": 0.4292, "step": 2406 }, { "epoch": 0.15425531914893617, "grad_norm": 2.6155267950712315, "learning_rate": 1e-06, "loss": 0.3833, "step": 2407 }, { "epoch": 0.15431940528069726, "grad_norm": 2.80650626799486, "learning_rate": 1e-06, "loss": 0.4245, "step": 2408 }, { "epoch": 0.15438349141245836, "grad_norm": 2.61101795676931, "learning_rate": 1e-06, "loss": 0.359, "step": 2409 }, { "epoch": 0.15444757754421942, "grad_norm": 2.707694683825272, "learning_rate": 1e-06, "loss": 0.4255, "step": 2410 }, { "epoch": 0.15451166367598052, "grad_norm": 2.5831471926173624, "learning_rate": 1e-06, "loss": 0.4173, "step": 2411 }, { "epoch": 0.1545757498077416, "grad_norm": 2.6242498641680334, "learning_rate": 1e-06, "loss": 0.448, "step": 2412 }, { "epoch": 0.1546398359395027, "grad_norm": 2.72655226953322, "learning_rate": 1e-06, "loss": 0.4317, "step": 2413 }, { "epoch": 0.15470392207126377, "grad_norm": 2.7383815110827583, "learning_rate": 1e-06, "loss": 0.4047, "step": 2414 }, { "epoch": 0.15476800820302486, "grad_norm": 2.5026739932341155, "learning_rate": 1e-06, "loss": 0.3867, "step": 2415 }, { "epoch": 0.15483209433478595, "grad_norm": 2.674607203930584, "learning_rate": 1e-06, "loss": 0.3793, "step": 2416 }, { "epoch": 0.15489618046654705, "grad_norm": 2.754064762987339, "learning_rate": 1e-06, "loss": 0.4281, "step": 2417 }, { "epoch": 0.15496026659830814, "grad_norm": 2.570533524925514, "learning_rate": 1e-06, "loss": 0.4598, "step": 2418 }, { "epoch": 0.1550243527300692, "grad_norm": 2.597622703572672, "learning_rate": 1e-06, "loss": 0.371, "step": 2419 }, { "epoch": 0.1550884388618303, "grad_norm": 2.8288481023355776, "learning_rate": 1e-06, "loss": 0.4526, "step": 2420 }, { "epoch": 0.1551525249935914, "grad_norm": 2.4192969540719527, "learning_rate": 1e-06, "loss": 0.4239, "step": 2421 }, { "epoch": 0.15521661112535248, "grad_norm": 2.570303124194049, "learning_rate": 1e-06, "loss": 0.4859, "step": 2422 }, { "epoch": 0.15528069725711355, "grad_norm": 2.5962623992860463, "learning_rate": 1e-06, "loss": 0.414, "step": 2423 }, { "epoch": 0.15534478338887464, "grad_norm": 2.6407161187844586, "learning_rate": 1e-06, "loss": 0.4539, "step": 2424 }, { "epoch": 0.15540886952063573, "grad_norm": 2.7032853850977983, "learning_rate": 1e-06, "loss": 0.4852, "step": 2425 }, { "epoch": 0.15547295565239683, "grad_norm": 2.6186822936277574, "learning_rate": 1e-06, "loss": 0.4252, "step": 2426 }, { "epoch": 0.15553704178415792, "grad_norm": 2.7692792260713768, "learning_rate": 1e-06, "loss": 0.4144, "step": 2427 }, { "epoch": 0.15560112791591899, "grad_norm": 2.819212145403548, "learning_rate": 1e-06, "loss": 0.3948, "step": 2428 }, { "epoch": 0.15566521404768008, "grad_norm": 2.7820635144861376, "learning_rate": 1e-06, "loss": 0.4357, "step": 2429 }, { "epoch": 0.15572930017944117, "grad_norm": 2.540855770044114, "learning_rate": 1e-06, "loss": 0.3847, "step": 2430 }, { "epoch": 0.15579338631120226, "grad_norm": 2.5881439472756202, "learning_rate": 1e-06, "loss": 0.3905, "step": 2431 }, { "epoch": 0.15585747244296333, "grad_norm": 2.628112799287706, "learning_rate": 1e-06, "loss": 0.4897, "step": 2432 }, { "epoch": 0.15592155857472442, "grad_norm": 2.8470469351320666, "learning_rate": 1e-06, "loss": 0.4372, "step": 2433 }, { "epoch": 0.15598564470648552, "grad_norm": 2.959718617534213, "learning_rate": 1e-06, "loss": 0.3556, "step": 2434 }, { "epoch": 0.1560497308382466, "grad_norm": 2.63349545605712, "learning_rate": 1e-06, "loss": 0.3966, "step": 2435 }, { "epoch": 0.1561138169700077, "grad_norm": 2.6920193790596687, "learning_rate": 1e-06, "loss": 0.4002, "step": 2436 }, { "epoch": 0.15617790310176877, "grad_norm": 2.5515125234377916, "learning_rate": 1e-06, "loss": 0.4704, "step": 2437 }, { "epoch": 0.15624198923352986, "grad_norm": 2.7845164985771738, "learning_rate": 1e-06, "loss": 0.3741, "step": 2438 }, { "epoch": 0.15630607536529095, "grad_norm": 2.7051788981886715, "learning_rate": 1e-06, "loss": 0.4248, "step": 2439 }, { "epoch": 0.15637016149705205, "grad_norm": 2.5027319502036103, "learning_rate": 1e-06, "loss": 0.4275, "step": 2440 }, { "epoch": 0.1564342476288131, "grad_norm": 2.67872865736227, "learning_rate": 1e-06, "loss": 0.3707, "step": 2441 }, { "epoch": 0.1564983337605742, "grad_norm": 2.6234605610607797, "learning_rate": 1e-06, "loss": 0.4122, "step": 2442 }, { "epoch": 0.1565624198923353, "grad_norm": 2.660891907681123, "learning_rate": 1e-06, "loss": 0.4079, "step": 2443 }, { "epoch": 0.1566265060240964, "grad_norm": 2.469269141511082, "learning_rate": 1e-06, "loss": 0.3787, "step": 2444 }, { "epoch": 0.15669059215585748, "grad_norm": 2.9128965615675084, "learning_rate": 1e-06, "loss": 0.4011, "step": 2445 }, { "epoch": 0.15675467828761855, "grad_norm": 2.3634946944799338, "learning_rate": 1e-06, "loss": 0.4086, "step": 2446 }, { "epoch": 0.15681876441937964, "grad_norm": 2.487803849131197, "learning_rate": 1e-06, "loss": 0.381, "step": 2447 }, { "epoch": 0.15688285055114073, "grad_norm": 2.73280436377226, "learning_rate": 1e-06, "loss": 0.4247, "step": 2448 }, { "epoch": 0.15694693668290183, "grad_norm": 2.645994090911685, "learning_rate": 1e-06, "loss": 0.4245, "step": 2449 }, { "epoch": 0.15701102281466292, "grad_norm": 2.7045428079749887, "learning_rate": 1e-06, "loss": 0.3821, "step": 2450 }, { "epoch": 0.15707510894642399, "grad_norm": 2.6721213176800283, "learning_rate": 1e-06, "loss": 0.3749, "step": 2451 }, { "epoch": 0.15713919507818508, "grad_norm": 2.5446333446323575, "learning_rate": 1e-06, "loss": 0.4501, "step": 2452 }, { "epoch": 0.15720328120994617, "grad_norm": 2.7025878221632214, "learning_rate": 1e-06, "loss": 0.4365, "step": 2453 }, { "epoch": 0.15726736734170726, "grad_norm": 2.4377600454198673, "learning_rate": 1e-06, "loss": 0.393, "step": 2454 }, { "epoch": 0.15733145347346833, "grad_norm": 2.3395968765113704, "learning_rate": 1e-06, "loss": 0.4026, "step": 2455 }, { "epoch": 0.15739553960522942, "grad_norm": 2.5508080145151895, "learning_rate": 1e-06, "loss": 0.4117, "step": 2456 }, { "epoch": 0.15745962573699052, "grad_norm": 2.9131013972018893, "learning_rate": 1e-06, "loss": 0.406, "step": 2457 }, { "epoch": 0.1575237118687516, "grad_norm": 2.6926586987036005, "learning_rate": 1e-06, "loss": 0.383, "step": 2458 }, { "epoch": 0.1575877980005127, "grad_norm": 2.661373242133559, "learning_rate": 1e-06, "loss": 0.4544, "step": 2459 }, { "epoch": 0.15765188413227377, "grad_norm": 2.865019871633759, "learning_rate": 1e-06, "loss": 0.3793, "step": 2460 }, { "epoch": 0.15771597026403486, "grad_norm": 2.7673917992905857, "learning_rate": 1e-06, "loss": 0.3693, "step": 2461 }, { "epoch": 0.15778005639579595, "grad_norm": 2.6496428816256183, "learning_rate": 1e-06, "loss": 0.4039, "step": 2462 }, { "epoch": 0.15784414252755705, "grad_norm": 2.5833209913336788, "learning_rate": 1e-06, "loss": 0.3253, "step": 2463 }, { "epoch": 0.1579082286593181, "grad_norm": 2.9927054051853594, "learning_rate": 1e-06, "loss": 0.4455, "step": 2464 }, { "epoch": 0.1579723147910792, "grad_norm": 2.688891946243902, "learning_rate": 1e-06, "loss": 0.4572, "step": 2465 }, { "epoch": 0.1580364009228403, "grad_norm": 2.8014701175146275, "learning_rate": 1e-06, "loss": 0.4, "step": 2466 }, { "epoch": 0.1581004870546014, "grad_norm": 2.6209516151437016, "learning_rate": 1e-06, "loss": 0.4293, "step": 2467 }, { "epoch": 0.15816457318636248, "grad_norm": 2.6968176829286077, "learning_rate": 1e-06, "loss": 0.4418, "step": 2468 }, { "epoch": 0.15822865931812355, "grad_norm": 2.716996681830155, "learning_rate": 1e-06, "loss": 0.4425, "step": 2469 }, { "epoch": 0.15829274544988464, "grad_norm": 2.7238337604748373, "learning_rate": 1e-06, "loss": 0.4307, "step": 2470 }, { "epoch": 0.15835683158164574, "grad_norm": 2.739047909106007, "learning_rate": 1e-06, "loss": 0.4299, "step": 2471 }, { "epoch": 0.15842091771340683, "grad_norm": 2.526469438645234, "learning_rate": 1e-06, "loss": 0.4244, "step": 2472 }, { "epoch": 0.1584850038451679, "grad_norm": 2.6278675097925763, "learning_rate": 1e-06, "loss": 0.4373, "step": 2473 }, { "epoch": 0.158549089976929, "grad_norm": 2.5826759075688055, "learning_rate": 1e-06, "loss": 0.4258, "step": 2474 }, { "epoch": 0.15861317610869008, "grad_norm": 2.77590367710701, "learning_rate": 1e-06, "loss": 0.4196, "step": 2475 }, { "epoch": 0.15867726224045117, "grad_norm": 2.7835205766483893, "learning_rate": 1e-06, "loss": 0.3705, "step": 2476 }, { "epoch": 0.15874134837221227, "grad_norm": 2.863480380954405, "learning_rate": 1e-06, "loss": 0.4371, "step": 2477 }, { "epoch": 0.15880543450397333, "grad_norm": 2.8855879891557374, "learning_rate": 1e-06, "loss": 0.4355, "step": 2478 }, { "epoch": 0.15886952063573442, "grad_norm": 2.721920994882, "learning_rate": 1e-06, "loss": 0.4384, "step": 2479 }, { "epoch": 0.15893360676749552, "grad_norm": 2.6621503936105686, "learning_rate": 1e-06, "loss": 0.4144, "step": 2480 }, { "epoch": 0.1589976928992566, "grad_norm": 2.625862393236496, "learning_rate": 1e-06, "loss": 0.4072, "step": 2481 }, { "epoch": 0.15906177903101767, "grad_norm": 2.698437048521841, "learning_rate": 1e-06, "loss": 0.4212, "step": 2482 }, { "epoch": 0.15912586516277877, "grad_norm": 2.747135808007895, "learning_rate": 1e-06, "loss": 0.3773, "step": 2483 }, { "epoch": 0.15918995129453986, "grad_norm": 2.6924238591324436, "learning_rate": 1e-06, "loss": 0.4419, "step": 2484 }, { "epoch": 0.15925403742630095, "grad_norm": 2.864874679432206, "learning_rate": 1e-06, "loss": 0.4116, "step": 2485 }, { "epoch": 0.15931812355806205, "grad_norm": 2.584090760115717, "learning_rate": 1e-06, "loss": 0.4204, "step": 2486 }, { "epoch": 0.1593822096898231, "grad_norm": 2.3501320571795565, "learning_rate": 1e-06, "loss": 0.4432, "step": 2487 }, { "epoch": 0.1594462958215842, "grad_norm": 2.62447790662663, "learning_rate": 1e-06, "loss": 0.3803, "step": 2488 }, { "epoch": 0.1595103819533453, "grad_norm": 2.809122378803803, "learning_rate": 1e-06, "loss": 0.4318, "step": 2489 }, { "epoch": 0.1595744680851064, "grad_norm": 2.560302012329596, "learning_rate": 1e-06, "loss": 0.4023, "step": 2490 }, { "epoch": 0.15963855421686746, "grad_norm": 2.7315313434872364, "learning_rate": 1e-06, "loss": 0.419, "step": 2491 }, { "epoch": 0.15970264034862855, "grad_norm": 2.7487724870084707, "learning_rate": 1e-06, "loss": 0.4438, "step": 2492 }, { "epoch": 0.15976672648038964, "grad_norm": 2.5211532370360317, "learning_rate": 1e-06, "loss": 0.4091, "step": 2493 }, { "epoch": 0.15983081261215074, "grad_norm": 2.69198583244194, "learning_rate": 1e-06, "loss": 0.3919, "step": 2494 }, { "epoch": 0.15989489874391183, "grad_norm": 2.6710258015363175, "learning_rate": 1e-06, "loss": 0.3612, "step": 2495 }, { "epoch": 0.1599589848756729, "grad_norm": 2.6456233748049787, "learning_rate": 1e-06, "loss": 0.4136, "step": 2496 }, { "epoch": 0.160023071007434, "grad_norm": 2.5859243386442037, "learning_rate": 1e-06, "loss": 0.4355, "step": 2497 }, { "epoch": 0.16008715713919508, "grad_norm": 2.7520692263098416, "learning_rate": 1e-06, "loss": 0.4324, "step": 2498 }, { "epoch": 0.16015124327095617, "grad_norm": 2.7816574091909723, "learning_rate": 1e-06, "loss": 0.3854, "step": 2499 }, { "epoch": 0.16021532940271727, "grad_norm": 2.6344550709051044, "learning_rate": 1e-06, "loss": 0.4278, "step": 2500 }, { "epoch": 0.16027941553447833, "grad_norm": 3.197606757914543, "learning_rate": 1e-06, "loss": 0.3987, "step": 2501 }, { "epoch": 0.16034350166623942, "grad_norm": 2.6471620383147676, "learning_rate": 1e-06, "loss": 0.4028, "step": 2502 }, { "epoch": 0.16040758779800052, "grad_norm": 2.882593526216847, "learning_rate": 1e-06, "loss": 0.4053, "step": 2503 }, { "epoch": 0.1604716739297616, "grad_norm": 2.6876055696015246, "learning_rate": 1e-06, "loss": 0.4112, "step": 2504 }, { "epoch": 0.16053576006152268, "grad_norm": 2.6908732798019157, "learning_rate": 1e-06, "loss": 0.4084, "step": 2505 }, { "epoch": 0.16059984619328377, "grad_norm": 2.696421366995068, "learning_rate": 1e-06, "loss": 0.391, "step": 2506 }, { "epoch": 0.16066393232504486, "grad_norm": 2.5708012883800184, "learning_rate": 1e-06, "loss": 0.4442, "step": 2507 }, { "epoch": 0.16072801845680595, "grad_norm": 2.5577193706830594, "learning_rate": 1e-06, "loss": 0.3862, "step": 2508 }, { "epoch": 0.16079210458856705, "grad_norm": 2.7187413971424266, "learning_rate": 1e-06, "loss": 0.4327, "step": 2509 }, { "epoch": 0.1608561907203281, "grad_norm": 2.5817179456414037, "learning_rate": 1e-06, "loss": 0.3879, "step": 2510 }, { "epoch": 0.1609202768520892, "grad_norm": 2.7269113290519593, "learning_rate": 1e-06, "loss": 0.4325, "step": 2511 }, { "epoch": 0.1609843629838503, "grad_norm": 2.572578117655859, "learning_rate": 1e-06, "loss": 0.3851, "step": 2512 }, { "epoch": 0.1610484491156114, "grad_norm": 2.6450180571019937, "learning_rate": 1e-06, "loss": 0.4411, "step": 2513 }, { "epoch": 0.16111253524737246, "grad_norm": 2.5789710591909545, "learning_rate": 1e-06, "loss": 0.4102, "step": 2514 }, { "epoch": 0.16117662137913355, "grad_norm": 2.4838537947972177, "learning_rate": 1e-06, "loss": 0.405, "step": 2515 }, { "epoch": 0.16124070751089464, "grad_norm": 2.534907881809524, "learning_rate": 1e-06, "loss": 0.3469, "step": 2516 }, { "epoch": 0.16130479364265574, "grad_norm": 2.500800182833469, "learning_rate": 1e-06, "loss": 0.3948, "step": 2517 }, { "epoch": 0.16136887977441683, "grad_norm": 2.6929560857938974, "learning_rate": 1e-06, "loss": 0.4144, "step": 2518 }, { "epoch": 0.1614329659061779, "grad_norm": 2.6113591154458287, "learning_rate": 1e-06, "loss": 0.3989, "step": 2519 }, { "epoch": 0.161497052037939, "grad_norm": 2.537840773620322, "learning_rate": 1e-06, "loss": 0.3568, "step": 2520 }, { "epoch": 0.16156113816970008, "grad_norm": 2.7707118455203767, "learning_rate": 1e-06, "loss": 0.3732, "step": 2521 }, { "epoch": 0.16162522430146117, "grad_norm": 2.7225252383237275, "learning_rate": 1e-06, "loss": 0.3935, "step": 2522 }, { "epoch": 0.16168931043322224, "grad_norm": 2.828852689766831, "learning_rate": 1e-06, "loss": 0.4267, "step": 2523 }, { "epoch": 0.16175339656498333, "grad_norm": 2.603720737428551, "learning_rate": 1e-06, "loss": 0.4425, "step": 2524 }, { "epoch": 0.16181748269674442, "grad_norm": 2.5478451431697033, "learning_rate": 1e-06, "loss": 0.3837, "step": 2525 }, { "epoch": 0.16188156882850552, "grad_norm": 2.661807124509187, "learning_rate": 1e-06, "loss": 0.384, "step": 2526 }, { "epoch": 0.1619456549602666, "grad_norm": 2.6041595365534604, "learning_rate": 1e-06, "loss": 0.3774, "step": 2527 }, { "epoch": 0.16200974109202768, "grad_norm": 2.570551916458256, "learning_rate": 1e-06, "loss": 0.4089, "step": 2528 }, { "epoch": 0.16207382722378877, "grad_norm": 2.691936352986267, "learning_rate": 1e-06, "loss": 0.3846, "step": 2529 }, { "epoch": 0.16213791335554986, "grad_norm": 2.630771561034015, "learning_rate": 1e-06, "loss": 0.4067, "step": 2530 }, { "epoch": 0.16220199948731095, "grad_norm": 2.812901706927222, "learning_rate": 1e-06, "loss": 0.4493, "step": 2531 }, { "epoch": 0.16226608561907202, "grad_norm": 2.895255542589514, "learning_rate": 1e-06, "loss": 0.4126, "step": 2532 }, { "epoch": 0.1623301717508331, "grad_norm": 2.689542921264475, "learning_rate": 1e-06, "loss": 0.4611, "step": 2533 }, { "epoch": 0.1623942578825942, "grad_norm": 2.7578900917283318, "learning_rate": 1e-06, "loss": 0.4068, "step": 2534 }, { "epoch": 0.1624583440143553, "grad_norm": 2.7909225863700104, "learning_rate": 1e-06, "loss": 0.4066, "step": 2535 }, { "epoch": 0.1625224301461164, "grad_norm": 2.7296440103447535, "learning_rate": 1e-06, "loss": 0.4286, "step": 2536 }, { "epoch": 0.16258651627787746, "grad_norm": 2.7413253180770747, "learning_rate": 1e-06, "loss": 0.398, "step": 2537 }, { "epoch": 0.16265060240963855, "grad_norm": 2.968023303872132, "learning_rate": 1e-06, "loss": 0.4264, "step": 2538 }, { "epoch": 0.16271468854139964, "grad_norm": 2.5240106724224285, "learning_rate": 1e-06, "loss": 0.4667, "step": 2539 }, { "epoch": 0.16277877467316074, "grad_norm": 2.8291621239977434, "learning_rate": 1e-06, "loss": 0.4637, "step": 2540 }, { "epoch": 0.1628428608049218, "grad_norm": 2.685586301138915, "learning_rate": 1e-06, "loss": 0.4277, "step": 2541 }, { "epoch": 0.1629069469366829, "grad_norm": 2.7011354343541827, "learning_rate": 1e-06, "loss": 0.4338, "step": 2542 }, { "epoch": 0.162971033068444, "grad_norm": 2.652397024902033, "learning_rate": 1e-06, "loss": 0.454, "step": 2543 }, { "epoch": 0.16303511920020508, "grad_norm": 2.977816057807566, "learning_rate": 1e-06, "loss": 0.4084, "step": 2544 }, { "epoch": 0.16309920533196617, "grad_norm": 2.7882639087445233, "learning_rate": 1e-06, "loss": 0.4856, "step": 2545 }, { "epoch": 0.16316329146372724, "grad_norm": 2.5045074481642526, "learning_rate": 1e-06, "loss": 0.4176, "step": 2546 }, { "epoch": 0.16322737759548833, "grad_norm": 2.617452267392621, "learning_rate": 1e-06, "loss": 0.4572, "step": 2547 }, { "epoch": 0.16329146372724943, "grad_norm": 2.658894789581529, "learning_rate": 1e-06, "loss": 0.4675, "step": 2548 }, { "epoch": 0.16335554985901052, "grad_norm": 2.852357932664824, "learning_rate": 1e-06, "loss": 0.4418, "step": 2549 }, { "epoch": 0.16341963599077158, "grad_norm": 2.6360746418969017, "learning_rate": 1e-06, "loss": 0.3874, "step": 2550 }, { "epoch": 0.16348372212253268, "grad_norm": 2.8766537155424214, "learning_rate": 1e-06, "loss": 0.4384, "step": 2551 }, { "epoch": 0.16354780825429377, "grad_norm": 2.625135910693668, "learning_rate": 1e-06, "loss": 0.4223, "step": 2552 }, { "epoch": 0.16361189438605486, "grad_norm": 2.564132710733575, "learning_rate": 1e-06, "loss": 0.4263, "step": 2553 }, { "epoch": 0.16367598051781596, "grad_norm": 2.533594805547048, "learning_rate": 1e-06, "loss": 0.4281, "step": 2554 }, { "epoch": 0.16374006664957702, "grad_norm": 2.6563807475777854, "learning_rate": 1e-06, "loss": 0.474, "step": 2555 }, { "epoch": 0.1638041527813381, "grad_norm": 2.6555639273705953, "learning_rate": 1e-06, "loss": 0.386, "step": 2556 }, { "epoch": 0.1638682389130992, "grad_norm": 3.423681981284491, "learning_rate": 1e-06, "loss": 0.3963, "step": 2557 }, { "epoch": 0.1639323250448603, "grad_norm": 2.6343181562972364, "learning_rate": 1e-06, "loss": 0.4535, "step": 2558 }, { "epoch": 0.1639964111766214, "grad_norm": 2.558480507768559, "learning_rate": 1e-06, "loss": 0.4296, "step": 2559 }, { "epoch": 0.16406049730838246, "grad_norm": 2.5653732110638154, "learning_rate": 1e-06, "loss": 0.4264, "step": 2560 }, { "epoch": 0.16412458344014355, "grad_norm": 2.7535149483462025, "learning_rate": 1e-06, "loss": 0.408, "step": 2561 }, { "epoch": 0.16418866957190464, "grad_norm": 2.7378653936579553, "learning_rate": 1e-06, "loss": 0.4314, "step": 2562 }, { "epoch": 0.16425275570366574, "grad_norm": 2.6625159731761268, "learning_rate": 1e-06, "loss": 0.3955, "step": 2563 }, { "epoch": 0.1643168418354268, "grad_norm": 2.6482824209454137, "learning_rate": 1e-06, "loss": 0.3903, "step": 2564 }, { "epoch": 0.1643809279671879, "grad_norm": 2.8188480793044666, "learning_rate": 1e-06, "loss": 0.3948, "step": 2565 }, { "epoch": 0.164445014098949, "grad_norm": 2.563521159898662, "learning_rate": 1e-06, "loss": 0.401, "step": 2566 }, { "epoch": 0.16450910023071008, "grad_norm": 2.7131990194512623, "learning_rate": 1e-06, "loss": 0.4144, "step": 2567 }, { "epoch": 0.16457318636247117, "grad_norm": 2.4797567023822937, "learning_rate": 1e-06, "loss": 0.3669, "step": 2568 }, { "epoch": 0.16463727249423224, "grad_norm": 2.467398013163161, "learning_rate": 1e-06, "loss": 0.3637, "step": 2569 }, { "epoch": 0.16470135862599333, "grad_norm": 2.544381041213776, "learning_rate": 1e-06, "loss": 0.4389, "step": 2570 }, { "epoch": 0.16476544475775443, "grad_norm": 2.5132164527889427, "learning_rate": 1e-06, "loss": 0.3867, "step": 2571 }, { "epoch": 0.16482953088951552, "grad_norm": 2.5655755742496074, "learning_rate": 1e-06, "loss": 0.3894, "step": 2572 }, { "epoch": 0.16489361702127658, "grad_norm": 2.5545087984725794, "learning_rate": 1e-06, "loss": 0.4292, "step": 2573 }, { "epoch": 0.16495770315303768, "grad_norm": 2.724930949542696, "learning_rate": 1e-06, "loss": 0.4255, "step": 2574 }, { "epoch": 0.16502178928479877, "grad_norm": 2.9083920762744464, "learning_rate": 1e-06, "loss": 0.4484, "step": 2575 }, { "epoch": 0.16508587541655986, "grad_norm": 2.647945420853656, "learning_rate": 1e-06, "loss": 0.4454, "step": 2576 }, { "epoch": 0.16514996154832096, "grad_norm": 2.656887468380951, "learning_rate": 1e-06, "loss": 0.3911, "step": 2577 }, { "epoch": 0.16521404768008202, "grad_norm": 2.4060271256432095, "learning_rate": 1e-06, "loss": 0.3968, "step": 2578 }, { "epoch": 0.16527813381184311, "grad_norm": 2.673050335073366, "learning_rate": 1e-06, "loss": 0.4298, "step": 2579 }, { "epoch": 0.1653422199436042, "grad_norm": 2.6629568710429488, "learning_rate": 1e-06, "loss": 0.4281, "step": 2580 }, { "epoch": 0.1654063060753653, "grad_norm": 2.6124306527589414, "learning_rate": 1e-06, "loss": 0.4531, "step": 2581 }, { "epoch": 0.16547039220712637, "grad_norm": 2.7811035047254764, "learning_rate": 1e-06, "loss": 0.4631, "step": 2582 }, { "epoch": 0.16553447833888746, "grad_norm": 2.5917185980322937, "learning_rate": 1e-06, "loss": 0.4477, "step": 2583 }, { "epoch": 0.16559856447064855, "grad_norm": 2.591034780645144, "learning_rate": 1e-06, "loss": 0.4439, "step": 2584 }, { "epoch": 0.16566265060240964, "grad_norm": 2.794625908520945, "learning_rate": 1e-06, "loss": 0.3689, "step": 2585 }, { "epoch": 0.16572673673417074, "grad_norm": 2.6106494060829046, "learning_rate": 1e-06, "loss": 0.4439, "step": 2586 }, { "epoch": 0.1657908228659318, "grad_norm": 2.6363465931070245, "learning_rate": 1e-06, "loss": 0.4187, "step": 2587 }, { "epoch": 0.1658549089976929, "grad_norm": 2.8385488313877363, "learning_rate": 1e-06, "loss": 0.4257, "step": 2588 }, { "epoch": 0.165918995129454, "grad_norm": 2.5718888642010556, "learning_rate": 1e-06, "loss": 0.4606, "step": 2589 }, { "epoch": 0.16598308126121508, "grad_norm": 2.824032193073902, "learning_rate": 1e-06, "loss": 0.4144, "step": 2590 }, { "epoch": 0.16604716739297615, "grad_norm": 2.6419319673150614, "learning_rate": 1e-06, "loss": 0.3877, "step": 2591 }, { "epoch": 0.16611125352473724, "grad_norm": 2.6349648425656222, "learning_rate": 1e-06, "loss": 0.4523, "step": 2592 }, { "epoch": 0.16617533965649833, "grad_norm": 2.5687018277447673, "learning_rate": 1e-06, "loss": 0.4163, "step": 2593 }, { "epoch": 0.16623942578825943, "grad_norm": 2.7502135847373, "learning_rate": 1e-06, "loss": 0.3938, "step": 2594 }, { "epoch": 0.16630351192002052, "grad_norm": 2.7870082246775536, "learning_rate": 1e-06, "loss": 0.3712, "step": 2595 }, { "epoch": 0.16636759805178158, "grad_norm": 2.664471483180359, "learning_rate": 1e-06, "loss": 0.3785, "step": 2596 }, { "epoch": 0.16643168418354268, "grad_norm": 2.8006579800788405, "learning_rate": 1e-06, "loss": 0.4322, "step": 2597 }, { "epoch": 0.16649577031530377, "grad_norm": 2.651191581602123, "learning_rate": 1e-06, "loss": 0.428, "step": 2598 }, { "epoch": 0.16655985644706486, "grad_norm": 2.7140854600700695, "learning_rate": 1e-06, "loss": 0.3945, "step": 2599 }, { "epoch": 0.16662394257882593, "grad_norm": 2.7856278395057856, "learning_rate": 1e-06, "loss": 0.4592, "step": 2600 }, { "epoch": 0.16668802871058702, "grad_norm": 2.607410882542063, "learning_rate": 1e-06, "loss": 0.4324, "step": 2601 }, { "epoch": 0.16675211484234811, "grad_norm": 2.732714333661932, "learning_rate": 1e-06, "loss": 0.4252, "step": 2602 }, { "epoch": 0.1668162009741092, "grad_norm": 2.921716514174632, "learning_rate": 1e-06, "loss": 0.4345, "step": 2603 }, { "epoch": 0.1668802871058703, "grad_norm": 2.4597493378828497, "learning_rate": 1e-06, "loss": 0.4015, "step": 2604 }, { "epoch": 0.16694437323763137, "grad_norm": 2.6806802003367127, "learning_rate": 1e-06, "loss": 0.3903, "step": 2605 }, { "epoch": 0.16700845936939246, "grad_norm": 2.5394049202373004, "learning_rate": 1e-06, "loss": 0.3787, "step": 2606 }, { "epoch": 0.16707254550115355, "grad_norm": 2.537658335645286, "learning_rate": 1e-06, "loss": 0.4692, "step": 2607 }, { "epoch": 0.16713663163291465, "grad_norm": 2.486190723866445, "learning_rate": 1e-06, "loss": 0.4244, "step": 2608 }, { "epoch": 0.1672007177646757, "grad_norm": 2.6499647012087504, "learning_rate": 1e-06, "loss": 0.3738, "step": 2609 }, { "epoch": 0.1672648038964368, "grad_norm": 2.4785537911649365, "learning_rate": 1e-06, "loss": 0.415, "step": 2610 }, { "epoch": 0.1673288900281979, "grad_norm": 2.6183916526906894, "learning_rate": 1e-06, "loss": 0.5018, "step": 2611 }, { "epoch": 0.167392976159959, "grad_norm": 2.6916594666182996, "learning_rate": 1e-06, "loss": 0.4613, "step": 2612 }, { "epoch": 0.16745706229172008, "grad_norm": 2.731776274248501, "learning_rate": 1e-06, "loss": 0.338, "step": 2613 }, { "epoch": 0.16752114842348115, "grad_norm": 2.5633563923010367, "learning_rate": 1e-06, "loss": 0.4063, "step": 2614 }, { "epoch": 0.16758523455524224, "grad_norm": 2.6792211962059986, "learning_rate": 1e-06, "loss": 0.4274, "step": 2615 }, { "epoch": 0.16764932068700333, "grad_norm": 2.704490572915443, "learning_rate": 1e-06, "loss": 0.3596, "step": 2616 }, { "epoch": 0.16771340681876443, "grad_norm": 2.7040420901466127, "learning_rate": 1e-06, "loss": 0.3669, "step": 2617 }, { "epoch": 0.16777749295052552, "grad_norm": 2.508603329270507, "learning_rate": 1e-06, "loss": 0.383, "step": 2618 }, { "epoch": 0.16784157908228658, "grad_norm": 2.667668834729716, "learning_rate": 1e-06, "loss": 0.3944, "step": 2619 }, { "epoch": 0.16790566521404768, "grad_norm": 2.788009681704992, "learning_rate": 1e-06, "loss": 0.393, "step": 2620 }, { "epoch": 0.16796975134580877, "grad_norm": 2.9943683712234295, "learning_rate": 1e-06, "loss": 0.4375, "step": 2621 }, { "epoch": 0.16803383747756986, "grad_norm": 2.4580934140045576, "learning_rate": 1e-06, "loss": 0.4295, "step": 2622 }, { "epoch": 0.16809792360933093, "grad_norm": 2.682255157750206, "learning_rate": 1e-06, "loss": 0.415, "step": 2623 }, { "epoch": 0.16816200974109202, "grad_norm": 2.6326858723207915, "learning_rate": 1e-06, "loss": 0.4016, "step": 2624 }, { "epoch": 0.16822609587285312, "grad_norm": 2.7531460345683945, "learning_rate": 1e-06, "loss": 0.4366, "step": 2625 }, { "epoch": 0.1682901820046142, "grad_norm": 2.4582924292223063, "learning_rate": 1e-06, "loss": 0.3714, "step": 2626 }, { "epoch": 0.1683542681363753, "grad_norm": 2.4189133463074413, "learning_rate": 1e-06, "loss": 0.3395, "step": 2627 }, { "epoch": 0.16841835426813637, "grad_norm": 2.4978464701468854, "learning_rate": 1e-06, "loss": 0.3388, "step": 2628 }, { "epoch": 0.16848244039989746, "grad_norm": 2.6293887606762003, "learning_rate": 1e-06, "loss": 0.4318, "step": 2629 }, { "epoch": 0.16854652653165855, "grad_norm": 2.688163877974156, "learning_rate": 1e-06, "loss": 0.4566, "step": 2630 }, { "epoch": 0.16861061266341965, "grad_norm": 2.6615797889998385, "learning_rate": 1e-06, "loss": 0.4286, "step": 2631 }, { "epoch": 0.1686746987951807, "grad_norm": 2.6150907943144452, "learning_rate": 1e-06, "loss": 0.3935, "step": 2632 }, { "epoch": 0.1687387849269418, "grad_norm": 2.920403244685697, "learning_rate": 1e-06, "loss": 0.4117, "step": 2633 }, { "epoch": 0.1688028710587029, "grad_norm": 2.748707499569203, "learning_rate": 1e-06, "loss": 0.4312, "step": 2634 }, { "epoch": 0.168866957190464, "grad_norm": 2.7044982203747323, "learning_rate": 1e-06, "loss": 0.3967, "step": 2635 }, { "epoch": 0.16893104332222508, "grad_norm": 2.695912823716649, "learning_rate": 1e-06, "loss": 0.4081, "step": 2636 }, { "epoch": 0.16899512945398615, "grad_norm": 2.5569026627371625, "learning_rate": 1e-06, "loss": 0.4057, "step": 2637 }, { "epoch": 0.16905921558574724, "grad_norm": 2.617933717691224, "learning_rate": 1e-06, "loss": 0.4184, "step": 2638 }, { "epoch": 0.16912330171750833, "grad_norm": 2.828806100247617, "learning_rate": 1e-06, "loss": 0.4145, "step": 2639 }, { "epoch": 0.16918738784926943, "grad_norm": 2.6495628081292613, "learning_rate": 1e-06, "loss": 0.4279, "step": 2640 }, { "epoch": 0.1692514739810305, "grad_norm": 2.762389598809753, "learning_rate": 1e-06, "loss": 0.4279, "step": 2641 }, { "epoch": 0.16931556011279159, "grad_norm": 2.5771748842832647, "learning_rate": 1e-06, "loss": 0.3541, "step": 2642 }, { "epoch": 0.16937964624455268, "grad_norm": 2.5680137462087944, "learning_rate": 1e-06, "loss": 0.3764, "step": 2643 }, { "epoch": 0.16944373237631377, "grad_norm": 2.533760471382165, "learning_rate": 1e-06, "loss": 0.3492, "step": 2644 }, { "epoch": 0.16950781850807486, "grad_norm": 2.537156219708976, "learning_rate": 1e-06, "loss": 0.4071, "step": 2645 }, { "epoch": 0.16957190463983593, "grad_norm": 2.662693914933848, "learning_rate": 1e-06, "loss": 0.3899, "step": 2646 }, { "epoch": 0.16963599077159702, "grad_norm": 2.6963640114525202, "learning_rate": 1e-06, "loss": 0.3973, "step": 2647 }, { "epoch": 0.16970007690335812, "grad_norm": 2.381564264533936, "learning_rate": 1e-06, "loss": 0.393, "step": 2648 }, { "epoch": 0.1697641630351192, "grad_norm": 2.666993056664914, "learning_rate": 1e-06, "loss": 0.4031, "step": 2649 }, { "epoch": 0.16982824916688027, "grad_norm": 2.7697832526098214, "learning_rate": 1e-06, "loss": 0.4172, "step": 2650 }, { "epoch": 0.16989233529864137, "grad_norm": 2.77774648574453, "learning_rate": 1e-06, "loss": 0.3955, "step": 2651 }, { "epoch": 0.16995642143040246, "grad_norm": 2.773426081646378, "learning_rate": 1e-06, "loss": 0.4396, "step": 2652 }, { "epoch": 0.17002050756216355, "grad_norm": 2.349409710796212, "learning_rate": 1e-06, "loss": 0.3712, "step": 2653 }, { "epoch": 0.17008459369392465, "grad_norm": 2.6672881479515587, "learning_rate": 1e-06, "loss": 0.4144, "step": 2654 }, { "epoch": 0.1701486798256857, "grad_norm": 2.5934156121316865, "learning_rate": 1e-06, "loss": 0.4574, "step": 2655 }, { "epoch": 0.1702127659574468, "grad_norm": 3.0540709948188893, "learning_rate": 1e-06, "loss": 0.4623, "step": 2656 }, { "epoch": 0.1702768520892079, "grad_norm": 2.5464630959332926, "learning_rate": 1e-06, "loss": 0.3893, "step": 2657 }, { "epoch": 0.170340938220969, "grad_norm": 2.6894763480997854, "learning_rate": 1e-06, "loss": 0.4528, "step": 2658 }, { "epoch": 0.17040502435273006, "grad_norm": 2.640883933595137, "learning_rate": 1e-06, "loss": 0.4243, "step": 2659 }, { "epoch": 0.17046911048449115, "grad_norm": 2.84348627663285, "learning_rate": 1e-06, "loss": 0.3694, "step": 2660 }, { "epoch": 0.17053319661625224, "grad_norm": 2.7193478058528147, "learning_rate": 1e-06, "loss": 0.4163, "step": 2661 }, { "epoch": 0.17059728274801333, "grad_norm": 2.3724490315311173, "learning_rate": 1e-06, "loss": 0.4099, "step": 2662 }, { "epoch": 0.17066136887977443, "grad_norm": 2.7397106390320998, "learning_rate": 1e-06, "loss": 0.4298, "step": 2663 }, { "epoch": 0.1707254550115355, "grad_norm": 2.928854559865521, "learning_rate": 1e-06, "loss": 0.3542, "step": 2664 }, { "epoch": 0.17078954114329659, "grad_norm": 3.174576889900393, "learning_rate": 1e-06, "loss": 0.4487, "step": 2665 }, { "epoch": 0.17085362727505768, "grad_norm": 2.768162935078266, "learning_rate": 1e-06, "loss": 0.4126, "step": 2666 }, { "epoch": 0.17091771340681877, "grad_norm": 2.6517696398341006, "learning_rate": 1e-06, "loss": 0.419, "step": 2667 }, { "epoch": 0.17098179953857987, "grad_norm": 2.4838150857756656, "learning_rate": 1e-06, "loss": 0.4224, "step": 2668 }, { "epoch": 0.17104588567034093, "grad_norm": 2.6833635728441023, "learning_rate": 1e-06, "loss": 0.4171, "step": 2669 }, { "epoch": 0.17110997180210202, "grad_norm": 2.603487459743295, "learning_rate": 1e-06, "loss": 0.3906, "step": 2670 }, { "epoch": 0.17117405793386312, "grad_norm": 2.5774146996288136, "learning_rate": 1e-06, "loss": 0.3956, "step": 2671 }, { "epoch": 0.1712381440656242, "grad_norm": 2.938289624792572, "learning_rate": 1e-06, "loss": 0.3997, "step": 2672 }, { "epoch": 0.17130223019738527, "grad_norm": 2.8039532904551585, "learning_rate": 1e-06, "loss": 0.4422, "step": 2673 }, { "epoch": 0.17136631632914637, "grad_norm": 2.750175646020322, "learning_rate": 1e-06, "loss": 0.4173, "step": 2674 }, { "epoch": 0.17143040246090746, "grad_norm": 2.4505574568016857, "learning_rate": 1e-06, "loss": 0.386, "step": 2675 }, { "epoch": 0.17149448859266855, "grad_norm": 2.5900624275998285, "learning_rate": 1e-06, "loss": 0.4086, "step": 2676 }, { "epoch": 0.17155857472442965, "grad_norm": 2.573632710786655, "learning_rate": 1e-06, "loss": 0.4297, "step": 2677 }, { "epoch": 0.1716226608561907, "grad_norm": 2.868666362706998, "learning_rate": 1e-06, "loss": 0.4225, "step": 2678 }, { "epoch": 0.1716867469879518, "grad_norm": 2.653904345921513, "learning_rate": 1e-06, "loss": 0.433, "step": 2679 }, { "epoch": 0.1717508331197129, "grad_norm": 2.55586101100343, "learning_rate": 1e-06, "loss": 0.3604, "step": 2680 }, { "epoch": 0.171814919251474, "grad_norm": 2.8104819328821535, "learning_rate": 1e-06, "loss": 0.4086, "step": 2681 }, { "epoch": 0.17187900538323506, "grad_norm": 2.706876954065412, "learning_rate": 1e-06, "loss": 0.506, "step": 2682 }, { "epoch": 0.17194309151499615, "grad_norm": 2.510943495458629, "learning_rate": 1e-06, "loss": 0.4211, "step": 2683 }, { "epoch": 0.17200717764675724, "grad_norm": 2.9015099476835777, "learning_rate": 1e-06, "loss": 0.3541, "step": 2684 }, { "epoch": 0.17207126377851834, "grad_norm": 2.6421129558259504, "learning_rate": 1e-06, "loss": 0.4685, "step": 2685 }, { "epoch": 0.17213534991027943, "grad_norm": 2.6910006860551583, "learning_rate": 1e-06, "loss": 0.4363, "step": 2686 }, { "epoch": 0.1721994360420405, "grad_norm": 3.002895992145651, "learning_rate": 1e-06, "loss": 0.4241, "step": 2687 }, { "epoch": 0.1722635221738016, "grad_norm": 2.630505341919852, "learning_rate": 1e-06, "loss": 0.3906, "step": 2688 }, { "epoch": 0.17232760830556268, "grad_norm": 2.5058688409268504, "learning_rate": 1e-06, "loss": 0.4239, "step": 2689 }, { "epoch": 0.17239169443732377, "grad_norm": 2.9556913711850963, "learning_rate": 1e-06, "loss": 0.4375, "step": 2690 }, { "epoch": 0.17245578056908484, "grad_norm": 2.7664247622675684, "learning_rate": 1e-06, "loss": 0.4922, "step": 2691 }, { "epoch": 0.17251986670084593, "grad_norm": 2.619465355014572, "learning_rate": 1e-06, "loss": 0.4407, "step": 2692 }, { "epoch": 0.17258395283260702, "grad_norm": 2.686744915784644, "learning_rate": 1e-06, "loss": 0.4147, "step": 2693 }, { "epoch": 0.17264803896436812, "grad_norm": 2.6546237480998762, "learning_rate": 1e-06, "loss": 0.391, "step": 2694 }, { "epoch": 0.1727121250961292, "grad_norm": 2.898017643183714, "learning_rate": 1e-06, "loss": 0.4154, "step": 2695 }, { "epoch": 0.17277621122789028, "grad_norm": 2.6544734467667817, "learning_rate": 1e-06, "loss": 0.4169, "step": 2696 }, { "epoch": 0.17284029735965137, "grad_norm": 2.6309737535988593, "learning_rate": 1e-06, "loss": 0.4016, "step": 2697 }, { "epoch": 0.17290438349141246, "grad_norm": 2.643895300769825, "learning_rate": 1e-06, "loss": 0.4367, "step": 2698 }, { "epoch": 0.17296846962317355, "grad_norm": 2.6365834195624682, "learning_rate": 1e-06, "loss": 0.466, "step": 2699 }, { "epoch": 0.17303255575493462, "grad_norm": 2.5310055725501313, "learning_rate": 1e-06, "loss": 0.4427, "step": 2700 }, { "epoch": 0.1730966418866957, "grad_norm": 2.6053804421251727, "learning_rate": 1e-06, "loss": 0.3722, "step": 2701 }, { "epoch": 0.1731607280184568, "grad_norm": 2.5949485014712925, "learning_rate": 1e-06, "loss": 0.3716, "step": 2702 }, { "epoch": 0.1732248141502179, "grad_norm": 2.461700301283681, "learning_rate": 1e-06, "loss": 0.4243, "step": 2703 }, { "epoch": 0.173288900281979, "grad_norm": 2.667172217132449, "learning_rate": 1e-06, "loss": 0.4409, "step": 2704 }, { "epoch": 0.17335298641374006, "grad_norm": 2.676509271450921, "learning_rate": 1e-06, "loss": 0.3738, "step": 2705 }, { "epoch": 0.17341707254550115, "grad_norm": 2.8242422751607963, "learning_rate": 1e-06, "loss": 0.4045, "step": 2706 }, { "epoch": 0.17348115867726224, "grad_norm": 2.476650612552705, "learning_rate": 1e-06, "loss": 0.4105, "step": 2707 }, { "epoch": 0.17354524480902334, "grad_norm": 2.993760001961813, "learning_rate": 1e-06, "loss": 0.4191, "step": 2708 }, { "epoch": 0.1736093309407844, "grad_norm": 3.0046453722930457, "learning_rate": 1e-06, "loss": 0.4066, "step": 2709 }, { "epoch": 0.1736734170725455, "grad_norm": 2.9357275097901137, "learning_rate": 1e-06, "loss": 0.4118, "step": 2710 }, { "epoch": 0.1737375032043066, "grad_norm": 2.610844895489968, "learning_rate": 1e-06, "loss": 0.4334, "step": 2711 }, { "epoch": 0.17380158933606768, "grad_norm": 2.825996241401419, "learning_rate": 1e-06, "loss": 0.4395, "step": 2712 }, { "epoch": 0.17386567546782877, "grad_norm": 2.8467469193619923, "learning_rate": 1e-06, "loss": 0.4601, "step": 2713 }, { "epoch": 0.17392976159958984, "grad_norm": 2.908167454995991, "learning_rate": 1e-06, "loss": 0.4204, "step": 2714 }, { "epoch": 0.17399384773135093, "grad_norm": 2.67761276554012, "learning_rate": 1e-06, "loss": 0.386, "step": 2715 }, { "epoch": 0.17405793386311202, "grad_norm": 2.6317302005851384, "learning_rate": 1e-06, "loss": 0.4263, "step": 2716 }, { "epoch": 0.17412201999487312, "grad_norm": 2.747385895018015, "learning_rate": 1e-06, "loss": 0.3776, "step": 2717 }, { "epoch": 0.17418610612663418, "grad_norm": 2.3660503979944356, "learning_rate": 1e-06, "loss": 0.3765, "step": 2718 }, { "epoch": 0.17425019225839528, "grad_norm": 2.5496448902297444, "learning_rate": 1e-06, "loss": 0.4378, "step": 2719 }, { "epoch": 0.17431427839015637, "grad_norm": 2.6347752728926244, "learning_rate": 1e-06, "loss": 0.4169, "step": 2720 }, { "epoch": 0.17437836452191746, "grad_norm": 2.5835556645401136, "learning_rate": 1e-06, "loss": 0.4, "step": 2721 }, { "epoch": 0.17444245065367855, "grad_norm": 2.712552510654322, "learning_rate": 1e-06, "loss": 0.4285, "step": 2722 }, { "epoch": 0.17450653678543962, "grad_norm": 2.641969170333069, "learning_rate": 1e-06, "loss": 0.4099, "step": 2723 }, { "epoch": 0.1745706229172007, "grad_norm": 3.0637998879801187, "learning_rate": 1e-06, "loss": 0.4272, "step": 2724 }, { "epoch": 0.1746347090489618, "grad_norm": 2.727006901877962, "learning_rate": 1e-06, "loss": 0.374, "step": 2725 }, { "epoch": 0.1746987951807229, "grad_norm": 3.023247813234385, "learning_rate": 1e-06, "loss": 0.4034, "step": 2726 }, { "epoch": 0.174762881312484, "grad_norm": 2.814083489198847, "learning_rate": 1e-06, "loss": 0.393, "step": 2727 }, { "epoch": 0.17482696744424506, "grad_norm": 2.449041367738676, "learning_rate": 1e-06, "loss": 0.4182, "step": 2728 }, { "epoch": 0.17489105357600615, "grad_norm": 2.8231543234962806, "learning_rate": 1e-06, "loss": 0.3886, "step": 2729 }, { "epoch": 0.17495513970776724, "grad_norm": 2.7766829388477325, "learning_rate": 1e-06, "loss": 0.4284, "step": 2730 }, { "epoch": 0.17501922583952834, "grad_norm": 2.847867939571184, "learning_rate": 1e-06, "loss": 0.4885, "step": 2731 }, { "epoch": 0.1750833119712894, "grad_norm": 2.4603559459072897, "learning_rate": 1e-06, "loss": 0.3952, "step": 2732 }, { "epoch": 0.1751473981030505, "grad_norm": 2.9335988133488913, "learning_rate": 1e-06, "loss": 0.367, "step": 2733 }, { "epoch": 0.1752114842348116, "grad_norm": 2.801463474487573, "learning_rate": 1e-06, "loss": 0.4461, "step": 2734 }, { "epoch": 0.17527557036657268, "grad_norm": 2.6157190091020146, "learning_rate": 1e-06, "loss": 0.4039, "step": 2735 }, { "epoch": 0.17533965649833377, "grad_norm": 2.860532946073638, "learning_rate": 1e-06, "loss": 0.4914, "step": 2736 }, { "epoch": 0.17540374263009484, "grad_norm": 2.6380029352671186, "learning_rate": 1e-06, "loss": 0.4389, "step": 2737 }, { "epoch": 0.17546782876185593, "grad_norm": 2.7757439534033725, "learning_rate": 1e-06, "loss": 0.3723, "step": 2738 }, { "epoch": 0.17553191489361702, "grad_norm": 2.5525342561888293, "learning_rate": 1e-06, "loss": 0.4242, "step": 2739 }, { "epoch": 0.17559600102537812, "grad_norm": 2.8172344485968264, "learning_rate": 1e-06, "loss": 0.447, "step": 2740 }, { "epoch": 0.17566008715713918, "grad_norm": 2.609296582257566, "learning_rate": 1e-06, "loss": 0.3896, "step": 2741 }, { "epoch": 0.17572417328890028, "grad_norm": 2.4000086904756732, "learning_rate": 1e-06, "loss": 0.3644, "step": 2742 }, { "epoch": 0.17578825942066137, "grad_norm": 2.8350571223146908, "learning_rate": 1e-06, "loss": 0.3783, "step": 2743 }, { "epoch": 0.17585234555242246, "grad_norm": 2.7509441663604552, "learning_rate": 1e-06, "loss": 0.4094, "step": 2744 }, { "epoch": 0.17591643168418356, "grad_norm": 2.5301745440674135, "learning_rate": 1e-06, "loss": 0.3968, "step": 2745 }, { "epoch": 0.17598051781594462, "grad_norm": 2.798905110335789, "learning_rate": 1e-06, "loss": 0.3853, "step": 2746 }, { "epoch": 0.1760446039477057, "grad_norm": 2.705049111037033, "learning_rate": 1e-06, "loss": 0.4149, "step": 2747 }, { "epoch": 0.1761086900794668, "grad_norm": 2.8010432178773823, "learning_rate": 1e-06, "loss": 0.3797, "step": 2748 }, { "epoch": 0.1761727762112279, "grad_norm": 2.4673188572577125, "learning_rate": 1e-06, "loss": 0.3891, "step": 2749 }, { "epoch": 0.17623686234298896, "grad_norm": 2.345419411995933, "learning_rate": 1e-06, "loss": 0.3931, "step": 2750 }, { "epoch": 0.17630094847475006, "grad_norm": 2.652929007808521, "learning_rate": 1e-06, "loss": 0.402, "step": 2751 }, { "epoch": 0.17636503460651115, "grad_norm": 2.4664570988941397, "learning_rate": 1e-06, "loss": 0.3661, "step": 2752 }, { "epoch": 0.17642912073827224, "grad_norm": 2.5442145233577733, "learning_rate": 1e-06, "loss": 0.387, "step": 2753 }, { "epoch": 0.17649320687003334, "grad_norm": 2.663665531636393, "learning_rate": 1e-06, "loss": 0.5124, "step": 2754 }, { "epoch": 0.1765572930017944, "grad_norm": 2.6084492727596986, "learning_rate": 1e-06, "loss": 0.4015, "step": 2755 }, { "epoch": 0.1766213791335555, "grad_norm": 2.6078674777982007, "learning_rate": 1e-06, "loss": 0.477, "step": 2756 }, { "epoch": 0.1766854652653166, "grad_norm": 2.548574378570834, "learning_rate": 1e-06, "loss": 0.3738, "step": 2757 }, { "epoch": 0.17674955139707768, "grad_norm": 2.5789832769642036, "learning_rate": 1e-06, "loss": 0.3945, "step": 2758 }, { "epoch": 0.17681363752883875, "grad_norm": 2.717502081315403, "learning_rate": 1e-06, "loss": 0.4145, "step": 2759 }, { "epoch": 0.17687772366059984, "grad_norm": 2.568127521668392, "learning_rate": 1e-06, "loss": 0.3814, "step": 2760 }, { "epoch": 0.17694180979236093, "grad_norm": 2.789005011677915, "learning_rate": 1e-06, "loss": 0.4004, "step": 2761 }, { "epoch": 0.17700589592412203, "grad_norm": 2.477575732446859, "learning_rate": 1e-06, "loss": 0.4279, "step": 2762 }, { "epoch": 0.17706998205588312, "grad_norm": 2.7186598813432994, "learning_rate": 1e-06, "loss": 0.4073, "step": 2763 }, { "epoch": 0.17713406818764418, "grad_norm": 2.7710934300979773, "learning_rate": 1e-06, "loss": 0.4294, "step": 2764 }, { "epoch": 0.17719815431940528, "grad_norm": 2.7693788880209076, "learning_rate": 1e-06, "loss": 0.3503, "step": 2765 }, { "epoch": 0.17726224045116637, "grad_norm": 2.6829776861796404, "learning_rate": 1e-06, "loss": 0.4504, "step": 2766 }, { "epoch": 0.17732632658292746, "grad_norm": 2.4899668268787583, "learning_rate": 1e-06, "loss": 0.4042, "step": 2767 }, { "epoch": 0.17739041271468853, "grad_norm": 2.5376629009155374, "learning_rate": 1e-06, "loss": 0.4113, "step": 2768 }, { "epoch": 0.17745449884644962, "grad_norm": 2.6190589519635847, "learning_rate": 1e-06, "loss": 0.4423, "step": 2769 }, { "epoch": 0.17751858497821071, "grad_norm": 2.733587140196812, "learning_rate": 1e-06, "loss": 0.3711, "step": 2770 }, { "epoch": 0.1775826711099718, "grad_norm": 2.7646668438152378, "learning_rate": 1e-06, "loss": 0.4172, "step": 2771 }, { "epoch": 0.1776467572417329, "grad_norm": 2.595584633191652, "learning_rate": 1e-06, "loss": 0.3571, "step": 2772 }, { "epoch": 0.17771084337349397, "grad_norm": 2.7361207178039266, "learning_rate": 1e-06, "loss": 0.407, "step": 2773 }, { "epoch": 0.17777492950525506, "grad_norm": 2.876956812662481, "learning_rate": 1e-06, "loss": 0.4357, "step": 2774 }, { "epoch": 0.17783901563701615, "grad_norm": 2.5906483777638485, "learning_rate": 1e-06, "loss": 0.4096, "step": 2775 }, { "epoch": 0.17790310176877724, "grad_norm": 2.604898683351699, "learning_rate": 1e-06, "loss": 0.3946, "step": 2776 }, { "epoch": 0.17796718790053834, "grad_norm": 2.6280129282501243, "learning_rate": 1e-06, "loss": 0.4328, "step": 2777 }, { "epoch": 0.1780312740322994, "grad_norm": 2.773509257973345, "learning_rate": 1e-06, "loss": 0.4847, "step": 2778 }, { "epoch": 0.1780953601640605, "grad_norm": 2.669028302656756, "learning_rate": 1e-06, "loss": 0.4049, "step": 2779 }, { "epoch": 0.1781594462958216, "grad_norm": 2.5119037751744466, "learning_rate": 1e-06, "loss": 0.3751, "step": 2780 }, { "epoch": 0.17822353242758268, "grad_norm": 2.719489579389454, "learning_rate": 1e-06, "loss": 0.4567, "step": 2781 }, { "epoch": 0.17828761855934375, "grad_norm": 2.802127986585095, "learning_rate": 1e-06, "loss": 0.4662, "step": 2782 }, { "epoch": 0.17835170469110484, "grad_norm": 3.488029860507148, "learning_rate": 1e-06, "loss": 0.419, "step": 2783 }, { "epoch": 0.17841579082286593, "grad_norm": 2.7624002719807117, "learning_rate": 1e-06, "loss": 0.4514, "step": 2784 }, { "epoch": 0.17847987695462703, "grad_norm": 2.7263045947433677, "learning_rate": 1e-06, "loss": 0.4049, "step": 2785 }, { "epoch": 0.17854396308638812, "grad_norm": 2.664083750414536, "learning_rate": 1e-06, "loss": 0.4095, "step": 2786 }, { "epoch": 0.17860804921814918, "grad_norm": 2.5985944580963025, "learning_rate": 1e-06, "loss": 0.4391, "step": 2787 }, { "epoch": 0.17867213534991028, "grad_norm": 2.6064454548319884, "learning_rate": 1e-06, "loss": 0.4302, "step": 2788 }, { "epoch": 0.17873622148167137, "grad_norm": 2.5802910154946668, "learning_rate": 1e-06, "loss": 0.3561, "step": 2789 }, { "epoch": 0.17880030761343246, "grad_norm": 2.571872682594762, "learning_rate": 1e-06, "loss": 0.3856, "step": 2790 }, { "epoch": 0.17886439374519353, "grad_norm": 2.6983518939151896, "learning_rate": 1e-06, "loss": 0.4202, "step": 2791 }, { "epoch": 0.17892847987695462, "grad_norm": 2.599966184965254, "learning_rate": 1e-06, "loss": 0.4119, "step": 2792 }, { "epoch": 0.17899256600871571, "grad_norm": 3.207940867000454, "learning_rate": 1e-06, "loss": 0.4075, "step": 2793 }, { "epoch": 0.1790566521404768, "grad_norm": 2.714492976153829, "learning_rate": 1e-06, "loss": 0.406, "step": 2794 }, { "epoch": 0.1791207382722379, "grad_norm": 2.6020945100146124, "learning_rate": 1e-06, "loss": 0.4085, "step": 2795 }, { "epoch": 0.17918482440399897, "grad_norm": 2.751146216088707, "learning_rate": 1e-06, "loss": 0.3513, "step": 2796 }, { "epoch": 0.17924891053576006, "grad_norm": 2.7125630629926847, "learning_rate": 1e-06, "loss": 0.4104, "step": 2797 }, { "epoch": 0.17931299666752115, "grad_norm": 2.5691944151852666, "learning_rate": 1e-06, "loss": 0.3925, "step": 2798 }, { "epoch": 0.17937708279928224, "grad_norm": 2.4952783268939, "learning_rate": 1e-06, "loss": 0.3993, "step": 2799 }, { "epoch": 0.1794411689310433, "grad_norm": 2.6788205945919157, "learning_rate": 1e-06, "loss": 0.3886, "step": 2800 }, { "epoch": 0.1795052550628044, "grad_norm": 2.6060361462237736, "learning_rate": 1e-06, "loss": 0.3713, "step": 2801 }, { "epoch": 0.1795693411945655, "grad_norm": 2.825696798513869, "learning_rate": 1e-06, "loss": 0.4254, "step": 2802 }, { "epoch": 0.1796334273263266, "grad_norm": 2.6233720527199287, "learning_rate": 1e-06, "loss": 0.3849, "step": 2803 }, { "epoch": 0.17969751345808768, "grad_norm": 2.72767923666059, "learning_rate": 1e-06, "loss": 0.3825, "step": 2804 }, { "epoch": 0.17976159958984875, "grad_norm": 2.6288577192437903, "learning_rate": 1e-06, "loss": 0.4019, "step": 2805 }, { "epoch": 0.17982568572160984, "grad_norm": 2.897506293696981, "learning_rate": 1e-06, "loss": 0.4932, "step": 2806 }, { "epoch": 0.17988977185337093, "grad_norm": 2.6155573925589124, "learning_rate": 1e-06, "loss": 0.3817, "step": 2807 }, { "epoch": 0.17995385798513203, "grad_norm": 2.7440180522487627, "learning_rate": 1e-06, "loss": 0.3971, "step": 2808 }, { "epoch": 0.1800179441168931, "grad_norm": 2.8646368360187378, "learning_rate": 1e-06, "loss": 0.4188, "step": 2809 }, { "epoch": 0.18008203024865418, "grad_norm": 2.569865671345972, "learning_rate": 1e-06, "loss": 0.382, "step": 2810 }, { "epoch": 0.18014611638041528, "grad_norm": 2.5413764281681055, "learning_rate": 1e-06, "loss": 0.3716, "step": 2811 }, { "epoch": 0.18021020251217637, "grad_norm": 2.438466070103246, "learning_rate": 1e-06, "loss": 0.366, "step": 2812 }, { "epoch": 0.18027428864393746, "grad_norm": 2.5496759695348814, "learning_rate": 1e-06, "loss": 0.4054, "step": 2813 }, { "epoch": 0.18033837477569853, "grad_norm": 2.6402962681310065, "learning_rate": 1e-06, "loss": 0.4161, "step": 2814 }, { "epoch": 0.18040246090745962, "grad_norm": 2.670866895541432, "learning_rate": 1e-06, "loss": 0.4144, "step": 2815 }, { "epoch": 0.18046654703922071, "grad_norm": 2.791334110746202, "learning_rate": 1e-06, "loss": 0.3661, "step": 2816 }, { "epoch": 0.1805306331709818, "grad_norm": 2.509276363670558, "learning_rate": 1e-06, "loss": 0.4076, "step": 2817 }, { "epoch": 0.18059471930274287, "grad_norm": 2.5760759381567775, "learning_rate": 1e-06, "loss": 0.3704, "step": 2818 }, { "epoch": 0.18065880543450397, "grad_norm": 2.823011500253391, "learning_rate": 1e-06, "loss": 0.3794, "step": 2819 }, { "epoch": 0.18072289156626506, "grad_norm": 2.5784308273181895, "learning_rate": 1e-06, "loss": 0.4494, "step": 2820 }, { "epoch": 0.18078697769802615, "grad_norm": 2.8354990552703767, "learning_rate": 1e-06, "loss": 0.4224, "step": 2821 }, { "epoch": 0.18085106382978725, "grad_norm": 2.545519235918881, "learning_rate": 1e-06, "loss": 0.3917, "step": 2822 }, { "epoch": 0.1809151499615483, "grad_norm": 2.650679339899046, "learning_rate": 1e-06, "loss": 0.4316, "step": 2823 }, { "epoch": 0.1809792360933094, "grad_norm": 2.5115419988434167, "learning_rate": 1e-06, "loss": 0.4015, "step": 2824 }, { "epoch": 0.1810433222250705, "grad_norm": 2.5266254302391333, "learning_rate": 1e-06, "loss": 0.3854, "step": 2825 }, { "epoch": 0.1811074083568316, "grad_norm": 2.7285294453224895, "learning_rate": 1e-06, "loss": 0.3494, "step": 2826 }, { "epoch": 0.18117149448859265, "grad_norm": 2.6957667643101453, "learning_rate": 1e-06, "loss": 0.4484, "step": 2827 }, { "epoch": 0.18123558062035375, "grad_norm": 2.5773499213332327, "learning_rate": 1e-06, "loss": 0.3713, "step": 2828 }, { "epoch": 0.18129966675211484, "grad_norm": 2.605720550620889, "learning_rate": 1e-06, "loss": 0.4316, "step": 2829 }, { "epoch": 0.18136375288387593, "grad_norm": 2.852712735022763, "learning_rate": 1e-06, "loss": 0.4289, "step": 2830 }, { "epoch": 0.18142783901563703, "grad_norm": 2.75059295048179, "learning_rate": 1e-06, "loss": 0.3943, "step": 2831 }, { "epoch": 0.1814919251473981, "grad_norm": 3.210091013374015, "learning_rate": 1e-06, "loss": 0.4356, "step": 2832 }, { "epoch": 0.18155601127915919, "grad_norm": 2.7176044087136577, "learning_rate": 1e-06, "loss": 0.4638, "step": 2833 }, { "epoch": 0.18162009741092028, "grad_norm": 2.6859048781918156, "learning_rate": 1e-06, "loss": 0.3808, "step": 2834 }, { "epoch": 0.18168418354268137, "grad_norm": 2.690898630883437, "learning_rate": 1e-06, "loss": 0.4282, "step": 2835 }, { "epoch": 0.18174826967444246, "grad_norm": 2.670934781807855, "learning_rate": 1e-06, "loss": 0.3632, "step": 2836 }, { "epoch": 0.18181235580620353, "grad_norm": 2.5683697269090775, "learning_rate": 1e-06, "loss": 0.3937, "step": 2837 }, { "epoch": 0.18187644193796462, "grad_norm": 2.721953999228212, "learning_rate": 1e-06, "loss": 0.4822, "step": 2838 }, { "epoch": 0.18194052806972572, "grad_norm": 2.761994453753278, "learning_rate": 1e-06, "loss": 0.4528, "step": 2839 }, { "epoch": 0.1820046142014868, "grad_norm": 2.4924789215698047, "learning_rate": 1e-06, "loss": 0.4148, "step": 2840 }, { "epoch": 0.18206870033324787, "grad_norm": 2.6911603552831953, "learning_rate": 1e-06, "loss": 0.4053, "step": 2841 }, { "epoch": 0.18213278646500897, "grad_norm": 2.4914369771221514, "learning_rate": 1e-06, "loss": 0.4142, "step": 2842 }, { "epoch": 0.18219687259677006, "grad_norm": 2.670264209025931, "learning_rate": 1e-06, "loss": 0.4526, "step": 2843 }, { "epoch": 0.18226095872853115, "grad_norm": 2.655186995685657, "learning_rate": 1e-06, "loss": 0.4319, "step": 2844 }, { "epoch": 0.18232504486029225, "grad_norm": 2.7994660696485654, "learning_rate": 1e-06, "loss": 0.3766, "step": 2845 }, { "epoch": 0.1823891309920533, "grad_norm": 2.6672566061037983, "learning_rate": 1e-06, "loss": 0.3663, "step": 2846 }, { "epoch": 0.1824532171238144, "grad_norm": 2.4995281084688403, "learning_rate": 1e-06, "loss": 0.3993, "step": 2847 }, { "epoch": 0.1825173032555755, "grad_norm": 2.9307889954258664, "learning_rate": 1e-06, "loss": 0.4265, "step": 2848 }, { "epoch": 0.1825813893873366, "grad_norm": 2.418817204994986, "learning_rate": 1e-06, "loss": 0.399, "step": 2849 }, { "epoch": 0.18264547551909766, "grad_norm": 2.683877473241475, "learning_rate": 1e-06, "loss": 0.4107, "step": 2850 }, { "epoch": 0.18270956165085875, "grad_norm": 2.4528568962910615, "learning_rate": 1e-06, "loss": 0.4044, "step": 2851 }, { "epoch": 0.18277364778261984, "grad_norm": 2.507911995843844, "learning_rate": 1e-06, "loss": 0.4158, "step": 2852 }, { "epoch": 0.18283773391438093, "grad_norm": 2.6538955269222164, "learning_rate": 1e-06, "loss": 0.4015, "step": 2853 }, { "epoch": 0.18290182004614203, "grad_norm": 2.3812884712864055, "learning_rate": 1e-06, "loss": 0.3987, "step": 2854 }, { "epoch": 0.1829659061779031, "grad_norm": 2.7519692295648457, "learning_rate": 1e-06, "loss": 0.4249, "step": 2855 }, { "epoch": 0.18302999230966419, "grad_norm": 2.5769076247826717, "learning_rate": 1e-06, "loss": 0.4546, "step": 2856 }, { "epoch": 0.18309407844142528, "grad_norm": 2.7671252476956343, "learning_rate": 1e-06, "loss": 0.4223, "step": 2857 }, { "epoch": 0.18315816457318637, "grad_norm": 2.7236996174804853, "learning_rate": 1e-06, "loss": 0.4156, "step": 2858 }, { "epoch": 0.18322225070494744, "grad_norm": 2.777018765978659, "learning_rate": 1e-06, "loss": 0.4837, "step": 2859 }, { "epoch": 0.18328633683670853, "grad_norm": 2.7820808558817247, "learning_rate": 1e-06, "loss": 0.4043, "step": 2860 }, { "epoch": 0.18335042296846962, "grad_norm": 2.728486937802698, "learning_rate": 1e-06, "loss": 0.4269, "step": 2861 }, { "epoch": 0.18341450910023072, "grad_norm": 2.4941825007689644, "learning_rate": 1e-06, "loss": 0.4187, "step": 2862 }, { "epoch": 0.1834785952319918, "grad_norm": 2.5636554116823262, "learning_rate": 1e-06, "loss": 0.4574, "step": 2863 }, { "epoch": 0.18354268136375287, "grad_norm": 2.745168426594291, "learning_rate": 1e-06, "loss": 0.4249, "step": 2864 }, { "epoch": 0.18360676749551397, "grad_norm": 2.55893475455084, "learning_rate": 1e-06, "loss": 0.3796, "step": 2865 }, { "epoch": 0.18367085362727506, "grad_norm": 2.48777488032714, "learning_rate": 1e-06, "loss": 0.432, "step": 2866 }, { "epoch": 0.18373493975903615, "grad_norm": 3.1189156752141023, "learning_rate": 1e-06, "loss": 0.4482, "step": 2867 }, { "epoch": 0.18379902589079722, "grad_norm": 2.5609793040069584, "learning_rate": 1e-06, "loss": 0.4464, "step": 2868 }, { "epoch": 0.1838631120225583, "grad_norm": 2.538218566718866, "learning_rate": 1e-06, "loss": 0.4741, "step": 2869 }, { "epoch": 0.1839271981543194, "grad_norm": 2.473628489684195, "learning_rate": 1e-06, "loss": 0.4571, "step": 2870 }, { "epoch": 0.1839912842860805, "grad_norm": 2.6655771340155425, "learning_rate": 1e-06, "loss": 0.4382, "step": 2871 }, { "epoch": 0.1840553704178416, "grad_norm": 2.5370217160344426, "learning_rate": 1e-06, "loss": 0.363, "step": 2872 }, { "epoch": 0.18411945654960266, "grad_norm": 2.4867648521029797, "learning_rate": 1e-06, "loss": 0.3996, "step": 2873 }, { "epoch": 0.18418354268136375, "grad_norm": 2.835924752123543, "learning_rate": 1e-06, "loss": 0.4362, "step": 2874 }, { "epoch": 0.18424762881312484, "grad_norm": 2.405751638794753, "learning_rate": 1e-06, "loss": 0.4, "step": 2875 }, { "epoch": 0.18431171494488593, "grad_norm": 2.8384661298225087, "learning_rate": 1e-06, "loss": 0.3972, "step": 2876 }, { "epoch": 0.184375801076647, "grad_norm": 2.4594890443427184, "learning_rate": 1e-06, "loss": 0.4313, "step": 2877 }, { "epoch": 0.1844398872084081, "grad_norm": 2.526104120739134, "learning_rate": 1e-06, "loss": 0.3513, "step": 2878 }, { "epoch": 0.1845039733401692, "grad_norm": 2.6250238766805585, "learning_rate": 1e-06, "loss": 0.4057, "step": 2879 }, { "epoch": 0.18456805947193028, "grad_norm": 2.633967543674042, "learning_rate": 1e-06, "loss": 0.435, "step": 2880 }, { "epoch": 0.18463214560369137, "grad_norm": 2.7900500444927507, "learning_rate": 1e-06, "loss": 0.462, "step": 2881 }, { "epoch": 0.18469623173545244, "grad_norm": 2.9289848277770814, "learning_rate": 1e-06, "loss": 0.453, "step": 2882 }, { "epoch": 0.18476031786721353, "grad_norm": 2.6918411760353536, "learning_rate": 1e-06, "loss": 0.3917, "step": 2883 }, { "epoch": 0.18482440399897462, "grad_norm": 2.9527899042701615, "learning_rate": 1e-06, "loss": 0.3835, "step": 2884 }, { "epoch": 0.18488849013073572, "grad_norm": 2.6012306739317785, "learning_rate": 1e-06, "loss": 0.4306, "step": 2885 }, { "epoch": 0.1849525762624968, "grad_norm": 2.9987158571479977, "learning_rate": 1e-06, "loss": 0.365, "step": 2886 }, { "epoch": 0.18501666239425787, "grad_norm": 2.800871847179712, "learning_rate": 1e-06, "loss": 0.3935, "step": 2887 }, { "epoch": 0.18508074852601897, "grad_norm": 2.828246130679259, "learning_rate": 1e-06, "loss": 0.3956, "step": 2888 }, { "epoch": 0.18514483465778006, "grad_norm": 2.619825070399056, "learning_rate": 1e-06, "loss": 0.4172, "step": 2889 }, { "epoch": 0.18520892078954115, "grad_norm": 2.624254162054244, "learning_rate": 1e-06, "loss": 0.4387, "step": 2890 }, { "epoch": 0.18527300692130222, "grad_norm": 2.669571750177714, "learning_rate": 1e-06, "loss": 0.4586, "step": 2891 }, { "epoch": 0.1853370930530633, "grad_norm": 2.58413530972886, "learning_rate": 1e-06, "loss": 0.4605, "step": 2892 }, { "epoch": 0.1854011791848244, "grad_norm": 2.750414580107718, "learning_rate": 1e-06, "loss": 0.4133, "step": 2893 }, { "epoch": 0.1854652653165855, "grad_norm": 2.5738479042844635, "learning_rate": 1e-06, "loss": 0.3957, "step": 2894 }, { "epoch": 0.1855293514483466, "grad_norm": 2.5740432194741394, "learning_rate": 1e-06, "loss": 0.3783, "step": 2895 }, { "epoch": 0.18559343758010766, "grad_norm": 2.7328320766355745, "learning_rate": 1e-06, "loss": 0.4155, "step": 2896 }, { "epoch": 0.18565752371186875, "grad_norm": 2.6266822727179617, "learning_rate": 1e-06, "loss": 0.4322, "step": 2897 }, { "epoch": 0.18572160984362984, "grad_norm": 2.7423123147741624, "learning_rate": 1e-06, "loss": 0.3899, "step": 2898 }, { "epoch": 0.18578569597539094, "grad_norm": 2.6644779759023405, "learning_rate": 1e-06, "loss": 0.3878, "step": 2899 }, { "epoch": 0.185849782107152, "grad_norm": 2.8416891170218164, "learning_rate": 1e-06, "loss": 0.3696, "step": 2900 }, { "epoch": 0.1859138682389131, "grad_norm": 2.651614683525343, "learning_rate": 1e-06, "loss": 0.4604, "step": 2901 }, { "epoch": 0.1859779543706742, "grad_norm": 2.6711252166907253, "learning_rate": 1e-06, "loss": 0.4333, "step": 2902 }, { "epoch": 0.18604204050243528, "grad_norm": 2.6775615588910275, "learning_rate": 1e-06, "loss": 0.3517, "step": 2903 }, { "epoch": 0.18610612663419637, "grad_norm": 2.702059561627033, "learning_rate": 1e-06, "loss": 0.4167, "step": 2904 }, { "epoch": 0.18617021276595744, "grad_norm": 2.536263599797668, "learning_rate": 1e-06, "loss": 0.3772, "step": 2905 }, { "epoch": 0.18623429889771853, "grad_norm": 2.845862941191916, "learning_rate": 1e-06, "loss": 0.4259, "step": 2906 }, { "epoch": 0.18629838502947962, "grad_norm": 2.700477381709106, "learning_rate": 1e-06, "loss": 0.3883, "step": 2907 }, { "epoch": 0.18636247116124072, "grad_norm": 2.682607738754587, "learning_rate": 1e-06, "loss": 0.3964, "step": 2908 }, { "epoch": 0.18642655729300178, "grad_norm": 2.554052321832966, "learning_rate": 1e-06, "loss": 0.3855, "step": 2909 }, { "epoch": 0.18649064342476288, "grad_norm": 2.6403351465968274, "learning_rate": 1e-06, "loss": 0.4517, "step": 2910 }, { "epoch": 0.18655472955652397, "grad_norm": 2.5477057026439476, "learning_rate": 1e-06, "loss": 0.3922, "step": 2911 }, { "epoch": 0.18661881568828506, "grad_norm": 2.5704484585325926, "learning_rate": 1e-06, "loss": 0.4367, "step": 2912 }, { "epoch": 0.18668290182004615, "grad_norm": 2.6724804046778594, "learning_rate": 1e-06, "loss": 0.3791, "step": 2913 }, { "epoch": 0.18674698795180722, "grad_norm": 2.705919945837271, "learning_rate": 1e-06, "loss": 0.4193, "step": 2914 }, { "epoch": 0.1868110740835683, "grad_norm": 2.6275085521905037, "learning_rate": 1e-06, "loss": 0.3668, "step": 2915 }, { "epoch": 0.1868751602153294, "grad_norm": 2.5644893129504993, "learning_rate": 1e-06, "loss": 0.385, "step": 2916 }, { "epoch": 0.1869392463470905, "grad_norm": 2.6374626451766843, "learning_rate": 1e-06, "loss": 0.3871, "step": 2917 }, { "epoch": 0.18700333247885156, "grad_norm": 2.916365013642046, "learning_rate": 1e-06, "loss": 0.4274, "step": 2918 }, { "epoch": 0.18706741861061266, "grad_norm": 2.5881011852417557, "learning_rate": 1e-06, "loss": 0.4246, "step": 2919 }, { "epoch": 0.18713150474237375, "grad_norm": 2.455367104725631, "learning_rate": 1e-06, "loss": 0.4351, "step": 2920 }, { "epoch": 0.18719559087413484, "grad_norm": 2.675434496637276, "learning_rate": 1e-06, "loss": 0.4039, "step": 2921 }, { "epoch": 0.18725967700589594, "grad_norm": 2.7514060307588823, "learning_rate": 1e-06, "loss": 0.3964, "step": 2922 }, { "epoch": 0.187323763137657, "grad_norm": 2.8818081578888135, "learning_rate": 1e-06, "loss": 0.4365, "step": 2923 }, { "epoch": 0.1873878492694181, "grad_norm": 2.722378179353567, "learning_rate": 1e-06, "loss": 0.4214, "step": 2924 }, { "epoch": 0.1874519354011792, "grad_norm": 2.6386298247760442, "learning_rate": 1e-06, "loss": 0.4116, "step": 2925 }, { "epoch": 0.18751602153294028, "grad_norm": 2.6530040293298693, "learning_rate": 1e-06, "loss": 0.3865, "step": 2926 }, { "epoch": 0.18758010766470135, "grad_norm": 2.8852720002345364, "learning_rate": 1e-06, "loss": 0.4578, "step": 2927 }, { "epoch": 0.18764419379646244, "grad_norm": 2.624833979077509, "learning_rate": 1e-06, "loss": 0.4535, "step": 2928 }, { "epoch": 0.18770827992822353, "grad_norm": 2.5224928226685557, "learning_rate": 1e-06, "loss": 0.3461, "step": 2929 }, { "epoch": 0.18777236605998462, "grad_norm": 2.783588647866934, "learning_rate": 1e-06, "loss": 0.349, "step": 2930 }, { "epoch": 0.18783645219174572, "grad_norm": 2.5465407705725, "learning_rate": 1e-06, "loss": 0.4535, "step": 2931 }, { "epoch": 0.18790053832350678, "grad_norm": 2.5538413117725964, "learning_rate": 1e-06, "loss": 0.421, "step": 2932 }, { "epoch": 0.18796462445526788, "grad_norm": 2.551250691707148, "learning_rate": 1e-06, "loss": 0.4594, "step": 2933 }, { "epoch": 0.18802871058702897, "grad_norm": 3.1452067199513345, "learning_rate": 1e-06, "loss": 0.4302, "step": 2934 }, { "epoch": 0.18809279671879006, "grad_norm": 2.743176730793466, "learning_rate": 1e-06, "loss": 0.3967, "step": 2935 }, { "epoch": 0.18815688285055113, "grad_norm": 2.8114938453740113, "learning_rate": 1e-06, "loss": 0.4454, "step": 2936 }, { "epoch": 0.18822096898231222, "grad_norm": 2.8125690365615457, "learning_rate": 1e-06, "loss": 0.3852, "step": 2937 }, { "epoch": 0.1882850551140733, "grad_norm": 2.7093968409248954, "learning_rate": 1e-06, "loss": 0.4598, "step": 2938 }, { "epoch": 0.1883491412458344, "grad_norm": 2.7084723079218977, "learning_rate": 1e-06, "loss": 0.3802, "step": 2939 }, { "epoch": 0.1884132273775955, "grad_norm": 2.924608356651168, "learning_rate": 1e-06, "loss": 0.4494, "step": 2940 }, { "epoch": 0.18847731350935656, "grad_norm": 2.7623725533237296, "learning_rate": 1e-06, "loss": 0.4018, "step": 2941 }, { "epoch": 0.18854139964111766, "grad_norm": 2.570143623445525, "learning_rate": 1e-06, "loss": 0.3848, "step": 2942 }, { "epoch": 0.18860548577287875, "grad_norm": 2.590379184495267, "learning_rate": 1e-06, "loss": 0.3513, "step": 2943 }, { "epoch": 0.18866957190463984, "grad_norm": 2.6091991566587796, "learning_rate": 1e-06, "loss": 0.3754, "step": 2944 }, { "epoch": 0.18873365803640094, "grad_norm": 2.5426130853423685, "learning_rate": 1e-06, "loss": 0.4436, "step": 2945 }, { "epoch": 0.188797744168162, "grad_norm": 2.4880023974397534, "learning_rate": 1e-06, "loss": 0.3936, "step": 2946 }, { "epoch": 0.1888618302999231, "grad_norm": 2.7254911181430734, "learning_rate": 1e-06, "loss": 0.4069, "step": 2947 }, { "epoch": 0.1889259164316842, "grad_norm": 2.855985760623235, "learning_rate": 1e-06, "loss": 0.4405, "step": 2948 }, { "epoch": 0.18899000256344528, "grad_norm": 2.738984275461709, "learning_rate": 1e-06, "loss": 0.4785, "step": 2949 }, { "epoch": 0.18905408869520635, "grad_norm": 2.693467333245631, "learning_rate": 1e-06, "loss": 0.4415, "step": 2950 }, { "epoch": 0.18911817482696744, "grad_norm": 2.725268201313213, "learning_rate": 1e-06, "loss": 0.4535, "step": 2951 }, { "epoch": 0.18918226095872853, "grad_norm": 2.649478170221162, "learning_rate": 1e-06, "loss": 0.4313, "step": 2952 }, { "epoch": 0.18924634709048962, "grad_norm": 2.7059280275569053, "learning_rate": 1e-06, "loss": 0.4168, "step": 2953 }, { "epoch": 0.18931043322225072, "grad_norm": 2.9448804667390966, "learning_rate": 1e-06, "loss": 0.4091, "step": 2954 }, { "epoch": 0.18937451935401178, "grad_norm": 2.6088760199937036, "learning_rate": 1e-06, "loss": 0.4026, "step": 2955 }, { "epoch": 0.18943860548577288, "grad_norm": 2.792982465429231, "learning_rate": 1e-06, "loss": 0.4355, "step": 2956 }, { "epoch": 0.18950269161753397, "grad_norm": 2.856731613527467, "learning_rate": 1e-06, "loss": 0.4374, "step": 2957 }, { "epoch": 0.18956677774929506, "grad_norm": 2.6591552161854044, "learning_rate": 1e-06, "loss": 0.4328, "step": 2958 }, { "epoch": 0.18963086388105613, "grad_norm": 2.820863474787736, "learning_rate": 1e-06, "loss": 0.4068, "step": 2959 }, { "epoch": 0.18969495001281722, "grad_norm": 3.1564803034432067, "learning_rate": 1e-06, "loss": 0.4894, "step": 2960 }, { "epoch": 0.1897590361445783, "grad_norm": 2.7775467867516723, "learning_rate": 1e-06, "loss": 0.4219, "step": 2961 }, { "epoch": 0.1898231222763394, "grad_norm": 2.570709104763904, "learning_rate": 1e-06, "loss": 0.3836, "step": 2962 }, { "epoch": 0.1898872084081005, "grad_norm": 3.02681267671875, "learning_rate": 1e-06, "loss": 0.4426, "step": 2963 }, { "epoch": 0.18995129453986156, "grad_norm": 2.510522158823479, "learning_rate": 1e-06, "loss": 0.4054, "step": 2964 }, { "epoch": 0.19001538067162266, "grad_norm": 2.693774060885273, "learning_rate": 1e-06, "loss": 0.4705, "step": 2965 }, { "epoch": 0.19007946680338375, "grad_norm": 2.748528804109493, "learning_rate": 1e-06, "loss": 0.4332, "step": 2966 }, { "epoch": 0.19014355293514484, "grad_norm": 2.776594086545492, "learning_rate": 1e-06, "loss": 0.4031, "step": 2967 }, { "epoch": 0.1902076390669059, "grad_norm": 2.797526799261737, "learning_rate": 1e-06, "loss": 0.4434, "step": 2968 }, { "epoch": 0.190271725198667, "grad_norm": 2.716540472496669, "learning_rate": 1e-06, "loss": 0.3527, "step": 2969 }, { "epoch": 0.1903358113304281, "grad_norm": 2.5129230641901255, "learning_rate": 1e-06, "loss": 0.4134, "step": 2970 }, { "epoch": 0.1903998974621892, "grad_norm": 2.600501120594301, "learning_rate": 1e-06, "loss": 0.4419, "step": 2971 }, { "epoch": 0.19046398359395028, "grad_norm": 2.976313279269352, "learning_rate": 1e-06, "loss": 0.4721, "step": 2972 }, { "epoch": 0.19052806972571135, "grad_norm": 2.771766068236897, "learning_rate": 1e-06, "loss": 0.3931, "step": 2973 }, { "epoch": 0.19059215585747244, "grad_norm": 2.6489145777810394, "learning_rate": 1e-06, "loss": 0.4546, "step": 2974 }, { "epoch": 0.19065624198923353, "grad_norm": 2.580312074382169, "learning_rate": 1e-06, "loss": 0.3992, "step": 2975 }, { "epoch": 0.19072032812099463, "grad_norm": 2.7443330078441237, "learning_rate": 1e-06, "loss": 0.4155, "step": 2976 }, { "epoch": 0.1907844142527557, "grad_norm": 2.633306061091404, "learning_rate": 1e-06, "loss": 0.4246, "step": 2977 }, { "epoch": 0.19084850038451678, "grad_norm": 2.604761994040819, "learning_rate": 1e-06, "loss": 0.3895, "step": 2978 }, { "epoch": 0.19091258651627788, "grad_norm": 2.683095016513032, "learning_rate": 1e-06, "loss": 0.4331, "step": 2979 }, { "epoch": 0.19097667264803897, "grad_norm": 2.6835183303069003, "learning_rate": 1e-06, "loss": 0.4427, "step": 2980 }, { "epoch": 0.19104075877980006, "grad_norm": 2.502521760012333, "learning_rate": 1e-06, "loss": 0.3837, "step": 2981 }, { "epoch": 0.19110484491156113, "grad_norm": 2.4357388695095055, "learning_rate": 1e-06, "loss": 0.3592, "step": 2982 }, { "epoch": 0.19116893104332222, "grad_norm": 2.7305965671623036, "learning_rate": 1e-06, "loss": 0.4131, "step": 2983 }, { "epoch": 0.19123301717508331, "grad_norm": 2.362646675213141, "learning_rate": 1e-06, "loss": 0.3743, "step": 2984 }, { "epoch": 0.1912971033068444, "grad_norm": 2.7384238709786413, "learning_rate": 1e-06, "loss": 0.413, "step": 2985 }, { "epoch": 0.19136118943860547, "grad_norm": 2.5007223098863647, "learning_rate": 1e-06, "loss": 0.3885, "step": 2986 }, { "epoch": 0.19142527557036657, "grad_norm": 2.700176270926466, "learning_rate": 1e-06, "loss": 0.4215, "step": 2987 }, { "epoch": 0.19148936170212766, "grad_norm": 2.7085371035105346, "learning_rate": 1e-06, "loss": 0.4206, "step": 2988 }, { "epoch": 0.19155344783388875, "grad_norm": 2.79283849751298, "learning_rate": 1e-06, "loss": 0.4543, "step": 2989 }, { "epoch": 0.19161753396564984, "grad_norm": 2.7877343008577946, "learning_rate": 1e-06, "loss": 0.4294, "step": 2990 }, { "epoch": 0.1916816200974109, "grad_norm": 2.7778817346697493, "learning_rate": 1e-06, "loss": 0.3539, "step": 2991 }, { "epoch": 0.191745706229172, "grad_norm": 2.6074940866398157, "learning_rate": 1e-06, "loss": 0.4145, "step": 2992 }, { "epoch": 0.1918097923609331, "grad_norm": 2.7947299787206887, "learning_rate": 1e-06, "loss": 0.4358, "step": 2993 }, { "epoch": 0.1918738784926942, "grad_norm": 2.550657685199485, "learning_rate": 1e-06, "loss": 0.4123, "step": 2994 }, { "epoch": 0.19193796462445528, "grad_norm": 2.6333770331684154, "learning_rate": 1e-06, "loss": 0.3853, "step": 2995 }, { "epoch": 0.19200205075621635, "grad_norm": 2.6548130755758126, "learning_rate": 1e-06, "loss": 0.3742, "step": 2996 }, { "epoch": 0.19206613688797744, "grad_norm": 2.5175421052166915, "learning_rate": 1e-06, "loss": 0.4083, "step": 2997 }, { "epoch": 0.19213022301973853, "grad_norm": 2.6092498555990433, "learning_rate": 1e-06, "loss": 0.4065, "step": 2998 }, { "epoch": 0.19219430915149963, "grad_norm": 2.6746199446994057, "learning_rate": 1e-06, "loss": 0.3676, "step": 2999 }, { "epoch": 0.1922583952832607, "grad_norm": 2.689102338246197, "learning_rate": 1e-06, "loss": 0.4043, "step": 3000 }, { "epoch": 0.19232248141502178, "grad_norm": 2.8545617517948174, "learning_rate": 1e-06, "loss": 0.3947, "step": 3001 }, { "epoch": 0.19238656754678288, "grad_norm": 2.72399379976401, "learning_rate": 1e-06, "loss": 0.4357, "step": 3002 }, { "epoch": 0.19245065367854397, "grad_norm": 2.6992917271041943, "learning_rate": 1e-06, "loss": 0.3954, "step": 3003 }, { "epoch": 0.19251473981030506, "grad_norm": 2.7469258281355975, "learning_rate": 1e-06, "loss": 0.4817, "step": 3004 }, { "epoch": 0.19257882594206613, "grad_norm": 2.8151942792068714, "learning_rate": 1e-06, "loss": 0.4743, "step": 3005 }, { "epoch": 0.19264291207382722, "grad_norm": 2.717535241339008, "learning_rate": 1e-06, "loss": 0.4756, "step": 3006 }, { "epoch": 0.19270699820558831, "grad_norm": 2.6518470118186936, "learning_rate": 1e-06, "loss": 0.4416, "step": 3007 }, { "epoch": 0.1927710843373494, "grad_norm": 2.745889107117633, "learning_rate": 1e-06, "loss": 0.4452, "step": 3008 }, { "epoch": 0.19283517046911047, "grad_norm": 2.6299402934811225, "learning_rate": 1e-06, "loss": 0.3689, "step": 3009 }, { "epoch": 0.19289925660087157, "grad_norm": 3.3013282687495265, "learning_rate": 1e-06, "loss": 0.4228, "step": 3010 }, { "epoch": 0.19296334273263266, "grad_norm": 2.7392676652603045, "learning_rate": 1e-06, "loss": 0.439, "step": 3011 }, { "epoch": 0.19302742886439375, "grad_norm": 2.5699388413274815, "learning_rate": 1e-06, "loss": 0.4317, "step": 3012 }, { "epoch": 0.19309151499615484, "grad_norm": 2.527230563956275, "learning_rate": 1e-06, "loss": 0.4066, "step": 3013 }, { "epoch": 0.1931556011279159, "grad_norm": 2.4490387549595685, "learning_rate": 1e-06, "loss": 0.4148, "step": 3014 }, { "epoch": 0.193219687259677, "grad_norm": 2.7023787880147108, "learning_rate": 1e-06, "loss": 0.4121, "step": 3015 }, { "epoch": 0.1932837733914381, "grad_norm": 2.6746716675529654, "learning_rate": 1e-06, "loss": 0.4117, "step": 3016 }, { "epoch": 0.1933478595231992, "grad_norm": 2.6345823971450373, "learning_rate": 1e-06, "loss": 0.4575, "step": 3017 }, { "epoch": 0.19341194565496025, "grad_norm": 2.7711286043104755, "learning_rate": 1e-06, "loss": 0.4484, "step": 3018 }, { "epoch": 0.19347603178672135, "grad_norm": 2.4680613743712616, "learning_rate": 1e-06, "loss": 0.4018, "step": 3019 }, { "epoch": 0.19354011791848244, "grad_norm": 2.6003860886706556, "learning_rate": 1e-06, "loss": 0.384, "step": 3020 }, { "epoch": 0.19360420405024353, "grad_norm": 2.6108666448050384, "learning_rate": 1e-06, "loss": 0.4367, "step": 3021 }, { "epoch": 0.19366829018200463, "grad_norm": 2.758026508393086, "learning_rate": 1e-06, "loss": 0.3995, "step": 3022 }, { "epoch": 0.1937323763137657, "grad_norm": 2.4812630624240546, "learning_rate": 1e-06, "loss": 0.373, "step": 3023 }, { "epoch": 0.19379646244552678, "grad_norm": 2.5059074556288685, "learning_rate": 1e-06, "loss": 0.4, "step": 3024 }, { "epoch": 0.19386054857728788, "grad_norm": 2.700687588961143, "learning_rate": 1e-06, "loss": 0.4318, "step": 3025 }, { "epoch": 0.19392463470904897, "grad_norm": 2.5778023098429625, "learning_rate": 1e-06, "loss": 0.4004, "step": 3026 }, { "epoch": 0.19398872084081004, "grad_norm": 2.8092589531817236, "learning_rate": 1e-06, "loss": 0.4292, "step": 3027 }, { "epoch": 0.19405280697257113, "grad_norm": 2.6755152209276702, "learning_rate": 1e-06, "loss": 0.4104, "step": 3028 }, { "epoch": 0.19411689310433222, "grad_norm": 2.717840638318553, "learning_rate": 1e-06, "loss": 0.3854, "step": 3029 }, { "epoch": 0.19418097923609332, "grad_norm": 2.8100797610129864, "learning_rate": 1e-06, "loss": 0.3904, "step": 3030 }, { "epoch": 0.1942450653678544, "grad_norm": 2.8468932816579846, "learning_rate": 1e-06, "loss": 0.4597, "step": 3031 }, { "epoch": 0.19430915149961547, "grad_norm": 2.7200125501208388, "learning_rate": 1e-06, "loss": 0.4227, "step": 3032 }, { "epoch": 0.19437323763137657, "grad_norm": 2.745674467926342, "learning_rate": 1e-06, "loss": 0.3926, "step": 3033 }, { "epoch": 0.19443732376313766, "grad_norm": 2.66764518189243, "learning_rate": 1e-06, "loss": 0.4487, "step": 3034 }, { "epoch": 0.19450140989489875, "grad_norm": 2.72148919183853, "learning_rate": 1e-06, "loss": 0.431, "step": 3035 }, { "epoch": 0.19456549602665982, "grad_norm": 2.766399378265474, "learning_rate": 1e-06, "loss": 0.3967, "step": 3036 }, { "epoch": 0.1946295821584209, "grad_norm": 2.4568489231818256, "learning_rate": 1e-06, "loss": 0.3821, "step": 3037 }, { "epoch": 0.194693668290182, "grad_norm": 2.8245628648840047, "learning_rate": 1e-06, "loss": 0.3791, "step": 3038 }, { "epoch": 0.1947577544219431, "grad_norm": 2.8867994809759514, "learning_rate": 1e-06, "loss": 0.4492, "step": 3039 }, { "epoch": 0.1948218405537042, "grad_norm": 2.7372903443602343, "learning_rate": 1e-06, "loss": 0.3816, "step": 3040 }, { "epoch": 0.19488592668546525, "grad_norm": 2.654619328065438, "learning_rate": 1e-06, "loss": 0.3837, "step": 3041 }, { "epoch": 0.19495001281722635, "grad_norm": 2.6189824595138953, "learning_rate": 1e-06, "loss": 0.4104, "step": 3042 }, { "epoch": 0.19501409894898744, "grad_norm": 2.549845796501405, "learning_rate": 1e-06, "loss": 0.3848, "step": 3043 }, { "epoch": 0.19507818508074853, "grad_norm": 2.7252239626651438, "learning_rate": 1e-06, "loss": 0.4229, "step": 3044 }, { "epoch": 0.1951422712125096, "grad_norm": 2.8890697357641875, "learning_rate": 1e-06, "loss": 0.4447, "step": 3045 }, { "epoch": 0.1952063573442707, "grad_norm": 2.72838732384927, "learning_rate": 1e-06, "loss": 0.4355, "step": 3046 }, { "epoch": 0.19527044347603179, "grad_norm": 2.656321398304245, "learning_rate": 1e-06, "loss": 0.422, "step": 3047 }, { "epoch": 0.19533452960779288, "grad_norm": 2.6185422333681867, "learning_rate": 1e-06, "loss": 0.4524, "step": 3048 }, { "epoch": 0.19539861573955397, "grad_norm": 2.5406421196673437, "learning_rate": 1e-06, "loss": 0.3771, "step": 3049 }, { "epoch": 0.19546270187131504, "grad_norm": 2.7469839128125497, "learning_rate": 1e-06, "loss": 0.3864, "step": 3050 }, { "epoch": 0.19552678800307613, "grad_norm": 2.4965398898043585, "learning_rate": 1e-06, "loss": 0.4065, "step": 3051 }, { "epoch": 0.19559087413483722, "grad_norm": 2.4349462204869314, "learning_rate": 1e-06, "loss": 0.3925, "step": 3052 }, { "epoch": 0.19565496026659832, "grad_norm": 2.466053052427371, "learning_rate": 1e-06, "loss": 0.4485, "step": 3053 }, { "epoch": 0.1957190463983594, "grad_norm": 2.644641993283081, "learning_rate": 1e-06, "loss": 0.3925, "step": 3054 }, { "epoch": 0.19578313253012047, "grad_norm": 2.7286313655607666, "learning_rate": 1e-06, "loss": 0.3796, "step": 3055 }, { "epoch": 0.19584721866188157, "grad_norm": 3.288638532373253, "learning_rate": 1e-06, "loss": 0.5013, "step": 3056 }, { "epoch": 0.19591130479364266, "grad_norm": 2.6024355715319603, "learning_rate": 1e-06, "loss": 0.4452, "step": 3057 }, { "epoch": 0.19597539092540375, "grad_norm": 2.781418640960001, "learning_rate": 1e-06, "loss": 0.4034, "step": 3058 }, { "epoch": 0.19603947705716482, "grad_norm": 2.5251976082178884, "learning_rate": 1e-06, "loss": 0.3554, "step": 3059 }, { "epoch": 0.1961035631889259, "grad_norm": 2.7218225486270264, "learning_rate": 1e-06, "loss": 0.3854, "step": 3060 }, { "epoch": 0.196167649320687, "grad_norm": 2.465634369754827, "learning_rate": 1e-06, "loss": 0.3811, "step": 3061 }, { "epoch": 0.1962317354524481, "grad_norm": 2.637439083827366, "learning_rate": 1e-06, "loss": 0.4004, "step": 3062 }, { "epoch": 0.1962958215842092, "grad_norm": 2.615911737590689, "learning_rate": 1e-06, "loss": 0.4588, "step": 3063 }, { "epoch": 0.19635990771597026, "grad_norm": 2.5254899516331526, "learning_rate": 1e-06, "loss": 0.4321, "step": 3064 }, { "epoch": 0.19642399384773135, "grad_norm": 2.6078553888979394, "learning_rate": 1e-06, "loss": 0.3808, "step": 3065 }, { "epoch": 0.19648807997949244, "grad_norm": 2.7298428056608466, "learning_rate": 1e-06, "loss": 0.4116, "step": 3066 }, { "epoch": 0.19655216611125353, "grad_norm": 2.636498777465297, "learning_rate": 1e-06, "loss": 0.4114, "step": 3067 }, { "epoch": 0.1966162522430146, "grad_norm": 2.6319615141742934, "learning_rate": 1e-06, "loss": 0.3911, "step": 3068 }, { "epoch": 0.1966803383747757, "grad_norm": 2.5563489648377313, "learning_rate": 1e-06, "loss": 0.468, "step": 3069 }, { "epoch": 0.19674442450653679, "grad_norm": 2.4495929287604232, "learning_rate": 1e-06, "loss": 0.3965, "step": 3070 }, { "epoch": 0.19680851063829788, "grad_norm": 2.4717484721741254, "learning_rate": 1e-06, "loss": 0.4132, "step": 3071 }, { "epoch": 0.19687259677005897, "grad_norm": 2.590478233813598, "learning_rate": 1e-06, "loss": 0.4312, "step": 3072 }, { "epoch": 0.19693668290182004, "grad_norm": 2.7616833115828707, "learning_rate": 1e-06, "loss": 0.3987, "step": 3073 }, { "epoch": 0.19700076903358113, "grad_norm": 2.5261292650531884, "learning_rate": 1e-06, "loss": 0.3813, "step": 3074 }, { "epoch": 0.19706485516534222, "grad_norm": 2.7011738929384284, "learning_rate": 1e-06, "loss": 0.3816, "step": 3075 }, { "epoch": 0.19712894129710332, "grad_norm": 2.879203428383538, "learning_rate": 1e-06, "loss": 0.4445, "step": 3076 }, { "epoch": 0.19719302742886438, "grad_norm": 2.595528866437978, "learning_rate": 1e-06, "loss": 0.4057, "step": 3077 }, { "epoch": 0.19725711356062547, "grad_norm": 2.94515771596298, "learning_rate": 1e-06, "loss": 0.4529, "step": 3078 }, { "epoch": 0.19732119969238657, "grad_norm": 2.6736790736746445, "learning_rate": 1e-06, "loss": 0.4309, "step": 3079 }, { "epoch": 0.19738528582414766, "grad_norm": 2.5140252504503726, "learning_rate": 1e-06, "loss": 0.4023, "step": 3080 }, { "epoch": 0.19744937195590875, "grad_norm": 2.81911651094041, "learning_rate": 1e-06, "loss": 0.4125, "step": 3081 }, { "epoch": 0.19751345808766982, "grad_norm": 2.5216675333964185, "learning_rate": 1e-06, "loss": 0.4035, "step": 3082 }, { "epoch": 0.1975775442194309, "grad_norm": 2.597791352413284, "learning_rate": 1e-06, "loss": 0.4371, "step": 3083 }, { "epoch": 0.197641630351192, "grad_norm": 2.6051238849878824, "learning_rate": 1e-06, "loss": 0.4179, "step": 3084 }, { "epoch": 0.1977057164829531, "grad_norm": 2.4626622081291916, "learning_rate": 1e-06, "loss": 0.3564, "step": 3085 }, { "epoch": 0.19776980261471416, "grad_norm": 2.6135316427042787, "learning_rate": 1e-06, "loss": 0.4043, "step": 3086 }, { "epoch": 0.19783388874647526, "grad_norm": 2.497469648863096, "learning_rate": 1e-06, "loss": 0.3922, "step": 3087 }, { "epoch": 0.19789797487823635, "grad_norm": 2.5303900994698476, "learning_rate": 1e-06, "loss": 0.3694, "step": 3088 }, { "epoch": 0.19796206100999744, "grad_norm": 2.6047418213304003, "learning_rate": 1e-06, "loss": 0.4524, "step": 3089 }, { "epoch": 0.19802614714175854, "grad_norm": 2.6321807982006797, "learning_rate": 1e-06, "loss": 0.3732, "step": 3090 }, { "epoch": 0.1980902332735196, "grad_norm": 2.5343685708188834, "learning_rate": 1e-06, "loss": 0.4256, "step": 3091 }, { "epoch": 0.1981543194052807, "grad_norm": 2.8626866297721123, "learning_rate": 1e-06, "loss": 0.3833, "step": 3092 }, { "epoch": 0.1982184055370418, "grad_norm": 2.648616331118623, "learning_rate": 1e-06, "loss": 0.3815, "step": 3093 }, { "epoch": 0.19828249166880288, "grad_norm": 2.7536516489830976, "learning_rate": 1e-06, "loss": 0.4149, "step": 3094 }, { "epoch": 0.19834657780056394, "grad_norm": 2.7451936420156873, "learning_rate": 1e-06, "loss": 0.4143, "step": 3095 }, { "epoch": 0.19841066393232504, "grad_norm": 2.559391512260891, "learning_rate": 1e-06, "loss": 0.4169, "step": 3096 }, { "epoch": 0.19847475006408613, "grad_norm": 2.5924661584948376, "learning_rate": 1e-06, "loss": 0.398, "step": 3097 }, { "epoch": 0.19853883619584722, "grad_norm": 2.7921584349548145, "learning_rate": 1e-06, "loss": 0.45, "step": 3098 }, { "epoch": 0.19860292232760832, "grad_norm": 2.659738608230625, "learning_rate": 1e-06, "loss": 0.3917, "step": 3099 }, { "epoch": 0.19866700845936938, "grad_norm": 2.5305396379037197, "learning_rate": 1e-06, "loss": 0.3791, "step": 3100 }, { "epoch": 0.19873109459113047, "grad_norm": 2.756190416272402, "learning_rate": 1e-06, "loss": 0.3839, "step": 3101 }, { "epoch": 0.19879518072289157, "grad_norm": 2.539580583309545, "learning_rate": 1e-06, "loss": 0.4275, "step": 3102 }, { "epoch": 0.19885926685465266, "grad_norm": 2.67813254086445, "learning_rate": 1e-06, "loss": 0.3512, "step": 3103 }, { "epoch": 0.19892335298641373, "grad_norm": 2.640991640157843, "learning_rate": 1e-06, "loss": 0.4512, "step": 3104 }, { "epoch": 0.19898743911817482, "grad_norm": 2.5533029619456853, "learning_rate": 1e-06, "loss": 0.3762, "step": 3105 }, { "epoch": 0.1990515252499359, "grad_norm": 2.6127542430810635, "learning_rate": 1e-06, "loss": 0.3938, "step": 3106 }, { "epoch": 0.199115611381697, "grad_norm": 2.6179136331616215, "learning_rate": 1e-06, "loss": 0.3971, "step": 3107 }, { "epoch": 0.1991796975134581, "grad_norm": 2.6944181569516914, "learning_rate": 1e-06, "loss": 0.4073, "step": 3108 }, { "epoch": 0.19924378364521916, "grad_norm": 2.8686676163236613, "learning_rate": 1e-06, "loss": 0.4284, "step": 3109 }, { "epoch": 0.19930786977698026, "grad_norm": 2.679658713052307, "learning_rate": 1e-06, "loss": 0.3844, "step": 3110 }, { "epoch": 0.19937195590874135, "grad_norm": 2.905490286430153, "learning_rate": 1e-06, "loss": 0.4576, "step": 3111 }, { "epoch": 0.19943604204050244, "grad_norm": 2.77202972977206, "learning_rate": 1e-06, "loss": 0.4071, "step": 3112 }, { "epoch": 0.19950012817226354, "grad_norm": 2.6987682861458877, "learning_rate": 1e-06, "loss": 0.3778, "step": 3113 }, { "epoch": 0.1995642143040246, "grad_norm": 2.9302419214602664, "learning_rate": 1e-06, "loss": 0.4353, "step": 3114 }, { "epoch": 0.1996283004357857, "grad_norm": 2.7112189817286887, "learning_rate": 1e-06, "loss": 0.4163, "step": 3115 }, { "epoch": 0.1996923865675468, "grad_norm": 2.6842811724796687, "learning_rate": 1e-06, "loss": 0.4416, "step": 3116 }, { "epoch": 0.19975647269930788, "grad_norm": 2.5186808347832708, "learning_rate": 1e-06, "loss": 0.3756, "step": 3117 }, { "epoch": 0.19982055883106895, "grad_norm": 2.891265829188758, "learning_rate": 1e-06, "loss": 0.4248, "step": 3118 }, { "epoch": 0.19988464496283004, "grad_norm": 2.445277384717998, "learning_rate": 1e-06, "loss": 0.3837, "step": 3119 }, { "epoch": 0.19994873109459113, "grad_norm": 2.4402702476916227, "learning_rate": 1e-06, "loss": 0.38, "step": 3120 }, { "epoch": 0.20001281722635222, "grad_norm": 2.6536177038610727, "learning_rate": 1e-06, "loss": 0.3974, "step": 3121 }, { "epoch": 0.20007690335811332, "grad_norm": 2.68284801164674, "learning_rate": 1e-06, "loss": 0.4214, "step": 3122 }, { "epoch": 0.20014098948987438, "grad_norm": 2.9008015151571342, "learning_rate": 1e-06, "loss": 0.4362, "step": 3123 }, { "epoch": 0.20020507562163548, "grad_norm": 2.640711993107474, "learning_rate": 1e-06, "loss": 0.4139, "step": 3124 }, { "epoch": 0.20026916175339657, "grad_norm": 2.7448204574462416, "learning_rate": 1e-06, "loss": 0.407, "step": 3125 }, { "epoch": 0.20033324788515766, "grad_norm": 2.6303804716884183, "learning_rate": 1e-06, "loss": 0.3674, "step": 3126 }, { "epoch": 0.20039733401691873, "grad_norm": 2.7394724745610897, "learning_rate": 1e-06, "loss": 0.3837, "step": 3127 }, { "epoch": 0.20046142014867982, "grad_norm": 2.407575755511553, "learning_rate": 1e-06, "loss": 0.3786, "step": 3128 }, { "epoch": 0.2005255062804409, "grad_norm": 2.7833525732649353, "learning_rate": 1e-06, "loss": 0.4346, "step": 3129 }, { "epoch": 0.200589592412202, "grad_norm": 2.7948666514800657, "learning_rate": 1e-06, "loss": 0.4425, "step": 3130 }, { "epoch": 0.2006536785439631, "grad_norm": 2.619044236684681, "learning_rate": 1e-06, "loss": 0.3851, "step": 3131 }, { "epoch": 0.20071776467572416, "grad_norm": 2.7631433482197596, "learning_rate": 1e-06, "loss": 0.3831, "step": 3132 }, { "epoch": 0.20078185080748526, "grad_norm": 2.7735001022993155, "learning_rate": 1e-06, "loss": 0.4302, "step": 3133 }, { "epoch": 0.20084593693924635, "grad_norm": 2.6649390403366198, "learning_rate": 1e-06, "loss": 0.4513, "step": 3134 }, { "epoch": 0.20091002307100744, "grad_norm": 2.642064035206794, "learning_rate": 1e-06, "loss": 0.4495, "step": 3135 }, { "epoch": 0.2009741092027685, "grad_norm": 2.6129073896361414, "learning_rate": 1e-06, "loss": 0.3816, "step": 3136 }, { "epoch": 0.2010381953345296, "grad_norm": 2.9648669735240447, "learning_rate": 1e-06, "loss": 0.4165, "step": 3137 }, { "epoch": 0.2011022814662907, "grad_norm": 2.8669137466757326, "learning_rate": 1e-06, "loss": 0.3595, "step": 3138 }, { "epoch": 0.2011663675980518, "grad_norm": 2.5460585330292003, "learning_rate": 1e-06, "loss": 0.3667, "step": 3139 }, { "epoch": 0.20123045372981288, "grad_norm": 2.3987256825811274, "learning_rate": 1e-06, "loss": 0.3765, "step": 3140 }, { "epoch": 0.20129453986157395, "grad_norm": 2.5263050591029867, "learning_rate": 1e-06, "loss": 0.4062, "step": 3141 }, { "epoch": 0.20135862599333504, "grad_norm": 2.985354785460851, "learning_rate": 1e-06, "loss": 0.396, "step": 3142 }, { "epoch": 0.20142271212509613, "grad_norm": 2.7659888764684264, "learning_rate": 1e-06, "loss": 0.4453, "step": 3143 }, { "epoch": 0.20148679825685722, "grad_norm": 2.591450612167699, "learning_rate": 1e-06, "loss": 0.4083, "step": 3144 }, { "epoch": 0.2015508843886183, "grad_norm": 2.8521468462132544, "learning_rate": 1e-06, "loss": 0.4404, "step": 3145 }, { "epoch": 0.20161497052037938, "grad_norm": 2.436797817729498, "learning_rate": 1e-06, "loss": 0.4238, "step": 3146 }, { "epoch": 0.20167905665214048, "grad_norm": 2.8810258226794265, "learning_rate": 1e-06, "loss": 0.4529, "step": 3147 }, { "epoch": 0.20174314278390157, "grad_norm": 2.749593307275725, "learning_rate": 1e-06, "loss": 0.3718, "step": 3148 }, { "epoch": 0.20180722891566266, "grad_norm": 2.5819133802039698, "learning_rate": 1e-06, "loss": 0.4202, "step": 3149 }, { "epoch": 0.20187131504742373, "grad_norm": 2.814319540956971, "learning_rate": 1e-06, "loss": 0.4702, "step": 3150 }, { "epoch": 0.20193540117918482, "grad_norm": 2.755121410304437, "learning_rate": 1e-06, "loss": 0.4292, "step": 3151 }, { "epoch": 0.2019994873109459, "grad_norm": 2.8607357345261186, "learning_rate": 1e-06, "loss": 0.4081, "step": 3152 }, { "epoch": 0.202063573442707, "grad_norm": 2.7644998286160414, "learning_rate": 1e-06, "loss": 0.4148, "step": 3153 }, { "epoch": 0.20212765957446807, "grad_norm": 3.1791677414919937, "learning_rate": 1e-06, "loss": 0.4024, "step": 3154 }, { "epoch": 0.20219174570622916, "grad_norm": 2.761445190168834, "learning_rate": 1e-06, "loss": 0.4556, "step": 3155 }, { "epoch": 0.20225583183799026, "grad_norm": 2.6524443855935744, "learning_rate": 1e-06, "loss": 0.3857, "step": 3156 }, { "epoch": 0.20231991796975135, "grad_norm": 2.6616266519428486, "learning_rate": 1e-06, "loss": 0.4252, "step": 3157 }, { "epoch": 0.20238400410151244, "grad_norm": 2.5016076188972827, "learning_rate": 1e-06, "loss": 0.3676, "step": 3158 }, { "epoch": 0.2024480902332735, "grad_norm": 2.530401535505697, "learning_rate": 1e-06, "loss": 0.3893, "step": 3159 }, { "epoch": 0.2025121763650346, "grad_norm": 2.6587919579612147, "learning_rate": 1e-06, "loss": 0.4009, "step": 3160 }, { "epoch": 0.2025762624967957, "grad_norm": 2.9566194209341, "learning_rate": 1e-06, "loss": 0.42, "step": 3161 }, { "epoch": 0.2026403486285568, "grad_norm": 2.6206510556300033, "learning_rate": 1e-06, "loss": 0.4262, "step": 3162 }, { "epoch": 0.20270443476031788, "grad_norm": 2.927652472371367, "learning_rate": 1e-06, "loss": 0.4889, "step": 3163 }, { "epoch": 0.20276852089207895, "grad_norm": 2.65170729751374, "learning_rate": 1e-06, "loss": 0.4269, "step": 3164 }, { "epoch": 0.20283260702384004, "grad_norm": 2.5867425876599865, "learning_rate": 1e-06, "loss": 0.4889, "step": 3165 }, { "epoch": 0.20289669315560113, "grad_norm": 2.7063246897863285, "learning_rate": 1e-06, "loss": 0.4004, "step": 3166 }, { "epoch": 0.20296077928736223, "grad_norm": 2.624579109415967, "learning_rate": 1e-06, "loss": 0.4464, "step": 3167 }, { "epoch": 0.2030248654191233, "grad_norm": 2.7380146988562504, "learning_rate": 1e-06, "loss": 0.3857, "step": 3168 }, { "epoch": 0.20308895155088438, "grad_norm": 2.4525698832360465, "learning_rate": 1e-06, "loss": 0.4224, "step": 3169 }, { "epoch": 0.20315303768264548, "grad_norm": 2.717087375931541, "learning_rate": 1e-06, "loss": 0.3979, "step": 3170 }, { "epoch": 0.20321712381440657, "grad_norm": 2.7561886591445672, "learning_rate": 1e-06, "loss": 0.364, "step": 3171 }, { "epoch": 0.20328120994616766, "grad_norm": 2.7637234247630853, "learning_rate": 1e-06, "loss": 0.3985, "step": 3172 }, { "epoch": 0.20334529607792873, "grad_norm": 2.660887630337373, "learning_rate": 1e-06, "loss": 0.3672, "step": 3173 }, { "epoch": 0.20340938220968982, "grad_norm": 2.7012411259252924, "learning_rate": 1e-06, "loss": 0.5082, "step": 3174 }, { "epoch": 0.2034734683414509, "grad_norm": 2.7199733769770913, "learning_rate": 1e-06, "loss": 0.4146, "step": 3175 }, { "epoch": 0.203537554473212, "grad_norm": 2.8716087266272314, "learning_rate": 1e-06, "loss": 0.3657, "step": 3176 }, { "epoch": 0.20360164060497307, "grad_norm": 2.757204262047918, "learning_rate": 1e-06, "loss": 0.418, "step": 3177 }, { "epoch": 0.20366572673673417, "grad_norm": 2.7868293714034356, "learning_rate": 1e-06, "loss": 0.4133, "step": 3178 }, { "epoch": 0.20372981286849526, "grad_norm": 2.4386658649656097, "learning_rate": 1e-06, "loss": 0.3242, "step": 3179 }, { "epoch": 0.20379389900025635, "grad_norm": 2.745982025953506, "learning_rate": 1e-06, "loss": 0.4203, "step": 3180 }, { "epoch": 0.20385798513201744, "grad_norm": 2.8324684296501697, "learning_rate": 1e-06, "loss": 0.4359, "step": 3181 }, { "epoch": 0.2039220712637785, "grad_norm": 2.997115716317168, "learning_rate": 1e-06, "loss": 0.4222, "step": 3182 }, { "epoch": 0.2039861573955396, "grad_norm": 2.6677174069426366, "learning_rate": 1e-06, "loss": 0.4169, "step": 3183 }, { "epoch": 0.2040502435273007, "grad_norm": 2.5669472730439704, "learning_rate": 1e-06, "loss": 0.4617, "step": 3184 }, { "epoch": 0.2041143296590618, "grad_norm": 2.5467043969436656, "learning_rate": 1e-06, "loss": 0.4144, "step": 3185 }, { "epoch": 0.20417841579082285, "grad_norm": 2.6219195102759723, "learning_rate": 1e-06, "loss": 0.4482, "step": 3186 }, { "epoch": 0.20424250192258395, "grad_norm": 2.753937077422682, "learning_rate": 1e-06, "loss": 0.4078, "step": 3187 }, { "epoch": 0.20430658805434504, "grad_norm": 2.5606663484409355, "learning_rate": 1e-06, "loss": 0.3949, "step": 3188 }, { "epoch": 0.20437067418610613, "grad_norm": 2.6646453207007994, "learning_rate": 1e-06, "loss": 0.4301, "step": 3189 }, { "epoch": 0.20443476031786723, "grad_norm": 2.6862551366161513, "learning_rate": 1e-06, "loss": 0.3938, "step": 3190 }, { "epoch": 0.2044988464496283, "grad_norm": 2.523615636687127, "learning_rate": 1e-06, "loss": 0.4181, "step": 3191 }, { "epoch": 0.20456293258138938, "grad_norm": 2.6884484446187873, "learning_rate": 1e-06, "loss": 0.3618, "step": 3192 }, { "epoch": 0.20462701871315048, "grad_norm": 2.829665325764848, "learning_rate": 1e-06, "loss": 0.4052, "step": 3193 }, { "epoch": 0.20469110484491157, "grad_norm": 2.5953374746890403, "learning_rate": 1e-06, "loss": 0.4389, "step": 3194 }, { "epoch": 0.20475519097667264, "grad_norm": 2.8466331258627604, "learning_rate": 1e-06, "loss": 0.4039, "step": 3195 }, { "epoch": 0.20481927710843373, "grad_norm": 2.7486183212115547, "learning_rate": 1e-06, "loss": 0.3876, "step": 3196 }, { "epoch": 0.20488336324019482, "grad_norm": 2.801298683547888, "learning_rate": 1e-06, "loss": 0.367, "step": 3197 }, { "epoch": 0.20494744937195591, "grad_norm": 2.695730931752156, "learning_rate": 1e-06, "loss": 0.4933, "step": 3198 }, { "epoch": 0.205011535503717, "grad_norm": 2.8125270494981027, "learning_rate": 1e-06, "loss": 0.3954, "step": 3199 }, { "epoch": 0.20507562163547807, "grad_norm": 3.2577167938002796, "learning_rate": 1e-06, "loss": 0.4277, "step": 3200 }, { "epoch": 0.20513970776723917, "grad_norm": 2.758478341893175, "learning_rate": 1e-06, "loss": 0.4352, "step": 3201 }, { "epoch": 0.20520379389900026, "grad_norm": 2.879050943230032, "learning_rate": 1e-06, "loss": 0.4305, "step": 3202 }, { "epoch": 0.20526788003076135, "grad_norm": 2.686643342867524, "learning_rate": 1e-06, "loss": 0.3573, "step": 3203 }, { "epoch": 0.20533196616252242, "grad_norm": 2.721258382735068, "learning_rate": 1e-06, "loss": 0.4019, "step": 3204 }, { "epoch": 0.2053960522942835, "grad_norm": 2.7158203178493854, "learning_rate": 1e-06, "loss": 0.43, "step": 3205 }, { "epoch": 0.2054601384260446, "grad_norm": 2.624134577750634, "learning_rate": 1e-06, "loss": 0.3608, "step": 3206 }, { "epoch": 0.2055242245578057, "grad_norm": 2.720833641453419, "learning_rate": 1e-06, "loss": 0.4991, "step": 3207 }, { "epoch": 0.2055883106895668, "grad_norm": 2.7278965888939255, "learning_rate": 1e-06, "loss": 0.4445, "step": 3208 }, { "epoch": 0.20565239682132785, "grad_norm": 2.9254754982044346, "learning_rate": 1e-06, "loss": 0.3889, "step": 3209 }, { "epoch": 0.20571648295308895, "grad_norm": 2.6542985721484382, "learning_rate": 1e-06, "loss": 0.3975, "step": 3210 }, { "epoch": 0.20578056908485004, "grad_norm": 2.699604037727829, "learning_rate": 1e-06, "loss": 0.3911, "step": 3211 }, { "epoch": 0.20584465521661113, "grad_norm": 2.7276421722192525, "learning_rate": 1e-06, "loss": 0.4107, "step": 3212 }, { "epoch": 0.2059087413483722, "grad_norm": 2.6977864355232715, "learning_rate": 1e-06, "loss": 0.4003, "step": 3213 }, { "epoch": 0.2059728274801333, "grad_norm": 2.6477848771201886, "learning_rate": 1e-06, "loss": 0.4429, "step": 3214 }, { "epoch": 0.20603691361189438, "grad_norm": 2.672606633365545, "learning_rate": 1e-06, "loss": 0.3984, "step": 3215 }, { "epoch": 0.20610099974365548, "grad_norm": 2.4604151399516945, "learning_rate": 1e-06, "loss": 0.4922, "step": 3216 }, { "epoch": 0.20616508587541657, "grad_norm": 2.857586432951352, "learning_rate": 1e-06, "loss": 0.3976, "step": 3217 }, { "epoch": 0.20622917200717764, "grad_norm": 2.4822416440627264, "learning_rate": 1e-06, "loss": 0.4069, "step": 3218 }, { "epoch": 0.20629325813893873, "grad_norm": 2.6436355434453565, "learning_rate": 1e-06, "loss": 0.3792, "step": 3219 }, { "epoch": 0.20635734427069982, "grad_norm": 2.6742355296426212, "learning_rate": 1e-06, "loss": 0.4404, "step": 3220 }, { "epoch": 0.20642143040246091, "grad_norm": 2.8105861214534453, "learning_rate": 1e-06, "loss": 0.39, "step": 3221 }, { "epoch": 0.206485516534222, "grad_norm": 2.5930834712675876, "learning_rate": 1e-06, "loss": 0.4299, "step": 3222 }, { "epoch": 0.20654960266598307, "grad_norm": 2.5829548868133654, "learning_rate": 1e-06, "loss": 0.4105, "step": 3223 }, { "epoch": 0.20661368879774417, "grad_norm": 2.719837435467215, "learning_rate": 1e-06, "loss": 0.3834, "step": 3224 }, { "epoch": 0.20667777492950526, "grad_norm": 2.61535271989138, "learning_rate": 1e-06, "loss": 0.371, "step": 3225 }, { "epoch": 0.20674186106126635, "grad_norm": 2.5771180704151755, "learning_rate": 1e-06, "loss": 0.4275, "step": 3226 }, { "epoch": 0.20680594719302742, "grad_norm": 2.672183893932333, "learning_rate": 1e-06, "loss": 0.4107, "step": 3227 }, { "epoch": 0.2068700333247885, "grad_norm": 2.773792137100908, "learning_rate": 1e-06, "loss": 0.4827, "step": 3228 }, { "epoch": 0.2069341194565496, "grad_norm": 2.681428202763583, "learning_rate": 1e-06, "loss": 0.4176, "step": 3229 }, { "epoch": 0.2069982055883107, "grad_norm": 2.9492158799161796, "learning_rate": 1e-06, "loss": 0.4708, "step": 3230 }, { "epoch": 0.2070622917200718, "grad_norm": 2.8897345694841534, "learning_rate": 1e-06, "loss": 0.4303, "step": 3231 }, { "epoch": 0.20712637785183285, "grad_norm": 2.759597729311294, "learning_rate": 1e-06, "loss": 0.4035, "step": 3232 }, { "epoch": 0.20719046398359395, "grad_norm": 2.840421542454811, "learning_rate": 1e-06, "loss": 0.4273, "step": 3233 }, { "epoch": 0.20725455011535504, "grad_norm": 2.739728384977558, "learning_rate": 1e-06, "loss": 0.4528, "step": 3234 }, { "epoch": 0.20731863624711613, "grad_norm": 2.824433398735102, "learning_rate": 1e-06, "loss": 0.3889, "step": 3235 }, { "epoch": 0.2073827223788772, "grad_norm": 2.6354881167840505, "learning_rate": 1e-06, "loss": 0.3858, "step": 3236 }, { "epoch": 0.2074468085106383, "grad_norm": 2.8945305522820477, "learning_rate": 1e-06, "loss": 0.4126, "step": 3237 }, { "epoch": 0.20751089464239938, "grad_norm": 2.764839911328175, "learning_rate": 1e-06, "loss": 0.4274, "step": 3238 }, { "epoch": 0.20757498077416048, "grad_norm": 2.773496463928611, "learning_rate": 1e-06, "loss": 0.5385, "step": 3239 }, { "epoch": 0.20763906690592157, "grad_norm": 2.7564360890775084, "learning_rate": 1e-06, "loss": 0.4388, "step": 3240 }, { "epoch": 0.20770315303768264, "grad_norm": 2.6660067624046637, "learning_rate": 1e-06, "loss": 0.4451, "step": 3241 }, { "epoch": 0.20776723916944373, "grad_norm": 2.3442950677854797, "learning_rate": 1e-06, "loss": 0.3913, "step": 3242 }, { "epoch": 0.20783132530120482, "grad_norm": 2.6071034795612014, "learning_rate": 1e-06, "loss": 0.35, "step": 3243 }, { "epoch": 0.20789541143296592, "grad_norm": 2.5404513415884065, "learning_rate": 1e-06, "loss": 0.4069, "step": 3244 }, { "epoch": 0.20795949756472698, "grad_norm": 2.732016662782567, "learning_rate": 1e-06, "loss": 0.4083, "step": 3245 }, { "epoch": 0.20802358369648807, "grad_norm": 2.470865179331822, "learning_rate": 1e-06, "loss": 0.366, "step": 3246 }, { "epoch": 0.20808766982824917, "grad_norm": 2.8312892904633227, "learning_rate": 1e-06, "loss": 0.3434, "step": 3247 }, { "epoch": 0.20815175596001026, "grad_norm": 2.9626699856363143, "learning_rate": 1e-06, "loss": 0.3919, "step": 3248 }, { "epoch": 0.20821584209177135, "grad_norm": 2.7202318287031617, "learning_rate": 1e-06, "loss": 0.3718, "step": 3249 }, { "epoch": 0.20827992822353242, "grad_norm": 2.5074669078149814, "learning_rate": 1e-06, "loss": 0.4725, "step": 3250 }, { "epoch": 0.2083440143552935, "grad_norm": 2.9385941662449473, "learning_rate": 1e-06, "loss": 0.4403, "step": 3251 }, { "epoch": 0.2084081004870546, "grad_norm": 2.748212967840643, "learning_rate": 1e-06, "loss": 0.3987, "step": 3252 }, { "epoch": 0.2084721866188157, "grad_norm": 2.6400904306111572, "learning_rate": 1e-06, "loss": 0.4194, "step": 3253 }, { "epoch": 0.20853627275057676, "grad_norm": 2.7657922553991554, "learning_rate": 1e-06, "loss": 0.4343, "step": 3254 }, { "epoch": 0.20860035888233786, "grad_norm": 2.6880821418013046, "learning_rate": 1e-06, "loss": 0.3921, "step": 3255 }, { "epoch": 0.20866444501409895, "grad_norm": 2.5586073503420756, "learning_rate": 1e-06, "loss": 0.3641, "step": 3256 }, { "epoch": 0.20872853114586004, "grad_norm": 2.7781977676069314, "learning_rate": 1e-06, "loss": 0.4149, "step": 3257 }, { "epoch": 0.20879261727762113, "grad_norm": 2.6557419528008372, "learning_rate": 1e-06, "loss": 0.38, "step": 3258 }, { "epoch": 0.2088567034093822, "grad_norm": 2.5621653160572184, "learning_rate": 1e-06, "loss": 0.3763, "step": 3259 }, { "epoch": 0.2089207895411433, "grad_norm": 2.9311715852645515, "learning_rate": 1e-06, "loss": 0.3435, "step": 3260 }, { "epoch": 0.20898487567290439, "grad_norm": 3.0964528455275517, "learning_rate": 1e-06, "loss": 0.4291, "step": 3261 }, { "epoch": 0.20904896180466548, "grad_norm": 2.9231040164499826, "learning_rate": 1e-06, "loss": 0.3814, "step": 3262 }, { "epoch": 0.20911304793642654, "grad_norm": 2.449860144323429, "learning_rate": 1e-06, "loss": 0.3969, "step": 3263 }, { "epoch": 0.20917713406818764, "grad_norm": 2.7797757734601145, "learning_rate": 1e-06, "loss": 0.4768, "step": 3264 }, { "epoch": 0.20924122019994873, "grad_norm": 2.813532878048226, "learning_rate": 1e-06, "loss": 0.4363, "step": 3265 }, { "epoch": 0.20930530633170982, "grad_norm": 2.66716937995186, "learning_rate": 1e-06, "loss": 0.4468, "step": 3266 }, { "epoch": 0.20936939246347092, "grad_norm": 2.5170598395883825, "learning_rate": 1e-06, "loss": 0.3783, "step": 3267 }, { "epoch": 0.20943347859523198, "grad_norm": 2.674849349647163, "learning_rate": 1e-06, "loss": 0.4077, "step": 3268 }, { "epoch": 0.20949756472699307, "grad_norm": 2.624379949822509, "learning_rate": 1e-06, "loss": 0.3982, "step": 3269 }, { "epoch": 0.20956165085875417, "grad_norm": 2.820816841414604, "learning_rate": 1e-06, "loss": 0.395, "step": 3270 }, { "epoch": 0.20962573699051526, "grad_norm": 2.527956786895495, "learning_rate": 1e-06, "loss": 0.4627, "step": 3271 }, { "epoch": 0.20968982312227635, "grad_norm": 2.725541845425045, "learning_rate": 1e-06, "loss": 0.4151, "step": 3272 }, { "epoch": 0.20975390925403742, "grad_norm": 2.878927720490599, "learning_rate": 1e-06, "loss": 0.4412, "step": 3273 }, { "epoch": 0.2098179953857985, "grad_norm": 2.9259836698705963, "learning_rate": 1e-06, "loss": 0.4036, "step": 3274 }, { "epoch": 0.2098820815175596, "grad_norm": 2.7982781093411586, "learning_rate": 1e-06, "loss": 0.3854, "step": 3275 }, { "epoch": 0.2099461676493207, "grad_norm": 2.5793222625428953, "learning_rate": 1e-06, "loss": 0.4232, "step": 3276 }, { "epoch": 0.21001025378108176, "grad_norm": 2.9796808226633757, "learning_rate": 1e-06, "loss": 0.3803, "step": 3277 }, { "epoch": 0.21007433991284286, "grad_norm": 2.4441804847492343, "learning_rate": 1e-06, "loss": 0.3494, "step": 3278 }, { "epoch": 0.21013842604460395, "grad_norm": 2.925764791415414, "learning_rate": 1e-06, "loss": 0.3994, "step": 3279 }, { "epoch": 0.21020251217636504, "grad_norm": 2.633148554481942, "learning_rate": 1e-06, "loss": 0.4208, "step": 3280 }, { "epoch": 0.21026659830812613, "grad_norm": 2.5106080788505984, "learning_rate": 1e-06, "loss": 0.4771, "step": 3281 }, { "epoch": 0.2103306844398872, "grad_norm": 2.6172458561738865, "learning_rate": 1e-06, "loss": 0.4521, "step": 3282 }, { "epoch": 0.2103947705716483, "grad_norm": 2.5929935788049487, "learning_rate": 1e-06, "loss": 0.3985, "step": 3283 }, { "epoch": 0.21045885670340939, "grad_norm": 2.7905184923144657, "learning_rate": 1e-06, "loss": 0.4423, "step": 3284 }, { "epoch": 0.21052294283517048, "grad_norm": 2.5347958919401874, "learning_rate": 1e-06, "loss": 0.3942, "step": 3285 }, { "epoch": 0.21058702896693154, "grad_norm": 2.8626532701014487, "learning_rate": 1e-06, "loss": 0.4876, "step": 3286 }, { "epoch": 0.21065111509869264, "grad_norm": 2.757452199734976, "learning_rate": 1e-06, "loss": 0.4211, "step": 3287 }, { "epoch": 0.21071520123045373, "grad_norm": 2.903120359363833, "learning_rate": 1e-06, "loss": 0.4326, "step": 3288 }, { "epoch": 0.21077928736221482, "grad_norm": 2.880237709233351, "learning_rate": 1e-06, "loss": 0.4016, "step": 3289 }, { "epoch": 0.21084337349397592, "grad_norm": 2.553661952738896, "learning_rate": 1e-06, "loss": 0.3995, "step": 3290 }, { "epoch": 0.21090745962573698, "grad_norm": 2.5558492231930363, "learning_rate": 1e-06, "loss": 0.3656, "step": 3291 }, { "epoch": 0.21097154575749807, "grad_norm": 2.7074908240403066, "learning_rate": 1e-06, "loss": 0.4687, "step": 3292 }, { "epoch": 0.21103563188925917, "grad_norm": 2.505986301029715, "learning_rate": 1e-06, "loss": 0.4268, "step": 3293 }, { "epoch": 0.21109971802102026, "grad_norm": 2.663176507171353, "learning_rate": 1e-06, "loss": 0.3707, "step": 3294 }, { "epoch": 0.21116380415278133, "grad_norm": 2.680302070013859, "learning_rate": 1e-06, "loss": 0.4296, "step": 3295 }, { "epoch": 0.21122789028454242, "grad_norm": 2.561409369888171, "learning_rate": 1e-06, "loss": 0.3794, "step": 3296 }, { "epoch": 0.2112919764163035, "grad_norm": 2.419773725787716, "learning_rate": 1e-06, "loss": 0.4255, "step": 3297 }, { "epoch": 0.2113560625480646, "grad_norm": 2.982157224673509, "learning_rate": 1e-06, "loss": 0.4163, "step": 3298 }, { "epoch": 0.2114201486798257, "grad_norm": 2.5932585562032577, "learning_rate": 1e-06, "loss": 0.3908, "step": 3299 }, { "epoch": 0.21148423481158676, "grad_norm": 2.5018110031718033, "learning_rate": 1e-06, "loss": 0.4069, "step": 3300 }, { "epoch": 0.21154832094334786, "grad_norm": 2.742542865770265, "learning_rate": 1e-06, "loss": 0.4739, "step": 3301 }, { "epoch": 0.21161240707510895, "grad_norm": 2.5360610155104784, "learning_rate": 1e-06, "loss": 0.4523, "step": 3302 }, { "epoch": 0.21167649320687004, "grad_norm": 2.551525084190843, "learning_rate": 1e-06, "loss": 0.4041, "step": 3303 }, { "epoch": 0.2117405793386311, "grad_norm": 2.434809756110649, "learning_rate": 1e-06, "loss": 0.4214, "step": 3304 }, { "epoch": 0.2118046654703922, "grad_norm": 2.411436200322004, "learning_rate": 1e-06, "loss": 0.4291, "step": 3305 }, { "epoch": 0.2118687516021533, "grad_norm": 2.905538512840825, "learning_rate": 1e-06, "loss": 0.4184, "step": 3306 }, { "epoch": 0.2119328377339144, "grad_norm": 2.9864965713027205, "learning_rate": 1e-06, "loss": 0.4506, "step": 3307 }, { "epoch": 0.21199692386567548, "grad_norm": 2.742136252581274, "learning_rate": 1e-06, "loss": 0.3788, "step": 3308 }, { "epoch": 0.21206100999743654, "grad_norm": 2.5326228134750206, "learning_rate": 1e-06, "loss": 0.398, "step": 3309 }, { "epoch": 0.21212509612919764, "grad_norm": 2.830915338256494, "learning_rate": 1e-06, "loss": 0.417, "step": 3310 }, { "epoch": 0.21218918226095873, "grad_norm": 2.562052867540231, "learning_rate": 1e-06, "loss": 0.4002, "step": 3311 }, { "epoch": 0.21225326839271982, "grad_norm": 2.7686620430274655, "learning_rate": 1e-06, "loss": 0.3997, "step": 3312 }, { "epoch": 0.2123173545244809, "grad_norm": 2.7678823836226916, "learning_rate": 1e-06, "loss": 0.4272, "step": 3313 }, { "epoch": 0.21238144065624198, "grad_norm": 2.5608644724506515, "learning_rate": 1e-06, "loss": 0.3728, "step": 3314 }, { "epoch": 0.21244552678800308, "grad_norm": 2.7431587712595635, "learning_rate": 1e-06, "loss": 0.4088, "step": 3315 }, { "epoch": 0.21250961291976417, "grad_norm": 2.6725004227866846, "learning_rate": 1e-06, "loss": 0.4367, "step": 3316 }, { "epoch": 0.21257369905152526, "grad_norm": 2.6414894647068774, "learning_rate": 1e-06, "loss": 0.4033, "step": 3317 }, { "epoch": 0.21263778518328633, "grad_norm": 2.6201335968447186, "learning_rate": 1e-06, "loss": 0.4295, "step": 3318 }, { "epoch": 0.21270187131504742, "grad_norm": 2.6344494226288595, "learning_rate": 1e-06, "loss": 0.443, "step": 3319 }, { "epoch": 0.2127659574468085, "grad_norm": 2.6728837469432785, "learning_rate": 1e-06, "loss": 0.4037, "step": 3320 }, { "epoch": 0.2128300435785696, "grad_norm": 2.5284088187012155, "learning_rate": 1e-06, "loss": 0.3615, "step": 3321 }, { "epoch": 0.21289412971033067, "grad_norm": 2.7400867894135406, "learning_rate": 1e-06, "loss": 0.4012, "step": 3322 }, { "epoch": 0.21295821584209176, "grad_norm": 2.8026338036476974, "learning_rate": 1e-06, "loss": 0.432, "step": 3323 }, { "epoch": 0.21302230197385286, "grad_norm": 2.733888181563427, "learning_rate": 1e-06, "loss": 0.4293, "step": 3324 }, { "epoch": 0.21308638810561395, "grad_norm": 2.587643683316493, "learning_rate": 1e-06, "loss": 0.374, "step": 3325 }, { "epoch": 0.21315047423737504, "grad_norm": 2.6521670469030143, "learning_rate": 1e-06, "loss": 0.4076, "step": 3326 }, { "epoch": 0.2132145603691361, "grad_norm": 2.858247171493016, "learning_rate": 1e-06, "loss": 0.4151, "step": 3327 }, { "epoch": 0.2132786465008972, "grad_norm": 2.9518990696313554, "learning_rate": 1e-06, "loss": 0.3808, "step": 3328 }, { "epoch": 0.2133427326326583, "grad_norm": 2.5807436920995706, "learning_rate": 1e-06, "loss": 0.3848, "step": 3329 }, { "epoch": 0.2134068187644194, "grad_norm": 2.692268668879906, "learning_rate": 1e-06, "loss": 0.4469, "step": 3330 }, { "epoch": 0.21347090489618048, "grad_norm": 2.548409043303377, "learning_rate": 1e-06, "loss": 0.4276, "step": 3331 }, { "epoch": 0.21353499102794155, "grad_norm": 2.457547361289557, "learning_rate": 1e-06, "loss": 0.4154, "step": 3332 }, { "epoch": 0.21359907715970264, "grad_norm": 3.140234343189398, "learning_rate": 1e-06, "loss": 0.3672, "step": 3333 }, { "epoch": 0.21366316329146373, "grad_norm": 2.5891886639159987, "learning_rate": 1e-06, "loss": 0.4243, "step": 3334 }, { "epoch": 0.21372724942322482, "grad_norm": 2.8625826627525175, "learning_rate": 1e-06, "loss": 0.4242, "step": 3335 }, { "epoch": 0.2137913355549859, "grad_norm": 2.509069922553237, "learning_rate": 1e-06, "loss": 0.4424, "step": 3336 }, { "epoch": 0.21385542168674698, "grad_norm": 2.6593254396801727, "learning_rate": 1e-06, "loss": 0.3739, "step": 3337 }, { "epoch": 0.21391950781850808, "grad_norm": 2.5553613531487565, "learning_rate": 1e-06, "loss": 0.4033, "step": 3338 }, { "epoch": 0.21398359395026917, "grad_norm": 2.7534326763941133, "learning_rate": 1e-06, "loss": 0.4361, "step": 3339 }, { "epoch": 0.21404768008203026, "grad_norm": 2.809417915523082, "learning_rate": 1e-06, "loss": 0.4618, "step": 3340 }, { "epoch": 0.21411176621379133, "grad_norm": 2.5990675730161152, "learning_rate": 1e-06, "loss": 0.4367, "step": 3341 }, { "epoch": 0.21417585234555242, "grad_norm": 2.7348091981913756, "learning_rate": 1e-06, "loss": 0.4644, "step": 3342 }, { "epoch": 0.2142399384773135, "grad_norm": 2.4841998191988854, "learning_rate": 1e-06, "loss": 0.4541, "step": 3343 }, { "epoch": 0.2143040246090746, "grad_norm": 2.513354725827738, "learning_rate": 1e-06, "loss": 0.3605, "step": 3344 }, { "epoch": 0.21436811074083567, "grad_norm": 2.6741945967742975, "learning_rate": 1e-06, "loss": 0.3704, "step": 3345 }, { "epoch": 0.21443219687259676, "grad_norm": 2.6481448811114743, "learning_rate": 1e-06, "loss": 0.4211, "step": 3346 }, { "epoch": 0.21449628300435786, "grad_norm": 2.881638476738493, "learning_rate": 1e-06, "loss": 0.4405, "step": 3347 }, { "epoch": 0.21456036913611895, "grad_norm": 2.5160389638004594, "learning_rate": 1e-06, "loss": 0.4447, "step": 3348 }, { "epoch": 0.21462445526788004, "grad_norm": 2.6219438331059846, "learning_rate": 1e-06, "loss": 0.4039, "step": 3349 }, { "epoch": 0.2146885413996411, "grad_norm": 2.7939679761335587, "learning_rate": 1e-06, "loss": 0.4316, "step": 3350 }, { "epoch": 0.2147526275314022, "grad_norm": 2.742546179444123, "learning_rate": 1e-06, "loss": 0.4001, "step": 3351 }, { "epoch": 0.2148167136631633, "grad_norm": 2.7588570120175753, "learning_rate": 1e-06, "loss": 0.4035, "step": 3352 }, { "epoch": 0.2148807997949244, "grad_norm": 2.9727016909828863, "learning_rate": 1e-06, "loss": 0.4292, "step": 3353 }, { "epoch": 0.21494488592668545, "grad_norm": 2.700187053010241, "learning_rate": 1e-06, "loss": 0.377, "step": 3354 }, { "epoch": 0.21500897205844655, "grad_norm": 2.8029003823348546, "learning_rate": 1e-06, "loss": 0.4384, "step": 3355 }, { "epoch": 0.21507305819020764, "grad_norm": 2.6629430980581237, "learning_rate": 1e-06, "loss": 0.3895, "step": 3356 }, { "epoch": 0.21513714432196873, "grad_norm": 2.757531931586532, "learning_rate": 1e-06, "loss": 0.398, "step": 3357 }, { "epoch": 0.21520123045372982, "grad_norm": 2.918032036734727, "learning_rate": 1e-06, "loss": 0.3756, "step": 3358 }, { "epoch": 0.2152653165854909, "grad_norm": 2.989072875608744, "learning_rate": 1e-06, "loss": 0.417, "step": 3359 }, { "epoch": 0.21532940271725198, "grad_norm": 2.754298990911609, "learning_rate": 1e-06, "loss": 0.4508, "step": 3360 }, { "epoch": 0.21539348884901308, "grad_norm": 2.6404117189428407, "learning_rate": 1e-06, "loss": 0.4318, "step": 3361 }, { "epoch": 0.21545757498077417, "grad_norm": 3.3312953457728525, "learning_rate": 1e-06, "loss": 0.3877, "step": 3362 }, { "epoch": 0.21552166111253523, "grad_norm": 2.775685646675011, "learning_rate": 1e-06, "loss": 0.4369, "step": 3363 }, { "epoch": 0.21558574724429633, "grad_norm": 2.7227277955938005, "learning_rate": 1e-06, "loss": 0.439, "step": 3364 }, { "epoch": 0.21564983337605742, "grad_norm": 2.7479986060865693, "learning_rate": 1e-06, "loss": 0.4015, "step": 3365 }, { "epoch": 0.2157139195078185, "grad_norm": 2.749304608901622, "learning_rate": 1e-06, "loss": 0.407, "step": 3366 }, { "epoch": 0.2157780056395796, "grad_norm": 2.677492736617638, "learning_rate": 1e-06, "loss": 0.4595, "step": 3367 }, { "epoch": 0.21584209177134067, "grad_norm": 2.721007325759014, "learning_rate": 1e-06, "loss": 0.3814, "step": 3368 }, { "epoch": 0.21590617790310176, "grad_norm": 2.8715102553121805, "learning_rate": 1e-06, "loss": 0.4608, "step": 3369 }, { "epoch": 0.21597026403486286, "grad_norm": 2.7047494169852784, "learning_rate": 1e-06, "loss": 0.4142, "step": 3370 }, { "epoch": 0.21603435016662395, "grad_norm": 2.390858891462131, "learning_rate": 1e-06, "loss": 0.4006, "step": 3371 }, { "epoch": 0.21609843629838502, "grad_norm": 2.688612240628484, "learning_rate": 1e-06, "loss": 0.3934, "step": 3372 }, { "epoch": 0.2161625224301461, "grad_norm": 2.3726230078498354, "learning_rate": 1e-06, "loss": 0.3854, "step": 3373 }, { "epoch": 0.2162266085619072, "grad_norm": 2.4845931896277924, "learning_rate": 1e-06, "loss": 0.3693, "step": 3374 }, { "epoch": 0.2162906946936683, "grad_norm": 2.569118902647708, "learning_rate": 1e-06, "loss": 0.4183, "step": 3375 }, { "epoch": 0.2163547808254294, "grad_norm": 2.8433001863545884, "learning_rate": 1e-06, "loss": 0.4537, "step": 3376 }, { "epoch": 0.21641886695719045, "grad_norm": 2.6467296962041003, "learning_rate": 1e-06, "loss": 0.3823, "step": 3377 }, { "epoch": 0.21648295308895155, "grad_norm": 2.5467529724807223, "learning_rate": 1e-06, "loss": 0.384, "step": 3378 }, { "epoch": 0.21654703922071264, "grad_norm": 2.6583699584853515, "learning_rate": 1e-06, "loss": 0.5313, "step": 3379 }, { "epoch": 0.21661112535247373, "grad_norm": 2.7413522163195903, "learning_rate": 1e-06, "loss": 0.3918, "step": 3380 }, { "epoch": 0.21667521148423483, "grad_norm": 2.850274265246485, "learning_rate": 1e-06, "loss": 0.3708, "step": 3381 }, { "epoch": 0.2167392976159959, "grad_norm": 2.4545951876671444, "learning_rate": 1e-06, "loss": 0.4202, "step": 3382 }, { "epoch": 0.21680338374775698, "grad_norm": 2.7541006484296826, "learning_rate": 1e-06, "loss": 0.4132, "step": 3383 }, { "epoch": 0.21686746987951808, "grad_norm": 2.654628145662869, "learning_rate": 1e-06, "loss": 0.391, "step": 3384 }, { "epoch": 0.21693155601127917, "grad_norm": 2.679905549390188, "learning_rate": 1e-06, "loss": 0.3952, "step": 3385 }, { "epoch": 0.21699564214304023, "grad_norm": 2.7026520249743853, "learning_rate": 1e-06, "loss": 0.411, "step": 3386 }, { "epoch": 0.21705972827480133, "grad_norm": 2.8935314931031972, "learning_rate": 1e-06, "loss": 0.4039, "step": 3387 }, { "epoch": 0.21712381440656242, "grad_norm": 2.8256596007112678, "learning_rate": 1e-06, "loss": 0.4511, "step": 3388 }, { "epoch": 0.21718790053832351, "grad_norm": 2.666041222942203, "learning_rate": 1e-06, "loss": 0.38, "step": 3389 }, { "epoch": 0.2172519866700846, "grad_norm": 2.6944711686890104, "learning_rate": 1e-06, "loss": 0.3871, "step": 3390 }, { "epoch": 0.21731607280184567, "grad_norm": 2.550478836812208, "learning_rate": 1e-06, "loss": 0.399, "step": 3391 }, { "epoch": 0.21738015893360677, "grad_norm": 2.614636062866319, "learning_rate": 1e-06, "loss": 0.4499, "step": 3392 }, { "epoch": 0.21744424506536786, "grad_norm": 2.668976199272245, "learning_rate": 1e-06, "loss": 0.459, "step": 3393 }, { "epoch": 0.21750833119712895, "grad_norm": 2.5668912370881976, "learning_rate": 1e-06, "loss": 0.3908, "step": 3394 }, { "epoch": 0.21757241732889002, "grad_norm": 2.8245523377207307, "learning_rate": 1e-06, "loss": 0.4191, "step": 3395 }, { "epoch": 0.2176365034606511, "grad_norm": 2.6904693651871763, "learning_rate": 1e-06, "loss": 0.386, "step": 3396 }, { "epoch": 0.2177005895924122, "grad_norm": 2.754562661101229, "learning_rate": 1e-06, "loss": 0.3947, "step": 3397 }, { "epoch": 0.2177646757241733, "grad_norm": 2.63518445508415, "learning_rate": 1e-06, "loss": 0.4651, "step": 3398 }, { "epoch": 0.2178287618559344, "grad_norm": 2.377123367412555, "learning_rate": 1e-06, "loss": 0.3997, "step": 3399 }, { "epoch": 0.21789284798769545, "grad_norm": 2.674782214632184, "learning_rate": 1e-06, "loss": 0.453, "step": 3400 } ], "logging_steps": 1.0, "max_steps": 15604, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 200, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 825505490534400.0, "train_batch_size": 10, "trial_name": null, "trial_params": null }