Step... (500/437388 | Training Loss: -0.0005138383712619543, Learning Rate: 1.9977182091679424e-05) Step... (1000/437388 | Training Loss: -0.0013853885466232896, Learning Rate: 1.9954319213866256e-05) Step... (1500/437388 | Training Loss: -0.0014231076929718256, Learning Rate: 1.993145633605309e-05) Step... (2000/437388 | Training Loss: -0.0015525615308433771, Learning Rate: 1.990859345823992e-05) Step... (2500/437388 | Training Loss: -0.004444368649274111, Learning Rate: 1.9885730580426753e-05) Step... (3000/437388 | Training Loss: -0.003619273891672492, Learning Rate: 1.9862867702613585e-05) Step... (3500/437388 | Training Loss: -9.0174020442646e-05, Learning Rate: 1.9840004824800417e-05) Step... (4000/437388 | Training Loss: -5.372820669435896e-05, Learning Rate: 1.9817140127997845e-05) Step... (4500/437388 | Training Loss: -0.0005610623047687113, Learning Rate: 1.979427906917408e-05) Step... (5000/437388 | Training Loss: -0.0031363563612103462, Learning Rate: 1.9771416191360913e-05) Step... (5500/437388 | Training Loss: -0.00013456691522151232, Learning Rate: 1.9748553313547745e-05) Step... (6000/437388 | Training Loss: -0.003430676180869341, Learning Rate: 1.9725688616745174e-05) Step... (6500/437388 | Training Loss: 2.9760311008431017e-05, Learning Rate: 1.9702825738932006e-05) Step... (7000/437388 | Training Loss: -0.0002961964055430144, Learning Rate: 1.9679962861118838e-05) Step... (7500/437388 | Training Loss: -0.00017826503608375788, Learning Rate: 1.965709998330567e-05) Step... (8000/437388 | Training Loss: -4.724973041447811e-05, Learning Rate: 1.9634237105492502e-05) Step... (8500/437388 | Training Loss: -0.0002406371058896184, Learning Rate: 1.9611374227679335e-05) Step... (9000/437388 | Training Loss: -8.045811409829184e-05, Learning Rate: 1.9588511349866167e-05) Step... (9500/437388 | Training Loss: 9.021518962981645e-06, Learning Rate: 1.9565648472053e-05) Step... (10000/437388 | Training Loss: -0.0004939670907333493, Learning Rate: 1.954278559423983e-05) Step... (10500/437388 | Training Loss: 9.438104825676419e-06, Learning Rate: 1.9519922716426663e-05) Step... (11000/437388 | Training Loss: 2.276919622090645e-05, Learning Rate: 1.9497059838613495e-05) Step... (11500/437388 | Training Loss: -1.4924948118277825e-05, Learning Rate: 1.9474195141810924e-05) Step... (12000/437388 | Training Loss: 8.752908797760028e-06, Learning Rate: 1.9451332263997756e-05) Step... (12500/437388 | Training Loss: -0.0014030194142833352, Learning Rate: 1.942847120517399e-05) Step... (13000/437388 | Training Loss: 7.227043170132674e-06, Learning Rate: 1.9405608327360824e-05) Step... (13500/437388 | Training Loss: -0.003136668587103486, Learning Rate: 1.9382743630558252e-05) Step... (14000/437388 | Training Loss: -4.724322479887633e-06, Learning Rate: 1.9359880752745084e-05) Step... (14500/437388 | Training Loss: 1.9046970919589512e-05, Learning Rate: 1.9337017874931917e-05) Step... (15000/437388 | Training Loss: -0.0003092987462878227, Learning Rate: 1.931415499711875e-05) Step... (15500/437388 | Training Loss: -0.03229433298110962, Learning Rate: 1.929129211930558e-05) Step... (16000/437388 | Training Loss: -1.7652382666710764e-06, Learning Rate: 1.9268429241492413e-05) Step... (16500/437388 | Training Loss: 1.8737622667686082e-05, Learning Rate: 1.9245566363679245e-05) Step... (17000/437388 | Training Loss: 9.301738828071393e-06, Learning Rate: 1.9222703485866077e-05) Step... (17500/437388 | Training Loss: 9.757788575370796e-06, Learning Rate: 1.919984060805291e-05) Step... (18000/437388 | Training Loss: 2.881894033635035e-05, Learning Rate: 1.917697773023974e-05) Step... (18500/437388 | Training Loss: 1.7251250028493814e-05, Learning Rate: 1.9154114852426574e-05) Step... (19000/437388 | Training Loss: -3.8056023186072707e-06, Learning Rate: 1.9131251974613406e-05) Step... (19500/437388 | Training Loss: 1.8639262634678744e-05, Learning Rate: 1.9108387277810834e-05) Step... (20000/437388 | Training Loss: -4.346034074842464e-07, Learning Rate: 1.908552621898707e-05) Step... (20500/437388 | Training Loss: 3.273967195127625e-07, Learning Rate: 1.9062663341173902e-05) Step... (21000/437388 | Training Loss: -0.0003723864210769534, Learning Rate: 1.9039800463360734e-05) Step... (21500/437388 | Training Loss: 9.924142432282679e-06, Learning Rate: 1.9016935766558163e-05) Step... (22000/437388 | Training Loss: 9.93290996120777e-06, Learning Rate: 1.8994072888744995e-05) Step... (22500/437388 | Training Loss: -0.0001418531610397622, Learning Rate: 1.8971210010931827e-05) Step... (23000/437388 | Training Loss: 1.0088506314787082e-05, Learning Rate: 1.894834713311866e-05) Step... (23500/437388 | Training Loss: 7.136069143598434e-06, Learning Rate: 1.892548425530549e-05) Step... (24000/437388 | Training Loss: 9.869800123851746e-06, Learning Rate: 1.8902621377492324e-05) Step... (24500/437388 | Training Loss: 8.915569196688011e-06, Learning Rate: 1.8879758499679156e-05) Step... (25000/437388 | Training Loss: 9.661911462899297e-06, Learning Rate: 1.8856895621865988e-05) Step... (25500/437388 | Training Loss: 9.152301572612487e-06, Learning Rate: 1.883403274405282e-05) Step... (26000/437388 | Training Loss: 9.94595575321e-06, Learning Rate: 1.8811169866239652e-05) Step... (26500/437388 | Training Loss: 9.882722224574536e-06, Learning Rate: 1.8788306988426484e-05) Step... (27000/437388 | Training Loss: 9.786906048248056e-06, Learning Rate: 1.8765442291623913e-05) Step... (27500/437388 | Training Loss: 9.950299499905668e-06, Learning Rate: 1.8742579413810745e-05) Step... (28000/437388 | Training Loss: 9.730408237373922e-06, Learning Rate: 1.871971835498698e-05) Step... (28500/437388 | Training Loss: 1.0129735528607853e-05, Learning Rate: 1.8696855477173813e-05) Step... (29000/437388 | Training Loss: 9.86078885034658e-06, Learning Rate: 1.867399078037124e-05) Step... (29500/437388 | Training Loss: 9.964114724425599e-06, Learning Rate: 1.8651127902558073e-05) Step... (30000/437388 | Training Loss: 9.767300070961937e-06, Learning Rate: 1.8628265024744906e-05) Step... (30500/437388 | Training Loss: 9.961664545699023e-06, Learning Rate: 1.8605402146931738e-05) Step... (31000/437388 | Training Loss: 1.0001157534134109e-05, Learning Rate: 1.858253926911857e-05) Step... (31500/437388 | Training Loss: 9.999861504184082e-06, Learning Rate: 1.8559676391305402e-05) Step... (32000/437388 | Training Loss: 9.999739631894045e-06, Learning Rate: 1.8536813513492234e-05) Step... (32500/437388 | Training Loss: 9.999379471992142e-06, Learning Rate: 1.8513950635679066e-05) Step... (33000/437388 | Training Loss: 9.577275704941712e-06, Learning Rate: 1.84910877578659e-05) Step... (33500/437388 | Training Loss: 9.91380602499703e-06, Learning Rate: 1.846822488005273e-05) Step... (34000/437388 | Training Loss: 9.999487701861653e-06, Learning Rate: 1.8445362002239563e-05) Step... (34500/437388 | Training Loss: 9.984047210309654e-06, Learning Rate: 1.8422499124426395e-05) Step... (35000/437388 | Training Loss: 9.99921940092463e-06, Learning Rate: 1.8399634427623823e-05) Step... (35500/437388 | Training Loss: 9.680366929387674e-06, Learning Rate: 1.8376771549810655e-05) Step... (36000/437388 | Training Loss: 9.998320820159279e-06, Learning Rate: 1.835391049098689e-05) Step... (36500/437388 | Training Loss: 9.958874215953983e-06, Learning Rate: 1.8331047613173723e-05) Step... (37000/437388 | Training Loss: 1.0039606422651559e-05, Learning Rate: 1.8308182916371152e-05) Step... (37500/437388 | Training Loss: 9.99998883344233e-06, Learning Rate: 1.8285320038557984e-05) Step... (38000/437388 | Training Loss: 1.0001829650718719e-05, Learning Rate: 1.8262457160744816e-05) Step... (38500/437388 | Training Loss: 9.997856977861375e-06, Learning Rate: 1.8239594282931648e-05) Step... (39000/437388 | Training Loss: 9.95433765638154e-06, Learning Rate: 1.821673140511848e-05) Step... (39500/437388 | Training Loss: 9.999251233239193e-06, Learning Rate: 1.8193868527305312e-05) Step... (40000/437388 | Training Loss: 9.996479093388189e-06, Learning Rate: 1.8171005649492145e-05) Step... (40500/437388 | Training Loss: 9.999072062782943e-06, Learning Rate: 1.8148142771678977e-05) Step... (41000/437388 | Training Loss: 9.999967005569488e-06, Learning Rate: 1.812527989386581e-05) Step... (41500/437388 | Training Loss: 9.775694707059301e-06, Learning Rate: 1.810241701605264e-05) Step... (42000/437388 | Training Loss: 9.975577995646745e-06, Learning Rate: 1.8079554138239473e-05) Step... (42500/437388 | Training Loss: 9.999113899539225e-06, Learning Rate: 1.80566894414369e-05) Step... (43000/437388 | Training Loss: 9.999290341511369e-06, Learning Rate: 1.8033828382613137e-05) Step... (43500/437388 | Training Loss: 9.998851055570412e-06, Learning Rate: 1.801096550479997e-05) Step... (44000/437388 | Training Loss: 9.995937034545932e-06, Learning Rate: 1.79881026269868e-05) Step... (44500/437388 | Training Loss: 9.999046596931294e-06, Learning Rate: 1.796523793018423e-05) Step... (45000/437388 | Training Loss: 1.0032883437816054e-05, Learning Rate: 1.7942375052371062e-05) Step... (45500/437388 | Training Loss: 9.680898074293509e-06, Learning Rate: 1.7919512174557894e-05) Step... (46000/437388 | Training Loss: 9.454144674236886e-06, Learning Rate: 1.7896649296744727e-05) Step... (46500/437388 | Training Loss: 9.958406735677272e-06, Learning Rate: 1.787378641893156e-05) Step... (47000/437388 | Training Loss: 7.78274807089474e-06, Learning Rate: 1.785092354111839e-05) Step... (47500/437388 | Training Loss: 9.662552656664047e-06, Learning Rate: 1.7828060663305223e-05) Step... (48000/437388 | Training Loss: 9.67618416325422e-06, Learning Rate: 1.7805197785492055e-05) Step... (48500/437388 | Training Loss: 9.7446582003613e-06, Learning Rate: 1.7782334907678887e-05) Step... (49000/437388 | Training Loss: 9.835666787694208e-06, Learning Rate: 1.775947202986572e-05) Step... (49500/437388 | Training Loss: 9.96859853330534e-06, Learning Rate: 1.773660915205255e-05) Step... (50000/437388 | Training Loss: 9.844617125054356e-06, Learning Rate: 1.7713746274239384e-05) Step... (50500/437388 | Training Loss: 9.803032298805192e-06, Learning Rate: 1.7690881577436812e-05) Step... (51000/437388 | Training Loss: 9.660185241955332e-06, Learning Rate: 1.7668020518613048e-05) Step... (51500/437388 | Training Loss: 9.900993063638452e-06, Learning Rate: 1.764515764079988e-05) Step... (52000/437388 | Training Loss: 9.604618753655814e-06, Learning Rate: 1.7622294762986712e-05) Step... (52500/437388 | Training Loss: 6.536213732033502e-06, Learning Rate: 1.759943006618414e-05) Step... (53000/437388 | Training Loss: 9.850440619629808e-06, Learning Rate: 1.7576567188370973e-05) Step... (53500/437388 | Training Loss: 9.80033291853033e-06, Learning Rate: 1.7553704310557805e-05) Step... (54000/437388 | Training Loss: 9.600864359526895e-06, Learning Rate: 1.7530841432744637e-05) Step... (54500/437388 | Training Loss: 9.353519999422133e-06, Learning Rate: 1.750797855493147e-05) Step... (55000/437388 | Training Loss: 9.570900147082284e-06, Learning Rate: 1.74851156771183e-05) Step... (55500/437388 | Training Loss: 7.14508587407181e-06, Learning Rate: 1.7462252799305134e-05) Step... (56000/437388 | Training Loss: 7.058968549245037e-06, Learning Rate: 1.7439389921491966e-05) Step... (56500/437388 | Training Loss: -2.7393669370212592e-05, Learning Rate: 1.7416527043678798e-05) Step... (57000/437388 | Training Loss: 7.528081368946005e-06, Learning Rate: 1.739366416586563e-05) Step... (57500/437388 | Training Loss: -2.508041143300943e-05, Learning Rate: 1.7370801288052462e-05) Step... (58000/437388 | Training Loss: -0.0005006093415431678, Learning Rate: 1.734793659124989e-05) Step... (58500/437388 | Training Loss: -8.784182682575192e-06, Learning Rate: 1.7325073713436723e-05) Step... (59000/437388 | Training Loss: -0.05110815167427063, Learning Rate: 1.730221265461296e-05) Step... (59500/437388 | Training Loss: -1.4655773156846408e-05, Learning Rate: 1.727934977679979e-05) Step... (60000/437388 | Training Loss: 9.906179911922663e-06, Learning Rate: 1.725648507999722e-05) Step... (60500/437388 | Training Loss: 9.99689018499339e-06, Learning Rate: 1.723362220218405e-05) Step... (61000/437388 | Training Loss: 9.974900422093924e-06, Learning Rate: 1.7210759324370883e-05) Step... (61500/437388 | Training Loss: -0.006380284670740366, Learning Rate: 1.7187896446557716e-05) Step... (62000/437388 | Training Loss: 9.996319931815378e-06, Learning Rate: 1.7165033568744548e-05) Step... (62500/437388 | Training Loss: 7.178080522862729e-06, Learning Rate: 1.714217069093138e-05) Step... (63000/437388 | Training Loss: 9.890783985611051e-06, Learning Rate: 1.7119307813118212e-05) Step... (63500/437388 | Training Loss: 9.915613190969452e-06, Learning Rate: 1.7096444935305044e-05) Step... (64000/437388 | Training Loss: 9.99755684460979e-06, Learning Rate: 1.7073582057491876e-05) Step... (64500/437388 | Training Loss: 9.984622920455877e-06, Learning Rate: 1.705071917967871e-05) Step... (65000/437388 | Training Loss: 8.908338713808917e-06, Learning Rate: 1.702785630186554e-05) Step... (65500/437388 | Training Loss: 1.002239605440991e-05, Learning Rate: 1.7004993424052373e-05) Step... (66000/437388 | Training Loss: 9.993064850277733e-06, Learning Rate: 1.69821287272498e-05) Step... (66500/437388 | Training Loss: 9.901496014208533e-06, Learning Rate: 1.6959265849436633e-05) Step... (67000/437388 | Training Loss: 9.920498996507376e-06, Learning Rate: 1.693640479061287e-05) Step... (67500/437388 | Training Loss: 8.78564333106624e-06, Learning Rate: 1.69135419127997e-05) Step... (68000/437388 | Training Loss: 1.0045086128229741e-05, Learning Rate: 1.689067721599713e-05) Step... (68500/437388 | Training Loss: 9.996912012866233e-06, Learning Rate: 1.6867814338183962e-05) Step... (69000/437388 | Training Loss: 1.0577703505987301e-05, Learning Rate: 1.6844951460370794e-05) Step... (69500/437388 | Training Loss: 6.859060704300646e-06, Learning Rate: 1.6822088582557626e-05) Step... (70000/437388 | Training Loss: 9.735958883538842e-06, Learning Rate: 1.6799225704744458e-05) Step... (70500/437388 | Training Loss: 3.69073313777335e-06, Learning Rate: 1.677636282693129e-05) Step... (71000/437388 | Training Loss: 1.5452194929821417e-05, Learning Rate: 1.6753499949118122e-05) Step... (71500/437388 | Training Loss: 7.551490853074938e-06, Learning Rate: 1.6730637071304955e-05) Step... (72000/437388 | Training Loss: 9.999574103858322e-06, Learning Rate: 1.6707774193491787e-05) Step... (72500/437388 | Training Loss: 1.0001018381444737e-05, Learning Rate: 1.668491131567862e-05) Step... (73000/437388 | Training Loss: 9.91267279459862e-06, Learning Rate: 1.666204843786545e-05) Step... (73500/437388 | Training Loss: 1.0612275218591094e-05, Learning Rate: 1.663918374106288e-05) Step... (74000/437388 | Training Loss: 9.93158118944848e-06, Learning Rate: 1.6616322682239115e-05) Step... (74500/437388 | Training Loss: 9.999830581364222e-06, Learning Rate: 1.6593457985436544e-05) Step... (75000/437388 | Training Loss: 9.99673648038879e-06, Learning Rate: 1.657059692661278e-05) Step... (75500/437388 | Training Loss: 9.667587619333062e-06, Learning Rate: 1.6547732229810208e-05) Step... (76000/437388 | Training Loss: 6.908721843501553e-06, Learning Rate: 1.652486935199704e-05) Step... (76500/437388 | Training Loss: 9.978556590795051e-06, Learning Rate: 1.6502006474183872e-05) Step... (77000/437388 | Training Loss: 9.992380000767298e-06, Learning Rate: 1.6479143596370704e-05) Step... (77500/437388 | Training Loss: 9.999144822359085e-06, Learning Rate: 1.6456280718557537e-05) Step... (78000/437388 | Training Loss: 9.959425369743258e-06, Learning Rate: 1.643341784074437e-05) Step... (78500/437388 | Training Loss: 4.806013748748228e-06, Learning Rate: 1.64105549629312e-05) Step... (79000/437388 | Training Loss: 9.999519534176216e-06, Learning Rate: 1.6387692085118033e-05) Step... (79500/437388 | Training Loss: -3.0628798413090408e-06, Learning Rate: 1.6364829207304865e-05) Step... (80000/437388 | Training Loss: 3.14687640639022e-05, Learning Rate: 1.6341966329491697e-05) Step... (80500/437388 | Training Loss: -5.9503574448172e-05, Learning Rate: 1.631910345167853e-05) Step... (81000/437388 | Training Loss: 9.830844646785408e-06, Learning Rate: 1.629624057386536e-05) Step... (81500/437388 | Training Loss: -0.0007751630619168282, Learning Rate: 1.627337587706279e-05) Step... (82000/437388 | Training Loss: 7.780356099829078e-06, Learning Rate: 1.6250514818239026e-05) Step... (82500/437388 | Training Loss: 9.727603355713654e-06, Learning Rate: 1.6227650121436454e-05) Step... (83000/437388 | Training Loss: 9.734287232276984e-06, Learning Rate: 1.620478906261269e-05) Step... (83500/437388 | Training Loss: 8.165938197635114e-06, Learning Rate: 1.618192436581012e-05) Step... (84000/437388 | Training Loss: -4.21609138356871e-06, Learning Rate: 1.615906148799695e-05) Step... (84500/437388 | Training Loss: -2.6465124392416328e-05, Learning Rate: 1.6136198610183783e-05) Step... (85000/437388 | Training Loss: 1.0105111869052052e-05, Learning Rate: 1.6113335732370615e-05) Step... (85500/437388 | Training Loss: -0.00010695862147258595, Learning Rate: 1.6090472854557447e-05) Step... (86000/437388 | Training Loss: -0.00014172535156831145, Learning Rate: 1.606760997674428e-05) Step... (86500/437388 | Training Loss: 9.377619790029712e-06, Learning Rate: 1.604474709893111e-05) Step... (87000/437388 | Training Loss: 1.3652050256496295e-05, Learning Rate: 1.6021884221117944e-05) Step... (87500/437388 | Training Loss: 0.00031775905517861247, Learning Rate: 1.5999021343304776e-05) Step... (88000/437388 | Training Loss: 7.545562766608782e-06, Learning Rate: 1.5976158465491608e-05) Step... (88500/437388 | Training Loss: -0.0010930340504273772, Learning Rate: 1.595329558767844e-05) Step... (89000/437388 | Training Loss: 3.0137512112560216e-06, Learning Rate: 1.593043089087587e-05) Step... (89500/437388 | Training Loss: 0.009223783388733864, Learning Rate: 1.59075680130627e-05) Step... (90000/437388 | Training Loss: -0.0023263858165591955, Learning Rate: 1.5884706954238936e-05) Step... (90500/437388 | Training Loss: -0.0009863653685897589, Learning Rate: 1.586184407642577e-05) Step... (91000/437388 | Training Loss: -8.11091304058209e-05, Learning Rate: 1.5838979379623197e-05) Step... (91500/437388 | Training Loss: -0.004317307844758034, Learning Rate: 1.581611650181003e-05) Step... (92000/437388 | Training Loss: -0.0005784975364804268, Learning Rate: 1.579325362399686e-05) Step... (92500/437388 | Training Loss: 9.436316759092733e-06, Learning Rate: 1.5770390746183693e-05) Step... (93000/437388 | Training Loss: -0.00013581309758592397, Learning Rate: 1.5747527868370526e-05) Step... (93500/437388 | Training Loss: 8.767711733526085e-06, Learning Rate: 1.5724664990557358e-05) Step... (94000/437388 | Training Loss: -0.001196634373627603, Learning Rate: 1.570180211274419e-05) Step... (94500/437388 | Training Loss: -4.4073633034713566e-05, Learning Rate: 1.5678939234931022e-05) Step... (95000/437388 | Training Loss: -0.0004681225400418043, Learning Rate: 1.5656076357117854e-05) Step... (95500/437388 | Training Loss: -0.03802439197897911, Learning Rate: 1.5633213479304686e-05) Step... (96000/437388 | Training Loss: 72.98323059082031, Learning Rate: 1.561035060149152e-05) Step... (96500/437388 | Training Loss: -7.45706565794535e-05, Learning Rate: 1.558748772367835e-05) Step... (97000/437388 | Training Loss: -1.4632112652179785e-05, Learning Rate: 1.556462302687578e-05) Step... (97500/437388 | Training Loss: -0.0013888446846976876, Learning Rate: 1.554176014906261e-05) Step... (98000/437388 | Training Loss: -0.009409919381141663, Learning Rate: 1.5518899090238847e-05) Step... (98500/437388 | Training Loss: -0.3626460134983063, Learning Rate: 1.549603621242568e-05) Step... (99000/437388 | Training Loss: 9.256023986381479e-06, Learning Rate: 1.5473171515623108e-05) Step... (99500/437388 | Training Loss: 1.0022363312600646e-05, Learning Rate: 1.545030863780994e-05) Step... (100000/437388 | Training Loss: -0.006803448777645826, Learning Rate: 1.5427445759996772e-05) Step... (100500/437388 | Training Loss: -0.0002515804662834853, Learning Rate: 1.5404582882183604e-05) Step... (101000/437388 | Training Loss: -0.0071137938648462296, Learning Rate: 1.5381720004370436e-05) Step... (101500/437388 | Training Loss: -0.0006095135468058288, Learning Rate: 1.5358857126557268e-05) Step... (102000/437388 | Training Loss: 0.0003835784154944122, Learning Rate: 1.53359942487441e-05) Step... (102500/437388 | Training Loss: 7.004499821050558e-06, Learning Rate: 1.5313131370930932e-05) Step... (103000/437388 | Training Loss: 6.723953447362874e-07, Learning Rate: 1.5290268493117765e-05) Step... (103500/437388 | Training Loss: -9.461044101044536e-06, Learning Rate: 1.5267405615304597e-05) Step... (104000/437388 | Training Loss: -3.5358179957256652e-06, Learning Rate: 1.5244541827996727e-05) Step... (104500/437388 | Training Loss: -95.99297332763672, Learning Rate: 1.522167895018356e-05) Step... (105000/437388 | Training Loss: -1.1236647878831718e-05, Learning Rate: 1.5198816981865093e-05) Step... (105500/437388 | Training Loss: 8.958060789154842e-06, Learning Rate: 1.5175952285062522e-05) Step... (106000/437388 | Training Loss: 9.924059668264817e-06, Learning Rate: 1.5153090316744056e-05) Step... (106500/437388 | Training Loss: 5.033837169321487e-06, Learning Rate: 1.5130227438930888e-05) Step... (107000/437388 | Training Loss: 9.999862413678784e-06, Learning Rate: 1.510736456111772e-05) Step... (107500/437388 | Training Loss: 9.653651432017796e-06, Learning Rate: 1.508450077380985e-05) Step... (108000/437388 | Training Loss: 9.98562973109074e-06, Learning Rate: 1.5061637895996682e-05) Step... (108500/437388 | Training Loss: 9.999821486417204e-06, Learning Rate: 1.5038775018183514e-05) Step... (109000/437388 | Training Loss: 7.144420123950113e-06, Learning Rate: 1.5015911230875645e-05) Step... (109500/437388 | Training Loss: 9.997345841838978e-06, Learning Rate: 1.499305017205188e-05) Step... (110000/437388 | Training Loss: 9.997858796850778e-06, Learning Rate: 1.4970185475249309e-05) Step... (110500/437388 | Training Loss: 9.991859769797884e-06, Learning Rate: 1.4947323506930843e-05) Step... (111000/437388 | Training Loss: -2.8533329896163195e-05, Learning Rate: 1.4924460629117675e-05) Step... (111500/437388 | Training Loss: -8.355713362107053e-06, Learning Rate: 1.4901596841809805e-05) Step... (112000/437388 | Training Loss: 8.365332178073004e-06, Learning Rate: 1.4878733963996638e-05) Step... (112500/437388 | Training Loss: 9.927052815328352e-06, Learning Rate: 1.485587108618347e-05) Step... (113000/437388 | Training Loss: -3.364314034115523e-05, Learning Rate: 1.4833009117865004e-05) Step... (113500/437388 | Training Loss: -20.275516510009766, Learning Rate: 1.4810144421062432e-05) Step... (114000/437388 | Training Loss: 9.999565008911304e-06, Learning Rate: 1.4787282452743966e-05) Step... (114500/437388 | Training Loss: 9.948298611561768e-06, Learning Rate: 1.4764419574930798e-05) Step... (115000/437388 | Training Loss: 9.998351742979139e-06, Learning Rate: 1.474155669711763e-05) Step... (115500/437388 | Training Loss: -0.4263361990451813, Learning Rate: 1.471869290980976e-05) Step... (116000/437388 | Training Loss: 9.999733265431132e-06, Learning Rate: 1.4695830031996593e-05) Step... (116500/437388 | Training Loss: 9.999770554713905e-06, Learning Rate: 1.4672968063678127e-05) Step... (117000/437388 | Training Loss: 1.01969098977861e-05, Learning Rate: 1.4650103366875555e-05) Step... (117500/437388 | Training Loss: 9.999991561926436e-06, Learning Rate: 1.462724139855709e-05) Step... (118000/437388 | Training Loss: 1.2863829397247173e-05, Learning Rate: 1.4604378520743921e-05) Step... (118500/437388 | Training Loss: 9.999992471421137e-06, Learning Rate: 1.4581515642930754e-05) Step... (119000/437388 | Training Loss: 9.999997928389348e-06, Learning Rate: 1.4558651855622884e-05) Step... (119500/437388 | Training Loss: 9.99999883788405e-06, Learning Rate: 1.4535788977809716e-05) Step... (120000/437388 | Training Loss: 9.99999883788405e-06, Learning Rate: 1.451292700949125e-05) Step... (120500/437388 | Training Loss: 1.0000000656873453e-05, Learning Rate: 1.449006322218338e-05) Step... (121000/437388 | Training Loss: 9.999996109399945e-06, Learning Rate: 1.4467201253864914e-05) Step... (121500/437388 | Training Loss: 9.99999883788405e-06, Learning Rate: 1.4444336557062343e-05) Step... (122000/437388 | Training Loss: 9.99996336759068e-06, Learning Rate: 1.4421474588743877e-05) Step... (122500/437388 | Training Loss: 0.21177683770656586, Learning Rate: 1.4398611710930709e-05) Step... (123000/437388 | Training Loss: -96.042724609375, Learning Rate: 1.4375747923622839e-05) Step... (123500/437388 | Training Loss: -1.1273064046690706e-05, Learning Rate: 1.4352885045809671e-05) Step... (124000/437388 | Training Loss: -0.0012545139761641622, Learning Rate: 1.4330022167996503e-05) Step... (124500/437388 | Training Loss: -2.347531795501709, Learning Rate: 1.4307160199678037e-05) Step... (125000/437388 | Training Loss: 9.999992471421137e-06, Learning Rate: 1.4284295502875466e-05) Step... (125500/437388 | Training Loss: 9.999503163271584e-06, Learning Rate: 1.4261433534557e-05) Step... (126000/437388 | Training Loss: 1.1671419997583143e-05, Learning Rate: 1.4238570656743832e-05) Step... (126500/437388 | Training Loss: 9.99999883788405e-06, Learning Rate: 1.4215707778930664e-05) Step... (127000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.4192843991622794e-05) Step... (127500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.4169981113809627e-05) Step... (128000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.414711914549116e-05) Step... (128500/437388 | Training Loss: -95.99994659423828, Learning Rate: 1.4124254448688589e-05) Step... (129000/437388 | Training Loss: -0.0011440961388871074, Learning Rate: 1.4101392480370123e-05) Step... (129500/437388 | Training Loss: 9.9998705991311e-06, Learning Rate: 1.4078528693062253e-05) Step... (130000/437388 | Training Loss: 9.925997801474296e-06, Learning Rate: 1.4055666724743787e-05) Step... (130500/437388 | Training Loss: -0.0002359842910664156, Learning Rate: 1.403280384693062e-05) Step... (131000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.400994005962275e-05) Step... (131500/437388 | Training Loss: 9.999987923947629e-06, Learning Rate: 1.3987077181809582e-05) Step... (132000/437388 | Training Loss: 9.998908353736624e-06, Learning Rate: 1.3964214303996414e-05) Step... (132500/437388 | Training Loss: 9.866323125606868e-06, Learning Rate: 1.3941352335677948e-05) Step... (133000/437388 | Training Loss: 9.999997928389348e-06, Learning Rate: 1.3918487638875376e-05) Step... (133500/437388 | Training Loss: 9.99999883788405e-06, Learning Rate: 1.389562567055691e-05) Step... (134000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3872762792743742e-05) Step... (134500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3849899005435873e-05) Step... (135000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3827036127622705e-05) Step... (135500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3804173249809537e-05) Step... (136000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3781311281491071e-05) Step... (136500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.37584465846885e-05) Step... (137000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3735584616370033e-05) Step... (137500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3712721738556866e-05) Step... (138000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3689858860743698e-05) Step... (138500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3666995073435828e-05) Step... (139000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.364413219562266e-05) Step... (139500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3621270227304194e-05) Step... (140000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3598405530501623e-05) Step... (140500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3575543562183157e-05) Step... (141000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3552679774875287e-05) Step... (141500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3529817806556821e-05) Step... (142000/437388 | Training Loss: 1.0000000656873453e-05, Learning Rate: 1.3506954928743653e-05) Step... (142500/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3484091141435783e-05) Step... (143000/437388 | Training Loss: 9.999999747378752e-06, Learning Rate: 1.3461228263622615e-05)