Step... (500/437388 | Training Loss: 0.000404452090151608, Learning Rate: 1.9977182091679424e-05) Step... (1000/437388 | Training Loss: 0.0006339488318189979, Learning Rate: 1.9954319213866256e-05) Step... (1500/437388 | Training Loss: 9.516239515505731e-05, Learning Rate: 1.993145633605309e-05) Step... (2000/437388 | Training Loss: 0.00023419701028615236, Learning Rate: 1.990859345823992e-05) Step... (2500/437388 | Training Loss: 0.00013391178799793124, Learning Rate: 1.9885730580426753e-05) Step... (3000/437388 | Training Loss: 0.0026682857424020767, Learning Rate: 1.9862867702613585e-05) Step... (3500/437388 | Training Loss: 7.625477883266285e-05, Learning Rate: 1.9840004824800417e-05) Step... (4000/437388 | Training Loss: 2.2841697500552982e-05, Learning Rate: 1.9817140127997845e-05) Step... (4500/437388 | Training Loss: 4.078699566889554e-05, Learning Rate: 1.979427906917408e-05) Step... (5000/437388 | Training Loss: 0.0001533488102722913, Learning Rate: 1.9771416191360913e-05) Step... (5500/437388 | Training Loss: -7.60231387175736e-07, Learning Rate: 1.9748553313547745e-05) Step... (6000/437388 | Training Loss: 6.25096436124295e-05, Learning Rate: 1.9725688616745174e-05) Step... (6500/437388 | Training Loss: -5.6734875215624925e-06, Learning Rate: 1.9702825738932006e-05) Step... (7000/437388 | Training Loss: 2.8425463369785575e-06, Learning Rate: 1.9679962861118838e-05) Step... (7500/437388 | Training Loss: -1.0971100437018322e-06, Learning Rate: 1.965709998330567e-05) Step... (8000/437388 | Training Loss: -3.7226100175757892e-06, Learning Rate: 1.9634237105492502e-05) Step... (8500/437388 | Training Loss: -9.15726377570536e-06, Learning Rate: 1.9611374227679335e-05) Step... (9000/437388 | Training Loss: 0.00028566704713739455, Learning Rate: 1.9588511349866167e-05) Step... (9500/437388 | Training Loss: -2.5296901640103897e-06, Learning Rate: 1.9565648472053e-05) Step... (10000/437388 | Training Loss: 4.72695282951463e-05, Learning Rate: 1.954278559423983e-05) Step... (10500/437388 | Training Loss: 2.3120959667721763e-05, Learning Rate: 1.9519922716426663e-05) Step... (11000/437388 | Training Loss: 1.824241735448595e-05, Learning Rate: 1.9497059838613495e-05) Step... (11500/437388 | Training Loss: 2.931378548964858e-05, Learning Rate: 1.9474195141810924e-05) Step... (12000/437388 | Training Loss: -5.238102403382072e-06, Learning Rate: 1.9451332263997756e-05) Step... (12500/437388 | Training Loss: 4.2583487811498344e-05, Learning Rate: 1.942847120517399e-05) Step... (13000/437388 | Training Loss: -6.542205483128782e-06, Learning Rate: 1.9405608327360824e-05) Step... (13500/437388 | Training Loss: 1.2718789548671339e-05, Learning Rate: 1.9382743630558252e-05) Step... (14000/437388 | Training Loss: -7.005878615018446e-06, Learning Rate: 1.9359880752745084e-05) Step... (14500/437388 | Training Loss: -7.702277798671275e-06, Learning Rate: 1.9337017874931917e-05) Step... (15000/437388 | Training Loss: 1.781577338988427e-07, Learning Rate: 1.931415499711875e-05) Step... (15500/437388 | Training Loss: -5.5821979003667366e-06, Learning Rate: 1.929129211930558e-05) Step... (16000/437388 | Training Loss: 4.509760856308276e-06, Learning Rate: 1.9268429241492413e-05) Step... (16500/437388 | Training Loss: -3.0017952667549253e-05, Learning Rate: 1.9245566363679245e-05) Step... (17000/437388 | Training Loss: -6.2593853726866655e-06, Learning Rate: 1.9222703485866077e-05) Step... (17500/437388 | Training Loss: -4.433305548445787e-06, Learning Rate: 1.919984060805291e-05) Step... (18000/437388 | Training Loss: -7.731199730187654e-06, Learning Rate: 1.917697773023974e-05) Step... (18500/437388 | Training Loss: -8.37458446767414e-06, Learning Rate: 1.9154114852426574e-05) Step... (19000/437388 | Training Loss: -5.067255642643431e-06, Learning Rate: 1.9131251974613406e-05) Step... (19500/437388 | Training Loss: 6.776190730306553e-06, Learning Rate: 1.9108387277810834e-05) Step... (20000/437388 | Training Loss: -7.4958870754926465e-06, Learning Rate: 1.908552621898707e-05) Step... (20500/437388 | Training Loss: -5.895290541957365e-06, Learning Rate: 1.9062663341173902e-05) Step... (21000/437388 | Training Loss: -6.40215785097098e-06, Learning Rate: 1.9039800463360734e-05) Step... (21500/437388 | Training Loss: 3.1176207357930252e-06, Learning Rate: 1.9016935766558163e-05) Step... (22000/437388 | Training Loss: -3.3340879781462718e-06, Learning Rate: 1.8994072888744995e-05) Step... (22500/437388 | Training Loss: -5.5457257985835895e-06, Learning Rate: 1.8971210010931827e-05) Step... (23000/437388 | Training Loss: 2.9257062124088407e-06, Learning Rate: 1.894834713311866e-05) Step... (23500/437388 | Training Loss: -5.124312338011805e-06, Learning Rate: 1.892548425530549e-05) Step... (24000/437388 | Training Loss: -1.773118128767237e-06, Learning Rate: 1.8902621377492324e-05) Step... (24500/437388 | Training Loss: -5.4094566621643025e-06, Learning Rate: 1.8879758499679156e-05) Step... (25000/437388 | Training Loss: -3.4828276511689182e-06, Learning Rate: 1.8856895621865988e-05) Step... (25500/437388 | Training Loss: -4.534229901764775e-06, Learning Rate: 1.883403274405282e-05) Step... (26000/437388 | Training Loss: -8.387982234125957e-06, Learning Rate: 1.8811169866239652e-05) Step... (26500/437388 | Training Loss: -5.165531547390856e-06, Learning Rate: 1.8788306988426484e-05) Step... (27000/437388 | Training Loss: -6.8571389419957995e-06, Learning Rate: 1.8765442291623913e-05) Step... (27500/437388 | Training Loss: -7.399700734822545e-06, Learning Rate: 1.8742579413810745e-05) Step... (28000/437388 | Training Loss: -2.5704005111037986e-06, Learning Rate: 1.871971835498698e-05) Step... (28500/437388 | Training Loss: -6.7595292421174236e-06, Learning Rate: 1.8696855477173813e-05) Step... (29000/437388 | Training Loss: -6.415868483600207e-06, Learning Rate: 1.867399078037124e-05) Step... (29500/437388 | Training Loss: -1.9005401554750279e-06, Learning Rate: 1.8651127902558073e-05) Step... (30000/437388 | Training Loss: -5.97090274823131e-06, Learning Rate: 1.8628265024744906e-05) Step... (30500/437388 | Training Loss: -4.2556366679491475e-06, Learning Rate: 1.8605402146931738e-05) Step... (31000/437388 | Training Loss: -5.529957434191601e-06, Learning Rate: 1.858253926911857e-05) Step... (31500/437388 | Training Loss: -9.014551324071363e-06, Learning Rate: 1.8559676391305402e-05) Step... (32000/437388 | Training Loss: -7.940849172882736e-06, Learning Rate: 1.8536813513492234e-05) Step... (32500/437388 | Training Loss: -5.163095920579508e-06, Learning Rate: 1.8513950635679066e-05) Step... (33000/437388 | Training Loss: -3.989764081779867e-06, Learning Rate: 1.84910877578659e-05) Step... (33500/437388 | Training Loss: -4.6107038542686496e-07, Learning Rate: 1.846822488005273e-05) Step... (34000/437388 | Training Loss: -5.280284767650301e-06, Learning Rate: 1.8445362002239563e-05) Step... (34500/437388 | Training Loss: -7.053782155708177e-06, Learning Rate: 1.8422499124426395e-05) Step... (35000/437388 | Training Loss: -8.036065992200747e-06, Learning Rate: 1.8399634427623823e-05) Step... (35500/437388 | Training Loss: -3.392528924450744e-06, Learning Rate: 1.8376771549810655e-05) Step... (36000/437388 | Training Loss: -3.18766433338169e-06, Learning Rate: 1.835391049098689e-05) Step... (36500/437388 | Training Loss: -5.097432222100906e-06, Learning Rate: 1.8331047613173723e-05) Step... (37000/437388 | Training Loss: -5.385624717746396e-06, Learning Rate: 1.8308182916371152e-05) Step... (37500/437388 | Training Loss: 2.5853719307633583e-06, Learning Rate: 1.8285320038557984e-05) Step... (38000/437388 | Training Loss: -4.720991000795038e-06, Learning Rate: 1.8262457160744816e-05) Step... (38500/437388 | Training Loss: -5.782361768069677e-06, Learning Rate: 1.8239594282931648e-05) Step... (39000/437388 | Training Loss: -1.1970331570410053e-06, Learning Rate: 1.821673140511848e-05) Step... (39500/437388 | Training Loss: -6.66629148327047e-06, Learning Rate: 1.8193868527305312e-05) Step... (40000/437388 | Training Loss: -3.6221426853444427e-06, Learning Rate: 1.8171005649492145e-05) Step... (40500/437388 | Training Loss: -5.708717253583018e-06, Learning Rate: 1.8148142771678977e-05) Step... (41000/437388 | Training Loss: -7.3245118983322755e-06, Learning Rate: 1.812527989386581e-05) Step... (41500/437388 | Training Loss: -8.422952305409126e-06, Learning Rate: 1.810241701605264e-05) Step... (42000/437388 | Training Loss: 6.409215984604089e-06, Learning Rate: 1.8079554138239473e-05) Step... (42500/437388 | Training Loss: -7.179387466749176e-06, Learning Rate: 1.80566894414369e-05) Step... (43000/437388 | Training Loss: 4.655762040783884e-06, Learning Rate: 1.8033828382613137e-05) Step... (43500/437388 | Training Loss: -6.4161577029153705e-06, Learning Rate: 1.801096550479997e-05) Step... (44000/437388 | Training Loss: -4.054519195051398e-06, Learning Rate: 1.79881026269868e-05) Step... (44500/437388 | Training Loss: -6.536903129017446e-06, Learning Rate: 1.796523793018423e-05) Step... (45000/437388 | Training Loss: -6.904315341671463e-06, Learning Rate: 1.7942375052371062e-05) Step... (45500/437388 | Training Loss: -6.406115062418394e-06, Learning Rate: 1.7919512174557894e-05) Step... (46000/437388 | Training Loss: -7.637635462742765e-06, Learning Rate: 1.7896649296744727e-05) Step... (46500/437388 | Training Loss: -7.8167386163841e-06, Learning Rate: 1.787378641893156e-05) Step... (47000/437388 | Training Loss: -4.7537982936773915e-06, Learning Rate: 1.785092354111839e-05) Step... (47500/437388 | Training Loss: -6.590669727302156e-06, Learning Rate: 1.7828060663305223e-05) Step... (48000/437388 | Training Loss: -6.844691597507335e-06, Learning Rate: 1.7805197785492055e-05) Step... (48500/437388 | Training Loss: -5.927687197981868e-06, Learning Rate: 1.7782334907678887e-05) Step... (49000/437388 | Training Loss: -3.12098063659505e-06, Learning Rate: 1.775947202986572e-05) Step... (49500/437388 | Training Loss: -5.57123212274746e-06, Learning Rate: 1.773660915205255e-05) Step... (50000/437388 | Training Loss: -7.441677098540822e-06, Learning Rate: 1.7713746274239384e-05) Step... (50500/437388 | Training Loss: -6.516596386063611e-06, Learning Rate: 1.7690881577436812e-05) Step... (51000/437388 | Training Loss: -4.794296728505287e-06, Learning Rate: 1.7668020518613048e-05) Step... (51500/437388 | Training Loss: -6.038122592144646e-06, Learning Rate: 1.764515764079988e-05) Step... (52000/437388 | Training Loss: -5.160728505870793e-06, Learning Rate: 1.7622294762986712e-05) Step... (52500/437388 | Training Loss: -6.0460461099864915e-06, Learning Rate: 1.759943006618414e-05) Step... (53000/437388 | Training Loss: -6.9430088842636906e-06, Learning Rate: 1.7576567188370973e-05) Step... (53500/437388 | Training Loss: -6.698157903883839e-06, Learning Rate: 1.7553704310557805e-05) Step... (54000/437388 | Training Loss: 2.8181757443235256e-06, Learning Rate: 1.7530841432744637e-05) Step... (54500/437388 | Training Loss: -4.628562692232663e-06, Learning Rate: 1.750797855493147e-05) Step... (55000/437388 | Training Loss: -7.183040906966198e-06, Learning Rate: 1.74851156771183e-05) Step... (55500/437388 | Training Loss: -6.296264018601505e-06, Learning Rate: 1.7462252799305134e-05) Step... (56000/437388 | Training Loss: -5.903525561734568e-06, Learning Rate: 1.7439389921491966e-05) Step... (56500/437388 | Training Loss: -5.366368895920459e-06, Learning Rate: 1.7416527043678798e-05) Step... (57000/437388 | Training Loss: 7.901328444859246e-07, Learning Rate: 1.739366416586563e-05) Step... (57500/437388 | Training Loss: -5.448615866043838e-06, Learning Rate: 1.7370801288052462e-05) Step... (58000/437388 | Training Loss: 3.7966519812471233e-06, Learning Rate: 1.734793659124989e-05) Step... (58500/437388 | Training Loss: -5.885602149646729e-06, Learning Rate: 1.7325073713436723e-05) Step... (59000/437388 | Training Loss: -3.6664628169091884e-06, Learning Rate: 1.730221265461296e-05) Step... (59500/437388 | Training Loss: -1.0451103662489913e-05, Learning Rate: 1.727934977679979e-05) Step... (60000/437388 | Training Loss: -4.403611001180252e-06, Learning Rate: 1.725648507999722e-05) Step... (60500/437388 | Training Loss: -6.125190338934772e-06, Learning Rate: 1.723362220218405e-05) Step... (61000/437388 | Training Loss: -5.913780114497058e-06, Learning Rate: 1.7210759324370883e-05) Step... (61500/437388 | Training Loss: -5.16153249918716e-06, Learning Rate: 1.7187896446557716e-05) Step... (62000/437388 | Training Loss: -6.906470844114665e-06, Learning Rate: 1.7165033568744548e-05) Step... (62500/437388 | Training Loss: -6.748471605533268e-06, Learning Rate: 1.714217069093138e-05) Step... (63000/437388 | Training Loss: -6.144377948658075e-06, Learning Rate: 1.7119307813118212e-05) Step... (63500/437388 | Training Loss: -7.6089936555945314e-06, Learning Rate: 1.7096444935305044e-05) Step... (64000/437388 | Training Loss: -6.796373781980947e-06, Learning Rate: 1.7073582057491876e-05) Step... (64500/437388 | Training Loss: -1.0295361789758317e-05, Learning Rate: 1.705071917967871e-05) Step... (65000/437388 | Training Loss: -6.661573934252374e-06, Learning Rate: 1.702785630186554e-05) Step... (65500/437388 | Training Loss: -6.960311111470219e-06, Learning Rate: 1.7004993424052373e-05) Step... (66000/437388 | Training Loss: -8.780200005276129e-06, Learning Rate: 1.69821287272498e-05) Step... (66500/437388 | Training Loss: -7.80259051680332e-06, Learning Rate: 1.6959265849436633e-05) Step... (67000/437388 | Training Loss: -5.190452611714136e-06, Learning Rate: 1.693640479061287e-05) Step... (67500/437388 | Training Loss: -6.170910637592897e-06, Learning Rate: 1.69135419127997e-05) Step... (68000/437388 | Training Loss: -5.451665401778882e-06, Learning Rate: 1.689067721599713e-05) Step... (68500/437388 | Training Loss: -8.33153444546042e-06, Learning Rate: 1.6867814338183962e-05) Step... (69000/437388 | Training Loss: -8.836791494104546e-06, Learning Rate: 1.6844951460370794e-05) Step... (69500/437388 | Training Loss: -7.965396434883587e-06, Learning Rate: 1.6822088582557626e-05) Step... (70000/437388 | Training Loss: -7.993636245373636e-06, Learning Rate: 1.6799225704744458e-05) Step... (70500/437388 | Training Loss: -7.944276148919016e-06, Learning Rate: 1.677636282693129e-05) Step... (71000/437388 | Training Loss: -9.243218300980516e-06, Learning Rate: 1.6753499949118122e-05) Step... (71500/437388 | Training Loss: -8.733037248020992e-06, Learning Rate: 1.6730637071304955e-05) Step... (72000/437388 | Training Loss: -8.596071893407498e-06, Learning Rate: 1.6707774193491787e-05) Step... (72500/437388 | Training Loss: -8.663163498567883e-06, Learning Rate: 1.668491131567862e-05) Step... (73000/437388 | Training Loss: -9.036175470100716e-06, Learning Rate: 1.666204843786545e-05) Step... (73500/437388 | Training Loss: -9.52473419602029e-06, Learning Rate: 1.663918374106288e-05) Step... (74000/437388 | Training Loss: -8.939436156651936e-06, Learning Rate: 1.6616322682239115e-05) Step... (74500/437388 | Training Loss: -9.713441613712348e-06, Learning Rate: 1.6593457985436544e-05) Step... (75000/437388 | Training Loss: -9.852315997704864e-06, Learning Rate: 1.657059692661278e-05) Step... (75500/437388 | Training Loss: -1.306520243815612e-05, Learning Rate: 1.6547732229810208e-05) Step... (76000/437388 | Training Loss: -9.346391379949637e-06, Learning Rate: 1.652486935199704e-05) Step... (76500/437388 | Training Loss: -9.491493983659893e-06, Learning Rate: 1.6502006474183872e-05) Step... (77000/437388 | Training Loss: -9.394856533617713e-06, Learning Rate: 1.6479143596370704e-05) Step... (77500/437388 | Training Loss: -9.725947165861726e-06, Learning Rate: 1.6456280718557537e-05) Step... (78000/437388 | Training Loss: -9.789433534024283e-06, Learning Rate: 1.643341784074437e-05) Step... (78500/437388 | Training Loss: -9.560701073496602e-06, Learning Rate: 1.64105549629312e-05) Step... (79000/437388 | Training Loss: -9.880196557787713e-06, Learning Rate: 1.6387692085118033e-05) Step... (79500/437388 | Training Loss: -9.761097317095846e-06, Learning Rate: 1.6364829207304865e-05) Step... (80000/437388 | Training Loss: -9.930587111739442e-06, Learning Rate: 1.6341966329491697e-05) Step... (80500/437388 | Training Loss: -9.742347174324095e-06, Learning Rate: 1.631910345167853e-05) Step... (81000/437388 | Training Loss: -9.903991667670198e-06, Learning Rate: 1.629624057386536e-05) Step... (81500/437388 | Training Loss: -9.69627762970049e-06, Learning Rate: 1.627337587706279e-05) Step... (82000/437388 | Training Loss: -9.94446236290969e-06, Learning Rate: 1.6250514818239026e-05) Step... (82500/437388 | Training Loss: -9.852805305854417e-06, Learning Rate: 1.6227650121436454e-05) Step... (83000/437388 | Training Loss: -9.919120202539489e-06, Learning Rate: 1.620478906261269e-05) Step... (83500/437388 | Training Loss: -9.893648893921636e-06, Learning Rate: 1.618192436581012e-05) Step... (84000/437388 | Training Loss: -9.873612725641578e-06, Learning Rate: 1.615906148799695e-05) Step... (84500/437388 | Training Loss: -9.74766953731887e-06, Learning Rate: 1.6136198610183783e-05) Step... (85000/437388 | Training Loss: -9.893889910017606e-06, Learning Rate: 1.6113335732370615e-05) Step... (85500/437388 | Training Loss: -9.625448910810519e-06, Learning Rate: 1.6090472854557447e-05) Step... (86000/437388 | Training Loss: -9.15635609999299e-06, Learning Rate: 1.606760997674428e-05) Step... (86500/437388 | Training Loss: -9.932768080034293e-06, Learning Rate: 1.604474709893111e-05) Step... (87000/437388 | Training Loss: -9.676634363131598e-06, Learning Rate: 1.6021884221117944e-05) Step... (87500/437388 | Training Loss: -1.0389778253738768e-05, Learning Rate: 1.5999021343304776e-05) Step... (88000/437388 | Training Loss: -9.875341675069649e-06, Learning Rate: 1.5976158465491608e-05) Step... (88500/437388 | Training Loss: -9.83661630016286e-06, Learning Rate: 1.595329558767844e-05) Step... (89000/437388 | Training Loss: -9.88773626886541e-06, Learning Rate: 1.593043089087587e-05) Step... (89500/437388 | Training Loss: -1.005065132630989e-05, Learning Rate: 1.59075680130627e-05) Step... (90000/437388 | Training Loss: -8.606668416177854e-06, Learning Rate: 1.5884706954238936e-05) Step... (90500/437388 | Training Loss: -9.940247764461674e-06, Learning Rate: 1.586184407642577e-05) Step... (91000/437388 | Training Loss: -9.934198715200182e-06, Learning Rate: 1.5838979379623197e-05) Step... (91500/437388 | Training Loss: -9.929631232807878e-06, Learning Rate: 1.581611650181003e-05) Step... (92000/437388 | Training Loss: -9.918112482409924e-06, Learning Rate: 1.579325362399686e-05) Step... (92500/437388 | Training Loss: -9.936074093275238e-06, Learning Rate: 1.5770390746183693e-05) Step... (93000/437388 | Training Loss: -9.881726327876095e-06, Learning Rate: 1.5747527868370526e-05) Step... (93500/437388 | Training Loss: -9.933090041158721e-06, Learning Rate: 1.5724664990557358e-05) Step... (94000/437388 | Training Loss: -9.976016372093e-06, Learning Rate: 1.570180211274419e-05) Step... (94500/437388 | Training Loss: -9.939818482962437e-06, Learning Rate: 1.5678939234931022e-05) Step... (95000/437388 | Training Loss: -9.946526915882714e-06, Learning Rate: 1.5656076357117854e-05) Step... (95500/437388 | Training Loss: -9.949440936907195e-06, Learning Rate: 1.5633213479304686e-05) Step... (96000/437388 | Training Loss: -1.0023382856161334e-05, Learning Rate: 1.561035060149152e-05) Step... (96500/437388 | Training Loss: -9.958578630175907e-06, Learning Rate: 1.558748772367835e-05) Step... (97000/437388 | Training Loss: -9.940342351910658e-06, Learning Rate: 1.556462302687578e-05) Step... (97500/437388 | Training Loss: -9.942121323547326e-06, Learning Rate: 1.554176014906261e-05) Step... (98000/437388 | Training Loss: -9.938939911080524e-06, Learning Rate: 1.5518899090238847e-05) Step... (98500/437388 | Training Loss: -9.912233508657664e-06, Learning Rate: 1.549603621242568e-05) Step... (99000/437388 | Training Loss: -9.934283298207447e-06, Learning Rate: 1.5473171515623108e-05) Step... (99500/437388 | Training Loss: -9.91287743090652e-06, Learning Rate: 1.545030863780994e-05) Step... (100000/437388 | Training Loss: -9.90589614957571e-06, Learning Rate: 1.5427445759996772e-05) Step... (100500/437388 | Training Loss: -9.936301466950681e-06, Learning Rate: 1.5404582882183604e-05) Step... (101000/437388 | Training Loss: -9.946139471139759e-06, Learning Rate: 1.5381720004370436e-05) Step... (101500/437388 | Training Loss: -9.907307685352862e-06, Learning Rate: 1.5358857126557268e-05) Step... (102000/437388 | Training Loss: -9.979352398659103e-06, Learning Rate: 1.53359942487441e-05) Step... (102500/437388 | Training Loss: -9.954264896805398e-06, Learning Rate: 1.5313131370930932e-05) Step... (103000/437388 | Training Loss: -9.955115274351556e-06, Learning Rate: 1.5290268493117765e-05) Step... (103500/437388 | Training Loss: -9.925246558850631e-06, Learning Rate: 1.5267405615304597e-05) Step... (104000/437388 | Training Loss: -9.947918442776427e-06, Learning Rate: 1.5244541827996727e-05) Step... (104500/437388 | Training Loss: -9.944302291842178e-06, Learning Rate: 1.522167895018356e-05) Step... (105000/437388 | Training Loss: -9.955795576388482e-06, Learning Rate: 1.5198816981865093e-05) Step... (105500/437388 | Training Loss: -9.942156793840695e-06, Learning Rate: 1.5175952285062522e-05) Step... (106000/437388 | Training Loss: -9.936501555785071e-06, Learning Rate: 1.5153090316744056e-05) Step... (106500/437388 | Training Loss: -9.950296771421563e-06, Learning Rate: 1.5130227438930888e-05) Step... (107000/437388 | Training Loss: -9.797213351703249e-06, Learning Rate: 1.510736456111772e-05) Step... (107500/437388 | Training Loss: -9.955976565834135e-06, Learning Rate: 1.508450077380985e-05) Step... (108000/437388 | Training Loss: -9.953116204997059e-06, Learning Rate: 1.5061637895996682e-05) Step... (108500/437388 | Training Loss: -9.957444490282796e-06, Learning Rate: 1.5038775018183514e-05) Step... (109000/437388 | Training Loss: -9.821238563745283e-06, Learning Rate: 1.5015911230875645e-05) Step... (109500/437388 | Training Loss: -9.946938916982617e-06, Learning Rate: 1.499305017205188e-05) Step... (110000/437388 | Training Loss: -9.967616279027425e-06, Learning Rate: 1.4970185475249309e-05) Step... (110500/437388 | Training Loss: -9.965857316274196e-06, Learning Rate: 1.4947323506930843e-05) Step... (111000/437388 | Training Loss: -9.944502380676568e-06, Learning Rate: 1.4924460629117675e-05) Step... (111500/437388 | Training Loss: -9.968231097445823e-06, Learning Rate: 1.4901596841809805e-05) Step... (112000/437388 | Training Loss: -9.950863386620767e-06, Learning Rate: 1.4878733963996638e-05) Step... (112500/437388 | Training Loss: -9.969849088520277e-06, Learning Rate: 1.485587108618347e-05) Step... (113000/437388 | Training Loss: -9.920340744429268e-06, Learning Rate: 1.4833009117865004e-05) Step... (113500/437388 | Training Loss: -9.993400453822687e-06, Learning Rate: 1.4810144421062432e-05) Step... (114000/437388 | Training Loss: -9.957962902262807e-06, Learning Rate: 1.4787282452743966e-05) Step... (114500/437388 | Training Loss: -9.957393558579497e-06, Learning Rate: 1.4764419574930798e-05) Step... (115000/437388 | Training Loss: -9.974402928492054e-06, Learning Rate: 1.474155669711763e-05) Step... (115500/437388 | Training Loss: -9.969589882530272e-06, Learning Rate: 1.471869290980976e-05) Step... (116000/437388 | Training Loss: -9.969175152946264e-06, Learning Rate: 1.4695830031996593e-05) Step... (116500/437388 | Training Loss: -9.960567695088685e-06, Learning Rate: 1.4672968063678127e-05) Step... (117000/437388 | Training Loss: -9.816652891458943e-06, Learning Rate: 1.4650103366875555e-05) Step... (117500/437388 | Training Loss: -9.960506758943666e-06, Learning Rate: 1.462724139855709e-05) Step... (118000/437388 | Training Loss: -9.966113793780096e-06, Learning Rate: 1.4604378520743921e-05) Step... (118500/437388 | Training Loss: -9.95407117443392e-06, Learning Rate: 1.4581515642930754e-05) Step... (119000/437388 | Training Loss: -9.978957677958533e-06, Learning Rate: 1.4558651855622884e-05) Step... (119500/437388 | Training Loss: -9.961753676179796e-06, Learning Rate: 1.4535788977809716e-05) Step... (120000/437388 | Training Loss: -9.842288818617817e-06, Learning Rate: 1.451292700949125e-05) Step... (120500/437388 | Training Loss: -9.98788709694054e-06, Learning Rate: 1.449006322218338e-05) Step... (121000/437388 | Training Loss: -9.951994798029773e-06, Learning Rate: 1.4467201253864914e-05) Step... (121500/437388 | Training Loss: -9.9709377536783e-06, Learning Rate: 1.4444336557062343e-05) Step... (122000/437388 | Training Loss: -9.959147064364515e-06, Learning Rate: 1.4421474588743877e-05) Step... (122500/437388 | Training Loss: -9.685417353466619e-06, Learning Rate: 1.4398611710930709e-05) Step... (123000/437388 | Training Loss: -9.959947419702075e-06, Learning Rate: 1.4375747923622839e-05) Step... (123500/437388 | Training Loss: -9.967046935344115e-06, Learning Rate: 1.4352885045809671e-05) Step... (124000/437388 | Training Loss: -9.98882569547277e-06, Learning Rate: 1.4330022167996503e-05) Step... (124500/437388 | Training Loss: -9.963216143660247e-06, Learning Rate: 1.4307160199678037e-05) Step... (125000/437388 | Training Loss: -9.949379091267474e-06, Learning Rate: 1.4284295502875466e-05) Step... (125500/437388 | Training Loss: -9.868399502011016e-06, Learning Rate: 1.4261433534557e-05) Step... (126000/437388 | Training Loss: -9.719717127154581e-06, Learning Rate: 1.4238570656743832e-05) Step... (126500/437388 | Training Loss: -9.97211282083299e-06, Learning Rate: 1.4215707778930664e-05) Step... (127000/437388 | Training Loss: -9.905468687065877e-06, Learning Rate: 1.4192843991622794e-05) Step... (127500/437388 | Training Loss: -9.968744052457623e-06, Learning Rate: 1.4169981113809627e-05) Step... (128000/437388 | Training Loss: -9.980949471355416e-06, Learning Rate: 1.414711914549116e-05) Step... (128500/437388 | Training Loss: -9.967592632165179e-06, Learning Rate: 1.4124254448688589e-05) Step... (129000/437388 | Training Loss: -9.979932656278834e-06, Learning Rate: 1.4101392480370123e-05) Step... (129500/437388 | Training Loss: -9.441755537409335e-06, Learning Rate: 1.4078528693062253e-05) Step... (130000/437388 | Training Loss: -9.931108252203558e-06, Learning Rate: 1.4055666724743787e-05) Step... (130500/437388 | Training Loss: -9.979726200981531e-06, Learning Rate: 1.403280384693062e-05) Step... (131000/437388 | Training Loss: -9.972496627597138e-06, Learning Rate: 1.400994005962275e-05) Step... (131500/437388 | Training Loss: -9.980236427509226e-06, Learning Rate: 1.3987077181809582e-05) Step... (132000/437388 | Training Loss: -9.980353752325755e-06, Learning Rate: 1.3964214303996414e-05) Step... (132500/437388 | Training Loss: -9.980927643482573e-06, Learning Rate: 1.3941352335677948e-05) Step... (133000/437388 | Training Loss: -9.694988875708077e-06, Learning Rate: 1.3918487638875376e-05) Step... (133500/437388 | Training Loss: -9.752675396157429e-06, Learning Rate: 1.389562567055691e-05) Step... (134000/437388 | Training Loss: -9.952703294402454e-06, Learning Rate: 1.3872762792743742e-05) Step... (134500/437388 | Training Loss: -9.968332960852422e-06, Learning Rate: 1.3849899005435873e-05) Step... (135000/437388 | Training Loss: -9.869803761830553e-06, Learning Rate: 1.3827036127622705e-05) Step... (135500/437388 | Training Loss: -9.96959897747729e-06, Learning Rate: 1.3804173249809537e-05) Step... (136000/437388 | Training Loss: -9.982661140384153e-06, Learning Rate: 1.3781311281491071e-05) Step... (136500/437388 | Training Loss: -9.980454706237651e-06, Learning Rate: 1.37584465846885e-05) Step... (137000/437388 | Training Loss: -9.969302482204512e-06, Learning Rate: 1.3735584616370033e-05) Step... (137500/437388 | Training Loss: -9.979442438634578e-06, Learning Rate: 1.3712721738556866e-05) Step... (138000/437388 | Training Loss: -9.97651386569487e-06, Learning Rate: 1.3689858860743698e-05) Step... (138500/437388 | Training Loss: -9.985508768295404e-06, Learning Rate: 1.3666995073435828e-05) Step... (139000/437388 | Training Loss: -9.726589269121177e-06, Learning Rate: 1.364413219562266e-05) Step... (139500/437388 | Training Loss: -9.964971468434669e-06, Learning Rate: 1.3621270227304194e-05) Step... (140000/437388 | Training Loss: -9.96951166598592e-06, Learning Rate: 1.3598405530501623e-05) Step... (140500/437388 | Training Loss: -9.984936696127988e-06, Learning Rate: 1.3575543562183157e-05) Step... (141000/437388 | Training Loss: -9.892564776237123e-06, Learning Rate: 1.3552679774875287e-05) Step... (141500/437388 | Training Loss: -9.977737136068754e-06, Learning Rate: 1.3529817806556821e-05) Step... (142000/437388 | Training Loss: -9.97265669866465e-06, Learning Rate: 1.3506954928743653e-05) Step... (142500/437388 | Training Loss: -9.97040478978306e-06, Learning Rate: 1.3484091141435783e-05) Step... (143000/437388 | Training Loss: -9.97698589344509e-06, Learning Rate: 1.3461228263622615e-05) Step... (146000/437388 | Training Loss: -9.630007298255805e-06, Learning Rate: 1.3445407603285275e-05) Step... (146500/437388 | Training Loss: -9.965676326828543e-06, Learning Rate: 1.3422543815977406e-05) Step... (147000/437388 | Training Loss: -9.72058842307888e-06, Learning Rate: 1.339968184765894e-05) Step... (147500/437388 | Training Loss: -9.968893209588714e-06, Learning Rate: 1.3376818969845772e-05) Step... (148000/437388 | Training Loss: -9.978568414226174e-06, Learning Rate: 1.3353955182537902e-05) Step... (148500/437388 | Training Loss: -9.978695743484423e-06, Learning Rate: 1.3331092304724734e-05) Step... (149000/437388 | Training Loss: -9.98615360003896e-06, Learning Rate: 1.3308229426911566e-05) Step... (149500/437388 | Training Loss: -9.963588126993272e-06, Learning Rate: 1.32853674585931e-05) Step... (150000/437388 | Training Loss: -9.972392945201136e-06, Learning Rate: 1.3262502761790529e-05) Step... (150500/437388 | Training Loss: -9.998041605285835e-06, Learning Rate: 1.3239640793472063e-05) Step... (151000/437388 | Training Loss: -9.98711766442284e-06, Learning Rate: 1.3216776096669491e-05) Step... (151500/437388 | Training Loss: -9.854455129243433e-06, Learning Rate: 1.3193914128351025e-05) Step... (152000/437388 | Training Loss: -9.965891877072863e-06, Learning Rate: 1.3171051250537857e-05) Step... (152500/437388 | Training Loss: -9.975628927350044e-06, Learning Rate: 1.314818837272469e-05) Step... (153000/437388 | Training Loss: -9.9787357612513e-06, Learning Rate: 1.3125325494911522e-05) Step... (153500/437388 | Training Loss: -9.78644129645545e-06, Learning Rate: 1.3102461707603652e-05) Step... (154000/437388 | Training Loss: -9.984996722778305e-06, Learning Rate: 1.3079599739285186e-05) Step... (154500/437388 | Training Loss: -9.972067346097901e-06, Learning Rate: 1.3056735951977316e-05) Step... (155000/437388 | Training Loss: -9.980914910556749e-06, Learning Rate: 1.303387398365885e-05) Step... (155500/437388 | Training Loss: -9.970542123483028e-06, Learning Rate: 1.301101019635098e-05) Step... (156000/437388 | Training Loss: -9.967713594960514e-06, Learning Rate: 1.2988147318537813e-05) Step... (156500/437388 | Training Loss: -9.9806848083972e-06, Learning Rate: 1.2965284440724645e-05) Step... (157000/437388 | Training Loss: -9.98432187770959e-06, Learning Rate: 1.2942421562911477e-05) Step... (157500/437388 | Training Loss: -9.975950888474472e-06, Learning Rate: 1.291955959459301e-05) Step... (158000/437388 | Training Loss: -9.968489393941127e-06, Learning Rate: 1.289669489779044e-05) Step... (158500/437388 | Training Loss: -9.981140465242788e-06, Learning Rate: 1.2873832929471973e-05) Step... (159000/437388 | Training Loss: -9.988563760998659e-06, Learning Rate: 1.2850970051658805e-05) Step... (159500/437388 | Training Loss: -9.971276085707359e-06, Learning Rate: 1.2828106264350936e-05) Step... (160000/437388 | Training Loss: -9.995190339395776e-06, Learning Rate: 1.2805243386537768e-05) Step... (160500/437388 | Training Loss: -9.97090683085844e-06, Learning Rate: 1.27823805087246e-05) Step... (161000/437388 | Training Loss: -9.982477422454394e-06, Learning Rate: 1.2759518540406134e-05) Step... (161500/437388 | Training Loss: -9.980572940548882e-06, Learning Rate: 1.2736653843603563e-05) Step... (162000/437388 | Training Loss: -9.981999028241262e-06, Learning Rate: 1.2713791875285096e-05) Step... (162500/437388 | Training Loss: -9.979303285945207e-06, Learning Rate: 1.2690928087977227e-05) Step... (163000/437388 | Training Loss: -9.984481948777102e-06, Learning Rate: 1.266806611965876e-05) Step... (163500/437388 | Training Loss: -9.98444011202082e-06, Learning Rate: 1.2645202332350891e-05) Step... (164000/437388 | Training Loss: -9.993951607611962e-06, Learning Rate: 1.2622339454537723e-05) Step... (164500/437388 | Training Loss: -9.982698429666925e-06, Learning Rate: 1.2599476576724555e-05) Step... (165000/437388 | Training Loss: -9.805340596358292e-06, Learning Rate: 1.2576612789416686e-05) Step... (165500/437388 | Training Loss: -9.72814996202942e-06, Learning Rate: 1.255375082109822e-05) Step... (166000/437388 | Training Loss: -9.966535799321719e-06, Learning Rate: 1.253088703379035e-05) Step... (166500/437388 | Training Loss: -9.917591341945808e-06, Learning Rate: 1.2508025065471884e-05) Step... (167000/437388 | Training Loss: -9.977657100534998e-06, Learning Rate: 1.2485161278164014e-05) Step... (167500/437388 | Training Loss: -9.982955816667527e-06, Learning Rate: 1.2462298400350846e-05) Step... (168000/437388 | Training Loss: -1.0105099136126228e-05, Learning Rate: 1.2439435522537678e-05) Step... (168500/437388 | Training Loss: -9.98173163679894e-06, Learning Rate: 1.241657264472451e-05) Step... (169000/437388 | Training Loss: -9.995868822443299e-06, Learning Rate: 1.2393710676406045e-05) Step... (169500/437388 | Training Loss: -9.98168525256915e-06, Learning Rate: 1.2370845979603473e-05) Step... (170000/437388 | Training Loss: -9.965764547814615e-06, Learning Rate: 1.2347984011285007e-05) Step... (170500/437388 | Training Loss: -9.795734513318166e-06, Learning Rate: 1.2325119314482436e-05) Step... (171000/437388 | Training Loss: -9.98737959889695e-06, Learning Rate: 1.230225734616397e-05) Step... (171500/437388 | Training Loss: -9.990029866457917e-06, Learning Rate: 1.2279394468350802e-05) Step... (172000/437388 | Training Loss: -9.971741746994667e-06, Learning Rate: 1.2256531590537634e-05) Step... (172500/437388 | Training Loss: -9.986046279664151e-06, Learning Rate: 1.2233669622219168e-05) Step... (173000/437388 | Training Loss: -9.985391443478875e-06, Learning Rate: 1.2210804925416596e-05) Step... (173500/437388 | Training Loss: -1.0004563591792248e-05, Learning Rate: 1.218794295709813e-05) Step... (174000/437388 | Training Loss: -9.979454262065701e-06, Learning Rate: 1.216507916979026e-05) Step... (174500/437388 | Training Loss: -1.0027236385212746e-05, Learning Rate: 1.2142217201471794e-05) Step... (175000/437388 | Training Loss: -9.978477464755997e-06, Learning Rate: 1.2119353414163925e-05) Step... (175500/437388 | Training Loss: -9.809462426346727e-06, Learning Rate: 1.2096490536350757e-05) Step... (176000/437388 | Training Loss: -9.970652172341943e-06, Learning Rate: 1.2073627658537589e-05) Step... (176500/437388 | Training Loss: -9.984391908801626e-06, Learning Rate: 1.205076387122972e-05) Step... (177000/437388 | Training Loss: -9.953098924597725e-06, Learning Rate: 1.2027901902911253e-05) Step... (177500/437388 | Training Loss: -1.0114017641171813e-05, Learning Rate: 1.2005038115603384e-05) Step... (178000/437388 | Training Loss: -9.99200165097136e-06, Learning Rate: 1.1982176147284918e-05) Step... (178500/437388 | Training Loss: -9.968525773729198e-06, Learning Rate: 1.195931326947175e-05) Step... (179000/437388 | Training Loss: -9.986906661652029e-06, Learning Rate: 1.193644948216388e-05) Step... (179500/437388 | Training Loss: -9.977569789043628e-06, Learning Rate: 1.1913586604350712e-05) Step... (180000/437388 | Training Loss: -9.949051673174836e-06, Learning Rate: 1.1890723726537544e-05) Step... (180500/437388 | Training Loss: -9.964109267457388e-06, Learning Rate: 1.1867861758219078e-05) Step... (181000/437388 | Training Loss: -9.971396139007993e-06, Learning Rate: 1.1844997061416507e-05) Step... (181500/437388 | Training Loss: -9.980771210393868e-06, Learning Rate: 1.182213509309804e-05) Step... (182000/437388 | Training Loss: -9.988624697143678e-06, Learning Rate: 1.179927039629547e-05) Step... (182500/437388 | Training Loss: -9.988372767111287e-06, Learning Rate: 1.1776408427977003e-05) Step... (183000/437388 | Training Loss: -9.979199603549205e-06, Learning Rate: 1.1753545550163835e-05) Step... (183500/437388 | Training Loss: -9.967914593289606e-06, Learning Rate: 1.1730682672350667e-05) Step... (184000/437388 | Training Loss: -9.89844193099998e-06, Learning Rate: 1.17078197945375e-05) Step... (184500/437388 | Training Loss: -1.0040076631412376e-05, Learning Rate: 1.168495600722963e-05) Step... (185000/437388 | Training Loss: -9.980107279261574e-06, Learning Rate: 1.1662094038911164e-05) Step... (185500/437388 | Training Loss: -9.925603080773726e-06, Learning Rate: 1.1639230251603294e-05) Step... (186000/437388 | Training Loss: -9.989335012505762e-06, Learning Rate: 1.1616368283284828e-05) Step... (186500/437388 | Training Loss: -9.844949090620503e-06, Learning Rate: 1.1593504495976958e-05) Step... (187000/437388 | Training Loss: -9.827876965573523e-06, Learning Rate: 1.157064161816379e-05) Step... (187500/437388 | Training Loss: -9.985111319110729e-06, Learning Rate: 1.1547778740350623e-05) Step... (188000/437388 | Training Loss: -9.98419909592485e-06, Learning Rate: 1.1524914953042753e-05) Step... (188500/437388 | Training Loss: -9.822408173931763e-06, Learning Rate: 1.1502053894218989e-05) Step... (189000/437388 | Training Loss: -9.985793440137058e-06, Learning Rate: 1.1479189197416417e-05) Step... (189500/437388 | Training Loss: -9.969493476091884e-06, Learning Rate: 1.1456327229097951e-05) Step... (190000/437388 | Training Loss: -9.972950465453323e-06, Learning Rate: 1.1433464351284783e-05) Step... (190500/437388 | Training Loss: -9.980501090467442e-06, Learning Rate: 1.1410600563976914e-05) Step... (191000/437388 | Training Loss: -9.985233191400766e-06, Learning Rate: 1.1387737686163746e-05) Step... (191500/437388 | Training Loss: -9.974917702493258e-06, Learning Rate: 1.1364874808350578e-05) Step... (192000/437388 | Training Loss: -9.982271876651794e-06, Learning Rate: 1.1342012840032112e-05) Step... (192500/437388 | Training Loss: -9.983750715036876e-06, Learning Rate: 1.131914814322954e-05) Step... (193000/437388 | Training Loss: -9.986609256884549e-06, Learning Rate: 1.1296286174911074e-05) Step... (193500/437388 | Training Loss: -9.981191396946087e-06, Learning Rate: 1.1273422387603205e-05) Step... (194000/437388 | Training Loss: -9.990637408918701e-06, Learning Rate: 1.1250560419284739e-05) Step... (194500/437388 | Training Loss: -9.983663403545506e-06, Learning Rate: 1.1227696631976869e-05) Step... (195000/437388 | Training Loss: -9.875216164800804e-06, Learning Rate: 1.1204833754163701e-05) Step... (195500/437388 | Training Loss: -9.98624091153033e-06, Learning Rate: 1.1181970876350533e-05) Step... (196000/437388 | Training Loss: -9.985752512875479e-06, Learning Rate: 1.1159107089042664e-05) Step... (196500/437388 | Training Loss: -9.983903510146774e-06, Learning Rate: 1.1136245120724197e-05) Step... (197000/437388 | Training Loss: -9.985045835492201e-06, Learning Rate: 1.1113381333416328e-05) Step... (197500/437388 | Training Loss: -9.98608174995752e-06, Learning Rate: 1.1090519365097862e-05) Step... (198000/437388 | Training Loss: -9.988999408960808e-06, Learning Rate: 1.1067655577789992e-05) Step... (198500/437388 | Training Loss: -9.985302312998101e-06, Learning Rate: 1.1044792699976824e-05) Step... (199000/437388 | Training Loss: -9.98492214421276e-06, Learning Rate: 1.1021929822163656e-05) Step... (199500/437388 | Training Loss: -9.997691449825652e-06, Learning Rate: 1.0999066944350488e-05) Step... (200000/437388 | Training Loss: -9.995275831897743e-06, Learning Rate: 1.0976204976032022e-05) Step... (200500/437388 | Training Loss: -9.980434697354212e-06, Learning Rate: 1.0953340279229451e-05) Step... (201000/437388 | Training Loss: -9.986708391807042e-06, Learning Rate: 1.0930478310910985e-05) Step... (201500/437388 | Training Loss: -9.995856089517474e-06, Learning Rate: 1.0907613614108413e-05) Step... (202000/437388 | Training Loss: -9.988008969230577e-06, Learning Rate: 1.0884751645789947e-05) Step... (202500/437388 | Training Loss: -9.97873030428309e-06, Learning Rate: 1.086188876797678e-05) Step... (203000/437388 | Training Loss: -9.833105650614016e-06, Learning Rate: 1.0839025890163612e-05) Step... (203500/437388 | Training Loss: -9.990989383368287e-06, Learning Rate: 1.0816162102855742e-05) Step... (204000/437388 | Training Loss: -9.993271305575036e-06, Learning Rate: 1.0793299225042574e-05) Step... (204500/437388 | Training Loss: -9.977482477552257e-06, Learning Rate: 1.0770437256724108e-05) Step... (205000/437388 | Training Loss: -9.986313671106473e-06, Learning Rate: 1.0747573469416238e-05) Step... (205500/437388 | Training Loss: -9.995596883527469e-06, Learning Rate: 1.0724711501097772e-05) Step... (206000/437388 | Training Loss: -9.83860172709683e-06, Learning Rate: 1.0701847713789903e-05) Step... (206500/437388 | Training Loss: -9.98814903141465e-06, Learning Rate: 1.0678984835976735e-05) Step... (207000/437388 | Training Loss: -9.978743037208915e-06, Learning Rate: 1.0656121958163567e-05) Step... (207500/437388 | Training Loss: -9.858611520030536e-06, Learning Rate: 1.0633258170855697e-05) Step... (208000/437388 | Training Loss: -9.97613460640423e-06, Learning Rate: 1.0610396202537231e-05) Step... (208500/437388 | Training Loss: -9.987576049752533e-06, Learning Rate: 1.0587532415229362e-05) Step... (209000/437388 | Training Loss: -9.988930287363473e-06, Learning Rate: 1.0564670446910895e-05) Step... (209500/437388 | Training Loss: -9.988874808186665e-06, Learning Rate: 1.0541807569097728e-05) Step... (210000/437388 | Training Loss: -9.970885002985597e-06, Learning Rate: 1.0518943781789858e-05) Step... (210500/437388 | Training Loss: -9.983324162021745e-06, Learning Rate: 1.049608090397669e-05) Step... (211000/437388 | Training Loss: -9.991470506065525e-06, Learning Rate: 1.0473218026163522e-05) Step... (211500/437388 | Training Loss: -9.98959058051696e-06, Learning Rate: 1.0450356057845056e-05) Step... (212000/437388 | Training Loss: -9.996974768000655e-06, Learning Rate: 1.0427491361042485e-05) Step... (212500/437388 | Training Loss: -9.991736078518443e-06, Learning Rate: 1.0404629392724019e-05) Step... (213000/437388 | Training Loss: -9.993411367759109e-06, Learning Rate: 1.0381764695921447e-05) Step... (213500/437388 | Training Loss: -9.990038051910233e-06, Learning Rate: 1.0358902727602981e-05) Step... (214000/437388 | Training Loss: -9.994332685892005e-06, Learning Rate: 1.0336039849789813e-05) Step... (214500/437388 | Training Loss: -9.982057235902175e-06, Learning Rate: 1.0313176971976645e-05) Step... (215000/437388 | Training Loss: -9.99447911453899e-06, Learning Rate: 1.0290314094163477e-05) Step... (215500/437388 | Training Loss: -9.990168109652586e-06, Learning Rate: 1.0267450306855608e-05) Step... (216000/437388 | Training Loss: -9.987637895392254e-06, Learning Rate: 1.0244588338537142e-05) Step... (216500/437388 | Training Loss: -9.981984476326033e-06, Learning Rate: 1.0221724551229272e-05) Step... (217000/437388 | Training Loss: -9.984448297473136e-06, Learning Rate: 1.0198862582910806e-05) Step... (217500/437388 | Training Loss: -9.993505045713391e-06, Learning Rate: 1.0175998795602936e-05) Step... (218000/437388 | Training Loss: -9.99086751107825e-06, Learning Rate: 1.0153135917789768e-05) Step... (218500/437388 | Training Loss: -9.909585060086101e-06, Learning Rate: 1.01302730399766e-05) Step... (219000/437388 | Training Loss: -1.0126263987331185e-05, Learning Rate: 1.0107409252668731e-05) Step... (219500/437388 | Training Loss: -9.997625966207124e-06, Learning Rate: 1.0084548193844967e-05) Step... (220000/437388 | Training Loss: -9.99296025838703e-06, Learning Rate: 1.0061683497042395e-05) Step... (220500/437388 | Training Loss: -9.985401447920594e-06, Learning Rate: 1.0038821528723929e-05) Step... (221000/437388 | Training Loss: -9.983653399103787e-06, Learning Rate: 1.0015958650910761e-05) Step... (221500/437388 | Training Loss: -9.855179087026045e-06, Learning Rate: 9.993094863602892e-06) Step... (222000/437388 | Training Loss: -9.978019079426304e-06, Learning Rate: 9.970231985789724e-06) Step... (222500/437388 | Training Loss: -9.991362276196014e-06, Learning Rate: 9.947369107976556e-06) Step... (223000/437388 | Training Loss: -9.984462849388365e-06, Learning Rate: 9.924505320668686e-06) Step... (223500/437388 | Training Loss: -9.991919796448201e-06, Learning Rate: 9.901642442855518e-06) Step... (224000/437388 | Training Loss: -9.852660696196835e-06, Learning Rate: 9.878780474537052e-06) Step... (224500/437388 | Training Loss: -9.99262192635797e-06, Learning Rate: 9.855917596723884e-06) Step... (225000/437388 | Training Loss: -9.992707418859936e-06, Learning Rate: 9.833054718910716e-06) Step... (225500/437388 | Training Loss: -9.999449503084179e-06, Learning Rate: 9.810190931602847e-06) Step... (226000/437388 | Training Loss: -9.58389227889711e-06, Learning Rate: 9.787328053789679e-06) Step... (226500/437388 | Training Loss: -9.977780791814439e-06, Learning Rate: 9.764465175976511e-06) Step... (227000/437388 | Training Loss: -9.989915270125493e-06, Learning Rate: 9.741601388668641e-06) Step... (227500/437388 | Training Loss: -9.993409548769705e-06, Learning Rate: 9.718738510855474e-06) Step... (228000/437388 | Training Loss: -9.988070814870298e-06, Learning Rate: 9.695876542537007e-06) Step... (228500/437388 | Training Loss: -9.985919859900605e-06, Learning Rate: 9.67301366472384e-06) Step... (229000/437388 | Training Loss: -9.696464985609055e-06, Learning Rate: 9.65014987741597e-06) Step... (229500/437388 | Training Loss: -9.992392733693123e-06, Learning Rate: 9.627286999602802e-06) Step... (230000/437388 | Training Loss: -9.97125061985571e-06, Learning Rate: 9.604424121789634e-06) Step... (230500/437388 | Training Loss: -9.985516044253018e-06, Learning Rate: 9.581561243976466e-06) Step... (231000/437388 | Training Loss: -9.988828423956875e-06, Learning Rate: 9.558697456668597e-06) Step... (231500/437388 | Training Loss: -9.853993105934933e-06, Learning Rate: 9.535834578855429e-06) Step... (232000/437388 | Training Loss: -9.98645555227995e-06, Learning Rate: 9.512972610536963e-06) Step... (232500/437388 | Training Loss: -9.985138603951782e-06, Learning Rate: 9.490109732723795e-06) Step... (233000/437388 | Training Loss: -9.986102668335661e-06, Learning Rate: 9.467245945415925e-06) Step... (233500/437388 | Training Loss: -9.983748896047473e-06, Learning Rate: 9.444383067602757e-06) Step... (234000/437388 | Training Loss: -9.987346857087687e-06, Learning Rate: 9.42152018978959e-06) Step... (234500/437388 | Training Loss: -9.971370673156343e-06, Learning Rate: 9.39865640248172e-06) Step... (235000/437388 | Training Loss: -9.881186997517943e-06, Learning Rate: 9.375793524668552e-06) Step... (235500/437388 | Training Loss: -9.987215889850631e-06, Learning Rate: 9.352930646855384e-06) Step... (236000/437388 | Training Loss: -9.983035852201283e-06, Learning Rate: 9.330068678536918e-06) Step... (236500/437388 | Training Loss: -9.984838470700197e-06, Learning Rate: 9.30720580072375e-06) Step... (237000/437388 | Training Loss: -9.986998520616908e-06, Learning Rate: 9.28434201341588e-06) Step... (237500/437388 | Training Loss: -9.974377462640405e-06, Learning Rate: 9.261479135602713e-06) Step... (238000/437388 | Training Loss: -9.987925295718014e-06, Learning Rate: 9.238616257789545e-06) Step... (238500/437388 | Training Loss: -9.986457371269353e-06, Learning Rate: 9.215752470481675e-06) Step... (239000/437388 | Training Loss: -9.99090843833983e-06, Learning Rate: 9.192889592668507e-06) Step... (239500/437388 | Training Loss: -9.988229066948406e-06, Learning Rate: 9.170027624350041e-06) Step... (240000/437388 | Training Loss: -9.994514584832359e-06, Learning Rate: 9.147164746536873e-06) Step... (240500/437388 | Training Loss: -9.993158528232016e-06, Learning Rate: 9.124301868723705e-06) Step... (241000/437388 | Training Loss: -9.996389962907415e-06, Learning Rate: 9.101438081415836e-06) Step... (241500/437388 | Training Loss: -9.979153219319414e-06, Learning Rate: 9.078575203602668e-06) Step... (242000/437388 | Training Loss: -9.989636055252049e-06, Learning Rate: 9.0557123257895e-06) Step... (242500/437388 | Training Loss: -9.993711501010694e-06, Learning Rate: 9.03284853848163e-06) Step... (243000/437388 | Training Loss: -9.888614840747323e-06, Learning Rate: 9.009985660668463e-06) Step... (243500/437388 | Training Loss: -9.989265890908428e-06, Learning Rate: 8.987123692349996e-06) Step... (244000/437388 | Training Loss: -9.879224307951517e-06, Learning Rate: 8.964260814536829e-06) Step... (244500/437388 | Training Loss: -9.99042094917968e-06, Learning Rate: 8.941397027228959e-06) Step... (245000/437388 | Training Loss: -9.990229045797605e-06, Learning Rate: 8.918534149415791e-06) Step... (245500/437388 | Training Loss: -9.98977520794142e-06, Learning Rate: 8.895671271602623e-06) Step... (246000/437388 | Training Loss: -9.997487723012455e-06, Learning Rate: 8.872808393789455e-06) Step... (246500/437388 | Training Loss: -9.986724762711674e-06, Learning Rate: 8.849944606481586e-06) Step... (247000/437388 | Training Loss: -9.988412784878165e-06, Learning Rate: 8.827081728668418e-06) Step... (247500/437388 | Training Loss: -9.990839316742495e-06, Learning Rate: 8.804219760349952e-06) Step... (248000/437388 | Training Loss: -9.991884326154832e-06, Learning Rate: 8.781356882536784e-06) Step... (248500/437388 | Training Loss: -9.986639270209707e-06, Learning Rate: 8.758493095228914e-06) Step... (249000/437388 | Training Loss: -9.890503861242905e-06, Learning Rate: 8.735630217415746e-06) Step... (249500/437388 | Training Loss: -9.99127769318875e-06, Learning Rate: 8.712767339602578e-06) Step... (250000/437388 | Training Loss: -9.987336852645967e-06, Learning Rate: 8.689903552294709e-06) Step... (250500/437388 | Training Loss: -9.989980753744021e-06, Learning Rate: 8.667040674481541e-06) Step... (251000/437388 | Training Loss: -9.989371392293833e-06, Learning Rate: 8.644177796668373e-06) Step... (251500/437388 | Training Loss: -9.898749340209179e-06, Learning Rate: 8.621315828349907e-06) Step... (252000/437388 | Training Loss: -9.987928933696821e-06, Learning Rate: 8.598452950536739e-06) Step... (252500/437388 | Training Loss: -9.976372894016095e-06, Learning Rate: 8.57558916322887e-06) Step... (253000/437388 | Training Loss: -9.986349141399842e-06, Learning Rate: 8.552726285415702e-06) Step... (253500/437388 | Training Loss: -9.896906703943387e-06, Learning Rate: 8.529863407602534e-06) Step... (254000/437388 | Training Loss: -9.98828727460932e-06, Learning Rate: 8.506999620294664e-06) Step... (254500/437388 | Training Loss: -9.989612408389803e-06, Learning Rate: 8.484136742481496e-06) Step... (255000/437388 | Training Loss: -9.949071682058275e-06, Learning Rate: 8.46127477416303e-06) Step... (255500/437388 | Training Loss: -9.882474842015654e-06, Learning Rate: 8.438411896349862e-06) Step... (256000/437388 | Training Loss: -9.836573553911876e-06, Learning Rate: 8.415549018536694e-06) Step... (256500/437388 | Training Loss: -9.986971235775854e-06, Learning Rate: 8.392685231228825e-06) Step... (257000/437388 | Training Loss: -9.986095392378047e-06, Learning Rate: 8.369822353415657e-06) Step... (257500/437388 | Training Loss: -9.99188887362834e-06, Learning Rate: 8.346959475602489e-06) Step... (258000/437388 | Training Loss: -9.967543519451283e-06, Learning Rate: 8.32409568829462e-06) Step... (258500/437388 | Training Loss: -9.990917533286847e-06, Learning Rate: 8.301232810481451e-06) Step... (259000/437388 | Training Loss: -9.987032171920873e-06, Learning Rate: 8.278370842162985e-06) Step... (259500/437388 | Training Loss: -9.992443665396422e-06, Learning Rate: 8.255507964349817e-06) Step... (260000/437388 | Training Loss: -9.987326848204248e-06, Learning Rate: 8.232644177041948e-06) Step... (260500/437388 | Training Loss: -9.988285455619916e-06, Learning Rate: 8.20978129922878e-06) Step... (261000/437388 | Training Loss: -9.995150321628898e-06, Learning Rate: 8.186918421415612e-06) Step... (261500/437388 | Training Loss: -9.988363672164269e-06, Learning Rate: 8.164055543602444e-06) Step... (262000/437388 | Training Loss: -9.995621439884417e-06, Learning Rate: 8.141191756294575e-06) Step... (262500/437388 | Training Loss: -9.986416444007773e-06, Learning Rate: 8.118328878481407e-06) Step... (263000/437388 | Training Loss: -9.989635145757347e-06, Learning Rate: 8.09546691016294e-06) Step... (263500/437388 | Training Loss: -9.992758350563236e-06, Learning Rate: 8.072604032349773e-06) Step... (264000/437388 | Training Loss: -9.988560123019852e-06, Learning Rate: 8.049740245041903e-06) Step... (264500/437388 | Training Loss: -9.983908967114985e-06, Learning Rate: 8.026877367228735e-06) Step... (265000/437388 | Training Loss: -9.989878890337422e-06, Learning Rate: 8.004014489415567e-06) Step... (265500/437388 | Training Loss: -9.982097253669053e-06, Learning Rate: 7.981150702107698e-06) Step... (266000/437388 | Training Loss: -9.986391887650825e-06, Learning Rate: 7.95828782429453e-06) Step... (266500/437388 | Training Loss: -9.987573321268428e-06, Learning Rate: 7.935424946481362e-06) Step... (267000/437388 | Training Loss: -9.99004168988904e-06, Learning Rate: 7.912562978162896e-06) Step... (267500/437388 | Training Loss: -9.990327271225397e-06, Learning Rate: 7.889700100349728e-06) Step... (268000/437388 | Training Loss: -9.991719707613811e-06, Learning Rate: 7.866836313041858e-06) Step... (268500/437388 | Training Loss: -9.99336589302402e-06, Learning Rate: 7.84397343522869e-06) Step... (269000/437388 | Training Loss: -9.927234714268707e-06, Learning Rate: 7.821110557415523e-06) Step... (269500/437388 | Training Loss: -9.992361810873263e-06, Learning Rate: 7.798246770107653e-06) Step... (270000/437388 | Training Loss: -9.868877896224149e-06, Learning Rate: 7.775383892294485e-06) Step... (270500/437388 | Training Loss: -9.974767635867465e-06, Learning Rate: 7.752521923976019e-06) Step... (271000/437388 | Training Loss: -9.993795174523257e-06, Learning Rate: 7.729659046162851e-06) Step... (271500/437388 | Training Loss: -9.992438208428212e-06, Learning Rate: 7.706796168349683e-06) Step... (272000/437388 | Training Loss: -9.864586900221184e-06, Learning Rate: 7.683932381041814e-06) Step... (272500/437388 | Training Loss: -9.992800187319517e-06, Learning Rate: 7.661069503228646e-06) Step... (273000/437388 | Training Loss: -9.991427759814542e-06, Learning Rate: 7.638206625415478e-06) Step... (273500/437388 | Training Loss: -9.99362782749813e-06, Learning Rate: 7.615343292854959e-06) Step... (274000/437388 | Training Loss: -9.998650966736022e-06, Learning Rate: 7.59247996029444e-06) Step... (274500/437388 | Training Loss: -9.986426448449492e-06, Learning Rate: 7.569617991975974e-06) Step... (275000/437388 | Training Loss: -9.892217349261045e-06, Learning Rate: 7.5467546594154555e-06) Step... (275500/437388 | Training Loss: -9.984280040953308e-06, Learning Rate: 7.523891781602288e-06) Step... (276000/437388 | Training Loss: -9.993502317229286e-06, Learning Rate: 7.501028449041769e-06) Step... (276500/437388 | Training Loss: -9.994282663683407e-06, Learning Rate: 7.478165571228601e-06) Step... (277000/437388 | Training Loss: -9.989940735977143e-06, Learning Rate: 7.455302238668082e-06) Step... (277500/437388 | Training Loss: -9.871511792880483e-06, Learning Rate: 7.432439360854914e-06) Step... (278000/437388 | Training Loss: -9.991127626562957e-06, Learning Rate: 7.409576028294396e-06) Step... (278500/437388 | Training Loss: -9.99518670141697e-06, Learning Rate: 7.3867140599759296e-06) Step... (279000/437388 | Training Loss: -9.990813850890845e-06, Learning Rate: 7.363850727415411e-06) Step... (279500/437388 | Training Loss: -9.992672858061269e-06, Learning Rate: 7.340987849602243e-06) Step... (280000/437388 | Training Loss: -9.99051371763926e-06, Learning Rate: 7.318124517041724e-06) Step... (280500/437388 | Training Loss: -9.989868885895703e-06, Learning Rate: 7.295261639228556e-06) Step... (281000/437388 | Training Loss: -9.992634659283794e-06, Learning Rate: 7.2723983066680375e-06) Step... (281500/437388 | Training Loss: -9.985859833250288e-06, Learning Rate: 7.249534974107519e-06) Step... (282000/437388 | Training Loss: -9.987268640543334e-06, Learning Rate: 7.226672096294351e-06) Step... (282500/437388 | Training Loss: -9.997535016736947e-06, Learning Rate: 7.203810127975885e-06) Step... (283000/437388 | Training Loss: -9.988288184104022e-06, Learning Rate: 7.180946795415366e-06) Step... (283500/437388 | Training Loss: -9.813671567826532e-06, Learning Rate: 7.158083917602198e-06) Step... (284000/437388 | Training Loss: -9.99168332782574e-06, Learning Rate: 7.1352205850416794e-06) Step... (284500/437388 | Training Loss: -9.990868420572951e-06, Learning Rate: 7.112357252481161e-06) Step... (285000/437388 | Training Loss: -9.990955732064322e-06, Learning Rate: 7.089494374667993e-06) Step... (285500/437388 | Training Loss: -9.994293577619828e-06, Learning Rate: 7.066631042107474e-06) Step... (286000/437388 | Training Loss: -9.994688298320398e-06, Learning Rate: 7.043769073789008e-06) Step... (286500/437388 | Training Loss: -9.88380634225905e-06, Learning Rate: 7.02090619597584e-06) Step... (287000/437388 | Training Loss: -9.993795174523257e-06, Learning Rate: 6.998042863415321e-06) Step... (287500/437388 | Training Loss: -9.992005288950168e-06, Learning Rate: 6.975179530854803e-06) Step... (288000/437388 | Training Loss: -9.986464647226967e-06, Learning Rate: 6.952316653041635e-06) Step... (288500/437388 | Training Loss: -9.993902494898066e-06, Learning Rate: 6.929453320481116e-06) Step... (292000/437388 | Training Loss: -9.987356861529406e-06, Learning Rate: 6.890768872835906e-06) Step... (292500/437388 | Training Loss: -9.986496479541529e-06, Learning Rate: 6.86790690451744e-06) Step... (293000/437388 | Training Loss: -9.991899787564762e-06, Learning Rate: 6.845043571956921e-06) Step... (293500/437388 | Training Loss: -9.889041393762454e-06, Learning Rate: 6.8221806941437535e-06) Step... (294000/437388 | Training Loss: -9.888466593110934e-06, Learning Rate: 6.799317361583235e-06) Step... (294500/437388 | Training Loss: -9.98623545456212e-06, Learning Rate: 6.776454483770067e-06) Step... (295000/437388 | Training Loss: -9.99187341221841e-06, Learning Rate: 6.753591151209548e-06) Step... (295500/437388 | Training Loss: -9.994157153414562e-06, Learning Rate: 6.730727818649029e-06) Step... (296000/437388 | Training Loss: -9.98769701254787e-06, Learning Rate: 6.707865850330563e-06) Step... (296500/437388 | Training Loss: -9.991597835323773e-06, Learning Rate: 6.685002972517395e-06) Step... (297000/437388 | Training Loss: -9.990687431127299e-06, Learning Rate: 6.662139639956877e-06) Step... (297500/437388 | Training Loss: -9.989196769311093e-06, Learning Rate: 6.639276762143709e-06) Step... (298000/437388 | Training Loss: -9.984323696698993e-06, Learning Rate: 6.61641342958319e-06) Step... (298500/437388 | Training Loss: -9.990490070777014e-06, Learning Rate: 6.593550097022671e-06) Step... (299000/437388 | Training Loss: -9.995187610911671e-06, Learning Rate: 6.570687219209503e-06) Step... (299500/437388 | Training Loss: -9.994940228352789e-06, Learning Rate: 6.547823886648985e-06) Step... (300000/437388 | Training Loss: -9.991530532715842e-06, Learning Rate: 6.5249619183305185e-06) Step... (300500/437388 | Training Loss: -9.991787010221742e-06, Learning Rate: 6.502099040517351e-06) Step... (301000/437388 | Training Loss: -9.996144399337936e-06, Learning Rate: 6.479235707956832e-06) Step... (301500/437388 | Training Loss: -9.986103577830363e-06, Learning Rate: 6.456372375396313e-06) Step... (302000/437388 | Training Loss: -9.995266736950725e-06, Learning Rate: 6.433509497583145e-06) Step... (302500/437388 | Training Loss: -9.98793893813854e-06, Learning Rate: 6.4106461650226265e-06) Step... (303000/437388 | Training Loss: -9.90675289358478e-06, Learning Rate: 6.387783287209459e-06) Step... (303500/437388 | Training Loss: -9.99127769318875e-06, Learning Rate: 6.36491995464894e-06) Step... (304000/437388 | Training Loss: -9.99108851829078e-06, Learning Rate: 6.342057986330474e-06) Step... (304500/437388 | Training Loss: -9.986890290747397e-06, Learning Rate: 6.319194653769955e-06) Step... (305000/437388 | Training Loss: -9.970584869734012e-06, Learning Rate: 6.296331775956787e-06) Step... (305500/437388 | Training Loss: -9.912827408697922e-06, Learning Rate: 6.273468443396268e-06) Step... (306000/437388 | Training Loss: -9.989652426156681e-06, Learning Rate: 6.2506055655831005e-06) Step... (306500/437388 | Training Loss: -9.927895916916896e-06, Learning Rate: 6.227742233022582e-06) Step... (307000/437388 | Training Loss: -9.994610081776045e-06, Learning Rate: 6.204878900462063e-06) Step... (307500/437388 | Training Loss: -9.992891136789694e-06, Learning Rate: 6.182016022648895e-06) Step... (308000/437388 | Training Loss: -9.995262189477216e-06, Learning Rate: 6.159154054330429e-06) Step... (308500/437388 | Training Loss: -9.99057010631077e-06, Learning Rate: 6.13629072176991e-06) Step... (309000/437388 | Training Loss: -9.898338248603977e-06, Learning Rate: 6.1134278439567424e-06) Step... (309500/437388 | Training Loss: -9.992212653742172e-06, Learning Rate: 6.090564511396224e-06) Step... (310000/437388 | Training Loss: -9.99668009171728e-06, Learning Rate: 6.067701633583056e-06) Step... (310500/437388 | Training Loss: -9.90634271147428e-06, Learning Rate: 6.044838301022537e-06) Step... (311000/437388 | Training Loss: -9.994634638132993e-06, Learning Rate: 6.021974968462018e-06) Step... (311500/437388 | Training Loss: -9.987345038098283e-06, Learning Rate: 5.999113000143552e-06) Step... (312000/437388 | Training Loss: -9.997144843509886e-06, Learning Rate: 5.976250122330384e-06) Step... (312500/437388 | Training Loss: -9.992167179007083e-06, Learning Rate: 5.9533867897698656e-06) Step... (313000/437388 | Training Loss: -9.994231731980108e-06, Learning Rate: 5.930523911956698e-06) Step... (313500/437388 | Training Loss: -9.989461432269309e-06, Learning Rate: 5.907660579396179e-06) Step... (314000/437388 | Training Loss: -9.991224033001345e-06, Learning Rate: 5.88479724683566e-06) Step... (314500/437388 | Training Loss: -1.0000232578022406e-05, Learning Rate: 5.861934369022492e-06) Step... (315000/437388 | Training Loss: -9.993785170081537e-06, Learning Rate: 5.8390710364619736e-06) Step... (315500/437388 | Training Loss: -9.992422747018281e-06, Learning Rate: 5.8162090681435075e-06) Step... (316000/437388 | Training Loss: -9.84826147032436e-06, Learning Rate: 5.79334619033034e-06) Step... (316500/437388 | Training Loss: -9.992265404434875e-06, Learning Rate: 5.770482857769821e-06) Step... (317000/437388 | Training Loss: -9.891874469758477e-06, Learning Rate: 5.747619525209302e-06) Step... (317500/437388 | Training Loss: -9.993611456593499e-06, Learning Rate: 5.724756647396134e-06) Step... (318000/437388 | Training Loss: -9.983097697841004e-06, Learning Rate: 5.7018933148356155e-06) Step... (318500/437388 | Training Loss: -9.994835636462085e-06, Learning Rate: 5.679030437022448e-06) Step... (319000/437388 | Training Loss: -9.995305845222902e-06, Learning Rate: 5.656167104461929e-06) Step... (319500/437388 | Training Loss: -9.988099918700755e-06, Learning Rate: 5.633305136143463e-06) Step... (320000/437388 | Training Loss: -9.989049431169406e-06, Learning Rate: 5.610441803582944e-06) Step... (320500/437388 | Training Loss: -9.993370440497529e-06, Learning Rate: 5.587578925769776e-06) Step... (321000/437388 | Training Loss: -9.994093488785438e-06, Learning Rate: 5.564715593209257e-06) Step... (321500/437388 | Training Loss: -9.994993888540193e-06, Learning Rate: 5.5418527153960895e-06) Step... (322000/437388 | Training Loss: -9.990870239562355e-06, Learning Rate: 5.518989382835571e-06) Step... (322500/437388 | Training Loss: -9.895560651784763e-06, Learning Rate: 5.496126050275052e-06) Step... (323000/437388 | Training Loss: -9.910821972880512e-06, Learning Rate: 5.473263172461884e-06) Step... (323500/437388 | Training Loss: -9.99239637167193e-06, Learning Rate: 5.450401204143418e-06) Step... (324000/437388 | Training Loss: -9.987401426769793e-06, Learning Rate: 5.427537871582899e-06) Step... (324500/437388 | Training Loss: -9.89317413768731e-06, Learning Rate: 5.404674993769731e-06) Step... (325000/437388 | Training Loss: -9.997502274927683e-06, Learning Rate: 5.381811661209213e-06) Step... (325500/437388 | Training Loss: -9.915451300912537e-06, Learning Rate: 5.358948783396045e-06) Step... (326000/437388 | Training Loss: -9.890471119433641e-06, Learning Rate: 5.336085450835526e-06) Step... (326500/437388 | Training Loss: -9.988754754886031e-06, Learning Rate: 5.313222118275007e-06) Step... (327000/437388 | Training Loss: -9.989450518332887e-06, Learning Rate: 5.290360149956541e-06) Step... (327500/437388 | Training Loss: -9.902462807076517e-06, Learning Rate: 5.267497272143373e-06) Step... (328000/437388 | Training Loss: -9.993040293920785e-06, Learning Rate: 5.2446339395828545e-06) Step... (328500/437388 | Training Loss: -9.992443665396422e-06, Learning Rate: 5.221771061769687e-06) Step... (329000/437388 | Training Loss: -9.988685633288696e-06, Learning Rate: 5.198907729209168e-06) Step... (329500/437388 | Training Loss: -9.994968422688544e-06, Learning Rate: 5.176044396648649e-06) Step... (330000/437388 | Training Loss: -9.987100384023506e-06, Learning Rate: 5.153181518835481e-06) Step... (330500/437388 | Training Loss: -9.991315891966224e-06, Learning Rate: 5.1303181862749625e-06) Step... (331000/437388 | Training Loss: -9.987525118049234e-06, Learning Rate: 5.107456217956496e-06) Step... (331500/437388 | Training Loss: -9.993723324441817e-06, Learning Rate: 5.0845933401433285e-06) Step... (332000/437388 | Training Loss: -9.989145837607794e-06, Learning Rate: 5.06173000758281e-06) Step... (332500/437388 | Training Loss: -9.993260391638614e-06, Learning Rate: 5.038866675022291e-06) Step... (333000/437388 | Training Loss: -9.987928933696821e-06, Learning Rate: 5.016003797209123e-06) Step... (333500/437388 | Training Loss: -9.771886652742978e-06, Learning Rate: 4.993140464648604e-06) Step... (334000/437388 | Training Loss: -9.991541446652263e-06, Learning Rate: 4.9702775868354365e-06) Step... (334500/437388 | Training Loss: -9.918113391904626e-06, Learning Rate: 4.947414254274918e-06) Step... (335000/437388 | Training Loss: -9.993021194532048e-06, Learning Rate: 4.924552285956452e-06) Step... (335500/437388 | Training Loss: -9.99302210402675e-06, Learning Rate: 4.901688953395933e-06) Step... (336000/437388 | Training Loss: -9.90634725894779e-06, Learning Rate: 4.878826075582765e-06) Step... (336500/437388 | Training Loss: -9.987375051423442e-06, Learning Rate: 4.855962743022246e-06) Step... (337000/437388 | Training Loss: -9.898597454593983e-06, Learning Rate: 4.833099865209078e-06) Step... (337500/437388 | Training Loss: -9.994385436584707e-06, Learning Rate: 4.81023653264856e-06) Step... (338000/437388 | Training Loss: -9.990299986384343e-06, Learning Rate: 4.787373200088041e-06) Step... (338500/437388 | Training Loss: -9.99488474917598e-06, Learning Rate: 4.764510322274873e-06) Step... (339000/437388 | Training Loss: -9.988407327909954e-06, Learning Rate: 4.741648353956407e-06) Step... (339500/437388 | Training Loss: -1.0000549082178622e-05, Learning Rate: 4.718785021395888e-06) Step... (340000/437388 | Training Loss: -9.993271305575036e-06, Learning Rate: 4.69592214358272e-06) Step... (340500/437388 | Training Loss: -9.868866982287727e-06, Learning Rate: 4.6730588110222016e-06) Step... (341000/437388 | Training Loss: -9.99502844933886e-06, Learning Rate: 4.650195933209034e-06) Step... (341500/437388 | Training Loss: -9.997817869589198e-06, Learning Rate: 4.627332600648515e-06) Step... (342000/437388 | Training Loss: -9.98912310024025e-06, Learning Rate: 4.604469268087996e-06) Step... (342500/437388 | Training Loss: -9.993742423830554e-06, Learning Rate: 4.58160729976953e-06) Step... (343000/437388 | Training Loss: -9.991261322284117e-06, Learning Rate: 4.558744421956362e-06) Step... (343500/437388 | Training Loss: -9.91877732303692e-06, Learning Rate: 4.5358810893958434e-06) Step... (344000/437388 | Training Loss: -9.973462510970421e-06, Learning Rate: 4.5130182115826756e-06) Step... (344500/437388 | Training Loss: -9.988431884266902e-06, Learning Rate: 4.490154879022157e-06) Step... (345000/437388 | Training Loss: -9.998928362620063e-06, Learning Rate: 4.467291546461638e-06) Step... (345500/437388 | Training Loss: -9.994077117880806e-06, Learning Rate: 4.44442866864847e-06) Step... (346000/437388 | Training Loss: -9.992885679821484e-06, Learning Rate: 4.4215653360879514e-06) Step... (346500/437388 | Training Loss: -9.991732440539636e-06, Learning Rate: 4.398703367769485e-06) Step... (347000/437388 | Training Loss: -9.994559150072746e-06, Learning Rate: 4.3758404899563175e-06) Step... (347500/437388 | Training Loss: -9.996765584219247e-06, Learning Rate: 4.352977157395799e-06) Step... (348000/437388 | Training Loss: -1.0088226190418936e-05, Learning Rate: 4.33011382483528e-06) Step... (348500/437388 | Training Loss: -9.913166650221683e-06, Learning Rate: 4.307250947022112e-06) Step... (349000/437388 | Training Loss: -9.994585525419097e-06, Learning Rate: 4.284387614461593e-06) Step... (349500/437388 | Training Loss: -9.991217666538432e-06, Learning Rate: 4.2615247366484255e-06) Step... (350000/437388 | Training Loss: -9.992018021875992e-06, Learning Rate: 4.238661404087907e-06) Step... (350500/437388 | Training Loss: -9.99391522782389e-06, Learning Rate: 4.215799435769441e-06) Step... (351000/437388 | Training Loss: -9.992203558795154e-06, Learning Rate: 4.192936103208922e-06) Step... (351500/437388 | Training Loss: -9.995798791351262e-06, Learning Rate: 4.170073225395754e-06) Step... (352000/437388 | Training Loss: -9.99713665805757e-06, Learning Rate: 4.147209892835235e-06) Step... (352500/437388 | Training Loss: -9.913122994475998e-06, Learning Rate: 4.124347015022067e-06) Step... (353000/437388 | Training Loss: -9.98831274046097e-06, Learning Rate: 4.101483682461549e-06) Step... (353500/437388 | Training Loss: -9.990230864787009e-06, Learning Rate: 4.07862034990103e-06) Step... (354000/437388 | Training Loss: -9.911616871249862e-06, Learning Rate: 4.055757472087862e-06) Step... (354500/437388 | Training Loss: -9.994477295549586e-06, Learning Rate: 4.032895503769396e-06) Step... (355000/437388 | Training Loss: -9.994131687562913e-06, Learning Rate: 4.010032171208877e-06) Step... (355500/437388 | Training Loss: -9.99648000288289e-06, Learning Rate: 3.987169293395709e-06) Step... (356000/437388 | Training Loss: -9.877321645035408e-06, Learning Rate: 3.9643059608351905e-06) Step... (356500/437388 | Training Loss: -9.99268377199769e-06, Learning Rate: 3.941443083022023e-06) Step... (357000/437388 | Training Loss: -9.993210369430017e-06, Learning Rate: 3.918579750461504e-06) Step... (357500/437388 | Training Loss: -9.997766028391197e-06, Learning Rate: 3.895716417900985e-06) Step... (358000/437388 | Training Loss: -9.99319127004128e-06, Learning Rate: 3.872854449582519e-06) Step... (358500/437388 | Training Loss: -9.991641491069458e-06, Learning Rate: 3.849991571769351e-06) Step... (359000/437388 | Training Loss: -9.994190804718528e-06, Learning Rate: 3.827128239208832e-06) Step... (359500/437388 | Training Loss: -9.990322723751888e-06, Learning Rate: 3.804265134021989e-06) Step... (360000/437388 | Training Loss: -9.967581718228757e-06, Learning Rate: 3.7814020288351458e-06) Step... (360500/437388 | Training Loss: -9.993122148443945e-06, Learning Rate: 3.7585389236483024e-06) Step... (361000/437388 | Training Loss: -9.911255801853258e-06, Learning Rate: 3.735675818461459e-06) Step... (361500/437388 | Training Loss: -9.991317710955627e-06, Learning Rate: 3.7128124859009404e-06) Step... (362000/437388 | Training Loss: -9.990660146286245e-06, Learning Rate: 3.6899505175824743e-06) Step... (362500/437388 | Training Loss: -9.98957511910703e-06, Learning Rate: 3.667087412395631e-06) Step... (363000/437388 | Training Loss: -9.99259646050632e-06, Learning Rate: 3.6442243072087876e-06) Step... (363500/437388 | Training Loss: -9.908624633681029e-06, Learning Rate: 3.6213612020219443e-06) Step... (364000/437388 | Training Loss: -9.996019798563793e-06, Learning Rate: 3.598498096835101e-06) Step... (364500/437388 | Training Loss: -9.999227586376946e-06, Learning Rate: 3.5756347642745823e-06) Step... (365000/437388 | Training Loss: -1.0177258445764892e-05, Learning Rate: 3.552771659087739e-06) Step... (365500/437388 | Training Loss: -9.996550943469629e-06, Learning Rate: 3.5299085539008956e-06) Step... (366000/437388 | Training Loss: -9.991243132390082e-06, Learning Rate: 3.5070465855824295e-06) Step... (366500/437388 | Training Loss: -9.994861102313735e-06, Learning Rate: 3.4841834803955862e-06) Step... (367000/437388 | Training Loss: -9.992957529902924e-06, Learning Rate: 3.461320375208743e-06) Step... (367500/437388 | Training Loss: -9.995338587032165e-06, Learning Rate: 3.4384572700218996e-06) Step... (368000/437388 | Training Loss: -9.993656931328587e-06, Learning Rate: 3.415593937461381e-06) Step... (368500/437388 | Training Loss: -9.993989806389436e-06, Learning Rate: 3.3927308322745375e-06) Step... (369000/437388 | Training Loss: -9.97816641756799e-06, Learning Rate: 3.3698677270876942e-06) Step... (369500/437388 | Training Loss: -9.991295883082785e-06, Learning Rate: 3.347004621900851e-06) Step... (370000/437388 | Training Loss: -9.983924428524915e-06, Learning Rate: 3.324142653582385e-06) Step... (370500/437388 | Training Loss: -9.992636478273198e-06, Learning Rate: 3.3012795483955415e-06) Step... (371000/437388 | Training Loss: -9.99076473817695e-06, Learning Rate: 3.2784162158350227e-06) Step... (371500/437388 | Training Loss: -9.991391380026471e-06, Learning Rate: 3.2555531106481794e-06) Step... (372000/437388 | Training Loss: -1.008028175419895e-05, Learning Rate: 3.232690005461336e-06) Step... (372500/437388 | Training Loss: -9.99081112240674e-06, Learning Rate: 3.209826900274493e-06) Step... (373000/437388 | Training Loss: -9.99004168988904e-06, Learning Rate: 3.1869637950876495e-06) Step... (373500/437388 | Training Loss: -9.99938310997095e-06, Learning Rate: 3.1641018267691834e-06) Step... (374000/437388 | Training Loss: -1.0080255378852598e-05, Learning Rate: 3.14123872158234e-06) Step... (374500/437388 | Training Loss: -9.997427696362138e-06, Learning Rate: 3.1183753890218213e-06) Step... (375000/437388 | Training Loss: -9.996185326599516e-06, Learning Rate: 3.095512283834978e-06) Step... (375500/437388 | Training Loss: -9.999781468650326e-06, Learning Rate: 3.0726491786481347e-06) Step... (376000/437388 | Training Loss: -9.994220818043686e-06, Learning Rate: 3.0497860734612914e-06) Step... (376500/437388 | Training Loss: -9.953191693057306e-06, Learning Rate: 3.026922968274448e-06) Step... (377000/437388 | Training Loss: -9.992988452722784e-06, Learning Rate: 3.0040596357139293e-06) Step... (377500/437388 | Training Loss: -9.907163985189982e-06, Learning Rate: 2.9811976673954632e-06) Step... (378000/437388 | Training Loss: -9.990209946408868e-06, Learning Rate: 2.95833456220862e-06) Step... (378500/437388 | Training Loss: -9.990634680434596e-06, Learning Rate: 2.9354714570217766e-06) Step... (379000/437388 | Training Loss: -9.993327694246545e-06, Learning Rate: 2.9126083518349333e-06) Step... (379500/437388 | Training Loss: -9.99444364424562e-06, Learning Rate: 2.88974524664809e-06) Step... (380000/437388 | Training Loss: -9.992084414989222e-06, Learning Rate: 2.866881914087571e-06) Step... (380500/437388 | Training Loss: -9.99133408186026e-06, Learning Rate: 2.844018808900728e-06) Step... (381000/437388 | Training Loss: -1.0079415915242862e-05, Learning Rate: 2.8211557037138846e-06) Step... (381500/437388 | Training Loss: -9.916247108776588e-06, Learning Rate: 2.7982937353954185e-06) Step... (382000/437388 | Training Loss: -9.982487426896114e-06, Learning Rate: 2.775430630208575e-06) Step... (382500/437388 | Training Loss: -9.9933040473843e-06, Learning Rate: 2.752567525021732e-06) Step... (383000/437388 | Training Loss: -9.994908396038227e-06, Learning Rate: 2.7297044198348885e-06) Step... (383500/437388 | Training Loss: -9.994670108426362e-06, Learning Rate: 2.7068410872743698e-06) Step... (384000/437388 | Training Loss: -9.991264960262924e-06, Learning Rate: 2.6839779820875265e-06) Step... (384500/437388 | Training Loss: -9.991925253416412e-06, Learning Rate: 2.661114876900683e-06) Step... (385000/437388 | Training Loss: -9.99308394966647e-06, Learning Rate: 2.63825177171384e-06) Step... (385500/437388 | Training Loss: -9.891340596368536e-06, Learning Rate: 2.6153898033953737e-06) Step... (386000/437388 | Training Loss: -9.992604645958636e-06, Learning Rate: 2.5925266982085304e-06) Step... (386500/437388 | Training Loss: -9.993276762543246e-06, Learning Rate: 2.5696633656480117e-06) Step... (387000/437388 | Training Loss: -9.99683379632188e-06, Learning Rate: 2.5468002604611684e-06) Step... (387500/437388 | Training Loss: -9.990859325625934e-06, Learning Rate: 2.523937155274325e-06) Step... (388000/437388 | Training Loss: -9.989444151869975e-06, Learning Rate: 2.5010740500874817e-06) Step... (388500/437388 | Training Loss: -9.994049833039753e-06, Learning Rate: 2.4782109449006384e-06) Step... (389000/437388 | Training Loss: -9.992872946895659e-06, Learning Rate: 2.4553476123401197e-06) Step... (389500/437388 | Training Loss: -9.992896593757905e-06, Learning Rate: 2.4324856440216536e-06) Step... (390000/437388 | Training Loss: -9.908483662002254e-06, Learning Rate: 2.4096225388348103e-06) Step... (390500/437388 | Training Loss: -9.917429451888893e-06, Learning Rate: 2.386759433647967e-06) Step... (391000/437388 | Training Loss: -9.994932042900473e-06, Learning Rate: 2.3638963284611236e-06) Step... (391500/437388 | Training Loss: -9.983873496821616e-06, Learning Rate: 2.3410332232742803e-06) Step... (392000/437388 | Training Loss: -9.989249520003796e-06, Learning Rate: 2.318170118087437e-06) Step... (392500/437388 | Training Loss: -9.915595001075417e-06, Learning Rate: 2.2953067855269182e-06) Step... (393000/437388 | Training Loss: -9.958464033843484e-06, Learning Rate: 2.272444817208452e-06) Step... (393500/437388 | Training Loss: -9.877664524537977e-06, Learning Rate: 2.249581712021609e-06) Step... (394000/437388 | Training Loss: -9.991355909733102e-06, Learning Rate: 2.2267186068347655e-06) Step... (394500/437388 | Training Loss: -1.0070090866065584e-05, Learning Rate: 2.203855501647922e-06) Step... (395000/437388 | Training Loss: -9.988858437282033e-06, Learning Rate: 2.180992396461079e-06) Step... (395500/437388 | Training Loss: -9.995630534831434e-06, Learning Rate: 2.15812906390056e-06) Step... (396000/437388 | Training Loss: -9.991104889195412e-06, Learning Rate: 2.135265958713717e-06) Step... (396500/437388 | Training Loss: -9.994979336624965e-06, Learning Rate: 2.1124028535268735e-06) Step... (397000/437388 | Training Loss: -9.99915755528491e-06, Learning Rate: 2.0895408852084074e-06) Step... (397500/437388 | Training Loss: -9.982877600123174e-06, Learning Rate: 2.066677780021564e-06) Step... (398000/437388 | Training Loss: -9.996765584219247e-06, Learning Rate: 2.043814674834721e-06) Step... (398500/437388 | Training Loss: -9.995555046771187e-06, Learning Rate: 2.0209515696478775e-06) Step... (399000/437388 | Training Loss: -9.994274478231091e-06, Learning Rate: 1.9980882370873587e-06) Step... (399500/437388 | Training Loss: -9.898638381855562e-06, Learning Rate: 1.9752251319005154e-06) Step... (400000/437388 | Training Loss: -9.997678716899827e-06, Learning Rate: 1.952362026713672e-06) Step... (400500/437388 | Training Loss: -9.995639629778452e-06, Learning Rate: 1.9294989215268288e-06) Step... (401000/437388 | Training Loss: -9.981082257581875e-06, Learning Rate: 1.9066369532083627e-06) Step... (401500/437388 | Training Loss: -9.994221727538388e-06, Learning Rate: 1.8837737343346816e-06) Step... (402000/437388 | Training Loss: -1.0003201168728992e-05, Learning Rate: 1.8609106291478383e-06) Step... (402500/437388 | Training Loss: -9.985319593397435e-06, Learning Rate: 1.8380474102741573e-06) Step... (403000/437388 | Training Loss: -9.895094081002753e-06, Learning Rate: 1.815184305087314e-06) Step... (403500/437388 | Training Loss: -9.990133548853919e-06, Learning Rate: 1.7923211999004707e-06) Step... (404000/437388 | Training Loss: -9.994495485443622e-06, Learning Rate: 1.7694579810267896e-06) Step... (404500/437388 | Training Loss: -9.98931864160113e-06, Learning Rate: 1.7465948758399463e-06) Step... (405000/437388 | Training Loss: -9.990131729864515e-06, Learning Rate: 1.7237329075214802e-06) Step... (405500/437388 | Training Loss: -9.997333108913153e-06, Learning Rate: 1.700869802334637e-06) Step... (406000/437388 | Training Loss: -9.993593266699463e-06, Learning Rate: 1.6780065834609559e-06) Step... (406500/437388 | Training Loss: -9.99224266706733e-06, Learning Rate: 1.6551434782741126e-06) Step... (407000/437388 | Training Loss: -9.988008969230577e-06, Learning Rate: 1.6322802594004315e-06) Step... (407500/437388 | Training Loss: -9.990412763727363e-06, Learning Rate: 1.6094171542135882e-06) Step... (408000/437388 | Training Loss: -9.993975254474208e-06, Learning Rate: 1.586554049026745e-06) Step... (408500/437388 | Training Loss: -9.990597391151823e-06, Learning Rate: 1.5636920807082788e-06) Step... (409000/437388 | Training Loss: -9.989906175178476e-06, Learning Rate: 1.5408288618345978e-06) Step... (409500/437388 | Training Loss: -9.99287840386387e-06, Learning Rate: 1.5179657566477545e-06) Step... (410000/437388 | Training Loss: -9.995594155043364e-06, Learning Rate: 1.4951026514609111e-06) Step... (410500/437388 | Training Loss: -9.992272680392489e-06, Learning Rate: 1.4722394325872301e-06) Step... (411000/437388 | Training Loss: -9.995179425459355e-06, Learning Rate: 1.4493763274003868e-06) Step... (411500/437388 | Training Loss: -9.901167686621193e-06, Learning Rate: 1.4265131085267058e-06) Step... (412000/437388 | Training Loss: -9.92278910416644e-06, Learning Rate: 1.4036500033398625e-06) Step... (412500/437388 | Training Loss: -1.0071958968183026e-05, Learning Rate: 1.3807880350213964e-06) Step... (413000/437388 | Training Loss: -9.992265404434875e-06, Learning Rate: 1.357924929834553e-06) Step... (413500/437388 | Training Loss: -9.994447282224428e-06, Learning Rate: 1.335061710960872e-06) Step... (414000/437388 | Training Loss: -9.840313396125566e-06, Learning Rate: 1.3121986057740287e-06) Step... (414500/437388 | Training Loss: -9.993370440497529e-06, Learning Rate: 1.2893355005871854e-06) Step... (415000/437388 | Training Loss: -9.991905244532973e-06, Learning Rate: 1.2664722817135043e-06) Step... (415500/437388 | Training Loss: -9.994333595386706e-06, Learning Rate: 1.243609176526661e-06) Step... (416000/437388 | Training Loss: -9.99580788629828e-06, Learning Rate: 1.22074595765298e-06) Step... (416500/437388 | Training Loss: -9.992885679821484e-06, Learning Rate: 1.1978841030213516e-06) Step... (417000/437388 | Training Loss: -9.9920735010528e-06, Learning Rate: 1.1750208841476706e-06) Step... (417500/437388 | Training Loss: -9.990299076889642e-06, Learning Rate: 1.1521577789608273e-06) Step... (418000/437388 | Training Loss: -9.992359991883859e-06, Learning Rate: 1.1292945600871462e-06) Step... (418500/437388 | Training Loss: -9.990686521632597e-06, Learning Rate: 1.106431454900303e-06) Step... (419000/437388 | Training Loss: -9.99644908006303e-06, Learning Rate: 1.0835683497134596e-06) Step... (419500/437388 | Training Loss: -9.996568223868962e-06, Learning Rate: 1.0607051308397786e-06) Step... (420000/437388 | Training Loss: -9.910583685268648e-06, Learning Rate: 1.0378420256529353e-06) Step... (420500/437388 | Training Loss: -9.99430994852446e-06, Learning Rate: 1.0149800573344692e-06) Step... (421000/437388 | Training Loss: -9.995170330512337e-06, Learning Rate: 9.921169521476259e-07) Step... (421500/437388 | Training Loss: -9.99375060928287e-06, Learning Rate: 9.692537332739448e-07) Step... (422000/437388 | Training Loss: -9.801300620893016e-06, Learning Rate: 9.463906280871015e-07) Step... (422500/437388 | Training Loss: -9.993218554882333e-06, Learning Rate: 9.235274660568393e-07) Step... (423000/437388 | Training Loss: -9.99222174868919e-06, Learning Rate: 9.006643040265772e-07) Step... (423500/437388 | Training Loss: -9.997004781325813e-06, Learning Rate: 8.77801141996315e-07) Step... (424000/437388 | Training Loss: -9.988641977543011e-06, Learning Rate: 8.549391736778489e-07) Step... (424500/437388 | Training Loss: -9.991374099627137e-06, Learning Rate: 8.320760684910056e-07) Step... (425000/437388 | Training Loss: -9.996907465392724e-06, Learning Rate: 8.092129064607434e-07) Step... (425500/437388 | Training Loss: -9.989498721552081e-06, Learning Rate: 7.863497444304812e-07) Step... (426000/437388 | Training Loss: -9.996458175010048e-06, Learning Rate: 7.63486582400219e-07) Step... (426500/437388 | Training Loss: -9.996458175010048e-06, Learning Rate: 7.406234772133757e-07) Step... (427000/437388 | Training Loss: -9.994309039029758e-06, Learning Rate: 7.177603151831136e-07) Step... (427500/437388 | Training Loss: -9.993862477131188e-06, Learning Rate: 6.948971531528514e-07) Step... (428000/437388 | Training Loss: -9.992753803089727e-06, Learning Rate: 6.720351848343853e-07) Step... (428500/437388 | Training Loss: -9.992030754801817e-06, Learning Rate: 6.49172079647542e-07) Step... (429000/437388 | Training Loss: -9.994111678679474e-06, Learning Rate: 6.263089176172798e-07) Step... (429500/437388 | Training Loss: -9.992382729251403e-06, Learning Rate: 6.034457555870176e-07) Step... (430000/437388 | Training Loss: -9.993003004638012e-06, Learning Rate: 5.805825935567555e-07) Step... (430500/437388 | Training Loss: -9.997980669140816e-06, Learning Rate: 5.577194315264933e-07) Step... (431000/437388 | Training Loss: -9.994604624807835e-06, Learning Rate: 5.3485632633965e-07) Step... (431500/437388 | Training Loss: -9.988767487811856e-06, Learning Rate: 5.119931643093878e-07) Step... (432000/437388 | Training Loss: -9.927052815328352e-06, Learning Rate: 4.891311959909217e-07) Step... (432500/437388 | Training Loss: -9.995008440455422e-06, Learning Rate: 4.6626806238236895e-07) Step... (433000/437388 | Training Loss: -9.990942999138497e-06, Learning Rate: 4.434049003521068e-07) Step... (433500/437388 | Training Loss: -9.991314982471522e-06, Learning Rate: 4.2054176674355404e-07) Step... (434000/437388 | Training Loss: -9.911815141094849e-06, Learning Rate: 3.9767860471329186e-07) Step... (434500/437388 | Training Loss: -9.984001735574566e-06, Learning Rate: 3.748154426830297e-07)