{ "best_metric": 0.09185712039470673, "best_model_checkpoint": "checkpoints_corpus_v2/checkpoint-211352", "epoch": 40.0, "eval_steps": 500, "global_step": 1056760, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "learning_rate": 1.9990537113441085e-05, "loss": 1.5304, "step": 500 }, { "epoch": 0.04, "learning_rate": 1.998107422688217e-05, "loss": 0.8661, "step": 1000 }, { "epoch": 0.06, "learning_rate": 1.9971611340323253e-05, "loss": 0.6917, "step": 1500 }, { "epoch": 0.08, "learning_rate": 1.996214845376434e-05, "loss": 0.573, "step": 2000 }, { "epoch": 0.09, "learning_rate": 1.995268556720542e-05, "loss": 0.5146, "step": 2500 }, { "epoch": 0.11, "learning_rate": 1.9943222680646507e-05, "loss": 0.4646, "step": 3000 }, { "epoch": 0.13, "learning_rate": 1.993375979408759e-05, "loss": 0.4205, "step": 3500 }, { "epoch": 0.15, "learning_rate": 1.9924315833301794e-05, "loss": 0.3912, "step": 4000 }, { "epoch": 0.17, "learning_rate": 1.9914852946742874e-05, "loss": 0.3759, "step": 4500 }, { "epoch": 0.19, "learning_rate": 1.990539006018396e-05, "loss": 0.3575, "step": 5000 }, { "epoch": 0.21, "learning_rate": 1.9895927173625045e-05, "loss": 0.3415, "step": 5500 }, { "epoch": 0.23, "learning_rate": 1.988646428706613e-05, "loss": 0.3225, "step": 6000 }, { "epoch": 0.25, "learning_rate": 1.9877001400507212e-05, "loss": 0.3104, "step": 6500 }, { "epoch": 0.26, "learning_rate": 1.9867557439721415e-05, "loss": 0.2976, "step": 7000 }, { "epoch": 0.28, "learning_rate": 1.98580945531625e-05, "loss": 0.2939, "step": 7500 }, { "epoch": 0.3, "learning_rate": 1.9848631666603582e-05, "loss": 0.2803, "step": 8000 }, { "epoch": 0.32, "learning_rate": 1.9839168780044666e-05, "loss": 0.2759, "step": 8500 }, { "epoch": 0.34, "learning_rate": 1.982970589348575e-05, "loss": 0.2741, "step": 9000 }, { "epoch": 0.36, "learning_rate": 1.9820261932699953e-05, "loss": 0.2661, "step": 9500 }, { "epoch": 0.38, "learning_rate": 1.9810817971914152e-05, "loss": 0.261, "step": 10000 }, { "epoch": 0.4, "learning_rate": 1.980135508535524e-05, "loss": 0.2514, "step": 10500 }, { "epoch": 0.42, "learning_rate": 1.9791892198796323e-05, "loss": 0.2514, "step": 11000 }, { "epoch": 0.44, "learning_rate": 1.9782429312237407e-05, "loss": 0.2437, "step": 11500 }, { "epoch": 0.45, "learning_rate": 1.977296642567849e-05, "loss": 0.2403, "step": 12000 }, { "epoch": 0.47, "learning_rate": 1.9763503539119574e-05, "loss": 0.2282, "step": 12500 }, { "epoch": 0.49, "learning_rate": 1.9754040652560658e-05, "loss": 0.2308, "step": 13000 }, { "epoch": 0.51, "learning_rate": 1.974457776600174e-05, "loss": 0.2303, "step": 13500 }, { "epoch": 0.53, "learning_rate": 1.973511487944283e-05, "loss": 0.2212, "step": 14000 }, { "epoch": 0.55, "learning_rate": 1.9725651992883912e-05, "loss": 0.2171, "step": 14500 }, { "epoch": 0.57, "learning_rate": 1.9716189106324996e-05, "loss": 0.2156, "step": 15000 }, { "epoch": 0.59, "learning_rate": 1.9706745145539195e-05, "loss": 0.2159, "step": 15500 }, { "epoch": 0.61, "learning_rate": 1.9697282258980282e-05, "loss": 0.2095, "step": 16000 }, { "epoch": 0.62, "learning_rate": 1.9687819372421363e-05, "loss": 0.209, "step": 16500 }, { "epoch": 0.64, "learning_rate": 1.967835648586245e-05, "loss": 0.2079, "step": 17000 }, { "epoch": 0.66, "learning_rate": 1.9668893599303533e-05, "loss": 0.1994, "step": 17500 }, { "epoch": 0.68, "learning_rate": 1.9659449638517736e-05, "loss": 0.2032, "step": 18000 }, { "epoch": 0.7, "learning_rate": 1.964998675195882e-05, "loss": 0.1983, "step": 18500 }, { "epoch": 0.72, "learning_rate": 1.9640523865399904e-05, "loss": 0.1989, "step": 19000 }, { "epoch": 0.74, "learning_rate": 1.9631060978840987e-05, "loss": 0.1938, "step": 19500 }, { "epoch": 0.76, "learning_rate": 1.962161701805519e-05, "loss": 0.2004, "step": 20000 }, { "epoch": 0.78, "learning_rate": 1.9612154131496274e-05, "loss": 0.1946, "step": 20500 }, { "epoch": 0.79, "learning_rate": 1.9602691244937358e-05, "loss": 0.1849, "step": 21000 }, { "epoch": 0.81, "learning_rate": 1.959322835837844e-05, "loss": 0.1874, "step": 21500 }, { "epoch": 0.83, "learning_rate": 1.9583765471819525e-05, "loss": 0.1896, "step": 22000 }, { "epoch": 0.85, "learning_rate": 1.957430258526061e-05, "loss": 0.1865, "step": 22500 }, { "epoch": 0.87, "learning_rate": 1.9564839698701696e-05, "loss": 0.1815, "step": 23000 }, { "epoch": 0.89, "learning_rate": 1.9555376812142776e-05, "loss": 0.1819, "step": 23500 }, { "epoch": 0.91, "learning_rate": 1.9545913925583863e-05, "loss": 0.187, "step": 24000 }, { "epoch": 0.93, "learning_rate": 1.9536451039024947e-05, "loss": 0.179, "step": 24500 }, { "epoch": 0.95, "learning_rate": 1.952700707823915e-05, "loss": 0.1775, "step": 25000 }, { "epoch": 0.97, "learning_rate": 1.951754419168023e-05, "loss": 0.1751, "step": 25500 }, { "epoch": 0.98, "learning_rate": 1.9508100230894433e-05, "loss": 0.1748, "step": 26000 }, { "epoch": 1.0, "eval_bleu": 88.9021, "eval_gen_len": 16.3286, "eval_loss": 0.14062626659870148, "eval_runtime": 1229.1102, "eval_samples_per_second": 37.155, "eval_steps_per_second": 1.162, "step": 26419 }, { "epoch": 1.0, "learning_rate": 1.949863734433552e-05, "loss": 0.1731, "step": 26500 }, { "epoch": 1.02, "learning_rate": 1.94891744577766e-05, "loss": 0.163, "step": 27000 }, { "epoch": 1.04, "learning_rate": 1.9479711571217684e-05, "loss": 0.1602, "step": 27500 }, { "epoch": 1.06, "learning_rate": 1.947024868465877e-05, "loss": 0.1604, "step": 28000 }, { "epoch": 1.08, "learning_rate": 1.9460785798099855e-05, "loss": 0.1592, "step": 28500 }, { "epoch": 1.1, "learning_rate": 1.945132291154094e-05, "loss": 0.1611, "step": 29000 }, { "epoch": 1.12, "learning_rate": 1.9441860024982022e-05, "loss": 0.1591, "step": 29500 }, { "epoch": 1.14, "learning_rate": 1.9432397138423106e-05, "loss": 0.1564, "step": 30000 }, { "epoch": 1.15, "learning_rate": 1.942293425186419e-05, "loss": 0.1589, "step": 30500 }, { "epoch": 1.17, "learning_rate": 1.9413471365305273e-05, "loss": 0.1566, "step": 31000 }, { "epoch": 1.19, "learning_rate": 1.9404027404519476e-05, "loss": 0.1536, "step": 31500 }, { "epoch": 1.21, "learning_rate": 1.939456451796056e-05, "loss": 0.1566, "step": 32000 }, { "epoch": 1.23, "learning_rate": 1.9385101631401643e-05, "loss": 0.155, "step": 32500 }, { "epoch": 1.25, "learning_rate": 1.9375638744842727e-05, "loss": 0.1528, "step": 33000 }, { "epoch": 1.27, "learning_rate": 1.9366175858283814e-05, "loss": 0.153, "step": 33500 }, { "epoch": 1.29, "learning_rate": 1.9356712971724894e-05, "loss": 0.1523, "step": 34000 }, { "epoch": 1.31, "learning_rate": 1.934725008516598e-05, "loss": 0.1526, "step": 34500 }, { "epoch": 1.32, "learning_rate": 1.9337787198607065e-05, "loss": 0.1489, "step": 35000 }, { "epoch": 1.34, "learning_rate": 1.9328362163594384e-05, "loss": 0.152, "step": 35500 }, { "epoch": 1.36, "learning_rate": 1.9318899277035467e-05, "loss": 0.1506, "step": 36000 }, { "epoch": 1.38, "learning_rate": 1.930943639047655e-05, "loss": 0.1498, "step": 36500 }, { "epoch": 1.4, "learning_rate": 1.9299973503917638e-05, "loss": 0.1467, "step": 37000 }, { "epoch": 1.42, "learning_rate": 1.929052954313184e-05, "loss": 0.148, "step": 37500 }, { "epoch": 1.44, "learning_rate": 1.928106665657292e-05, "loss": 0.1436, "step": 38000 }, { "epoch": 1.46, "learning_rate": 1.927160377001401e-05, "loss": 0.1457, "step": 38500 }, { "epoch": 1.48, "learning_rate": 1.9262140883455092e-05, "loss": 0.1443, "step": 39000 }, { "epoch": 1.5, "learning_rate": 1.9252677996896172e-05, "loss": 0.1442, "step": 39500 }, { "epoch": 1.51, "learning_rate": 1.9243234036110375e-05, "loss": 0.1435, "step": 40000 }, { "epoch": 1.53, "learning_rate": 1.9233771149551462e-05, "loss": 0.1467, "step": 40500 }, { "epoch": 1.55, "learning_rate": 1.9224308262992543e-05, "loss": 0.1455, "step": 41000 }, { "epoch": 1.57, "learning_rate": 1.921484537643363e-05, "loss": 0.1437, "step": 41500 }, { "epoch": 1.59, "learning_rate": 1.920540141564783e-05, "loss": 0.142, "step": 42000 }, { "epoch": 1.61, "learning_rate": 1.9195938529088916e-05, "loss": 0.1417, "step": 42500 }, { "epoch": 1.63, "learning_rate": 1.9186475642529997e-05, "loss": 0.1408, "step": 43000 }, { "epoch": 1.65, "learning_rate": 1.91770316817442e-05, "loss": 0.1438, "step": 43500 }, { "epoch": 1.67, "learning_rate": 1.9167568795185286e-05, "loss": 0.1432, "step": 44000 }, { "epoch": 1.68, "learning_rate": 1.915810590862637e-05, "loss": 0.1396, "step": 44500 }, { "epoch": 1.7, "learning_rate": 1.9148643022067454e-05, "loss": 0.14, "step": 45000 }, { "epoch": 1.72, "learning_rate": 1.9139180135508538e-05, "loss": 0.1384, "step": 45500 }, { "epoch": 1.74, "learning_rate": 1.912971724894962e-05, "loss": 0.1402, "step": 46000 }, { "epoch": 1.76, "learning_rate": 1.9120254362390705e-05, "loss": 0.1393, "step": 46500 }, { "epoch": 1.78, "learning_rate": 1.911079147583179e-05, "loss": 0.1385, "step": 47000 }, { "epoch": 1.8, "learning_rate": 1.9101328589272872e-05, "loss": 0.1379, "step": 47500 }, { "epoch": 1.82, "learning_rate": 1.9091865702713956e-05, "loss": 0.1391, "step": 48000 }, { "epoch": 1.84, "learning_rate": 1.908242174192816e-05, "loss": 0.1362, "step": 48500 }, { "epoch": 1.85, "learning_rate": 1.9072958855369243e-05, "loss": 0.1332, "step": 49000 }, { "epoch": 1.87, "learning_rate": 1.906349596881033e-05, "loss": 0.1368, "step": 49500 }, { "epoch": 1.89, "learning_rate": 1.905403308225141e-05, "loss": 0.1369, "step": 50000 }, { "epoch": 1.91, "learning_rate": 1.9044570195692497e-05, "loss": 0.1368, "step": 50500 }, { "epoch": 1.93, "learning_rate": 1.9035145160679816e-05, "loss": 0.1361, "step": 51000 }, { "epoch": 1.95, "learning_rate": 1.90256822741209e-05, "loss": 0.1365, "step": 51500 }, { "epoch": 1.97, "learning_rate": 1.9016219387561983e-05, "loss": 0.1338, "step": 52000 }, { "epoch": 1.99, "learning_rate": 1.9006756501003067e-05, "loss": 0.1344, "step": 52500 }, { "epoch": 2.0, "eval_bleu": 90.5089, "eval_gen_len": 16.3365, "eval_loss": 0.11337216943502426, "eval_runtime": 1219.6509, "eval_samples_per_second": 37.443, "eval_steps_per_second": 1.171, "step": 52838 }, { "epoch": 2.01, "learning_rate": 1.8997293614444154e-05, "loss": 0.1299, "step": 53000 }, { "epoch": 2.03, "learning_rate": 1.8987830727885234e-05, "loss": 0.1238, "step": 53500 }, { "epoch": 2.04, "learning_rate": 1.897836784132632e-05, "loss": 0.1217, "step": 54000 }, { "epoch": 2.06, "learning_rate": 1.8968904954767405e-05, "loss": 0.1209, "step": 54500 }, { "epoch": 2.08, "learning_rate": 1.8959442068208485e-05, "loss": 0.1217, "step": 55000 }, { "epoch": 2.1, "learning_rate": 1.8949979181649572e-05, "loss": 0.1217, "step": 55500 }, { "epoch": 2.12, "learning_rate": 1.8940535220863775e-05, "loss": 0.1196, "step": 56000 }, { "epoch": 2.14, "learning_rate": 1.893107233430486e-05, "loss": 0.123, "step": 56500 }, { "epoch": 2.16, "learning_rate": 1.8921609447745942e-05, "loss": 0.1229, "step": 57000 }, { "epoch": 2.18, "learning_rate": 1.8912146561187026e-05, "loss": 0.1264, "step": 57500 }, { "epoch": 2.2, "learning_rate": 1.890268367462811e-05, "loss": 0.1187, "step": 58000 }, { "epoch": 2.21, "learning_rate": 1.8893220788069194e-05, "loss": 0.1226, "step": 58500 }, { "epoch": 2.23, "learning_rate": 1.8883757901510277e-05, "loss": 0.1213, "step": 59000 }, { "epoch": 2.25, "learning_rate": 1.8874295014951364e-05, "loss": 0.1207, "step": 59500 }, { "epoch": 2.27, "learning_rate": 1.8864851054165564e-05, "loss": 0.1208, "step": 60000 }, { "epoch": 2.29, "learning_rate": 1.8855407093379767e-05, "loss": 0.1219, "step": 60500 }, { "epoch": 2.31, "learning_rate": 1.884594420682085e-05, "loss": 0.1216, "step": 61000 }, { "epoch": 2.33, "learning_rate": 1.8836481320261934e-05, "loss": 0.1184, "step": 61500 }, { "epoch": 2.35, "learning_rate": 1.8827018433703018e-05, "loss": 0.1187, "step": 62000 }, { "epoch": 2.37, "learning_rate": 1.88175555471441e-05, "loss": 0.1188, "step": 62500 }, { "epoch": 2.38, "learning_rate": 1.8808092660585185e-05, "loss": 0.1187, "step": 63000 }, { "epoch": 2.4, "learning_rate": 1.8798629774026272e-05, "loss": 0.1207, "step": 63500 }, { "epoch": 2.42, "learning_rate": 1.878918581324047e-05, "loss": 0.1218, "step": 64000 }, { "epoch": 2.44, "learning_rate": 1.8779722926681555e-05, "loss": 0.1173, "step": 64500 }, { "epoch": 2.46, "learning_rate": 1.8770260040122642e-05, "loss": 0.1181, "step": 65000 }, { "epoch": 2.48, "learning_rate": 1.8760797153563726e-05, "loss": 0.1202, "step": 65500 }, { "epoch": 2.5, "learning_rate": 1.875133426700481e-05, "loss": 0.1181, "step": 66000 }, { "epoch": 2.52, "learning_rate": 1.8741871380445893e-05, "loss": 0.1178, "step": 66500 }, { "epoch": 2.54, "learning_rate": 1.8732408493886977e-05, "loss": 0.1189, "step": 67000 }, { "epoch": 2.55, "learning_rate": 1.872294560732806e-05, "loss": 0.1163, "step": 67500 }, { "epoch": 2.57, "learning_rate": 1.8713501646542264e-05, "loss": 0.1179, "step": 68000 }, { "epoch": 2.59, "learning_rate": 1.8704057685756466e-05, "loss": 0.1161, "step": 68500 }, { "epoch": 2.61, "learning_rate": 1.869459479919755e-05, "loss": 0.1185, "step": 69000 }, { "epoch": 2.63, "learning_rate": 1.8685131912638634e-05, "loss": 0.1172, "step": 69500 }, { "epoch": 2.65, "learning_rate": 1.8675669026079718e-05, "loss": 0.1191, "step": 70000 }, { "epoch": 2.67, "learning_rate": 1.86662061395208e-05, "loss": 0.1174, "step": 70500 }, { "epoch": 2.69, "learning_rate": 1.8656762178735004e-05, "loss": 0.1159, "step": 71000 }, { "epoch": 2.71, "learning_rate": 1.8647299292176088e-05, "loss": 0.1195, "step": 71500 }, { "epoch": 2.73, "learning_rate": 1.863783640561717e-05, "loss": 0.1144, "step": 72000 }, { "epoch": 2.74, "learning_rate": 1.8628392444831374e-05, "loss": 0.1165, "step": 72500 }, { "epoch": 2.76, "learning_rate": 1.8618929558272455e-05, "loss": 0.1155, "step": 73000 }, { "epoch": 2.78, "learning_rate": 1.860946667171354e-05, "loss": 0.1155, "step": 73500 }, { "epoch": 2.8, "learning_rate": 1.8600003785154625e-05, "loss": 0.1164, "step": 74000 }, { "epoch": 2.82, "learning_rate": 1.859054089859571e-05, "loss": 0.1129, "step": 74500 }, { "epoch": 2.84, "learning_rate": 1.8581096937809912e-05, "loss": 0.115, "step": 75000 }, { "epoch": 2.86, "learning_rate": 1.8571634051250996e-05, "loss": 0.1158, "step": 75500 }, { "epoch": 2.88, "learning_rate": 1.856217116469208e-05, "loss": 0.1136, "step": 76000 }, { "epoch": 2.9, "learning_rate": 1.8552708278133163e-05, "loss": 0.1163, "step": 76500 }, { "epoch": 2.91, "learning_rate": 1.8543245391574247e-05, "loss": 0.1145, "step": 77000 }, { "epoch": 2.93, "learning_rate": 1.853378250501533e-05, "loss": 0.1156, "step": 77500 }, { "epoch": 2.95, "learning_rate": 1.8524319618456414e-05, "loss": 0.1122, "step": 78000 }, { "epoch": 2.97, "learning_rate": 1.8514856731897498e-05, "loss": 0.1143, "step": 78500 }, { "epoch": 2.99, "learning_rate": 1.8505393845338585e-05, "loss": 0.1143, "step": 79000 }, { "epoch": 3.0, "eval_bleu": 91.1209, "eval_gen_len": 16.3757, "eval_loss": 0.10402025282382965, "eval_runtime": 1204.974, "eval_samples_per_second": 37.899, "eval_steps_per_second": 1.185, "step": 79257 }, { "epoch": 3.01, "learning_rate": 1.8495949884552788e-05, "loss": 0.1113, "step": 79500 }, { "epoch": 3.03, "learning_rate": 1.8486486997993868e-05, "loss": 0.1047, "step": 80000 }, { "epoch": 3.05, "learning_rate": 1.8477024111434955e-05, "loss": 0.1023, "step": 80500 }, { "epoch": 3.07, "learning_rate": 1.846756122487604e-05, "loss": 0.1044, "step": 81000 }, { "epoch": 3.08, "learning_rate": 1.8458098338317122e-05, "loss": 0.1057, "step": 81500 }, { "epoch": 3.1, "learning_rate": 1.8448635451758206e-05, "loss": 0.1026, "step": 82000 }, { "epoch": 3.12, "learning_rate": 1.843917256519929e-05, "loss": 0.1019, "step": 82500 }, { "epoch": 3.14, "learning_rate": 1.8429709678640374e-05, "loss": 0.1034, "step": 83000 }, { "epoch": 3.16, "learning_rate": 1.8420246792081457e-05, "loss": 0.1037, "step": 83500 }, { "epoch": 3.18, "learning_rate": 1.841080283129566e-05, "loss": 0.107, "step": 84000 }, { "epoch": 3.2, "learning_rate": 1.8401358870509863e-05, "loss": 0.1029, "step": 84500 }, { "epoch": 3.22, "learning_rate": 1.8391895983950947e-05, "loss": 0.1027, "step": 85000 }, { "epoch": 3.24, "learning_rate": 1.838243309739203e-05, "loss": 0.1055, "step": 85500 }, { "epoch": 3.26, "learning_rate": 1.8372970210833114e-05, "loss": 0.1031, "step": 86000 }, { "epoch": 3.27, "learning_rate": 1.8363526250047317e-05, "loss": 0.1044, "step": 86500 }, { "epoch": 3.29, "learning_rate": 1.83540633634884e-05, "loss": 0.1044, "step": 87000 }, { "epoch": 3.31, "learning_rate": 1.8344600476929484e-05, "loss": 0.1037, "step": 87500 }, { "epoch": 3.33, "learning_rate": 1.8335137590370568e-05, "loss": 0.103, "step": 88000 }, { "epoch": 3.35, "learning_rate": 1.832567470381165e-05, "loss": 0.1032, "step": 88500 }, { "epoch": 3.37, "learning_rate": 1.8316211817252735e-05, "loss": 0.1029, "step": 89000 }, { "epoch": 3.39, "learning_rate": 1.8306748930693822e-05, "loss": 0.1002, "step": 89500 }, { "epoch": 3.41, "learning_rate": 1.8297286044134906e-05, "loss": 0.1044, "step": 90000 }, { "epoch": 3.43, "learning_rate": 1.8287861009122224e-05, "loss": 0.1038, "step": 90500 }, { "epoch": 3.44, "learning_rate": 1.8278398122563308e-05, "loss": 0.1039, "step": 91000 }, { "epoch": 3.46, "learning_rate": 1.8268935236004392e-05, "loss": 0.1026, "step": 91500 }, { "epoch": 3.48, "learning_rate": 1.825947234944548e-05, "loss": 0.102, "step": 92000 }, { "epoch": 3.5, "learning_rate": 1.825000946288656e-05, "loss": 0.1036, "step": 92500 }, { "epoch": 3.52, "learning_rate": 1.8240546576327643e-05, "loss": 0.1024, "step": 93000 }, { "epoch": 3.54, "learning_rate": 1.823108368976873e-05, "loss": 0.1041, "step": 93500 }, { "epoch": 3.56, "learning_rate": 1.822162080320981e-05, "loss": 0.102, "step": 94000 }, { "epoch": 3.58, "learning_rate": 1.8212157916650897e-05, "loss": 0.1049, "step": 94500 }, { "epoch": 3.6, "learning_rate": 1.820269503009198e-05, "loss": 0.0992, "step": 95000 }, { "epoch": 3.61, "learning_rate": 1.8193232143533065e-05, "loss": 0.104, "step": 95500 }, { "epoch": 3.63, "learning_rate": 1.818376925697415e-05, "loss": 0.1007, "step": 96000 }, { "epoch": 3.65, "learning_rate": 1.817432529618835e-05, "loss": 0.1037, "step": 96500 }, { "epoch": 3.67, "learning_rate": 1.8164862409629435e-05, "loss": 0.1036, "step": 97000 }, { "epoch": 3.69, "learning_rate": 1.8155418448843635e-05, "loss": 0.1012, "step": 97500 }, { "epoch": 3.71, "learning_rate": 1.814595556228472e-05, "loss": 0.1041, "step": 98000 }, { "epoch": 3.73, "learning_rate": 1.8136492675725805e-05, "loss": 0.1011, "step": 98500 }, { "epoch": 3.75, "learning_rate": 1.812702978916689e-05, "loss": 0.1025, "step": 99000 }, { "epoch": 3.77, "learning_rate": 1.8117566902607973e-05, "loss": 0.1012, "step": 99500 }, { "epoch": 3.79, "learning_rate": 1.8108104016049056e-05, "loss": 0.1014, "step": 100000 }, { "epoch": 3.8, "learning_rate": 1.8098641129490143e-05, "loss": 0.1011, "step": 100500 }, { "epoch": 3.82, "learning_rate": 1.8089178242931224e-05, "loss": 0.1008, "step": 101000 }, { "epoch": 3.84, "learning_rate": 1.8079734282145427e-05, "loss": 0.1009, "step": 101500 }, { "epoch": 3.86, "learning_rate": 1.807027139558651e-05, "loss": 0.1014, "step": 102000 }, { "epoch": 3.88, "learning_rate": 1.8060808509027594e-05, "loss": 0.099, "step": 102500 }, { "epoch": 3.9, "learning_rate": 1.8051345622468678e-05, "loss": 0.1014, "step": 103000 }, { "epoch": 3.92, "learning_rate": 1.8041882735909765e-05, "loss": 0.1012, "step": 103500 }, { "epoch": 3.94, "learning_rate": 1.803241984935085e-05, "loss": 0.1008, "step": 104000 }, { "epoch": 3.96, "learning_rate": 1.8022975888565048e-05, "loss": 0.1014, "step": 104500 }, { "epoch": 3.97, "learning_rate": 1.801351300200613e-05, "loss": 0.1021, "step": 105000 }, { "epoch": 3.99, "learning_rate": 1.800405011544722e-05, "loss": 0.0991, "step": 105500 }, { "epoch": 4.0, "eval_bleu": 91.4665, "eval_gen_len": 16.423, "eval_loss": 0.09734214097261429, "eval_runtime": 1213.4766, "eval_samples_per_second": 37.633, "eval_steps_per_second": 1.177, "step": 105676 }, { "epoch": 4.01, "learning_rate": 1.79945872288883e-05, "loss": 0.0962, "step": 106000 }, { "epoch": 4.03, "learning_rate": 1.7985124342329386e-05, "loss": 0.0908, "step": 106500 }, { "epoch": 4.05, "learning_rate": 1.797568038154359e-05, "loss": 0.0924, "step": 107000 }, { "epoch": 4.07, "learning_rate": 1.7966217494984673e-05, "loss": 0.0894, "step": 107500 }, { "epoch": 4.09, "learning_rate": 1.7956754608425756e-05, "loss": 0.0908, "step": 108000 }, { "epoch": 4.11, "learning_rate": 1.794729172186684e-05, "loss": 0.094, "step": 108500 }, { "epoch": 4.13, "learning_rate": 1.7937828835307924e-05, "loss": 0.0921, "step": 109000 }, { "epoch": 4.14, "learning_rate": 1.7928365948749007e-05, "loss": 0.0891, "step": 109500 }, { "epoch": 4.16, "learning_rate": 1.791890306219009e-05, "loss": 0.0895, "step": 110000 }, { "epoch": 4.18, "learning_rate": 1.7909459101404294e-05, "loss": 0.0905, "step": 110500 }, { "epoch": 4.2, "learning_rate": 1.7899996214845378e-05, "loss": 0.091, "step": 111000 }, { "epoch": 4.22, "learning_rate": 1.789053332828646e-05, "loss": 0.0949, "step": 111500 }, { "epoch": 4.24, "learning_rate": 1.7881070441727545e-05, "loss": 0.0922, "step": 112000 }, { "epoch": 4.26, "learning_rate": 1.7871607555168632e-05, "loss": 0.0929, "step": 112500 }, { "epoch": 4.28, "learning_rate": 1.7862144668609712e-05, "loss": 0.0907, "step": 113000 }, { "epoch": 4.3, "learning_rate": 1.78526817820508e-05, "loss": 0.0932, "step": 113500 }, { "epoch": 4.32, "learning_rate": 1.7843218895491883e-05, "loss": 0.093, "step": 114000 }, { "epoch": 4.33, "learning_rate": 1.7833774934706086e-05, "loss": 0.0921, "step": 114500 }, { "epoch": 4.35, "learning_rate": 1.7824330973920285e-05, "loss": 0.0902, "step": 115000 }, { "epoch": 4.37, "learning_rate": 1.781486808736137e-05, "loss": 0.0911, "step": 115500 }, { "epoch": 4.39, "learning_rate": 1.7805405200802456e-05, "loss": 0.0903, "step": 116000 }, { "epoch": 4.41, "learning_rate": 1.7795942314243536e-05, "loss": 0.094, "step": 116500 }, { "epoch": 4.43, "learning_rate": 1.778649835345774e-05, "loss": 0.0912, "step": 117000 }, { "epoch": 4.45, "learning_rate": 1.7777035466898823e-05, "loss": 0.0913, "step": 117500 }, { "epoch": 4.47, "learning_rate": 1.776757258033991e-05, "loss": 0.0938, "step": 118000 }, { "epoch": 4.49, "learning_rate": 1.775810969378099e-05, "loss": 0.0903, "step": 118500 }, { "epoch": 4.5, "learning_rate": 1.7748646807222077e-05, "loss": 0.0917, "step": 119000 }, { "epoch": 4.52, "learning_rate": 1.773920284643628e-05, "loss": 0.0906, "step": 119500 }, { "epoch": 4.54, "learning_rate": 1.7729739959877364e-05, "loss": 0.0928, "step": 120000 }, { "epoch": 4.56, "learning_rate": 1.7720277073318444e-05, "loss": 0.0919, "step": 120500 }, { "epoch": 4.58, "learning_rate": 1.771081418675953e-05, "loss": 0.0933, "step": 121000 }, { "epoch": 4.6, "learning_rate": 1.7701370225973734e-05, "loss": 0.0925, "step": 121500 }, { "epoch": 4.62, "learning_rate": 1.7691907339414814e-05, "loss": 0.0909, "step": 122000 }, { "epoch": 4.64, "learning_rate": 1.76824444528559e-05, "loss": 0.0923, "step": 122500 }, { "epoch": 4.66, "learning_rate": 1.7672981566296985e-05, "loss": 0.0898, "step": 123000 }, { "epoch": 4.67, "learning_rate": 1.766351867973807e-05, "loss": 0.0906, "step": 123500 }, { "epoch": 4.69, "learning_rate": 1.7654055793179153e-05, "loss": 0.0921, "step": 124000 }, { "epoch": 4.71, "learning_rate": 1.7644592906620236e-05, "loss": 0.09, "step": 124500 }, { "epoch": 4.73, "learning_rate": 1.763514894583444e-05, "loss": 0.0898, "step": 125000 }, { "epoch": 4.75, "learning_rate": 1.7625686059275523e-05, "loss": 0.091, "step": 125500 }, { "epoch": 4.77, "learning_rate": 1.7616223172716607e-05, "loss": 0.09, "step": 126000 }, { "epoch": 4.79, "learning_rate": 1.760676028615769e-05, "loss": 0.0943, "step": 126500 }, { "epoch": 4.81, "learning_rate": 1.7597297399598777e-05, "loss": 0.0904, "step": 127000 }, { "epoch": 4.83, "learning_rate": 1.7587834513039858e-05, "loss": 0.0919, "step": 127500 }, { "epoch": 4.84, "learning_rate": 1.7578371626480945e-05, "loss": 0.0904, "step": 128000 }, { "epoch": 4.86, "learning_rate": 1.7568927665695144e-05, "loss": 0.0935, "step": 128500 }, { "epoch": 4.88, "learning_rate": 1.7559464779136228e-05, "loss": 0.0916, "step": 129000 }, { "epoch": 4.9, "learning_rate": 1.755000189257731e-05, "loss": 0.0886, "step": 129500 }, { "epoch": 4.92, "learning_rate": 1.75405390060184e-05, "loss": 0.0943, "step": 130000 }, { "epoch": 4.94, "learning_rate": 1.75310950452326e-05, "loss": 0.0911, "step": 130500 }, { "epoch": 4.96, "learning_rate": 1.7521632158673682e-05, "loss": 0.0917, "step": 131000 }, { "epoch": 4.98, "learning_rate": 1.751216927211477e-05, "loss": 0.0925, "step": 131500 }, { "epoch": 5.0, "learning_rate": 1.7502706385555853e-05, "loss": 0.0917, "step": 132000 }, { "epoch": 5.0, "eval_bleu": 91.7718, "eval_gen_len": 16.394, "eval_loss": 0.09411130845546722, "eval_runtime": 1205.451, "eval_samples_per_second": 37.884, "eval_steps_per_second": 1.185, "step": 132095 }, { "epoch": 5.02, "learning_rate": 1.7493243498996936e-05, "loss": 0.0822, "step": 132500 }, { "epoch": 5.03, "learning_rate": 1.748378061243802e-05, "loss": 0.0819, "step": 133000 }, { "epoch": 5.05, "learning_rate": 1.7474317725879104e-05, "loss": 0.0835, "step": 133500 }, { "epoch": 5.07, "learning_rate": 1.7464854839320187e-05, "loss": 0.0805, "step": 134000 }, { "epoch": 5.09, "learning_rate": 1.745539195276127e-05, "loss": 0.0806, "step": 134500 }, { "epoch": 5.11, "learning_rate": 1.7445947991975474e-05, "loss": 0.085, "step": 135000 }, { "epoch": 5.13, "learning_rate": 1.7436485105416558e-05, "loss": 0.0823, "step": 135500 }, { "epoch": 5.15, "learning_rate": 1.742702221885764e-05, "loss": 0.0832, "step": 136000 }, { "epoch": 5.17, "learning_rate": 1.7417559332298725e-05, "loss": 0.085, "step": 136500 }, { "epoch": 5.19, "learning_rate": 1.7408096445739812e-05, "loss": 0.0823, "step": 137000 }, { "epoch": 5.2, "learning_rate": 1.7398633559180892e-05, "loss": 0.0837, "step": 137500 }, { "epoch": 5.22, "learning_rate": 1.7389170672621976e-05, "loss": 0.0835, "step": 138000 }, { "epoch": 5.24, "learning_rate": 1.7379707786063063e-05, "loss": 0.0808, "step": 138500 }, { "epoch": 5.26, "learning_rate": 1.7370244899504147e-05, "loss": 0.0815, "step": 139000 }, { "epoch": 5.28, "learning_rate": 1.7360819864491465e-05, "loss": 0.0857, "step": 139500 }, { "epoch": 5.3, "learning_rate": 1.735135697793255e-05, "loss": 0.0824, "step": 140000 }, { "epoch": 5.32, "learning_rate": 1.7341894091373633e-05, "loss": 0.0833, "step": 140500 }, { "epoch": 5.34, "learning_rate": 1.733243120481472e-05, "loss": 0.0828, "step": 141000 }, { "epoch": 5.36, "learning_rate": 1.73229683182558e-05, "loss": 0.085, "step": 141500 }, { "epoch": 5.37, "learning_rate": 1.7313505431696887e-05, "loss": 0.0817, "step": 142000 }, { "epoch": 5.39, "learning_rate": 1.730406147091109e-05, "loss": 0.0822, "step": 142500 }, { "epoch": 5.41, "learning_rate": 1.729459858435217e-05, "loss": 0.0851, "step": 143000 }, { "epoch": 5.43, "learning_rate": 1.7285135697793257e-05, "loss": 0.0818, "step": 143500 }, { "epoch": 5.45, "learning_rate": 1.727567281123434e-05, "loss": 0.083, "step": 144000 }, { "epoch": 5.47, "learning_rate": 1.7266228850448544e-05, "loss": 0.0833, "step": 144500 }, { "epoch": 5.49, "learning_rate": 1.7256765963889624e-05, "loss": 0.0818, "step": 145000 }, { "epoch": 5.51, "learning_rate": 1.724730307733071e-05, "loss": 0.0833, "step": 145500 }, { "epoch": 5.53, "learning_rate": 1.7237840190771795e-05, "loss": 0.0842, "step": 146000 }, { "epoch": 5.55, "learning_rate": 1.722837730421288e-05, "loss": 0.0814, "step": 146500 }, { "epoch": 5.56, "learning_rate": 1.7218914417653962e-05, "loss": 0.0806, "step": 147000 }, { "epoch": 5.58, "learning_rate": 1.7209470456868165e-05, "loss": 0.0837, "step": 147500 }, { "epoch": 5.6, "learning_rate": 1.720000757030925e-05, "loss": 0.0842, "step": 148000 }, { "epoch": 5.62, "learning_rate": 1.7190544683750333e-05, "loss": 0.0822, "step": 148500 }, { "epoch": 5.64, "learning_rate": 1.7181081797191416e-05, "loss": 0.0823, "step": 149000 }, { "epoch": 5.66, "learning_rate": 1.71716189106325e-05, "loss": 0.0874, "step": 149500 }, { "epoch": 5.68, "learning_rate": 1.7162156024073584e-05, "loss": 0.0832, "step": 150000 }, { "epoch": 5.7, "learning_rate": 1.7152712063287787e-05, "loss": 0.084, "step": 150500 }, { "epoch": 5.72, "learning_rate": 1.714324917672887e-05, "loss": 0.083, "step": 151000 }, { "epoch": 5.73, "learning_rate": 1.7133805215943073e-05, "loss": 0.0825, "step": 151500 }, { "epoch": 5.75, "learning_rate": 1.7124342329384157e-05, "loss": 0.0823, "step": 152000 }, { "epoch": 5.77, "learning_rate": 1.711487944282524e-05, "loss": 0.0829, "step": 152500 }, { "epoch": 5.79, "learning_rate": 1.7105416556266324e-05, "loss": 0.0833, "step": 153000 }, { "epoch": 5.81, "learning_rate": 1.7095953669707408e-05, "loss": 0.0828, "step": 153500 }, { "epoch": 5.83, "learning_rate": 1.708649078314849e-05, "loss": 0.081, "step": 154000 }, { "epoch": 5.85, "learning_rate": 1.707702789658958e-05, "loss": 0.0834, "step": 154500 }, { "epoch": 5.87, "learning_rate": 1.7067565010030662e-05, "loss": 0.0823, "step": 155000 }, { "epoch": 5.89, "learning_rate": 1.7058102123471746e-05, "loss": 0.0833, "step": 155500 }, { "epoch": 5.9, "learning_rate": 1.704863923691283e-05, "loss": 0.0836, "step": 156000 }, { "epoch": 5.92, "learning_rate": 1.7039176350353913e-05, "loss": 0.0836, "step": 156500 }, { "epoch": 5.94, "learning_rate": 1.7029713463794997e-05, "loss": 0.0829, "step": 157000 }, { "epoch": 5.96, "learning_rate": 1.702025057723608e-05, "loss": 0.0835, "step": 157500 }, { "epoch": 5.98, "learning_rate": 1.7010806616450284e-05, "loss": 0.0841, "step": 158000 }, { "epoch": 6.0, "learning_rate": 1.7001343729891367e-05, "loss": 0.0822, "step": 158500 }, { "epoch": 6.0, "eval_bleu": 91.8571, "eval_gen_len": 16.3677, "eval_loss": 0.0922790989279747, "eval_runtime": 1204.098, "eval_samples_per_second": 37.926, "eval_steps_per_second": 1.186, "step": 158514 }, { "epoch": 6.02, "learning_rate": 1.699188084333245e-05, "loss": 0.0756, "step": 159000 }, { "epoch": 6.04, "learning_rate": 1.6982417956773535e-05, "loss": 0.0749, "step": 159500 }, { "epoch": 6.06, "learning_rate": 1.6972955070214622e-05, "loss": 0.0736, "step": 160000 }, { "epoch": 6.08, "learning_rate": 1.6963492183655702e-05, "loss": 0.0743, "step": 160500 }, { "epoch": 6.09, "learning_rate": 1.6954048222869905e-05, "loss": 0.0743, "step": 161000 }, { "epoch": 6.11, "learning_rate": 1.6944604262084108e-05, "loss": 0.0748, "step": 161500 }, { "epoch": 6.13, "learning_rate": 1.693514137552519e-05, "loss": 0.0728, "step": 162000 }, { "epoch": 6.15, "learning_rate": 1.6925678488966275e-05, "loss": 0.0759, "step": 162500 }, { "epoch": 6.17, "learning_rate": 1.691621560240736e-05, "loss": 0.076, "step": 163000 }, { "epoch": 6.19, "learning_rate": 1.6906752715848446e-05, "loss": 0.0752, "step": 163500 }, { "epoch": 6.21, "learning_rate": 1.6897289829289526e-05, "loss": 0.0754, "step": 164000 }, { "epoch": 6.23, "learning_rate": 1.6887826942730613e-05, "loss": 0.0741, "step": 164500 }, { "epoch": 6.25, "learning_rate": 1.6878364056171697e-05, "loss": 0.0786, "step": 165000 }, { "epoch": 6.26, "learning_rate": 1.686890116961278e-05, "loss": 0.076, "step": 165500 }, { "epoch": 6.28, "learning_rate": 1.685945720882698e-05, "loss": 0.075, "step": 166000 }, { "epoch": 6.3, "learning_rate": 1.6850013248041183e-05, "loss": 0.0744, "step": 166500 }, { "epoch": 6.32, "learning_rate": 1.6840569287255386e-05, "loss": 0.0758, "step": 167000 }, { "epoch": 6.34, "learning_rate": 1.683110640069647e-05, "loss": 0.0756, "step": 167500 }, { "epoch": 6.36, "learning_rate": 1.6821643514137553e-05, "loss": 0.0748, "step": 168000 }, { "epoch": 6.38, "learning_rate": 1.6812180627578637e-05, "loss": 0.0776, "step": 168500 }, { "epoch": 6.4, "learning_rate": 1.6802717741019724e-05, "loss": 0.076, "step": 169000 }, { "epoch": 6.42, "learning_rate": 1.6793254854460804e-05, "loss": 0.0744, "step": 169500 }, { "epoch": 6.43, "learning_rate": 1.678379196790189e-05, "loss": 0.0757, "step": 170000 }, { "epoch": 6.45, "learning_rate": 1.6774329081342975e-05, "loss": 0.078, "step": 170500 }, { "epoch": 6.47, "learning_rate": 1.676486619478406e-05, "loss": 0.0772, "step": 171000 }, { "epoch": 6.49, "learning_rate": 1.6755403308225142e-05, "loss": 0.0763, "step": 171500 }, { "epoch": 6.51, "learning_rate": 1.6745940421666226e-05, "loss": 0.0734, "step": 172000 }, { "epoch": 6.53, "learning_rate": 1.673649646088043e-05, "loss": 0.0758, "step": 172500 }, { "epoch": 6.55, "learning_rate": 1.6727033574321513e-05, "loss": 0.0768, "step": 173000 }, { "epoch": 6.57, "learning_rate": 1.6717570687762596e-05, "loss": 0.0762, "step": 173500 }, { "epoch": 6.59, "learning_rate": 1.670810780120368e-05, "loss": 0.0774, "step": 174000 }, { "epoch": 6.61, "learning_rate": 1.6698644914644764e-05, "loss": 0.0744, "step": 174500 }, { "epoch": 6.62, "learning_rate": 1.6689200953858967e-05, "loss": 0.0775, "step": 175000 }, { "epoch": 6.64, "learning_rate": 1.667973806730005e-05, "loss": 0.0749, "step": 175500 }, { "epoch": 6.66, "learning_rate": 1.6670275180741134e-05, "loss": 0.0778, "step": 176000 }, { "epoch": 6.68, "learning_rate": 1.6660812294182218e-05, "loss": 0.0757, "step": 176500 }, { "epoch": 6.7, "learning_rate": 1.66513494076233e-05, "loss": 0.0751, "step": 177000 }, { "epoch": 6.72, "learning_rate": 1.664188652106439e-05, "loss": 0.0756, "step": 177500 }, { "epoch": 6.74, "learning_rate": 1.6632442560278588e-05, "loss": 0.0734, "step": 178000 }, { "epoch": 6.76, "learning_rate": 1.662297967371967e-05, "loss": 0.076, "step": 178500 }, { "epoch": 6.78, "learning_rate": 1.661351678716076e-05, "loss": 0.0754, "step": 179000 }, { "epoch": 6.79, "learning_rate": 1.6604053900601842e-05, "loss": 0.076, "step": 179500 }, { "epoch": 6.81, "learning_rate": 1.6594591014042926e-05, "loss": 0.0739, "step": 180000 }, { "epoch": 6.83, "learning_rate": 1.6585165979030245e-05, "loss": 0.0749, "step": 180500 }, { "epoch": 6.85, "learning_rate": 1.6575703092471328e-05, "loss": 0.0747, "step": 181000 }, { "epoch": 6.87, "learning_rate": 1.6566240205912415e-05, "loss": 0.0777, "step": 181500 }, { "epoch": 6.89, "learning_rate": 1.6556777319353496e-05, "loss": 0.0768, "step": 182000 }, { "epoch": 6.91, "learning_rate": 1.6547314432794583e-05, "loss": 0.0777, "step": 182500 }, { "epoch": 6.93, "learning_rate": 1.6537851546235666e-05, "loss": 0.0762, "step": 183000 }, { "epoch": 6.95, "learning_rate": 1.6528388659676747e-05, "loss": 0.0766, "step": 183500 }, { "epoch": 6.96, "learning_rate": 1.6518925773117834e-05, "loss": 0.0757, "step": 184000 }, { "epoch": 6.98, "learning_rate": 1.6509481812332037e-05, "loss": 0.0773, "step": 184500 }, { "epoch": 7.0, "eval_bleu": 92.0146, "eval_gen_len": 16.3778, "eval_loss": 0.0919041708111763, "eval_runtime": 1212.2042, "eval_samples_per_second": 37.673, "eval_steps_per_second": 1.178, "step": 184933 }, { "epoch": 7.0, "learning_rate": 1.650001892577312e-05, "loss": 0.0745, "step": 185000 }, { "epoch": 7.02, "learning_rate": 1.6490556039214204e-05, "loss": 0.0657, "step": 185500 }, { "epoch": 7.04, "learning_rate": 1.6481093152655288e-05, "loss": 0.0673, "step": 186000 }, { "epoch": 7.06, "learning_rate": 1.647163026609637e-05, "loss": 0.0688, "step": 186500 }, { "epoch": 7.08, "learning_rate": 1.6462167379537455e-05, "loss": 0.0674, "step": 187000 }, { "epoch": 7.1, "learning_rate": 1.645270449297854e-05, "loss": 0.0669, "step": 187500 }, { "epoch": 7.12, "learning_rate": 1.6443241606419626e-05, "loss": 0.07, "step": 188000 }, { "epoch": 7.14, "learning_rate": 1.6433778719860706e-05, "loss": 0.0683, "step": 188500 }, { "epoch": 7.15, "learning_rate": 1.642433475907491e-05, "loss": 0.0672, "step": 189000 }, { "epoch": 7.17, "learning_rate": 1.6414871872515993e-05, "loss": 0.0675, "step": 189500 }, { "epoch": 7.19, "learning_rate": 1.640540898595708e-05, "loss": 0.0688, "step": 190000 }, { "epoch": 7.21, "learning_rate": 1.639594609939816e-05, "loss": 0.0691, "step": 190500 }, { "epoch": 7.23, "learning_rate": 1.6386483212839247e-05, "loss": 0.0676, "step": 191000 }, { "epoch": 7.25, "learning_rate": 1.6377039252053447e-05, "loss": 0.0706, "step": 191500 }, { "epoch": 7.27, "learning_rate": 1.636757636549453e-05, "loss": 0.0701, "step": 192000 }, { "epoch": 7.29, "learning_rate": 1.6358113478935614e-05, "loss": 0.07, "step": 192500 }, { "epoch": 7.31, "learning_rate": 1.63486505923767e-05, "loss": 0.069, "step": 193000 }, { "epoch": 7.32, "learning_rate": 1.6339187705817785e-05, "loss": 0.068, "step": 193500 }, { "epoch": 7.34, "learning_rate": 1.632972481925887e-05, "loss": 0.0678, "step": 194000 }, { "epoch": 7.36, "learning_rate": 1.632028085847307e-05, "loss": 0.0701, "step": 194500 }, { "epoch": 7.38, "learning_rate": 1.6310817971914155e-05, "loss": 0.0702, "step": 195000 }, { "epoch": 7.4, "learning_rate": 1.630135508535524e-05, "loss": 0.0688, "step": 195500 }, { "epoch": 7.42, "learning_rate": 1.6291911124569438e-05, "loss": 0.0687, "step": 196000 }, { "epoch": 7.44, "learning_rate": 1.6282448238010525e-05, "loss": 0.0676, "step": 196500 }, { "epoch": 7.46, "learning_rate": 1.627298535145161e-05, "loss": 0.0697, "step": 197000 }, { "epoch": 7.48, "learning_rate": 1.6263522464892693e-05, "loss": 0.0686, "step": 197500 }, { "epoch": 7.49, "learning_rate": 1.6254059578333776e-05, "loss": 0.0683, "step": 198000 }, { "epoch": 7.51, "learning_rate": 1.624461561754798e-05, "loss": 0.0684, "step": 198500 }, { "epoch": 7.53, "learning_rate": 1.6235152730989063e-05, "loss": 0.0687, "step": 199000 }, { "epoch": 7.55, "learning_rate": 1.6225689844430147e-05, "loss": 0.0698, "step": 199500 }, { "epoch": 7.57, "learning_rate": 1.621622695787123e-05, "loss": 0.0689, "step": 200000 }, { "epoch": 7.59, "learning_rate": 1.6206764071312314e-05, "loss": 0.0717, "step": 200500 }, { "epoch": 7.61, "learning_rate": 1.6197301184753398e-05, "loss": 0.0706, "step": 201000 }, { "epoch": 7.63, "learning_rate": 1.61878572239676e-05, "loss": 0.0686, "step": 201500 }, { "epoch": 7.65, "learning_rate": 1.6178394337408684e-05, "loss": 0.0704, "step": 202000 }, { "epoch": 7.66, "learning_rate": 1.6168931450849768e-05, "loss": 0.0701, "step": 202500 }, { "epoch": 7.68, "learning_rate": 1.615946856429085e-05, "loss": 0.0711, "step": 203000 }, { "epoch": 7.7, "learning_rate": 1.6150005677731935e-05, "loss": 0.0697, "step": 203500 }, { "epoch": 7.72, "learning_rate": 1.6140542791173022e-05, "loss": 0.0698, "step": 204000 }, { "epoch": 7.74, "learning_rate": 1.6131079904614103e-05, "loss": 0.0731, "step": 204500 }, { "epoch": 7.76, "learning_rate": 1.612161701805519e-05, "loss": 0.0692, "step": 205000 }, { "epoch": 7.78, "learning_rate": 1.6112173057269393e-05, "loss": 0.0725, "step": 205500 }, { "epoch": 7.8, "learning_rate": 1.6102710170710473e-05, "loss": 0.0698, "step": 206000 }, { "epoch": 7.82, "learning_rate": 1.609324728415156e-05, "loss": 0.071, "step": 206500 }, { "epoch": 7.84, "learning_rate": 1.6083784397592644e-05, "loss": 0.0715, "step": 207000 }, { "epoch": 7.85, "learning_rate": 1.6074321511033727e-05, "loss": 0.0717, "step": 207500 }, { "epoch": 7.87, "learning_rate": 1.606485862447481e-05, "loss": 0.07, "step": 208000 }, { "epoch": 7.89, "learning_rate": 1.6055414663689014e-05, "loss": 0.0694, "step": 208500 }, { "epoch": 7.91, "learning_rate": 1.6045951777130098e-05, "loss": 0.0693, "step": 209000 }, { "epoch": 7.93, "learning_rate": 1.603648889057118e-05, "loss": 0.071, "step": 209500 }, { "epoch": 7.95, "learning_rate": 1.6027026004012265e-05, "loss": 0.0713, "step": 210000 }, { "epoch": 7.97, "learning_rate": 1.601756311745335e-05, "loss": 0.0701, "step": 210500 }, { "epoch": 7.99, "learning_rate": 1.6008100230894436e-05, "loss": 0.0705, "step": 211000 }, { "epoch": 8.0, "eval_bleu": 92.0134, "eval_gen_len": 16.388, "eval_loss": 0.09185712039470673, "eval_runtime": 1202.772, "eval_samples_per_second": 37.968, "eval_steps_per_second": 1.187, "step": 211352 }, { "epoch": 8.01, "learning_rate": 1.5998656270108635e-05, "loss": 0.0665, "step": 211500 }, { "epoch": 8.02, "learning_rate": 1.598919338354972e-05, "loss": 0.0607, "step": 212000 }, { "epoch": 8.04, "learning_rate": 1.5979730496990803e-05, "loss": 0.0613, "step": 212500 }, { "epoch": 8.06, "learning_rate": 1.5970267610431886e-05, "loss": 0.0611, "step": 213000 }, { "epoch": 8.08, "learning_rate": 1.596080472387297e-05, "loss": 0.0616, "step": 213500 }, { "epoch": 8.1, "learning_rate": 1.5951360763087173e-05, "loss": 0.0644, "step": 214000 }, { "epoch": 8.12, "learning_rate": 1.594189787652826e-05, "loss": 0.0605, "step": 214500 }, { "epoch": 8.14, "learning_rate": 1.593243498996934e-05, "loss": 0.063, "step": 215000 }, { "epoch": 8.16, "learning_rate": 1.5922972103410427e-05, "loss": 0.0635, "step": 215500 }, { "epoch": 8.18, "learning_rate": 1.591350921685151e-05, "loss": 0.0635, "step": 216000 }, { "epoch": 8.19, "learning_rate": 1.590406525606571e-05, "loss": 0.0634, "step": 216500 }, { "epoch": 8.21, "learning_rate": 1.5894602369506794e-05, "loss": 0.0641, "step": 217000 }, { "epoch": 8.23, "learning_rate": 1.588513948294788e-05, "loss": 0.0626, "step": 217500 }, { "epoch": 8.25, "learning_rate": 1.5875676596388965e-05, "loss": 0.0629, "step": 218000 }, { "epoch": 8.27, "learning_rate": 1.586621370983005e-05, "loss": 0.0646, "step": 218500 }, { "epoch": 8.29, "learning_rate": 1.5856750823271132e-05, "loss": 0.0638, "step": 219000 }, { "epoch": 8.31, "learning_rate": 1.5847287936712216e-05, "loss": 0.0637, "step": 219500 }, { "epoch": 8.33, "learning_rate": 1.5837843975926415e-05, "loss": 0.064, "step": 220000 }, { "epoch": 8.35, "learning_rate": 1.5828381089367502e-05, "loss": 0.0643, "step": 220500 }, { "epoch": 8.37, "learning_rate": 1.5818918202808586e-05, "loss": 0.0616, "step": 221000 }, { "epoch": 8.38, "learning_rate": 1.580945531624967e-05, "loss": 0.0633, "step": 221500 }, { "epoch": 8.4, "learning_rate": 1.5799992429690754e-05, "loss": 0.0625, "step": 222000 }, { "epoch": 8.42, "learning_rate": 1.5790529543131837e-05, "loss": 0.0636, "step": 222500 }, { "epoch": 8.44, "learning_rate": 1.578108558234604e-05, "loss": 0.0638, "step": 223000 }, { "epoch": 8.46, "learning_rate": 1.5771622695787124e-05, "loss": 0.0643, "step": 223500 }, { "epoch": 8.48, "learning_rate": 1.5762159809228207e-05, "loss": 0.0625, "step": 224000 }, { "epoch": 8.5, "learning_rate": 1.575269692266929e-05, "loss": 0.0624, "step": 224500 }, { "epoch": 8.52, "learning_rate": 1.5743252961883494e-05, "loss": 0.0623, "step": 225000 }, { "epoch": 8.54, "learning_rate": 1.5733790075324578e-05, "loss": 0.0649, "step": 225500 }, { "epoch": 8.55, "learning_rate": 1.572432718876566e-05, "loss": 0.0627, "step": 226000 }, { "epoch": 8.57, "learning_rate": 1.571486430220675e-05, "loss": 0.0639, "step": 226500 }, { "epoch": 8.59, "learning_rate": 1.570540141564783e-05, "loss": 0.0635, "step": 227000 }, { "epoch": 8.61, "learning_rate": 1.5695938529088916e-05, "loss": 0.062, "step": 227500 }, { "epoch": 8.63, "learning_rate": 1.568647564253e-05, "loss": 0.0683, "step": 228000 }, { "epoch": 8.65, "learning_rate": 1.5677031681744202e-05, "loss": 0.0632, "step": 228500 }, { "epoch": 8.67, "learning_rate": 1.5667568795185283e-05, "loss": 0.0648, "step": 229000 }, { "epoch": 8.69, "learning_rate": 1.565810590862637e-05, "loss": 0.0652, "step": 229500 }, { "epoch": 8.71, "learning_rate": 1.5648643022067453e-05, "loss": 0.0653, "step": 230000 }, { "epoch": 8.72, "learning_rate": 1.5639199061281653e-05, "loss": 0.0638, "step": 230500 }, { "epoch": 8.74, "learning_rate": 1.562973617472274e-05, "loss": 0.0655, "step": 231000 }, { "epoch": 8.76, "learning_rate": 1.5620273288163824e-05, "loss": 0.063, "step": 231500 }, { "epoch": 8.78, "learning_rate": 1.5610810401604907e-05, "loss": 0.0647, "step": 232000 }, { "epoch": 8.8, "learning_rate": 1.560134751504599e-05, "loss": 0.0629, "step": 232500 }, { "epoch": 8.82, "learning_rate": 1.5591884628487075e-05, "loss": 0.063, "step": 233000 }, { "epoch": 8.84, "learning_rate": 1.558242174192816e-05, "loss": 0.0634, "step": 233500 }, { "epoch": 8.86, "learning_rate": 1.5572958855369242e-05, "loss": 0.0642, "step": 234000 }, { "epoch": 8.88, "learning_rate": 1.5563495968810326e-05, "loss": 0.0637, "step": 234500 }, { "epoch": 8.9, "learning_rate": 1.5554033082251413e-05, "loss": 0.0648, "step": 235000 }, { "epoch": 8.91, "learning_rate": 1.5544589121465616e-05, "loss": 0.0647, "step": 235500 }, { "epoch": 8.93, "learning_rate": 1.5535126234906696e-05, "loss": 0.0636, "step": 236000 }, { "epoch": 8.95, "learning_rate": 1.552566334834778e-05, "loss": 0.0648, "step": 236500 }, { "epoch": 8.97, "learning_rate": 1.5516200461788867e-05, "loss": 0.0654, "step": 237000 }, { "epoch": 8.99, "learning_rate": 1.5506737575229947e-05, "loss": 0.0662, "step": 237500 }, { "epoch": 9.0, "eval_bleu": 92.1314, "eval_gen_len": 16.3862, "eval_loss": 0.09200727194547653, "eval_runtime": 1201.1498, "eval_samples_per_second": 38.019, "eval_steps_per_second": 1.189, "step": 237771 }, { "epoch": 9.01, "learning_rate": 1.5497274688671034e-05, "loss": 0.0541, "step": 238000 }, { "epoch": 9.03, "learning_rate": 1.5487811802112118e-05, "loss": 0.0553, "step": 238500 }, { "epoch": 9.05, "learning_rate": 1.54783489155532e-05, "loss": 0.0573, "step": 239000 }, { "epoch": 9.07, "learning_rate": 1.5468886028994285e-05, "loss": 0.0581, "step": 239500 }, { "epoch": 9.08, "learning_rate": 1.545942314243537e-05, "loss": 0.0559, "step": 240000 }, { "epoch": 9.1, "learning_rate": 1.5449979181649572e-05, "loss": 0.0567, "step": 240500 }, { "epoch": 9.12, "learning_rate": 1.5440516295090656e-05, "loss": 0.0572, "step": 241000 }, { "epoch": 9.14, "learning_rate": 1.543105340853174e-05, "loss": 0.0561, "step": 241500 }, { "epoch": 9.16, "learning_rate": 1.5421590521972823e-05, "loss": 0.0564, "step": 242000 }, { "epoch": 9.18, "learning_rate": 1.5412127635413907e-05, "loss": 0.0562, "step": 242500 }, { "epoch": 9.2, "learning_rate": 1.540266474885499e-05, "loss": 0.0568, "step": 243000 }, { "epoch": 9.22, "learning_rate": 1.5393201862296077e-05, "loss": 0.0591, "step": 243500 }, { "epoch": 9.24, "learning_rate": 1.538375790151028e-05, "loss": 0.0578, "step": 244000 }, { "epoch": 9.25, "learning_rate": 1.537429501495136e-05, "loss": 0.0567, "step": 244500 }, { "epoch": 9.27, "learning_rate": 1.5364832128392448e-05, "loss": 0.058, "step": 245000 }, { "epoch": 9.29, "learning_rate": 1.535536924183353e-05, "loss": 0.0561, "step": 245500 }, { "epoch": 9.31, "learning_rate": 1.534592528104773e-05, "loss": 0.0589, "step": 246000 }, { "epoch": 9.33, "learning_rate": 1.5336462394488814e-05, "loss": 0.057, "step": 246500 }, { "epoch": 9.35, "learning_rate": 1.53269995079299e-05, "loss": 0.0577, "step": 247000 }, { "epoch": 9.37, "learning_rate": 1.5317536621370985e-05, "loss": 0.0557, "step": 247500 }, { "epoch": 9.39, "learning_rate": 1.530807373481207e-05, "loss": 0.0586, "step": 248000 }, { "epoch": 9.41, "learning_rate": 1.5298610848253153e-05, "loss": 0.059, "step": 248500 }, { "epoch": 9.43, "learning_rate": 1.5289166887467355e-05, "loss": 0.0598, "step": 249000 }, { "epoch": 9.44, "learning_rate": 1.5279722926681558e-05, "loss": 0.0585, "step": 249500 }, { "epoch": 9.46, "learning_rate": 1.527026004012264e-05, "loss": 0.0589, "step": 250000 }, { "epoch": 9.48, "learning_rate": 1.5260797153563726e-05, "loss": 0.0579, "step": 250500 }, { "epoch": 9.5, "learning_rate": 1.525133426700481e-05, "loss": 0.0577, "step": 251000 }, { "epoch": 9.52, "learning_rate": 1.5241871380445891e-05, "loss": 0.0577, "step": 251500 }, { "epoch": 9.54, "learning_rate": 1.5232408493886977e-05, "loss": 0.0587, "step": 252000 }, { "epoch": 9.56, "learning_rate": 1.522294560732806e-05, "loss": 0.0596, "step": 252500 }, { "epoch": 9.58, "learning_rate": 1.5213501646542263e-05, "loss": 0.0609, "step": 253000 }, { "epoch": 9.6, "learning_rate": 1.5204038759983347e-05, "loss": 0.0572, "step": 253500 }, { "epoch": 9.61, "learning_rate": 1.519457587342443e-05, "loss": 0.0581, "step": 254000 }, { "epoch": 9.63, "learning_rate": 1.5185112986865516e-05, "loss": 0.0591, "step": 254500 }, { "epoch": 9.65, "learning_rate": 1.5175650100306598e-05, "loss": 0.0606, "step": 255000 }, { "epoch": 9.67, "learning_rate": 1.5166187213747683e-05, "loss": 0.0594, "step": 255500 }, { "epoch": 9.69, "learning_rate": 1.5156724327188767e-05, "loss": 0.0603, "step": 256000 }, { "epoch": 9.71, "learning_rate": 1.514726144062985e-05, "loss": 0.06, "step": 256500 }, { "epoch": 9.73, "learning_rate": 1.5137798554070935e-05, "loss": 0.0585, "step": 257000 }, { "epoch": 9.75, "learning_rate": 1.512833566751202e-05, "loss": 0.058, "step": 257500 }, { "epoch": 9.77, "learning_rate": 1.5118891706726223e-05, "loss": 0.0595, "step": 258000 }, { "epoch": 9.78, "learning_rate": 1.5109428820167305e-05, "loss": 0.061, "step": 258500 }, { "epoch": 9.8, "learning_rate": 1.5099965933608388e-05, "loss": 0.0589, "step": 259000 }, { "epoch": 9.82, "learning_rate": 1.5090503047049474e-05, "loss": 0.0592, "step": 259500 }, { "epoch": 9.84, "learning_rate": 1.5081040160490556e-05, "loss": 0.0596, "step": 260000 }, { "epoch": 9.86, "learning_rate": 1.5071577273931641e-05, "loss": 0.0595, "step": 260500 }, { "epoch": 9.88, "learning_rate": 1.5062114387372727e-05, "loss": 0.0594, "step": 261000 }, { "epoch": 9.9, "learning_rate": 1.5052670426586928e-05, "loss": 0.0579, "step": 261500 }, { "epoch": 9.92, "learning_rate": 1.5043207540028011e-05, "loss": 0.0598, "step": 262000 }, { "epoch": 9.94, "learning_rate": 1.5033744653469095e-05, "loss": 0.06, "step": 262500 }, { "epoch": 9.95, "learning_rate": 1.502428176691018e-05, "loss": 0.0574, "step": 263000 }, { "epoch": 9.97, "learning_rate": 1.5014818880351263e-05, "loss": 0.0582, "step": 263500 }, { "epoch": 9.99, "learning_rate": 1.5005374919565465e-05, "loss": 0.0588, "step": 264000 }, { "epoch": 10.0, "eval_bleu": 92.2075, "eval_gen_len": 16.4288, "eval_loss": 0.09403391927480698, "eval_runtime": 1204.1417, "eval_samples_per_second": 37.925, "eval_steps_per_second": 1.186, "step": 264190 }, { "epoch": 10.01, "learning_rate": 1.499591203300655e-05, "loss": 0.0548, "step": 264500 }, { "epoch": 10.03, "learning_rate": 1.4986449146447634e-05, "loss": 0.0504, "step": 265000 }, { "epoch": 10.05, "learning_rate": 1.4976986259888716e-05, "loss": 0.0518, "step": 265500 }, { "epoch": 10.07, "learning_rate": 1.4967523373329802e-05, "loss": 0.052, "step": 266000 }, { "epoch": 10.09, "learning_rate": 1.4958060486770887e-05, "loss": 0.0529, "step": 266500 }, { "epoch": 10.11, "learning_rate": 1.494859760021197e-05, "loss": 0.0519, "step": 267000 }, { "epoch": 10.13, "learning_rate": 1.493917256519929e-05, "loss": 0.0516, "step": 267500 }, { "epoch": 10.14, "learning_rate": 1.4929709678640373e-05, "loss": 0.0532, "step": 268000 }, { "epoch": 10.16, "learning_rate": 1.4920246792081459e-05, "loss": 0.0513, "step": 268500 }, { "epoch": 10.18, "learning_rate": 1.491078390552254e-05, "loss": 0.053, "step": 269000 }, { "epoch": 10.2, "learning_rate": 1.4901321018963626e-05, "loss": 0.0528, "step": 269500 }, { "epoch": 10.22, "learning_rate": 1.4891858132404711e-05, "loss": 0.0504, "step": 270000 }, { "epoch": 10.24, "learning_rate": 1.488241417161891e-05, "loss": 0.0539, "step": 270500 }, { "epoch": 10.26, "learning_rate": 1.4872951285059996e-05, "loss": 0.0541, "step": 271000 }, { "epoch": 10.28, "learning_rate": 1.486348839850108e-05, "loss": 0.0516, "step": 271500 }, { "epoch": 10.3, "learning_rate": 1.4854025511942165e-05, "loss": 0.0532, "step": 272000 }, { "epoch": 10.31, "learning_rate": 1.4844562625383247e-05, "loss": 0.0511, "step": 272500 }, { "epoch": 10.33, "learning_rate": 1.4835099738824333e-05, "loss": 0.0525, "step": 273000 }, { "epoch": 10.35, "learning_rate": 1.4825636852265416e-05, "loss": 0.0539, "step": 273500 }, { "epoch": 10.37, "learning_rate": 1.48161739657065e-05, "loss": 0.0522, "step": 274000 }, { "epoch": 10.39, "learning_rate": 1.4806711079147584e-05, "loss": 0.0536, "step": 274500 }, { "epoch": 10.41, "learning_rate": 1.4797248192588669e-05, "loss": 0.0536, "step": 275000 }, { "epoch": 10.43, "learning_rate": 1.4787785306029751e-05, "loss": 0.0527, "step": 275500 }, { "epoch": 10.45, "learning_rate": 1.4778322419470837e-05, "loss": 0.053, "step": 276000 }, { "epoch": 10.47, "learning_rate": 1.4768859532911922e-05, "loss": 0.0527, "step": 276500 }, { "epoch": 10.48, "learning_rate": 1.4759415572126123e-05, "loss": 0.0547, "step": 277000 }, { "epoch": 10.5, "learning_rate": 1.4749952685567207e-05, "loss": 0.054, "step": 277500 }, { "epoch": 10.52, "learning_rate": 1.474048979900829e-05, "loss": 0.0541, "step": 278000 }, { "epoch": 10.54, "learning_rate": 1.473106476399561e-05, "loss": 0.055, "step": 278500 }, { "epoch": 10.56, "learning_rate": 1.4721601877436696e-05, "loss": 0.0546, "step": 279000 }, { "epoch": 10.58, "learning_rate": 1.4712138990877778e-05, "loss": 0.0522, "step": 279500 }, { "epoch": 10.6, "learning_rate": 1.4702676104318863e-05, "loss": 0.0536, "step": 280000 }, { "epoch": 10.62, "learning_rate": 1.4693232143533065e-05, "loss": 0.0531, "step": 280500 }, { "epoch": 10.64, "learning_rate": 1.468376925697415e-05, "loss": 0.0557, "step": 281000 }, { "epoch": 10.66, "learning_rate": 1.4674306370415232e-05, "loss": 0.0557, "step": 281500 }, { "epoch": 10.67, "learning_rate": 1.4664843483856317e-05, "loss": 0.0535, "step": 282000 }, { "epoch": 10.69, "learning_rate": 1.4655380597297401e-05, "loss": 0.0535, "step": 282500 }, { "epoch": 10.71, "learning_rate": 1.4645917710738485e-05, "loss": 0.0532, "step": 283000 }, { "epoch": 10.73, "learning_rate": 1.4636473749952686e-05, "loss": 0.0541, "step": 283500 }, { "epoch": 10.75, "learning_rate": 1.4627010863393771e-05, "loss": 0.0542, "step": 284000 }, { "epoch": 10.77, "learning_rate": 1.4617547976834853e-05, "loss": 0.054, "step": 284500 }, { "epoch": 10.79, "learning_rate": 1.4608085090275939e-05, "loss": 0.052, "step": 285000 }, { "epoch": 10.81, "learning_rate": 1.4598622203717024e-05, "loss": 0.0537, "step": 285500 }, { "epoch": 10.83, "learning_rate": 1.4589159317158108e-05, "loss": 0.0558, "step": 286000 }, { "epoch": 10.84, "learning_rate": 1.4579696430599191e-05, "loss": 0.0558, "step": 286500 }, { "epoch": 10.86, "learning_rate": 1.4570233544040275e-05, "loss": 0.0545, "step": 287000 }, { "epoch": 10.88, "learning_rate": 1.4560789583254478e-05, "loss": 0.0544, "step": 287500 }, { "epoch": 10.9, "learning_rate": 1.455134562246868e-05, "loss": 0.0539, "step": 288000 }, { "epoch": 10.92, "learning_rate": 1.4541882735909763e-05, "loss": 0.0548, "step": 288500 }, { "epoch": 10.94, "learning_rate": 1.4532419849350846e-05, "loss": 0.0524, "step": 289000 }, { "epoch": 10.96, "learning_rate": 1.4522956962791932e-05, "loss": 0.0546, "step": 289500 }, { "epoch": 10.98, "learning_rate": 1.4513494076233014e-05, "loss": 0.0552, "step": 290000 }, { "epoch": 11.0, "learning_rate": 1.45040311896741e-05, "loss": 0.0556, "step": 290500 }, { "epoch": 11.0, "eval_bleu": 92.2181, "eval_gen_len": 16.4826, "eval_loss": 0.09591283649206161, "eval_runtime": 1213.601, "eval_samples_per_second": 37.629, "eval_steps_per_second": 1.177, "step": 290609 }, { "epoch": 11.01, "learning_rate": 1.4494568303115185e-05, "loss": 0.0494, "step": 291000 }, { "epoch": 11.03, "learning_rate": 1.4485105416556267e-05, "loss": 0.0457, "step": 291500 }, { "epoch": 11.05, "learning_rate": 1.447566145577047e-05, "loss": 0.0474, "step": 292000 }, { "epoch": 11.07, "learning_rate": 1.4466198569211553e-05, "loss": 0.0492, "step": 292500 }, { "epoch": 11.09, "learning_rate": 1.4456735682652639e-05, "loss": 0.0467, "step": 293000 }, { "epoch": 11.11, "learning_rate": 1.444727279609372e-05, "loss": 0.0489, "step": 293500 }, { "epoch": 11.13, "learning_rate": 1.4437809909534806e-05, "loss": 0.0478, "step": 294000 }, { "epoch": 11.15, "learning_rate": 1.442834702297589e-05, "loss": 0.0472, "step": 294500 }, { "epoch": 11.17, "learning_rate": 1.4418903062190092e-05, "loss": 0.0475, "step": 295000 }, { "epoch": 11.19, "learning_rate": 1.4409459101404294e-05, "loss": 0.0484, "step": 295500 }, { "epoch": 11.2, "learning_rate": 1.4399996214845377e-05, "loss": 0.0487, "step": 296000 }, { "epoch": 11.22, "learning_rate": 1.4390533328286463e-05, "loss": 0.0472, "step": 296500 }, { "epoch": 11.24, "learning_rate": 1.4381070441727545e-05, "loss": 0.0473, "step": 297000 }, { "epoch": 11.26, "learning_rate": 1.437160755516863e-05, "loss": 0.0465, "step": 297500 }, { "epoch": 11.28, "learning_rate": 1.4362144668609714e-05, "loss": 0.0477, "step": 298000 }, { "epoch": 11.3, "learning_rate": 1.4352681782050797e-05, "loss": 0.0478, "step": 298500 }, { "epoch": 11.32, "learning_rate": 1.4343218895491881e-05, "loss": 0.0478, "step": 299000 }, { "epoch": 11.34, "learning_rate": 1.4333756008932967e-05, "loss": 0.0489, "step": 299500 }, { "epoch": 11.36, "learning_rate": 1.4324293122374052e-05, "loss": 0.0479, "step": 300000 }, { "epoch": 11.37, "learning_rate": 1.4314849161588251e-05, "loss": 0.0482, "step": 300500 }, { "epoch": 11.39, "learning_rate": 1.4305386275029337e-05, "loss": 0.0478, "step": 301000 }, { "epoch": 11.41, "learning_rate": 1.429592338847042e-05, "loss": 0.0488, "step": 301500 }, { "epoch": 11.43, "learning_rate": 1.4286479427684623e-05, "loss": 0.0487, "step": 302000 }, { "epoch": 11.45, "learning_rate": 1.4277016541125705e-05, "loss": 0.0488, "step": 302500 }, { "epoch": 11.47, "learning_rate": 1.426755365456679e-05, "loss": 0.0496, "step": 303000 }, { "epoch": 11.49, "learning_rate": 1.4258090768007874e-05, "loss": 0.0492, "step": 303500 }, { "epoch": 11.51, "learning_rate": 1.4248627881448958e-05, "loss": 0.0494, "step": 304000 }, { "epoch": 11.53, "learning_rate": 1.4239164994890042e-05, "loss": 0.0499, "step": 304500 }, { "epoch": 11.54, "learning_rate": 1.4229702108331127e-05, "loss": 0.0492, "step": 305000 }, { "epoch": 11.56, "learning_rate": 1.4220239221772209e-05, "loss": 0.0485, "step": 305500 }, { "epoch": 11.58, "learning_rate": 1.4210776335213295e-05, "loss": 0.0492, "step": 306000 }, { "epoch": 11.6, "learning_rate": 1.4201332374427497e-05, "loss": 0.0494, "step": 306500 }, { "epoch": 11.62, "learning_rate": 1.4191869487868581e-05, "loss": 0.0501, "step": 307000 }, { "epoch": 11.64, "learning_rate": 1.4182406601309665e-05, "loss": 0.0499, "step": 307500 }, { "epoch": 11.66, "learning_rate": 1.4172943714750748e-05, "loss": 0.0507, "step": 308000 }, { "epoch": 11.68, "learning_rate": 1.4163480828191834e-05, "loss": 0.0493, "step": 308500 }, { "epoch": 11.7, "learning_rate": 1.4154017941632916e-05, "loss": 0.0499, "step": 309000 }, { "epoch": 11.72, "learning_rate": 1.4144555055074001e-05, "loss": 0.0506, "step": 309500 }, { "epoch": 11.73, "learning_rate": 1.4135092168515085e-05, "loss": 0.0506, "step": 310000 }, { "epoch": 11.75, "learning_rate": 1.4125648207729288e-05, "loss": 0.0492, "step": 310500 }, { "epoch": 11.77, "learning_rate": 1.4116204246943487e-05, "loss": 0.0485, "step": 311000 }, { "epoch": 11.79, "learning_rate": 1.4106741360384573e-05, "loss": 0.0509, "step": 311500 }, { "epoch": 11.81, "learning_rate": 1.4097297399598775e-05, "loss": 0.0497, "step": 312000 }, { "epoch": 11.83, "learning_rate": 1.4087834513039859e-05, "loss": 0.0492, "step": 312500 }, { "epoch": 11.85, "learning_rate": 1.4078371626480943e-05, "loss": 0.05, "step": 313000 }, { "epoch": 11.87, "learning_rate": 1.4068908739922026e-05, "loss": 0.0507, "step": 313500 }, { "epoch": 11.89, "learning_rate": 1.4059445853363112e-05, "loss": 0.0508, "step": 314000 }, { "epoch": 11.9, "learning_rate": 1.4049982966804194e-05, "loss": 0.0506, "step": 314500 }, { "epoch": 11.92, "learning_rate": 1.404052008024528e-05, "loss": 0.0488, "step": 315000 }, { "epoch": 11.94, "learning_rate": 1.4031057193686363e-05, "loss": 0.0501, "step": 315500 }, { "epoch": 11.96, "learning_rate": 1.4021594307127447e-05, "loss": 0.048, "step": 316000 }, { "epoch": 11.98, "learning_rate": 1.401215034634165e-05, "loss": 0.0519, "step": 316500 }, { "epoch": 12.0, "learning_rate": 1.4002687459782733e-05, "loss": 0.049, "step": 317000 }, { "epoch": 12.0, "eval_bleu": 92.2414, "eval_gen_len": 16.4298, "eval_loss": 0.09784110635519028, "eval_runtime": 1192.5959, "eval_samples_per_second": 38.292, "eval_steps_per_second": 1.197, "step": 317028 }, { "epoch": 12.02, "learning_rate": 1.3993224573223819e-05, "loss": 0.0424, "step": 317500 }, { "epoch": 12.04, "learning_rate": 1.3983780612438018e-05, "loss": 0.0414, "step": 318000 }, { "epoch": 12.06, "learning_rate": 1.3974317725879103e-05, "loss": 0.0424, "step": 318500 }, { "epoch": 12.07, "learning_rate": 1.3964854839320187e-05, "loss": 0.0422, "step": 319000 }, { "epoch": 12.09, "learning_rate": 1.3955391952761272e-05, "loss": 0.0433, "step": 319500 }, { "epoch": 12.11, "learning_rate": 1.3945929066202354e-05, "loss": 0.0429, "step": 320000 }, { "epoch": 12.13, "learning_rate": 1.393646617964344e-05, "loss": 0.0433, "step": 320500 }, { "epoch": 12.15, "learning_rate": 1.3927003293084525e-05, "loss": 0.044, "step": 321000 }, { "epoch": 12.17, "learning_rate": 1.3917540406525607e-05, "loss": 0.0443, "step": 321500 }, { "epoch": 12.19, "learning_rate": 1.3908077519966691e-05, "loss": 0.0436, "step": 322000 }, { "epoch": 12.21, "learning_rate": 1.3898614633407776e-05, "loss": 0.0436, "step": 322500 }, { "epoch": 12.23, "learning_rate": 1.3889151746848858e-05, "loss": 0.0435, "step": 323000 }, { "epoch": 12.24, "learning_rate": 1.3879688860289944e-05, "loss": 0.0437, "step": 323500 }, { "epoch": 12.26, "learning_rate": 1.3870244899504147e-05, "loss": 0.0461, "step": 324000 }, { "epoch": 12.28, "learning_rate": 1.386078201294523e-05, "loss": 0.0442, "step": 324500 }, { "epoch": 12.3, "learning_rate": 1.3851319126386314e-05, "loss": 0.0447, "step": 325000 }, { "epoch": 12.32, "learning_rate": 1.3841856239827398e-05, "loss": 0.0457, "step": 325500 }, { "epoch": 12.34, "learning_rate": 1.38324122790416e-05, "loss": 0.0436, "step": 326000 }, { "epoch": 12.36, "learning_rate": 1.3822949392482682e-05, "loss": 0.0445, "step": 326500 }, { "epoch": 12.38, "learning_rate": 1.3813486505923768e-05, "loss": 0.0456, "step": 327000 }, { "epoch": 12.4, "learning_rate": 1.3804023619364853e-05, "loss": 0.0452, "step": 327500 }, { "epoch": 12.42, "learning_rate": 1.3794560732805937e-05, "loss": 0.0456, "step": 328000 }, { "epoch": 12.43, "learning_rate": 1.3785116772020138e-05, "loss": 0.0446, "step": 328500 }, { "epoch": 12.45, "learning_rate": 1.3775653885461222e-05, "loss": 0.0451, "step": 329000 }, { "epoch": 12.47, "learning_rate": 1.3766190998902307e-05, "loss": 0.0457, "step": 329500 }, { "epoch": 12.49, "learning_rate": 1.3756728112343389e-05, "loss": 0.0448, "step": 330000 }, { "epoch": 12.51, "learning_rate": 1.3747265225784475e-05, "loss": 0.0446, "step": 330500 }, { "epoch": 12.53, "learning_rate": 1.3737840190771795e-05, "loss": 0.0455, "step": 331000 }, { "epoch": 12.55, "learning_rate": 1.3728377304212878e-05, "loss": 0.0449, "step": 331500 }, { "epoch": 12.57, "learning_rate": 1.371891441765396e-05, "loss": 0.044, "step": 332000 }, { "epoch": 12.59, "learning_rate": 1.3709451531095046e-05, "loss": 0.0445, "step": 332500 }, { "epoch": 12.6, "learning_rate": 1.3699988644536131e-05, "loss": 0.0454, "step": 333000 }, { "epoch": 12.62, "learning_rate": 1.3690525757977215e-05, "loss": 0.0455, "step": 333500 }, { "epoch": 12.64, "learning_rate": 1.3681062871418299e-05, "loss": 0.0453, "step": 334000 }, { "epoch": 12.66, "learning_rate": 1.36716189106325e-05, "loss": 0.0456, "step": 334500 }, { "epoch": 12.68, "learning_rate": 1.3662156024073585e-05, "loss": 0.0447, "step": 335000 }, { "epoch": 12.7, "learning_rate": 1.3652693137514667e-05, "loss": 0.0442, "step": 335500 }, { "epoch": 12.72, "learning_rate": 1.3643230250955753e-05, "loss": 0.0455, "step": 336000 }, { "epoch": 12.74, "learning_rate": 1.3633786290169955e-05, "loss": 0.0449, "step": 336500 }, { "epoch": 12.76, "learning_rate": 1.3624323403611039e-05, "loss": 0.0446, "step": 337000 }, { "epoch": 12.77, "learning_rate": 1.3614860517052123e-05, "loss": 0.0451, "step": 337500 }, { "epoch": 12.79, "learning_rate": 1.3605397630493206e-05, "loss": 0.0448, "step": 338000 }, { "epoch": 12.81, "learning_rate": 1.3595934743934292e-05, "loss": 0.0454, "step": 338500 }, { "epoch": 12.83, "learning_rate": 1.3586490783148495e-05, "loss": 0.0464, "step": 339000 }, { "epoch": 12.85, "learning_rate": 1.3577027896589577e-05, "loss": 0.0458, "step": 339500 }, { "epoch": 12.87, "learning_rate": 1.356756501003066e-05, "loss": 0.0449, "step": 340000 }, { "epoch": 12.89, "learning_rate": 1.3558102123471746e-05, "loss": 0.0459, "step": 340500 }, { "epoch": 12.91, "learning_rate": 1.3548639236912828e-05, "loss": 0.0443, "step": 341000 }, { "epoch": 12.93, "learning_rate": 1.3539176350353913e-05, "loss": 0.0458, "step": 341500 }, { "epoch": 12.95, "learning_rate": 1.3529713463794999e-05, "loss": 0.0463, "step": 342000 }, { "epoch": 12.96, "learning_rate": 1.352025057723608e-05, "loss": 0.0468, "step": 342500 }, { "epoch": 12.98, "learning_rate": 1.3510806616450283e-05, "loss": 0.045, "step": 343000 }, { "epoch": 13.0, "eval_bleu": 92.2161, "eval_gen_len": 16.443, "eval_loss": 0.09937400370836258, "eval_runtime": 1185.3348, "eval_samples_per_second": 38.527, "eval_steps_per_second": 1.205, "step": 343447 }, { "epoch": 13.0, "learning_rate": 1.3501343729891367e-05, "loss": 0.0445, "step": 343500 }, { "epoch": 13.02, "learning_rate": 1.3491880843332452e-05, "loss": 0.0381, "step": 344000 }, { "epoch": 13.04, "learning_rate": 1.3482417956773534e-05, "loss": 0.0397, "step": 344500 }, { "epoch": 13.06, "learning_rate": 1.3472973995987737e-05, "loss": 0.0385, "step": 345000 }, { "epoch": 13.08, "learning_rate": 1.3463511109428823e-05, "loss": 0.0393, "step": 345500 }, { "epoch": 13.1, "learning_rate": 1.3454067148643024e-05, "loss": 0.0392, "step": 346000 }, { "epoch": 13.12, "learning_rate": 1.3444604262084107e-05, "loss": 0.0385, "step": 346500 }, { "epoch": 13.13, "learning_rate": 1.3435141375525191e-05, "loss": 0.0403, "step": 347000 }, { "epoch": 13.15, "learning_rate": 1.3425697414739394e-05, "loss": 0.039, "step": 347500 }, { "epoch": 13.17, "learning_rate": 1.3416234528180476e-05, "loss": 0.0402, "step": 348000 }, { "epoch": 13.19, "learning_rate": 1.3406771641621561e-05, "loss": 0.0397, "step": 348500 }, { "epoch": 13.21, "learning_rate": 1.3397308755062645e-05, "loss": 0.0405, "step": 349000 }, { "epoch": 13.23, "learning_rate": 1.338784586850373e-05, "loss": 0.0407, "step": 349500 }, { "epoch": 13.25, "learning_rate": 1.3378382981944812e-05, "loss": 0.04, "step": 350000 }, { "epoch": 13.27, "learning_rate": 1.3368920095385898e-05, "loss": 0.0395, "step": 350500 }, { "epoch": 13.29, "learning_rate": 1.3359457208826983e-05, "loss": 0.0393, "step": 351000 }, { "epoch": 13.3, "learning_rate": 1.3349994322268065e-05, "loss": 0.0404, "step": 351500 }, { "epoch": 13.32, "learning_rate": 1.334053143570915e-05, "loss": 0.0401, "step": 352000 }, { "epoch": 13.34, "learning_rate": 1.3331106400696469e-05, "loss": 0.0404, "step": 352500 }, { "epoch": 13.36, "learning_rate": 1.3321643514137554e-05, "loss": 0.041, "step": 353000 }, { "epoch": 13.38, "learning_rate": 1.3312180627578637e-05, "loss": 0.0418, "step": 353500 }, { "epoch": 13.4, "learning_rate": 1.3302717741019722e-05, "loss": 0.0402, "step": 354000 }, { "epoch": 13.42, "learning_rate": 1.3293254854460806e-05, "loss": 0.0405, "step": 354500 }, { "epoch": 13.44, "learning_rate": 1.328379196790189e-05, "loss": 0.0415, "step": 355000 }, { "epoch": 13.46, "learning_rate": 1.3274348007116092e-05, "loss": 0.0415, "step": 355500 }, { "epoch": 13.48, "learning_rate": 1.3264885120557176e-05, "loss": 0.0406, "step": 356000 }, { "epoch": 13.49, "learning_rate": 1.3255422233998261e-05, "loss": 0.0409, "step": 356500 }, { "epoch": 13.51, "learning_rate": 1.3245959347439343e-05, "loss": 0.0404, "step": 357000 }, { "epoch": 13.53, "learning_rate": 1.3236496460880429e-05, "loss": 0.0405, "step": 357500 }, { "epoch": 13.55, "learning_rate": 1.3227033574321512e-05, "loss": 0.0419, "step": 358000 }, { "epoch": 13.57, "learning_rate": 1.3217570687762596e-05, "loss": 0.0396, "step": 358500 }, { "epoch": 13.59, "learning_rate": 1.320810780120368e-05, "loss": 0.0409, "step": 359000 }, { "epoch": 13.61, "learning_rate": 1.3198644914644765e-05, "loss": 0.0419, "step": 359500 }, { "epoch": 13.63, "learning_rate": 1.3189182028085847e-05, "loss": 0.0407, "step": 360000 }, { "epoch": 13.65, "learning_rate": 1.3179719141526932e-05, "loss": 0.0413, "step": 360500 }, { "epoch": 13.66, "learning_rate": 1.3170256254968016e-05, "loss": 0.041, "step": 361000 }, { "epoch": 13.68, "learning_rate": 1.3160812294182219e-05, "loss": 0.0416, "step": 361500 }, { "epoch": 13.7, "learning_rate": 1.3151349407623301e-05, "loss": 0.0415, "step": 362000 }, { "epoch": 13.72, "learning_rate": 1.3141905446837504e-05, "loss": 0.0426, "step": 362500 }, { "epoch": 13.74, "learning_rate": 1.313244256027859e-05, "loss": 0.0418, "step": 363000 }, { "epoch": 13.76, "learning_rate": 1.3122979673719673e-05, "loss": 0.0414, "step": 363500 }, { "epoch": 13.78, "learning_rate": 1.3113535712933874e-05, "loss": 0.0431, "step": 364000 }, { "epoch": 13.8, "learning_rate": 1.3104072826374958e-05, "loss": 0.0402, "step": 364500 }, { "epoch": 13.82, "learning_rate": 1.3094609939816043e-05, "loss": 0.0417, "step": 365000 }, { "epoch": 13.83, "learning_rate": 1.3085147053257125e-05, "loss": 0.0405, "step": 365500 }, { "epoch": 13.85, "learning_rate": 1.307568416669821e-05, "loss": 0.0416, "step": 366000 }, { "epoch": 13.87, "learning_rate": 1.3066221280139296e-05, "loss": 0.0429, "step": 366500 }, { "epoch": 13.89, "learning_rate": 1.305675839358038e-05, "loss": 0.0408, "step": 367000 }, { "epoch": 13.91, "learning_rate": 1.3047295507021462e-05, "loss": 0.0425, "step": 367500 }, { "epoch": 13.93, "learning_rate": 1.3037832620462547e-05, "loss": 0.0414, "step": 368000 }, { "epoch": 13.95, "learning_rate": 1.3028369733903632e-05, "loss": 0.0427, "step": 368500 }, { "epoch": 13.97, "learning_rate": 1.3018906847344714e-05, "loss": 0.0413, "step": 369000 }, { "epoch": 13.99, "learning_rate": 1.30094439607858e-05, "loss": 0.0418, "step": 369500 }, { "epoch": 14.0, "eval_bleu": 92.2123, "eval_gen_len": 16.4615, "eval_loss": 0.10214965045452118, "eval_runtime": 1189.3745, "eval_samples_per_second": 38.396, "eval_steps_per_second": 1.201, "step": 369866 }, { "epoch": 14.01, "learning_rate": 1.2999981074226883e-05, "loss": 0.0396, "step": 370000 }, { "epoch": 14.02, "learning_rate": 1.2990537113441085e-05, "loss": 0.0336, "step": 370500 }, { "epoch": 14.04, "learning_rate": 1.2981074226882168e-05, "loss": 0.0348, "step": 371000 }, { "epoch": 14.06, "learning_rate": 1.2971630266096371e-05, "loss": 0.0358, "step": 371500 }, { "epoch": 14.08, "learning_rate": 1.2962167379537456e-05, "loss": 0.0359, "step": 372000 }, { "epoch": 14.1, "learning_rate": 1.2952704492978538e-05, "loss": 0.0358, "step": 372500 }, { "epoch": 14.12, "learning_rate": 1.2943241606419624e-05, "loss": 0.0351, "step": 373000 }, { "epoch": 14.14, "learning_rate": 1.2933778719860708e-05, "loss": 0.0362, "step": 373500 }, { "epoch": 14.16, "learning_rate": 1.292431583330179e-05, "loss": 0.0356, "step": 374000 }, { "epoch": 14.18, "learning_rate": 1.2914871872515992e-05, "loss": 0.0359, "step": 374500 }, { "epoch": 14.19, "learning_rate": 1.2905408985957078e-05, "loss": 0.0366, "step": 375000 }, { "epoch": 14.21, "learning_rate": 1.2895946099398161e-05, "loss": 0.035, "step": 375500 }, { "epoch": 14.23, "learning_rate": 1.2886483212839245e-05, "loss": 0.0363, "step": 376000 }, { "epoch": 14.25, "learning_rate": 1.2877020326280329e-05, "loss": 0.0374, "step": 376500 }, { "epoch": 14.27, "learning_rate": 1.2867576365494532e-05, "loss": 0.0369, "step": 377000 }, { "epoch": 14.29, "learning_rate": 1.2858113478935617e-05, "loss": 0.0353, "step": 377500 }, { "epoch": 14.31, "learning_rate": 1.2848650592376699e-05, "loss": 0.0366, "step": 378000 }, { "epoch": 14.33, "learning_rate": 1.2839187705817784e-05, "loss": 0.0355, "step": 378500 }, { "epoch": 14.35, "learning_rate": 1.2829724819258868e-05, "loss": 0.0365, "step": 379000 }, { "epoch": 14.36, "learning_rate": 1.2820261932699952e-05, "loss": 0.0371, "step": 379500 }, { "epoch": 14.38, "learning_rate": 1.2810799046141036e-05, "loss": 0.0371, "step": 380000 }, { "epoch": 14.4, "learning_rate": 1.2801336159582121e-05, "loss": 0.0366, "step": 380500 }, { "epoch": 14.42, "learning_rate": 1.2791873273023203e-05, "loss": 0.0371, "step": 381000 }, { "epoch": 14.44, "learning_rate": 1.2782429312237406e-05, "loss": 0.0371, "step": 381500 }, { "epoch": 14.46, "learning_rate": 1.277296642567849e-05, "loss": 0.0362, "step": 382000 }, { "epoch": 14.48, "learning_rate": 1.2763503539119575e-05, "loss": 0.0365, "step": 382500 }, { "epoch": 14.5, "learning_rate": 1.2754059578333774e-05, "loss": 0.037, "step": 383000 }, { "epoch": 14.52, "learning_rate": 1.274459669177486e-05, "loss": 0.0375, "step": 383500 }, { "epoch": 14.53, "learning_rate": 1.2735133805215945e-05, "loss": 0.0372, "step": 384000 }, { "epoch": 14.55, "learning_rate": 1.2725670918657027e-05, "loss": 0.0373, "step": 384500 }, { "epoch": 14.57, "learning_rate": 1.2716208032098112e-05, "loss": 0.0363, "step": 385000 }, { "epoch": 14.59, "learning_rate": 1.2706745145539196e-05, "loss": 0.0374, "step": 385500 }, { "epoch": 14.61, "learning_rate": 1.2697282258980282e-05, "loss": 0.0373, "step": 386000 }, { "epoch": 14.63, "learning_rate": 1.2687838298194481e-05, "loss": 0.0381, "step": 386500 }, { "epoch": 14.65, "learning_rate": 1.2678375411635566e-05, "loss": 0.0383, "step": 387000 }, { "epoch": 14.67, "learning_rate": 1.266891252507665e-05, "loss": 0.0369, "step": 387500 }, { "epoch": 14.69, "learning_rate": 1.2659449638517734e-05, "loss": 0.0385, "step": 388000 }, { "epoch": 14.71, "learning_rate": 1.2649986751958817e-05, "loss": 0.0384, "step": 388500 }, { "epoch": 14.72, "learning_rate": 1.2640523865399903e-05, "loss": 0.0391, "step": 389000 }, { "epoch": 14.74, "learning_rate": 1.2631060978840988e-05, "loss": 0.0379, "step": 389500 }, { "epoch": 14.76, "learning_rate": 1.262159809228207e-05, "loss": 0.0376, "step": 390000 }, { "epoch": 14.78, "learning_rate": 1.2612135205723156e-05, "loss": 0.0383, "step": 390500 }, { "epoch": 14.8, "learning_rate": 1.260267231916424e-05, "loss": 0.0378, "step": 391000 }, { "epoch": 14.82, "learning_rate": 1.2593209432605323e-05, "loss": 0.0372, "step": 391500 }, { "epoch": 14.84, "learning_rate": 1.2583746546046407e-05, "loss": 0.0375, "step": 392000 }, { "epoch": 14.86, "learning_rate": 1.2574283659487492e-05, "loss": 0.0375, "step": 392500 }, { "epoch": 14.88, "learning_rate": 1.2564839698701693e-05, "loss": 0.0379, "step": 393000 }, { "epoch": 14.89, "learning_rate": 1.2555395737915894e-05, "loss": 0.0393, "step": 393500 }, { "epoch": 14.91, "learning_rate": 1.2545932851356978e-05, "loss": 0.0381, "step": 394000 }, { "epoch": 14.93, "learning_rate": 1.2536488890571181e-05, "loss": 0.0382, "step": 394500 }, { "epoch": 14.95, "learning_rate": 1.2527026004012266e-05, "loss": 0.0392, "step": 395000 }, { "epoch": 14.97, "learning_rate": 1.2517563117453348e-05, "loss": 0.0375, "step": 395500 }, { "epoch": 14.99, "learning_rate": 1.2508100230894434e-05, "loss": 0.0389, "step": 396000 }, { "epoch": 15.0, "eval_bleu": 92.1099, "eval_gen_len": 16.4498, "eval_loss": 0.10496072471141815, "eval_runtime": 1187.4741, "eval_samples_per_second": 38.457, "eval_steps_per_second": 1.203, "step": 396285 }, { "epoch": 15.01, "learning_rate": 1.2498637344335517e-05, "loss": 0.0357, "step": 396500 }, { "epoch": 15.03, "learning_rate": 1.2489174457776601e-05, "loss": 0.0318, "step": 397000 }, { "epoch": 15.05, "learning_rate": 1.2479711571217685e-05, "loss": 0.0329, "step": 397500 }, { "epoch": 15.06, "learning_rate": 1.247024868465877e-05, "loss": 0.0329, "step": 398000 }, { "epoch": 15.08, "learning_rate": 1.2460785798099852e-05, "loss": 0.0331, "step": 398500 }, { "epoch": 15.1, "learning_rate": 1.2451322911540938e-05, "loss": 0.0338, "step": 399000 }, { "epoch": 15.12, "learning_rate": 1.2441860024982021e-05, "loss": 0.0319, "step": 399500 }, { "epoch": 15.14, "learning_rate": 1.2432397138423105e-05, "loss": 0.0337, "step": 400000 }, { "epoch": 15.16, "learning_rate": 1.2422934251864189e-05, "loss": 0.0329, "step": 400500 }, { "epoch": 15.18, "learning_rate": 1.2413490291078391e-05, "loss": 0.0326, "step": 401000 }, { "epoch": 15.2, "learning_rate": 1.2404046330292594e-05, "loss": 0.0325, "step": 401500 }, { "epoch": 15.22, "learning_rate": 1.2394583443733676e-05, "loss": 0.0332, "step": 402000 }, { "epoch": 15.24, "learning_rate": 1.2385120557174762e-05, "loss": 0.0343, "step": 402500 }, { "epoch": 15.25, "learning_rate": 1.2375657670615845e-05, "loss": 0.0324, "step": 403000 }, { "epoch": 15.27, "learning_rate": 1.236619478405693e-05, "loss": 0.0331, "step": 403500 }, { "epoch": 15.29, "learning_rate": 1.2356731897498013e-05, "loss": 0.0318, "step": 404000 }, { "epoch": 15.31, "learning_rate": 1.2347269010939098e-05, "loss": 0.0317, "step": 404500 }, { "epoch": 15.33, "learning_rate": 1.2337806124380184e-05, "loss": 0.0333, "step": 405000 }, { "epoch": 15.35, "learning_rate": 1.2328343237821266e-05, "loss": 0.0346, "step": 405500 }, { "epoch": 15.37, "learning_rate": 1.231888035126235e-05, "loss": 0.0334, "step": 406000 }, { "epoch": 15.39, "learning_rate": 1.2309436390476552e-05, "loss": 0.0331, "step": 406500 }, { "epoch": 15.41, "learning_rate": 1.2299973503917637e-05, "loss": 0.0342, "step": 407000 }, { "epoch": 15.42, "learning_rate": 1.229051061735872e-05, "loss": 0.0338, "step": 407500 }, { "epoch": 15.44, "learning_rate": 1.2281047730799805e-05, "loss": 0.0331, "step": 408000 }, { "epoch": 15.46, "learning_rate": 1.2271603770014006e-05, "loss": 0.0343, "step": 408500 }, { "epoch": 15.48, "learning_rate": 1.226214088345509e-05, "loss": 0.0345, "step": 409000 }, { "epoch": 15.5, "learning_rate": 1.2252677996896173e-05, "loss": 0.0337, "step": 409500 }, { "epoch": 15.52, "learning_rate": 1.2243215110337259e-05, "loss": 0.0343, "step": 410000 }, { "epoch": 15.54, "learning_rate": 1.223375222377834e-05, "loss": 0.0335, "step": 410500 }, { "epoch": 15.56, "learning_rate": 1.2224289337219426e-05, "loss": 0.0335, "step": 411000 }, { "epoch": 15.58, "learning_rate": 1.2214845376433629e-05, "loss": 0.0345, "step": 411500 }, { "epoch": 15.59, "learning_rate": 1.220540141564783e-05, "loss": 0.0337, "step": 412000 }, { "epoch": 15.61, "learning_rate": 1.2195938529088914e-05, "loss": 0.0339, "step": 412500 }, { "epoch": 15.63, "learning_rate": 1.2186475642529997e-05, "loss": 0.0359, "step": 413000 }, { "epoch": 15.65, "learning_rate": 1.2177012755971083e-05, "loss": 0.0329, "step": 413500 }, { "epoch": 15.67, "learning_rate": 1.2167549869412167e-05, "loss": 0.0349, "step": 414000 }, { "epoch": 15.69, "learning_rate": 1.215808698285325e-05, "loss": 0.0341, "step": 414500 }, { "epoch": 15.71, "learning_rate": 1.2148624096294334e-05, "loss": 0.0343, "step": 415000 }, { "epoch": 15.73, "learning_rate": 1.213916120973542e-05, "loss": 0.0343, "step": 415500 }, { "epoch": 15.75, "learning_rate": 1.2129698323176501e-05, "loss": 0.0337, "step": 416000 }, { "epoch": 15.77, "learning_rate": 1.2120235436617587e-05, "loss": 0.0344, "step": 416500 }, { "epoch": 15.78, "learning_rate": 1.2110772550058672e-05, "loss": 0.0341, "step": 417000 }, { "epoch": 15.8, "learning_rate": 1.2101309663499754e-05, "loss": 0.0341, "step": 417500 }, { "epoch": 15.82, "learning_rate": 1.2091865702713957e-05, "loss": 0.0341, "step": 418000 }, { "epoch": 15.84, "learning_rate": 1.208240281615504e-05, "loss": 0.0351, "step": 418500 }, { "epoch": 15.86, "learning_rate": 1.2072939929596126e-05, "loss": 0.0339, "step": 419000 }, { "epoch": 15.88, "learning_rate": 1.2063477043037208e-05, "loss": 0.035, "step": 419500 }, { "epoch": 15.9, "learning_rate": 1.2054014156478293e-05, "loss": 0.0343, "step": 420000 }, { "epoch": 15.92, "learning_rate": 1.2044551269919377e-05, "loss": 0.034, "step": 420500 }, { "epoch": 15.94, "learning_rate": 1.203510730913358e-05, "loss": 0.0346, "step": 421000 }, { "epoch": 15.95, "learning_rate": 1.2025644422574662e-05, "loss": 0.0352, "step": 421500 }, { "epoch": 15.97, "learning_rate": 1.2016200461788865e-05, "loss": 0.0341, "step": 422000 }, { "epoch": 15.99, "learning_rate": 1.200673757522995e-05, "loss": 0.0337, "step": 422500 }, { "epoch": 16.0, "eval_bleu": 92.2303, "eval_gen_len": 16.4514, "eval_loss": 0.10698259621858597, "eval_runtime": 1187.5586, "eval_samples_per_second": 38.455, "eval_steps_per_second": 1.202, "step": 422704 }, { "epoch": 16.01, "learning_rate": 1.1997274688671032e-05, "loss": 0.0312, "step": 423000 }, { "epoch": 16.03, "learning_rate": 1.1987811802112118e-05, "loss": 0.0283, "step": 423500 }, { "epoch": 16.05, "learning_rate": 1.1978348915553201e-05, "loss": 0.0278, "step": 424000 }, { "epoch": 16.07, "learning_rate": 1.1968886028994285e-05, "loss": 0.0298, "step": 424500 }, { "epoch": 16.09, "learning_rate": 1.1959423142435369e-05, "loss": 0.0289, "step": 425000 }, { "epoch": 16.11, "learning_rate": 1.1949960255876454e-05, "loss": 0.0286, "step": 425500 }, { "epoch": 16.12, "learning_rate": 1.1940497369317538e-05, "loss": 0.0287, "step": 426000 }, { "epoch": 16.14, "learning_rate": 1.1931053408531739e-05, "loss": 0.0298, "step": 426500 }, { "epoch": 16.16, "learning_rate": 1.1921590521972823e-05, "loss": 0.0292, "step": 427000 }, { "epoch": 16.18, "learning_rate": 1.1912127635413908e-05, "loss": 0.0295, "step": 427500 }, { "epoch": 16.2, "learning_rate": 1.190266474885499e-05, "loss": 0.0307, "step": 428000 }, { "epoch": 16.22, "learning_rate": 1.1893201862296075e-05, "loss": 0.0291, "step": 428500 }, { "epoch": 16.24, "learning_rate": 1.1883757901510278e-05, "loss": 0.0309, "step": 429000 }, { "epoch": 16.26, "learning_rate": 1.1874295014951362e-05, "loss": 0.0301, "step": 429500 }, { "epoch": 16.28, "learning_rate": 1.1864832128392446e-05, "loss": 0.0298, "step": 430000 }, { "epoch": 16.3, "learning_rate": 1.185536924183353e-05, "loss": 0.0302, "step": 430500 }, { "epoch": 16.31, "learning_rate": 1.1845925281047732e-05, "loss": 0.0303, "step": 431000 }, { "epoch": 16.33, "learning_rate": 1.1836462394488817e-05, "loss": 0.0309, "step": 431500 }, { "epoch": 16.35, "learning_rate": 1.18269995079299e-05, "loss": 0.0295, "step": 432000 }, { "epoch": 16.37, "learning_rate": 1.1817536621370985e-05, "loss": 0.0311, "step": 432500 }, { "epoch": 16.39, "learning_rate": 1.1808073734812069e-05, "loss": 0.0306, "step": 433000 }, { "epoch": 16.41, "learning_rate": 1.179861084825315e-05, "loss": 0.0305, "step": 433500 }, { "epoch": 16.43, "learning_rate": 1.1789147961694236e-05, "loss": 0.0307, "step": 434000 }, { "epoch": 16.45, "learning_rate": 1.1779704000908439e-05, "loss": 0.0307, "step": 434500 }, { "epoch": 16.47, "learning_rate": 1.1770241114349522e-05, "loss": 0.0315, "step": 435000 }, { "epoch": 16.48, "learning_rate": 1.1760778227790606e-05, "loss": 0.0305, "step": 435500 }, { "epoch": 16.5, "learning_rate": 1.175131534123169e-05, "loss": 0.0312, "step": 436000 }, { "epoch": 16.52, "learning_rate": 1.1741852454672775e-05, "loss": 0.0311, "step": 436500 }, { "epoch": 16.54, "learning_rate": 1.1732389568113857e-05, "loss": 0.0313, "step": 437000 }, { "epoch": 16.56, "learning_rate": 1.172294560732806e-05, "loss": 0.0311, "step": 437500 }, { "epoch": 16.58, "learning_rate": 1.1713501646542263e-05, "loss": 0.0317, "step": 438000 }, { "epoch": 16.6, "learning_rate": 1.1704038759983347e-05, "loss": 0.031, "step": 438500 }, { "epoch": 16.62, "learning_rate": 1.169457587342443e-05, "loss": 0.0298, "step": 439000 }, { "epoch": 16.64, "learning_rate": 1.1685112986865514e-05, "loss": 0.0304, "step": 439500 }, { "epoch": 16.65, "learning_rate": 1.16756501003066e-05, "loss": 0.0311, "step": 440000 }, { "epoch": 16.67, "learning_rate": 1.1666187213747681e-05, "loss": 0.0305, "step": 440500 }, { "epoch": 16.69, "learning_rate": 1.1656724327188767e-05, "loss": 0.0312, "step": 441000 }, { "epoch": 16.71, "learning_rate": 1.164726144062985e-05, "loss": 0.031, "step": 441500 }, { "epoch": 16.73, "learning_rate": 1.1637798554070934e-05, "loss": 0.0301, "step": 442000 }, { "epoch": 16.75, "learning_rate": 1.1628335667512018e-05, "loss": 0.0325, "step": 442500 }, { "epoch": 16.77, "learning_rate": 1.1618872780953103e-05, "loss": 0.0305, "step": 443000 }, { "epoch": 16.79, "learning_rate": 1.1609409894394189e-05, "loss": 0.0315, "step": 443500 }, { "epoch": 16.81, "learning_rate": 1.159994700783527e-05, "loss": 0.0308, "step": 444000 }, { "epoch": 16.83, "learning_rate": 1.1590503047049473e-05, "loss": 0.0315, "step": 444500 }, { "epoch": 16.84, "learning_rate": 1.1581059086263675e-05, "loss": 0.0315, "step": 445000 }, { "epoch": 16.86, "learning_rate": 1.1571615125477877e-05, "loss": 0.032, "step": 445500 }, { "epoch": 16.88, "learning_rate": 1.156215223891896e-05, "loss": 0.0313, "step": 446000 }, { "epoch": 16.9, "learning_rate": 1.1552689352360045e-05, "loss": 0.0303, "step": 446500 }, { "epoch": 16.92, "learning_rate": 1.154322646580113e-05, "loss": 0.031, "step": 447000 }, { "epoch": 16.94, "learning_rate": 1.1533763579242212e-05, "loss": 0.0317, "step": 447500 }, { "epoch": 16.96, "learning_rate": 1.1524300692683298e-05, "loss": 0.0325, "step": 448000 }, { "epoch": 16.98, "learning_rate": 1.1514837806124381e-05, "loss": 0.0315, "step": 448500 }, { "epoch": 17.0, "learning_rate": 1.1505374919565467e-05, "loss": 0.032, "step": 449000 }, { "epoch": 17.0, "eval_bleu": 92.1868, "eval_gen_len": 16.4864, "eval_loss": 0.10941721498966217, "eval_runtime": 1187.2938, "eval_samples_per_second": 38.463, "eval_steps_per_second": 1.203, "step": 449123 }, { "epoch": 17.01, "learning_rate": 1.1495912033006549e-05, "loss": 0.027, "step": 449500 }, { "epoch": 17.03, "learning_rate": 1.1486468072220751e-05, "loss": 0.0272, "step": 450000 }, { "epoch": 17.05, "learning_rate": 1.1477005185661835e-05, "loss": 0.0265, "step": 450500 }, { "epoch": 17.07, "learning_rate": 1.1467542299102919e-05, "loss": 0.0257, "step": 451000 }, { "epoch": 17.09, "learning_rate": 1.1458079412544003e-05, "loss": 0.0264, "step": 451500 }, { "epoch": 17.11, "learning_rate": 1.1448616525985088e-05, "loss": 0.0268, "step": 452000 }, { "epoch": 17.13, "learning_rate": 1.143915363942617e-05, "loss": 0.027, "step": 452500 }, { "epoch": 17.15, "learning_rate": 1.1429690752867255e-05, "loss": 0.0268, "step": 453000 }, { "epoch": 17.17, "learning_rate": 1.1420246792081458e-05, "loss": 0.0271, "step": 453500 }, { "epoch": 17.18, "learning_rate": 1.1410783905522542e-05, "loss": 0.0275, "step": 454000 }, { "epoch": 17.2, "learning_rate": 1.1401321018963626e-05, "loss": 0.0263, "step": 454500 }, { "epoch": 17.22, "learning_rate": 1.139185813240471e-05, "loss": 0.0266, "step": 455000 }, { "epoch": 17.24, "learning_rate": 1.1382395245845795e-05, "loss": 0.0279, "step": 455500 }, { "epoch": 17.26, "learning_rate": 1.1372932359286877e-05, "loss": 0.0266, "step": 456000 }, { "epoch": 17.28, "learning_rate": 1.1363469472727962e-05, "loss": 0.027, "step": 456500 }, { "epoch": 17.3, "learning_rate": 1.1354006586169046e-05, "loss": 0.0273, "step": 457000 }, { "epoch": 17.32, "learning_rate": 1.1344562625383249e-05, "loss": 0.0283, "step": 457500 }, { "epoch": 17.34, "learning_rate": 1.133509973882433e-05, "loss": 0.0267, "step": 458000 }, { "epoch": 17.35, "learning_rate": 1.1325655778038533e-05, "loss": 0.0272, "step": 458500 }, { "epoch": 17.37, "learning_rate": 1.1316192891479619e-05, "loss": 0.028, "step": 459000 }, { "epoch": 17.39, "learning_rate": 1.130674893069382e-05, "loss": 0.0273, "step": 459500 }, { "epoch": 17.41, "learning_rate": 1.1297286044134904e-05, "loss": 0.0268, "step": 460000 }, { "epoch": 17.43, "learning_rate": 1.1287823157575987e-05, "loss": 0.0276, "step": 460500 }, { "epoch": 17.45, "learning_rate": 1.1278360271017073e-05, "loss": 0.0272, "step": 461000 }, { "epoch": 17.47, "learning_rate": 1.1268897384458155e-05, "loss": 0.0271, "step": 461500 }, { "epoch": 17.49, "learning_rate": 1.125943449789924e-05, "loss": 0.029, "step": 462000 }, { "epoch": 17.51, "learning_rate": 1.1249971611340324e-05, "loss": 0.0279, "step": 462500 }, { "epoch": 17.53, "learning_rate": 1.124050872478141e-05, "loss": 0.0288, "step": 463000 }, { "epoch": 17.54, "learning_rate": 1.1231045838222491e-05, "loss": 0.0286, "step": 463500 }, { "epoch": 17.56, "learning_rate": 1.1221601877436694e-05, "loss": 0.0284, "step": 464000 }, { "epoch": 17.58, "learning_rate": 1.121213899087778e-05, "loss": 0.0274, "step": 464500 }, { "epoch": 17.6, "learning_rate": 1.1202676104318861e-05, "loss": 0.0278, "step": 465000 }, { "epoch": 17.62, "learning_rate": 1.1193213217759947e-05, "loss": 0.0285, "step": 465500 }, { "epoch": 17.64, "learning_rate": 1.118375033120103e-05, "loss": 0.0281, "step": 466000 }, { "epoch": 17.66, "learning_rate": 1.1174287444642114e-05, "loss": 0.0277, "step": 466500 }, { "epoch": 17.68, "learning_rate": 1.1164843483856315e-05, "loss": 0.028, "step": 467000 }, { "epoch": 17.7, "learning_rate": 1.11553805972974e-05, "loss": 0.0275, "step": 467500 }, { "epoch": 17.71, "learning_rate": 1.1145936636511603e-05, "loss": 0.0278, "step": 468000 }, { "epoch": 17.73, "learning_rate": 1.1136473749952685e-05, "loss": 0.0287, "step": 468500 }, { "epoch": 17.75, "learning_rate": 1.1127010863393771e-05, "loss": 0.0289, "step": 469000 }, { "epoch": 17.77, "learning_rate": 1.1117547976834855e-05, "loss": 0.0279, "step": 469500 }, { "epoch": 17.79, "learning_rate": 1.110808509027594e-05, "loss": 0.0279, "step": 470000 }, { "epoch": 17.81, "learning_rate": 1.1098622203717022e-05, "loss": 0.0296, "step": 470500 }, { "epoch": 17.83, "learning_rate": 1.1089159317158107e-05, "loss": 0.029, "step": 471000 }, { "epoch": 17.85, "learning_rate": 1.1079715356372308e-05, "loss": 0.0293, "step": 471500 }, { "epoch": 17.87, "learning_rate": 1.1070252469813392e-05, "loss": 0.0275, "step": 472000 }, { "epoch": 17.88, "learning_rate": 1.1060789583254476e-05, "loss": 0.0286, "step": 472500 }, { "epoch": 17.9, "learning_rate": 1.1051326696695561e-05, "loss": 0.0285, "step": 473000 }, { "epoch": 17.92, "learning_rate": 1.1041863810136647e-05, "loss": 0.0285, "step": 473500 }, { "epoch": 17.94, "learning_rate": 1.1032400923577729e-05, "loss": 0.0288, "step": 474000 }, { "epoch": 17.96, "learning_rate": 1.1022938037018814e-05, "loss": 0.0291, "step": 474500 }, { "epoch": 17.98, "learning_rate": 1.1013475150459898e-05, "loss": 0.0291, "step": 475000 }, { "epoch": 18.0, "learning_rate": 1.1004031189674099e-05, "loss": 0.0302, "step": 475500 }, { "epoch": 18.0, "eval_bleu": 92.1969, "eval_gen_len": 16.4745, "eval_loss": 0.11121129244565964, "eval_runtime": 1185.9896, "eval_samples_per_second": 38.505, "eval_steps_per_second": 1.204, "step": 475542 }, { "epoch": 18.02, "learning_rate": 1.0994568303115183e-05, "loss": 0.0234, "step": 476000 }, { "epoch": 18.04, "learning_rate": 1.0985105416556268e-05, "loss": 0.0245, "step": 476500 }, { "epoch": 18.06, "learning_rate": 1.0975642529997352e-05, "loss": 0.0241, "step": 477000 }, { "epoch": 18.07, "learning_rate": 1.0966179643438435e-05, "loss": 0.0248, "step": 477500 }, { "epoch": 18.09, "learning_rate": 1.0956716756879519e-05, "loss": 0.0242, "step": 478000 }, { "epoch": 18.11, "learning_rate": 1.0947272796093722e-05, "loss": 0.0247, "step": 478500 }, { "epoch": 18.13, "learning_rate": 1.0937809909534804e-05, "loss": 0.0243, "step": 479000 }, { "epoch": 18.15, "learning_rate": 1.092834702297589e-05, "loss": 0.0238, "step": 479500 }, { "epoch": 18.17, "learning_rate": 1.0918884136416975e-05, "loss": 0.0249, "step": 480000 }, { "epoch": 18.19, "learning_rate": 1.0909440175631176e-05, "loss": 0.0251, "step": 480500 }, { "epoch": 18.21, "learning_rate": 1.089997728907226e-05, "loss": 0.0243, "step": 481000 }, { "epoch": 18.23, "learning_rate": 1.0890514402513343e-05, "loss": 0.0245, "step": 481500 }, { "epoch": 18.24, "learning_rate": 1.0881051515954429e-05, "loss": 0.0248, "step": 482000 }, { "epoch": 18.26, "learning_rate": 1.0871607555168628e-05, "loss": 0.0241, "step": 482500 }, { "epoch": 18.28, "learning_rate": 1.0862144668609713e-05, "loss": 0.0242, "step": 483000 }, { "epoch": 18.3, "learning_rate": 1.0852681782050799e-05, "loss": 0.0251, "step": 483500 }, { "epoch": 18.32, "learning_rate": 1.0843218895491882e-05, "loss": 0.025, "step": 484000 }, { "epoch": 18.34, "learning_rate": 1.0833756008932964e-05, "loss": 0.0246, "step": 484500 }, { "epoch": 18.36, "learning_rate": 1.082429312237405e-05, "loss": 0.0255, "step": 485000 }, { "epoch": 18.38, "learning_rate": 1.0814830235815135e-05, "loss": 0.0252, "step": 485500 }, { "epoch": 18.4, "learning_rate": 1.0805367349256217e-05, "loss": 0.0257, "step": 486000 }, { "epoch": 18.41, "learning_rate": 1.0795904462697303e-05, "loss": 0.0249, "step": 486500 }, { "epoch": 18.43, "learning_rate": 1.0786460501911504e-05, "loss": 0.0248, "step": 487000 }, { "epoch": 18.45, "learning_rate": 1.0776997615352589e-05, "loss": 0.025, "step": 487500 }, { "epoch": 18.47, "learning_rate": 1.0767553654566789e-05, "loss": 0.0258, "step": 488000 }, { "epoch": 18.49, "learning_rate": 1.0758090768007874e-05, "loss": 0.0249, "step": 488500 }, { "epoch": 18.51, "learning_rate": 1.074862788144896e-05, "loss": 0.0242, "step": 489000 }, { "epoch": 18.53, "learning_rate": 1.0739164994890041e-05, "loss": 0.0248, "step": 489500 }, { "epoch": 18.55, "learning_rate": 1.0729702108331127e-05, "loss": 0.026, "step": 490000 }, { "epoch": 18.57, "learning_rate": 1.0720258147545328e-05, "loss": 0.0254, "step": 490500 }, { "epoch": 18.59, "learning_rate": 1.0710795260986413e-05, "loss": 0.025, "step": 491000 }, { "epoch": 18.6, "learning_rate": 1.0701332374427495e-05, "loss": 0.0253, "step": 491500 }, { "epoch": 18.62, "learning_rate": 1.069186948786858e-05, "loss": 0.0255, "step": 492000 }, { "epoch": 18.64, "learning_rate": 1.0682406601309664e-05, "loss": 0.0263, "step": 492500 }, { "epoch": 18.66, "learning_rate": 1.0672962640523867e-05, "loss": 0.0254, "step": 493000 }, { "epoch": 18.68, "learning_rate": 1.0663499753964949e-05, "loss": 0.0255, "step": 493500 }, { "epoch": 18.7, "learning_rate": 1.0654055793179152e-05, "loss": 0.0255, "step": 494000 }, { "epoch": 18.72, "learning_rate": 1.0644592906620237e-05, "loss": 0.0266, "step": 494500 }, { "epoch": 18.74, "learning_rate": 1.063513002006132e-05, "loss": 0.0256, "step": 495000 }, { "epoch": 18.76, "learning_rate": 1.0625667133502405e-05, "loss": 0.0244, "step": 495500 }, { "epoch": 18.77, "learning_rate": 1.0616204246943488e-05, "loss": 0.0262, "step": 496000 }, { "epoch": 18.79, "learning_rate": 1.0606741360384572e-05, "loss": 0.0252, "step": 496500 }, { "epoch": 18.81, "learning_rate": 1.0597278473825656e-05, "loss": 0.0266, "step": 497000 }, { "epoch": 18.83, "learning_rate": 1.0587815587266741e-05, "loss": 0.0251, "step": 497500 }, { "epoch": 18.85, "learning_rate": 1.0578352700707825e-05, "loss": 0.0259, "step": 498000 }, { "epoch": 18.87, "learning_rate": 1.0568889814148909e-05, "loss": 0.0267, "step": 498500 }, { "epoch": 18.89, "learning_rate": 1.055944585336311e-05, "loss": 0.0254, "step": 499000 }, { "epoch": 18.91, "learning_rate": 1.0549982966804195e-05, "loss": 0.0254, "step": 499500 }, { "epoch": 18.93, "learning_rate": 1.0540539006018398e-05, "loss": 0.0262, "step": 500000 }, { "epoch": 18.94, "learning_rate": 1.053107611945948e-05, "loss": 0.0266, "step": 500500 }, { "epoch": 18.96, "learning_rate": 1.0521613232900565e-05, "loss": 0.0263, "step": 501000 }, { "epoch": 18.98, "learning_rate": 1.0512150346341649e-05, "loss": 0.0263, "step": 501500 }, { "epoch": 19.0, "eval_bleu": 92.283, "eval_gen_len": 16.4824, "eval_loss": 0.11453192681074142, "eval_runtime": 1203.1736, "eval_samples_per_second": 37.955, "eval_steps_per_second": 1.187, "step": 501961 }, { "epoch": 19.0, "learning_rate": 1.0502687459782733e-05, "loss": 0.0262, "step": 502000 }, { "epoch": 19.02, "learning_rate": 1.0493224573223816e-05, "loss": 0.0214, "step": 502500 }, { "epoch": 19.04, "learning_rate": 1.0483761686664902e-05, "loss": 0.0221, "step": 503000 }, { "epoch": 19.06, "learning_rate": 1.0474298800105984e-05, "loss": 0.0222, "step": 503500 }, { "epoch": 19.08, "learning_rate": 1.046483591354707e-05, "loss": 0.022, "step": 504000 }, { "epoch": 19.1, "learning_rate": 1.0455391952761272e-05, "loss": 0.0223, "step": 504500 }, { "epoch": 19.12, "learning_rate": 1.0445929066202356e-05, "loss": 0.0215, "step": 505000 }, { "epoch": 19.13, "learning_rate": 1.0436466179643438e-05, "loss": 0.0219, "step": 505500 }, { "epoch": 19.15, "learning_rate": 1.042702221885764e-05, "loss": 0.0223, "step": 506000 }, { "epoch": 19.17, "learning_rate": 1.0417559332298726e-05, "loss": 0.0217, "step": 506500 }, { "epoch": 19.19, "learning_rate": 1.040809644573981e-05, "loss": 0.0221, "step": 507000 }, { "epoch": 19.21, "learning_rate": 1.0398633559180893e-05, "loss": 0.0216, "step": 507500 }, { "epoch": 19.23, "learning_rate": 1.0389170672621977e-05, "loss": 0.0226, "step": 508000 }, { "epoch": 19.25, "learning_rate": 1.037972671183618e-05, "loss": 0.0228, "step": 508500 }, { "epoch": 19.27, "learning_rate": 1.0370263825277262e-05, "loss": 0.0226, "step": 509000 }, { "epoch": 19.29, "learning_rate": 1.0360800938718347e-05, "loss": 0.0228, "step": 509500 }, { "epoch": 19.3, "learning_rate": 1.0351338052159433e-05, "loss": 0.0223, "step": 510000 }, { "epoch": 19.32, "learning_rate": 1.0341875165600515e-05, "loss": 0.0222, "step": 510500 }, { "epoch": 19.34, "learning_rate": 1.0332431204814717e-05, "loss": 0.0222, "step": 511000 }, { "epoch": 19.36, "learning_rate": 1.0322968318255801e-05, "loss": 0.0224, "step": 511500 }, { "epoch": 19.38, "learning_rate": 1.0313505431696887e-05, "loss": 0.0225, "step": 512000 }, { "epoch": 19.4, "learning_rate": 1.0304042545137969e-05, "loss": 0.0226, "step": 512500 }, { "epoch": 19.42, "learning_rate": 1.0294579658579054e-05, "loss": 0.0235, "step": 513000 }, { "epoch": 19.44, "learning_rate": 1.0285116772020138e-05, "loss": 0.0229, "step": 513500 }, { "epoch": 19.46, "learning_rate": 1.0275653885461221e-05, "loss": 0.0232, "step": 514000 }, { "epoch": 19.47, "learning_rate": 1.0266190998902305e-05, "loss": 0.0226, "step": 514500 }, { "epoch": 19.49, "learning_rate": 1.025672811234339e-05, "loss": 0.023, "step": 515000 }, { "epoch": 19.51, "learning_rate": 1.0247265225784476e-05, "loss": 0.0227, "step": 515500 }, { "epoch": 19.53, "learning_rate": 1.0237802339225558e-05, "loss": 0.0234, "step": 516000 }, { "epoch": 19.55, "learning_rate": 1.022835837843976e-05, "loss": 0.0247, "step": 516500 }, { "epoch": 19.57, "learning_rate": 1.0218895491880844e-05, "loss": 0.0227, "step": 517000 }, { "epoch": 19.59, "learning_rate": 1.0209432605321928e-05, "loss": 0.0248, "step": 517500 }, { "epoch": 19.61, "learning_rate": 1.0199969718763012e-05, "loss": 0.0222, "step": 518000 }, { "epoch": 19.63, "learning_rate": 1.0190506832204097e-05, "loss": 0.0237, "step": 518500 }, { "epoch": 19.64, "learning_rate": 1.018104394564518e-05, "loss": 0.0232, "step": 519000 }, { "epoch": 19.66, "learning_rate": 1.0171581059086265e-05, "loss": 0.0232, "step": 519500 }, { "epoch": 19.68, "learning_rate": 1.0162118172527348e-05, "loss": 0.0227, "step": 520000 }, { "epoch": 19.7, "learning_rate": 1.0152655285968434e-05, "loss": 0.0237, "step": 520500 }, { "epoch": 19.72, "learning_rate": 1.0143230250955754e-05, "loss": 0.0232, "step": 521000 }, { "epoch": 19.74, "learning_rate": 1.0133767364396836e-05, "loss": 0.0228, "step": 521500 }, { "epoch": 19.76, "learning_rate": 1.0124304477837921e-05, "loss": 0.0241, "step": 522000 }, { "epoch": 19.78, "learning_rate": 1.0114841591279005e-05, "loss": 0.0236, "step": 522500 }, { "epoch": 19.8, "learning_rate": 1.0105378704720089e-05, "loss": 0.0234, "step": 523000 }, { "epoch": 19.82, "learning_rate": 1.009593474393429e-05, "loss": 0.0245, "step": 523500 }, { "epoch": 19.83, "learning_rate": 1.0086471857375375e-05, "loss": 0.0234, "step": 524000 }, { "epoch": 19.85, "learning_rate": 1.0077008970816457e-05, "loss": 0.0234, "step": 524500 }, { "epoch": 19.87, "learning_rate": 1.0067546084257543e-05, "loss": 0.0231, "step": 525000 }, { "epoch": 19.89, "learning_rate": 1.0058083197698626e-05, "loss": 0.0222, "step": 525500 }, { "epoch": 19.91, "learning_rate": 1.0048620311139712e-05, "loss": 0.0233, "step": 526000 }, { "epoch": 19.93, "learning_rate": 1.0039157424580794e-05, "loss": 0.0238, "step": 526500 }, { "epoch": 19.95, "learning_rate": 1.0029694538021879e-05, "loss": 0.0238, "step": 527000 }, { "epoch": 19.97, "learning_rate": 1.0020231651462964e-05, "loss": 0.0237, "step": 527500 }, { "epoch": 19.99, "learning_rate": 1.0010787690677164e-05, "loss": 0.0233, "step": 528000 }, { "epoch": 20.0, "eval_bleu": 92.2202, "eval_gen_len": 16.5063, "eval_loss": 0.11753202229738235, "eval_runtime": 1202.9336, "eval_samples_per_second": 37.963, "eval_steps_per_second": 1.187, "step": 528380 }, { "epoch": 20.0, "learning_rate": 1.000132480411825e-05, "loss": 0.022, "step": 528500 }, { "epoch": 20.02, "learning_rate": 9.991861917559333e-06, "loss": 0.0201, "step": 529000 }, { "epoch": 20.04, "learning_rate": 9.982399031000417e-06, "loss": 0.0198, "step": 529500 }, { "epoch": 20.06, "learning_rate": 9.97295507021462e-06, "loss": 0.0198, "step": 530000 }, { "epoch": 20.08, "learning_rate": 9.96351110942882e-06, "loss": 0.0202, "step": 530500 }, { "epoch": 20.1, "learning_rate": 9.954048222869906e-06, "loss": 0.0198, "step": 531000 }, { "epoch": 20.12, "learning_rate": 9.94458533631099e-06, "loss": 0.02, "step": 531500 }, { "epoch": 20.14, "learning_rate": 9.93514137552519e-06, "loss": 0.0201, "step": 532000 }, { "epoch": 20.16, "learning_rate": 9.925678488966274e-06, "loss": 0.0201, "step": 532500 }, { "epoch": 20.17, "learning_rate": 9.91621560240736e-06, "loss": 0.0203, "step": 533000 }, { "epoch": 20.19, "learning_rate": 9.906752715848444e-06, "loss": 0.021, "step": 533500 }, { "epoch": 20.21, "learning_rate": 9.897289829289527e-06, "loss": 0.0207, "step": 534000 }, { "epoch": 20.23, "learning_rate": 9.887826942730611e-06, "loss": 0.0206, "step": 534500 }, { "epoch": 20.25, "learning_rate": 9.878364056171695e-06, "loss": 0.0205, "step": 535000 }, { "epoch": 20.27, "learning_rate": 9.868901169612778e-06, "loss": 0.0203, "step": 535500 }, { "epoch": 20.29, "learning_rate": 9.859438283053864e-06, "loss": 0.02, "step": 536000 }, { "epoch": 20.31, "learning_rate": 9.849975396494947e-06, "loss": 0.0201, "step": 536500 }, { "epoch": 20.33, "learning_rate": 9.840512509936033e-06, "loss": 0.0209, "step": 537000 }, { "epoch": 20.35, "learning_rate": 9.831049623377117e-06, "loss": 0.0202, "step": 537500 }, { "epoch": 20.36, "learning_rate": 9.821605662591318e-06, "loss": 0.0196, "step": 538000 }, { "epoch": 20.38, "learning_rate": 9.812161701805519e-06, "loss": 0.0206, "step": 538500 }, { "epoch": 20.4, "learning_rate": 9.802698815246604e-06, "loss": 0.0212, "step": 539000 }, { "epoch": 20.42, "learning_rate": 9.793235928687688e-06, "loss": 0.0204, "step": 539500 }, { "epoch": 20.44, "learning_rate": 9.783773042128772e-06, "loss": 0.0199, "step": 540000 }, { "epoch": 20.46, "learning_rate": 9.774310155569855e-06, "loss": 0.0213, "step": 540500 }, { "epoch": 20.48, "learning_rate": 9.764866194784058e-06, "loss": 0.0206, "step": 541000 }, { "epoch": 20.5, "learning_rate": 9.755403308225142e-06, "loss": 0.0205, "step": 541500 }, { "epoch": 20.52, "learning_rate": 9.745940421666225e-06, "loss": 0.0208, "step": 542000 }, { "epoch": 20.53, "learning_rate": 9.73647753510731e-06, "loss": 0.0212, "step": 542500 }, { "epoch": 20.55, "learning_rate": 9.727014648548395e-06, "loss": 0.0206, "step": 543000 }, { "epoch": 20.57, "learning_rate": 9.717570687762596e-06, "loss": 0.0214, "step": 543500 }, { "epoch": 20.59, "learning_rate": 9.70810780120368e-06, "loss": 0.0207, "step": 544000 }, { "epoch": 20.61, "learning_rate": 9.698644914644763e-06, "loss": 0.021, "step": 544500 }, { "epoch": 20.63, "learning_rate": 9.689182028085848e-06, "loss": 0.0215, "step": 545000 }, { "epoch": 20.65, "learning_rate": 9.679719141526932e-06, "loss": 0.0214, "step": 545500 }, { "epoch": 20.67, "learning_rate": 9.670275180741135e-06, "loss": 0.0206, "step": 546000 }, { "epoch": 20.69, "learning_rate": 9.660812294182219e-06, "loss": 0.0208, "step": 546500 }, { "epoch": 20.7, "learning_rate": 9.65136833339642e-06, "loss": 0.0213, "step": 547000 }, { "epoch": 20.72, "learning_rate": 9.641905446837503e-06, "loss": 0.021, "step": 547500 }, { "epoch": 20.74, "learning_rate": 9.632442560278589e-06, "loss": 0.0213, "step": 548000 }, { "epoch": 20.76, "learning_rate": 9.622979673719673e-06, "loss": 0.0206, "step": 548500 }, { "epoch": 20.78, "learning_rate": 9.613516787160756e-06, "loss": 0.0215, "step": 549000 }, { "epoch": 20.8, "learning_rate": 9.60405390060184e-06, "loss": 0.0217, "step": 549500 }, { "epoch": 20.82, "learning_rate": 9.594591014042924e-06, "loss": 0.0216, "step": 550000 }, { "epoch": 20.84, "learning_rate": 9.585128127484007e-06, "loss": 0.021, "step": 550500 }, { "epoch": 20.86, "learning_rate": 9.575665240925093e-06, "loss": 0.0216, "step": 551000 }, { "epoch": 20.88, "learning_rate": 9.566202354366176e-06, "loss": 0.0216, "step": 551500 }, { "epoch": 20.89, "learning_rate": 9.556739467807262e-06, "loss": 0.0214, "step": 552000 }, { "epoch": 20.91, "learning_rate": 9.547295507021463e-06, "loss": 0.0214, "step": 552500 }, { "epoch": 20.93, "learning_rate": 9.537832620462547e-06, "loss": 0.0219, "step": 553000 }, { "epoch": 20.95, "learning_rate": 9.528388659676748e-06, "loss": 0.0223, "step": 553500 }, { "epoch": 20.97, "learning_rate": 9.518925773117833e-06, "loss": 0.0208, "step": 554000 }, { "epoch": 20.99, "learning_rate": 9.509462886558917e-06, "loss": 0.0219, "step": 554500 }, { "epoch": 21.0, "eval_bleu": 92.2963, "eval_gen_len": 16.4984, "eval_loss": 0.11960500478744507, "eval_runtime": 1195.7203, "eval_samples_per_second": 38.192, "eval_steps_per_second": 1.194, "step": 554799 }, { "epoch": 21.01, "learning_rate": 9.5e-06, "loss": 0.0197, "step": 555000 }, { "epoch": 21.03, "learning_rate": 9.490537113441086e-06, "loss": 0.0184, "step": 555500 }, { "epoch": 21.05, "learning_rate": 9.481074226882168e-06, "loss": 0.0185, "step": 556000 }, { "epoch": 21.06, "learning_rate": 9.471611340323253e-06, "loss": 0.0183, "step": 556500 }, { "epoch": 21.08, "learning_rate": 9.462148453764337e-06, "loss": 0.0184, "step": 557000 }, { "epoch": 21.1, "learning_rate": 9.45268556720542e-06, "loss": 0.0183, "step": 557500 }, { "epoch": 21.12, "learning_rate": 9.443222680646506e-06, "loss": 0.0179, "step": 558000 }, { "epoch": 21.14, "learning_rate": 9.43375979408759e-06, "loss": 0.0186, "step": 558500 }, { "epoch": 21.16, "learning_rate": 9.424315833301791e-06, "loss": 0.0184, "step": 559000 }, { "epoch": 21.18, "learning_rate": 9.414871872515992e-06, "loss": 0.0184, "step": 559500 }, { "epoch": 21.2, "learning_rate": 9.405408985957077e-06, "loss": 0.0184, "step": 560000 }, { "epoch": 21.22, "learning_rate": 9.395946099398161e-06, "loss": 0.0185, "step": 560500 }, { "epoch": 21.23, "learning_rate": 9.386483212839247e-06, "loss": 0.0184, "step": 561000 }, { "epoch": 21.25, "learning_rate": 9.37702032628033e-06, "loss": 0.0178, "step": 561500 }, { "epoch": 21.27, "learning_rate": 9.367557439721412e-06, "loss": 0.0189, "step": 562000 }, { "epoch": 21.29, "learning_rate": 9.358094553162498e-06, "loss": 0.0189, "step": 562500 }, { "epoch": 21.31, "learning_rate": 9.348631666603581e-06, "loss": 0.0185, "step": 563000 }, { "epoch": 21.33, "learning_rate": 9.339168780044665e-06, "loss": 0.0189, "step": 563500 }, { "epoch": 21.35, "learning_rate": 9.329724819258868e-06, "loss": 0.018, "step": 564000 }, { "epoch": 21.37, "learning_rate": 9.320261932699952e-06, "loss": 0.0183, "step": 564500 }, { "epoch": 21.39, "learning_rate": 9.310817971914153e-06, "loss": 0.0188, "step": 565000 }, { "epoch": 21.41, "learning_rate": 9.301355085355236e-06, "loss": 0.0193, "step": 565500 }, { "epoch": 21.42, "learning_rate": 9.291892198796322e-06, "loss": 0.0187, "step": 566000 }, { "epoch": 21.44, "learning_rate": 9.282429312237405e-06, "loss": 0.0195, "step": 566500 }, { "epoch": 21.46, "learning_rate": 9.27296642567849e-06, "loss": 0.0191, "step": 567000 }, { "epoch": 21.48, "learning_rate": 9.263503539119575e-06, "loss": 0.0182, "step": 567500 }, { "epoch": 21.5, "learning_rate": 9.254040652560658e-06, "loss": 0.0196, "step": 568000 }, { "epoch": 21.52, "learning_rate": 9.244577766001742e-06, "loss": 0.0189, "step": 568500 }, { "epoch": 21.54, "learning_rate": 9.235114879442826e-06, "loss": 0.019, "step": 569000 }, { "epoch": 21.56, "learning_rate": 9.225651992883911e-06, "loss": 0.0191, "step": 569500 }, { "epoch": 21.58, "learning_rate": 9.216189106324995e-06, "loss": 0.0192, "step": 570000 }, { "epoch": 21.59, "learning_rate": 9.206745145539196e-06, "loss": 0.0197, "step": 570500 }, { "epoch": 21.61, "learning_rate": 9.19728225898028e-06, "loss": 0.0184, "step": 571000 }, { "epoch": 21.63, "learning_rate": 9.187819372421363e-06, "loss": 0.019, "step": 571500 }, { "epoch": 21.65, "learning_rate": 9.178375411635566e-06, "loss": 0.0197, "step": 572000 }, { "epoch": 21.67, "learning_rate": 9.16891252507665e-06, "loss": 0.019, "step": 572500 }, { "epoch": 21.69, "learning_rate": 9.159449638517735e-06, "loss": 0.0186, "step": 573000 }, { "epoch": 21.71, "learning_rate": 9.149986751958819e-06, "loss": 0.0186, "step": 573500 }, { "epoch": 21.73, "learning_rate": 9.140523865399903e-06, "loss": 0.0184, "step": 574000 }, { "epoch": 21.75, "learning_rate": 9.131060978840986e-06, "loss": 0.0196, "step": 574500 }, { "epoch": 21.76, "learning_rate": 9.121617018055189e-06, "loss": 0.0197, "step": 575000 }, { "epoch": 21.78, "learning_rate": 9.112154131496273e-06, "loss": 0.0188, "step": 575500 }, { "epoch": 21.8, "learning_rate": 9.102691244937356e-06, "loss": 0.0196, "step": 576000 }, { "epoch": 21.82, "learning_rate": 9.09322835837844e-06, "loss": 0.0188, "step": 576500 }, { "epoch": 21.84, "learning_rate": 9.083765471819524e-06, "loss": 0.0194, "step": 577000 }, { "epoch": 21.86, "learning_rate": 9.074302585260608e-06, "loss": 0.0195, "step": 577500 }, { "epoch": 21.88, "learning_rate": 9.064839698701693e-06, "loss": 0.0199, "step": 578000 }, { "epoch": 21.9, "learning_rate": 9.055376812142777e-06, "loss": 0.0188, "step": 578500 }, { "epoch": 21.92, "learning_rate": 9.045913925583862e-06, "loss": 0.0196, "step": 579000 }, { "epoch": 21.93, "learning_rate": 9.036451039024946e-06, "loss": 0.0195, "step": 579500 }, { "epoch": 21.95, "learning_rate": 9.026988152466028e-06, "loss": 0.0202, "step": 580000 }, { "epoch": 21.97, "learning_rate": 9.017525265907113e-06, "loss": 0.0189, "step": 580500 }, { "epoch": 21.99, "learning_rate": 9.008062379348197e-06, "loss": 0.0196, "step": 581000 }, { "epoch": 22.0, "eval_bleu": 92.2659, "eval_gen_len": 16.477, "eval_loss": 0.12201742082834244, "eval_runtime": 1201.2438, "eval_samples_per_second": 38.016, "eval_steps_per_second": 1.189, "step": 581218 }, { "epoch": 22.01, "learning_rate": 8.998637344335517e-06, "loss": 0.0182, "step": 581500 }, { "epoch": 22.03, "learning_rate": 8.9891744577766e-06, "loss": 0.0163, "step": 582000 }, { "epoch": 22.05, "learning_rate": 8.979730496990804e-06, "loss": 0.0169, "step": 582500 }, { "epoch": 22.07, "learning_rate": 8.970267610431887e-06, "loss": 0.0162, "step": 583000 }, { "epoch": 22.09, "learning_rate": 8.960804723872971e-06, "loss": 0.0167, "step": 583500 }, { "epoch": 22.11, "learning_rate": 8.951341837314055e-06, "loss": 0.016, "step": 584000 }, { "epoch": 22.12, "learning_rate": 8.94187895075514e-06, "loss": 0.0163, "step": 584500 }, { "epoch": 22.14, "learning_rate": 8.932416064196224e-06, "loss": 0.0165, "step": 585000 }, { "epoch": 22.16, "learning_rate": 8.922953177637307e-06, "loss": 0.0173, "step": 585500 }, { "epoch": 22.18, "learning_rate": 8.913490291078391e-06, "loss": 0.0166, "step": 586000 }, { "epoch": 22.2, "learning_rate": 8.904027404519475e-06, "loss": 0.0172, "step": 586500 }, { "epoch": 22.22, "learning_rate": 8.894564517960559e-06, "loss": 0.0171, "step": 587000 }, { "epoch": 22.24, "learning_rate": 8.885101631401644e-06, "loss": 0.0167, "step": 587500 }, { "epoch": 22.26, "learning_rate": 8.875638744842728e-06, "loss": 0.0169, "step": 588000 }, { "epoch": 22.28, "learning_rate": 8.86619478405693e-06, "loss": 0.0175, "step": 588500 }, { "epoch": 22.29, "learning_rate": 8.856731897498012e-06, "loss": 0.0161, "step": 589000 }, { "epoch": 22.31, "learning_rate": 8.847269010939098e-06, "loss": 0.0167, "step": 589500 }, { "epoch": 22.33, "learning_rate": 8.837806124380182e-06, "loss": 0.0171, "step": 590000 }, { "epoch": 22.35, "learning_rate": 8.828362163594384e-06, "loss": 0.0175, "step": 590500 }, { "epoch": 22.37, "learning_rate": 8.818899277035468e-06, "loss": 0.0166, "step": 591000 }, { "epoch": 22.39, "learning_rate": 8.809455316249669e-06, "loss": 0.0164, "step": 591500 }, { "epoch": 22.41, "learning_rate": 8.799992429690753e-06, "loss": 0.0172, "step": 592000 }, { "epoch": 22.43, "learning_rate": 8.790529543131837e-06, "loss": 0.0176, "step": 592500 }, { "epoch": 22.45, "learning_rate": 8.781066656572922e-06, "loss": 0.0175, "step": 593000 }, { "epoch": 22.46, "learning_rate": 8.771622695787123e-06, "loss": 0.0172, "step": 593500 }, { "epoch": 22.48, "learning_rate": 8.762159809228208e-06, "loss": 0.0177, "step": 594000 }, { "epoch": 22.5, "learning_rate": 8.752696922669292e-06, "loss": 0.0173, "step": 594500 }, { "epoch": 22.52, "learning_rate": 8.743234036110376e-06, "loss": 0.0167, "step": 595000 }, { "epoch": 22.54, "learning_rate": 8.73377114955146e-06, "loss": 0.0173, "step": 595500 }, { "epoch": 22.56, "learning_rate": 8.72434611453878e-06, "loss": 0.0173, "step": 596000 }, { "epoch": 22.58, "learning_rate": 8.714883227979863e-06, "loss": 0.0178, "step": 596500 }, { "epoch": 22.6, "learning_rate": 8.705420341420949e-06, "loss": 0.0176, "step": 597000 }, { "epoch": 22.62, "learning_rate": 8.695957454862033e-06, "loss": 0.017, "step": 597500 }, { "epoch": 22.64, "learning_rate": 8.686494568303116e-06, "loss": 0.0175, "step": 598000 }, { "epoch": 22.65, "learning_rate": 8.6770316817442e-06, "loss": 0.0171, "step": 598500 }, { "epoch": 22.67, "learning_rate": 8.667568795185284e-06, "loss": 0.0181, "step": 599000 }, { "epoch": 22.69, "learning_rate": 8.658105908626369e-06, "loss": 0.0175, "step": 599500 }, { "epoch": 22.71, "learning_rate": 8.648643022067453e-06, "loss": 0.0175, "step": 600000 }, { "epoch": 22.73, "learning_rate": 8.639180135508536e-06, "loss": 0.0185, "step": 600500 }, { "epoch": 22.75, "learning_rate": 8.629736174722738e-06, "loss": 0.0179, "step": 601000 }, { "epoch": 22.77, "learning_rate": 8.620273288163821e-06, "loss": 0.018, "step": 601500 }, { "epoch": 22.79, "learning_rate": 8.610810401604907e-06, "loss": 0.0172, "step": 602000 }, { "epoch": 22.81, "learning_rate": 8.60134751504599e-06, "loss": 0.0176, "step": 602500 }, { "epoch": 22.82, "learning_rate": 8.591903554260193e-06, "loss": 0.0177, "step": 603000 }, { "epoch": 22.84, "learning_rate": 8.582440667701277e-06, "loss": 0.0171, "step": 603500 }, { "epoch": 22.86, "learning_rate": 8.57297778114236e-06, "loss": 0.0176, "step": 604000 }, { "epoch": 22.88, "learning_rate": 8.563514894583444e-06, "loss": 0.0176, "step": 604500 }, { "epoch": 22.9, "learning_rate": 8.554052008024528e-06, "loss": 0.0177, "step": 605000 }, { "epoch": 22.92, "learning_rate": 8.544589121465613e-06, "loss": 0.0176, "step": 605500 }, { "epoch": 22.94, "learning_rate": 8.535126234906697e-06, "loss": 0.018, "step": 606000 }, { "epoch": 22.96, "learning_rate": 8.52566334834778e-06, "loss": 0.0179, "step": 606500 }, { "epoch": 22.98, "learning_rate": 8.516200461788864e-06, "loss": 0.0173, "step": 607000 }, { "epoch": 22.99, "learning_rate": 8.506737575229948e-06, "loss": 0.0181, "step": 607500 }, { "epoch": 23.0, "eval_bleu": 92.2975, "eval_gen_len": 16.4935, "eval_loss": 0.12451058626174927, "eval_runtime": 1203.5654, "eval_samples_per_second": 37.943, "eval_steps_per_second": 1.186, "step": 607637 }, { "epoch": 23.01, "learning_rate": 8.497312540217268e-06, "loss": 0.0158, "step": 608000 }, { "epoch": 23.03, "learning_rate": 8.487849653658352e-06, "loss": 0.0151, "step": 608500 }, { "epoch": 23.05, "learning_rate": 8.478386767099437e-06, "loss": 0.0156, "step": 609000 }, { "epoch": 23.07, "learning_rate": 8.468923880540521e-06, "loss": 0.0154, "step": 609500 }, { "epoch": 23.09, "learning_rate": 8.459460993981605e-06, "loss": 0.0149, "step": 610000 }, { "epoch": 23.11, "learning_rate": 8.449998107422689e-06, "loss": 0.0154, "step": 610500 }, { "epoch": 23.13, "learning_rate": 8.440535220863772e-06, "loss": 0.0157, "step": 611000 }, { "epoch": 23.15, "learning_rate": 8.431072334304858e-06, "loss": 0.0157, "step": 611500 }, { "epoch": 23.17, "learning_rate": 8.421609447745941e-06, "loss": 0.0151, "step": 612000 }, { "epoch": 23.18, "learning_rate": 8.412146561187025e-06, "loss": 0.0149, "step": 612500 }, { "epoch": 23.2, "learning_rate": 8.402683674628109e-06, "loss": 0.0151, "step": 613000 }, { "epoch": 23.22, "learning_rate": 8.393220788069192e-06, "loss": 0.0151, "step": 613500 }, { "epoch": 23.24, "learning_rate": 8.383757901510278e-06, "loss": 0.0154, "step": 614000 }, { "epoch": 23.26, "learning_rate": 8.374332866497598e-06, "loss": 0.0156, "step": 614500 }, { "epoch": 23.28, "learning_rate": 8.364869979938682e-06, "loss": 0.0157, "step": 615000 }, { "epoch": 23.3, "learning_rate": 8.355407093379765e-06, "loss": 0.0158, "step": 615500 }, { "epoch": 23.32, "learning_rate": 8.345944206820849e-06, "loss": 0.0156, "step": 616000 }, { "epoch": 23.34, "learning_rate": 8.336481320261933e-06, "loss": 0.0152, "step": 616500 }, { "epoch": 23.35, "learning_rate": 8.327037359476136e-06, "loss": 0.0153, "step": 617000 }, { "epoch": 23.37, "learning_rate": 8.31757447291722e-06, "loss": 0.0154, "step": 617500 }, { "epoch": 23.39, "learning_rate": 8.308111586358305e-06, "loss": 0.0159, "step": 618000 }, { "epoch": 23.41, "learning_rate": 8.298648699799388e-06, "loss": 0.0154, "step": 618500 }, { "epoch": 23.43, "learning_rate": 8.28918581324047e-06, "loss": 0.0156, "step": 619000 }, { "epoch": 23.45, "learning_rate": 8.279722926681556e-06, "loss": 0.0155, "step": 619500 }, { "epoch": 23.47, "learning_rate": 8.27026004012264e-06, "loss": 0.0157, "step": 620000 }, { "epoch": 23.49, "learning_rate": 8.260797153563723e-06, "loss": 0.016, "step": 620500 }, { "epoch": 23.51, "learning_rate": 8.251353192777926e-06, "loss": 0.016, "step": 621000 }, { "epoch": 23.52, "learning_rate": 8.24189030621901e-06, "loss": 0.0161, "step": 621500 }, { "epoch": 23.54, "learning_rate": 8.232427419660093e-06, "loss": 0.0158, "step": 622000 }, { "epoch": 23.56, "learning_rate": 8.222964533101177e-06, "loss": 0.0157, "step": 622500 }, { "epoch": 23.58, "learning_rate": 8.213501646542262e-06, "loss": 0.017, "step": 623000 }, { "epoch": 23.6, "learning_rate": 8.204057685756464e-06, "loss": 0.0157, "step": 623500 }, { "epoch": 23.62, "learning_rate": 8.194594799197549e-06, "loss": 0.0167, "step": 624000 }, { "epoch": 23.64, "learning_rate": 8.185131912638633e-06, "loss": 0.0156, "step": 624500 }, { "epoch": 23.66, "learning_rate": 8.175669026079716e-06, "loss": 0.0162, "step": 625000 }, { "epoch": 23.68, "learning_rate": 8.1662061395208e-06, "loss": 0.0164, "step": 625500 }, { "epoch": 23.7, "learning_rate": 8.156762178735001e-06, "loss": 0.0165, "step": 626000 }, { "epoch": 23.71, "learning_rate": 8.147299292176087e-06, "loss": 0.0157, "step": 626500 }, { "epoch": 23.73, "learning_rate": 8.13783640561717e-06, "loss": 0.0164, "step": 627000 }, { "epoch": 23.75, "learning_rate": 8.128373519058254e-06, "loss": 0.0164, "step": 627500 }, { "epoch": 23.77, "learning_rate": 8.118910632499338e-06, "loss": 0.0158, "step": 628000 }, { "epoch": 23.79, "learning_rate": 8.10946667171354e-06, "loss": 0.0157, "step": 628500 }, { "epoch": 23.81, "learning_rate": 8.100022710927742e-06, "loss": 0.0161, "step": 629000 }, { "epoch": 23.83, "learning_rate": 8.090578750141944e-06, "loss": 0.0168, "step": 629500 }, { "epoch": 23.85, "learning_rate": 8.081115863583028e-06, "loss": 0.0152, "step": 630000 }, { "epoch": 23.87, "learning_rate": 8.071652977024112e-06, "loss": 0.0162, "step": 630500 }, { "epoch": 23.88, "learning_rate": 8.062190090465195e-06, "loss": 0.0158, "step": 631000 }, { "epoch": 23.9, "learning_rate": 8.05272720390628e-06, "loss": 0.0164, "step": 631500 }, { "epoch": 23.92, "learning_rate": 8.043264317347365e-06, "loss": 0.0166, "step": 632000 }, { "epoch": 23.94, "learning_rate": 8.033801430788448e-06, "loss": 0.0152, "step": 632500 }, { "epoch": 23.96, "learning_rate": 8.024338544229534e-06, "loss": 0.0164, "step": 633000 }, { "epoch": 23.98, "learning_rate": 8.014875657670617e-06, "loss": 0.0166, "step": 633500 }, { "epoch": 24.0, "learning_rate": 8.0054127711117e-06, "loss": 0.0161, "step": 634000 }, { "epoch": 24.0, "eval_bleu": 92.2619, "eval_gen_len": 16.5368, "eval_loss": 0.12691813707351685, "eval_runtime": 1199.0212, "eval_samples_per_second": 38.087, "eval_steps_per_second": 1.191, "step": 634056 }, { "epoch": 24.02, "learning_rate": 7.995949884552785e-06, "loss": 0.0141, "step": 634500 }, { "epoch": 24.04, "learning_rate": 7.986486997993868e-06, "loss": 0.013, "step": 635000 }, { "epoch": 24.05, "learning_rate": 7.977024111434952e-06, "loss": 0.0136, "step": 635500 }, { "epoch": 24.07, "learning_rate": 7.967580150649155e-06, "loss": 0.0136, "step": 636000 }, { "epoch": 24.09, "learning_rate": 7.958117264090239e-06, "loss": 0.014, "step": 636500 }, { "epoch": 24.11, "learning_rate": 7.948654377531322e-06, "loss": 0.0136, "step": 637000 }, { "epoch": 24.13, "learning_rate": 7.939191490972406e-06, "loss": 0.0139, "step": 637500 }, { "epoch": 24.15, "learning_rate": 7.929747530186609e-06, "loss": 0.0146, "step": 638000 }, { "epoch": 24.17, "learning_rate": 7.92030356940081e-06, "loss": 0.0142, "step": 638500 }, { "epoch": 24.19, "learning_rate": 7.910840682841895e-06, "loss": 0.0135, "step": 639000 }, { "epoch": 24.21, "learning_rate": 7.901377796282979e-06, "loss": 0.0137, "step": 639500 }, { "epoch": 24.22, "learning_rate": 7.891914909724063e-06, "loss": 0.0145, "step": 640000 }, { "epoch": 24.24, "learning_rate": 7.882452023165146e-06, "loss": 0.0142, "step": 640500 }, { "epoch": 24.26, "learning_rate": 7.87300806237935e-06, "loss": 0.0146, "step": 641000 }, { "epoch": 24.28, "learning_rate": 7.863545175820433e-06, "loss": 0.0136, "step": 641500 }, { "epoch": 24.3, "learning_rate": 7.854082289261517e-06, "loss": 0.0141, "step": 642000 }, { "epoch": 24.32, "learning_rate": 7.844619402702602e-06, "loss": 0.0142, "step": 642500 }, { "epoch": 24.34, "learning_rate": 7.835156516143684e-06, "loss": 0.0149, "step": 643000 }, { "epoch": 24.36, "learning_rate": 7.82569362958477e-06, "loss": 0.0149, "step": 643500 }, { "epoch": 24.38, "learning_rate": 7.816230743025853e-06, "loss": 0.0141, "step": 644000 }, { "epoch": 24.4, "learning_rate": 7.806767856466937e-06, "loss": 0.0148, "step": 644500 }, { "epoch": 24.41, "learning_rate": 7.797304969908022e-06, "loss": 0.0143, "step": 645000 }, { "epoch": 24.43, "learning_rate": 7.787842083349106e-06, "loss": 0.0139, "step": 645500 }, { "epoch": 24.45, "learning_rate": 7.77837919679019e-06, "loss": 0.0143, "step": 646000 }, { "epoch": 24.47, "learning_rate": 7.76893523600439e-06, "loss": 0.0139, "step": 646500 }, { "epoch": 24.49, "learning_rate": 7.759472349445476e-06, "loss": 0.0147, "step": 647000 }, { "epoch": 24.51, "learning_rate": 7.75000946288656e-06, "loss": 0.0148, "step": 647500 }, { "epoch": 24.53, "learning_rate": 7.740546576327644e-06, "loss": 0.0156, "step": 648000 }, { "epoch": 24.55, "learning_rate": 7.731083689768727e-06, "loss": 0.0146, "step": 648500 }, { "epoch": 24.57, "learning_rate": 7.721658654756047e-06, "loss": 0.0145, "step": 649000 }, { "epoch": 24.58, "learning_rate": 7.712195768197131e-06, "loss": 0.0147, "step": 649500 }, { "epoch": 24.6, "learning_rate": 7.702732881638215e-06, "loss": 0.0144, "step": 650000 }, { "epoch": 24.62, "learning_rate": 7.6932699950793e-06, "loss": 0.0145, "step": 650500 }, { "epoch": 24.64, "learning_rate": 7.683807108520384e-06, "loss": 0.0144, "step": 651000 }, { "epoch": 24.66, "learning_rate": 7.674363147734585e-06, "loss": 0.0145, "step": 651500 }, { "epoch": 24.68, "learning_rate": 7.664900261175669e-06, "loss": 0.0152, "step": 652000 }, { "epoch": 24.7, "learning_rate": 7.655437374616752e-06, "loss": 0.0146, "step": 652500 }, { "epoch": 24.72, "learning_rate": 7.645974488057838e-06, "loss": 0.014, "step": 653000 }, { "epoch": 24.74, "learning_rate": 7.636511601498922e-06, "loss": 0.0151, "step": 653500 }, { "epoch": 24.75, "learning_rate": 7.6270676407131235e-06, "loss": 0.0144, "step": 654000 }, { "epoch": 24.77, "learning_rate": 7.617604754154207e-06, "loss": 0.0149, "step": 654500 }, { "epoch": 24.79, "learning_rate": 7.608141867595293e-06, "loss": 0.0152, "step": 655000 }, { "epoch": 24.81, "learning_rate": 7.598678981036376e-06, "loss": 0.0147, "step": 655500 }, { "epoch": 24.83, "learning_rate": 7.589216094477459e-06, "loss": 0.0136, "step": 656000 }, { "epoch": 24.85, "learning_rate": 7.579753207918545e-06, "loss": 0.0145, "step": 656500 }, { "epoch": 24.87, "learning_rate": 7.570309247132746e-06, "loss": 0.0149, "step": 657000 }, { "epoch": 24.89, "learning_rate": 7.56084636057383e-06, "loss": 0.0157, "step": 657500 }, { "epoch": 24.91, "learning_rate": 7.551383474014914e-06, "loss": 0.0146, "step": 658000 }, { "epoch": 24.93, "learning_rate": 7.5419205874559985e-06, "loss": 0.0147, "step": 658500 }, { "epoch": 24.94, "learning_rate": 7.5324766266702e-06, "loss": 0.0153, "step": 659000 }, { "epoch": 24.96, "learning_rate": 7.523013740111285e-06, "loss": 0.0149, "step": 659500 }, { "epoch": 24.98, "learning_rate": 7.513550853552368e-06, "loss": 0.0153, "step": 660000 }, { "epoch": 25.0, "eval_bleu": 92.2799, "eval_gen_len": 16.4882, "eval_loss": 0.1280273050069809, "eval_runtime": 1203.4771, "eval_samples_per_second": 37.946, "eval_steps_per_second": 1.187, "step": 660475 }, { "epoch": 25.0, "learning_rate": 7.5040879669934515e-06, "loss": 0.0147, "step": 660500 }, { "epoch": 25.02, "learning_rate": 7.494625080434537e-06, "loss": 0.0129, "step": 661000 }, { "epoch": 25.04, "learning_rate": 7.485162193875621e-06, "loss": 0.0131, "step": 661500 }, { "epoch": 25.06, "learning_rate": 7.475699307316705e-06, "loss": 0.0123, "step": 662000 }, { "epoch": 25.08, "learning_rate": 7.466236420757789e-06, "loss": 0.0127, "step": 662500 }, { "epoch": 25.1, "learning_rate": 7.456792459971991e-06, "loss": 0.0124, "step": 663000 }, { "epoch": 25.11, "learning_rate": 7.447348499186192e-06, "loss": 0.0122, "step": 663500 }, { "epoch": 25.13, "learning_rate": 7.437885612627277e-06, "loss": 0.0133, "step": 664000 }, { "epoch": 25.15, "learning_rate": 7.42842272606836e-06, "loss": 0.0125, "step": 664500 }, { "epoch": 25.17, "learning_rate": 7.418959839509444e-06, "loss": 0.0132, "step": 665000 }, { "epoch": 25.19, "learning_rate": 7.409496952950529e-06, "loss": 0.013, "step": 665500 }, { "epoch": 25.21, "learning_rate": 7.40005299216473e-06, "loss": 0.0126, "step": 666000 }, { "epoch": 25.23, "learning_rate": 7.390590105605815e-06, "loss": 0.0133, "step": 666500 }, { "epoch": 25.25, "learning_rate": 7.381127219046899e-06, "loss": 0.0134, "step": 667000 }, { "epoch": 25.27, "learning_rate": 7.371664332487982e-06, "loss": 0.0129, "step": 667500 }, { "epoch": 25.28, "learning_rate": 7.362201445929067e-06, "loss": 0.0129, "step": 668000 }, { "epoch": 25.3, "learning_rate": 7.3527385593701506e-06, "loss": 0.0132, "step": 668500 }, { "epoch": 25.32, "learning_rate": 7.343275672811235e-06, "loss": 0.0131, "step": 669000 }, { "epoch": 25.34, "learning_rate": 7.333812786252319e-06, "loss": 0.0133, "step": 669500 }, { "epoch": 25.36, "learning_rate": 7.3243498996934025e-06, "loss": 0.013, "step": 670000 }, { "epoch": 25.38, "learning_rate": 7.314905938907605e-06, "loss": 0.0139, "step": 670500 }, { "epoch": 25.4, "learning_rate": 7.305461978121807e-06, "loss": 0.0131, "step": 671000 }, { "epoch": 25.42, "learning_rate": 7.295999091562891e-06, "loss": 0.0132, "step": 671500 }, { "epoch": 25.44, "learning_rate": 7.286536205003975e-06, "loss": 0.013, "step": 672000 }, { "epoch": 25.46, "learning_rate": 7.277073318445059e-06, "loss": 0.0136, "step": 672500 }, { "epoch": 25.47, "learning_rate": 7.267610431886143e-06, "loss": 0.0134, "step": 673000 }, { "epoch": 25.49, "learning_rate": 7.2581475453272275e-06, "loss": 0.013, "step": 673500 }, { "epoch": 25.51, "learning_rate": 7.2487035845414286e-06, "loss": 0.013, "step": 674000 }, { "epoch": 25.53, "learning_rate": 7.239240697982514e-06, "loss": 0.0139, "step": 674500 }, { "epoch": 25.55, "learning_rate": 7.229777811423598e-06, "loss": 0.0128, "step": 675000 }, { "epoch": 25.57, "learning_rate": 7.2203149248646805e-06, "loss": 0.0137, "step": 675500 }, { "epoch": 25.59, "learning_rate": 7.210852038305766e-06, "loss": 0.013, "step": 676000 }, { "epoch": 25.61, "learning_rate": 7.20138915174685e-06, "loss": 0.0127, "step": 676500 }, { "epoch": 25.63, "learning_rate": 7.191926265187934e-06, "loss": 0.0126, "step": 677000 }, { "epoch": 25.64, "learning_rate": 7.182463378629018e-06, "loss": 0.014, "step": 677500 }, { "epoch": 25.66, "learning_rate": 7.1730004920701016e-06, "loss": 0.0133, "step": 678000 }, { "epoch": 25.68, "learning_rate": 7.163537605511186e-06, "loss": 0.0135, "step": 678500 }, { "epoch": 25.7, "learning_rate": 7.154112570498506e-06, "loss": 0.014, "step": 679000 }, { "epoch": 25.72, "learning_rate": 7.144649683939589e-06, "loss": 0.0137, "step": 679500 }, { "epoch": 25.74, "learning_rate": 7.135186797380673e-06, "loss": 0.0136, "step": 680000 }, { "epoch": 25.76, "learning_rate": 7.125723910821758e-06, "loss": 0.0135, "step": 680500 }, { "epoch": 25.78, "learning_rate": 7.116261024262842e-06, "loss": 0.0138, "step": 681000 }, { "epoch": 25.8, "learning_rate": 7.106798137703925e-06, "loss": 0.0135, "step": 681500 }, { "epoch": 25.81, "learning_rate": 7.09733525114501e-06, "loss": 0.0133, "step": 682000 }, { "epoch": 25.83, "learning_rate": 7.087872364586094e-06, "loss": 0.0133, "step": 682500 }, { "epoch": 25.85, "learning_rate": 7.0784094780271785e-06, "loss": 0.0135, "step": 683000 }, { "epoch": 25.87, "learning_rate": 7.068946591468262e-06, "loss": 0.013, "step": 683500 }, { "epoch": 25.89, "learning_rate": 7.059502630682464e-06, "loss": 0.0134, "step": 684000 }, { "epoch": 25.91, "learning_rate": 7.050039744123548e-06, "loss": 0.0142, "step": 684500 }, { "epoch": 25.93, "learning_rate": 7.0405768575646315e-06, "loss": 0.0135, "step": 685000 }, { "epoch": 25.95, "learning_rate": 7.031113971005716e-06, "loss": 0.0135, "step": 685500 }, { "epoch": 25.97, "learning_rate": 7.0216510844468e-06, "loss": 0.0139, "step": 686000 }, { "epoch": 25.99, "learning_rate": 7.012188197887884e-06, "loss": 0.0142, "step": 686500 }, { "epoch": 26.0, "eval_bleu": 92.3179, "eval_gen_len": 16.4738, "eval_loss": 0.13104189932346344, "eval_runtime": 1208.6929, "eval_samples_per_second": 37.782, "eval_steps_per_second": 1.181, "step": 686894 }, { "epoch": 26.0, "learning_rate": 7.002725311328968e-06, "loss": 0.0136, "step": 687000 }, { "epoch": 26.02, "learning_rate": 6.993262424770052e-06, "loss": 0.0117, "step": 687500 }, { "epoch": 26.04, "learning_rate": 6.9838184639842545e-06, "loss": 0.0114, "step": 688000 }, { "epoch": 26.06, "learning_rate": 6.974355577425338e-06, "loss": 0.012, "step": 688500 }, { "epoch": 26.08, "learning_rate": 6.964892690866423e-06, "loss": 0.0118, "step": 689000 }, { "epoch": 26.1, "learning_rate": 6.955448730080624e-06, "loss": 0.0118, "step": 689500 }, { "epoch": 26.12, "learning_rate": 6.945985843521708e-06, "loss": 0.012, "step": 690000 }, { "epoch": 26.14, "learning_rate": 6.936522956962792e-06, "loss": 0.012, "step": 690500 }, { "epoch": 26.16, "learning_rate": 6.927060070403877e-06, "loss": 0.0116, "step": 691000 }, { "epoch": 26.17, "learning_rate": 6.91759718384496e-06, "loss": 0.0122, "step": 691500 }, { "epoch": 26.19, "learning_rate": 6.908134297286044e-06, "loss": 0.0127, "step": 692000 }, { "epoch": 26.21, "learning_rate": 6.8986714107271295e-06, "loss": 0.0115, "step": 692500 }, { "epoch": 26.23, "learning_rate": 6.889208524168212e-06, "loss": 0.0122, "step": 693000 }, { "epoch": 26.25, "learning_rate": 6.879764563382415e-06, "loss": 0.0125, "step": 693500 }, { "epoch": 26.27, "learning_rate": 6.870301676823499e-06, "loss": 0.012, "step": 694000 }, { "epoch": 26.29, "learning_rate": 6.8608387902645825e-06, "loss": 0.0121, "step": 694500 }, { "epoch": 26.31, "learning_rate": 6.851375903705667e-06, "loss": 0.0113, "step": 695000 }, { "epoch": 26.33, "learning_rate": 6.841931942919868e-06, "loss": 0.013, "step": 695500 }, { "epoch": 26.34, "learning_rate": 6.832469056360953e-06, "loss": 0.012, "step": 696000 }, { "epoch": 26.36, "learning_rate": 6.823006169802036e-06, "loss": 0.0124, "step": 696500 }, { "epoch": 26.38, "learning_rate": 6.813543283243122e-06, "loss": 0.0126, "step": 697000 }, { "epoch": 26.4, "learning_rate": 6.804080396684205e-06, "loss": 0.0116, "step": 697500 }, { "epoch": 26.42, "learning_rate": 6.794617510125288e-06, "loss": 0.0121, "step": 698000 }, { "epoch": 26.44, "learning_rate": 6.785154623566374e-06, "loss": 0.0126, "step": 698500 }, { "epoch": 26.46, "learning_rate": 6.7756917370074575e-06, "loss": 0.0114, "step": 699000 }, { "epoch": 26.48, "learning_rate": 6.766247776221659e-06, "loss": 0.012, "step": 699500 }, { "epoch": 26.5, "learning_rate": 6.756784889662743e-06, "loss": 0.0122, "step": 700000 }, { "epoch": 26.52, "learning_rate": 6.747322003103828e-06, "loss": 0.0124, "step": 700500 }, { "epoch": 26.53, "learning_rate": 6.737859116544911e-06, "loss": 0.012, "step": 701000 }, { "epoch": 26.55, "learning_rate": 6.728415155759114e-06, "loss": 0.0123, "step": 701500 }, { "epoch": 26.57, "learning_rate": 6.718952269200197e-06, "loss": 0.0128, "step": 702000 }, { "epoch": 26.59, "learning_rate": 6.709489382641281e-06, "loss": 0.0129, "step": 702500 }, { "epoch": 26.61, "learning_rate": 6.7000454218554835e-06, "loss": 0.0123, "step": 703000 }, { "epoch": 26.63, "learning_rate": 6.690582535296567e-06, "loss": 0.0122, "step": 703500 }, { "epoch": 26.65, "learning_rate": 6.681119648737652e-06, "loss": 0.0126, "step": 704000 }, { "epoch": 26.67, "learning_rate": 6.6716567621787355e-06, "loss": 0.0119, "step": 704500 }, { "epoch": 26.69, "learning_rate": 6.66219387561982e-06, "loss": 0.0123, "step": 705000 }, { "epoch": 26.7, "learning_rate": 6.652730989060904e-06, "loss": 0.0125, "step": 705500 }, { "epoch": 26.72, "learning_rate": 6.643268102501987e-06, "loss": 0.0124, "step": 706000 }, { "epoch": 26.74, "learning_rate": 6.633824141716189e-06, "loss": 0.0124, "step": 706500 }, { "epoch": 26.76, "learning_rate": 6.624361255157273e-06, "loss": 0.0121, "step": 707000 }, { "epoch": 26.78, "learning_rate": 6.6148983685983585e-06, "loss": 0.0129, "step": 707500 }, { "epoch": 26.8, "learning_rate": 6.605435482039442e-06, "loss": 0.0126, "step": 708000 }, { "epoch": 26.82, "learning_rate": 6.595991521253644e-06, "loss": 0.0127, "step": 708500 }, { "epoch": 26.84, "learning_rate": 6.586528634694728e-06, "loss": 0.0126, "step": 709000 }, { "epoch": 26.86, "learning_rate": 6.5770657481358115e-06, "loss": 0.0125, "step": 709500 }, { "epoch": 26.87, "learning_rate": 6.567602861576896e-06, "loss": 0.0129, "step": 710000 }, { "epoch": 26.89, "learning_rate": 6.55813997501798e-06, "loss": 0.012, "step": 710500 }, { "epoch": 26.91, "learning_rate": 6.548696014232182e-06, "loss": 0.0126, "step": 711000 }, { "epoch": 26.93, "learning_rate": 6.539233127673265e-06, "loss": 0.0121, "step": 711500 }, { "epoch": 26.95, "learning_rate": 6.529770241114351e-06, "loss": 0.0124, "step": 712000 }, { "epoch": 26.97, "learning_rate": 6.520307354555434e-06, "loss": 0.0127, "step": 712500 }, { "epoch": 26.99, "learning_rate": 6.510844467996517e-06, "loss": 0.0126, "step": 713000 }, { "epoch": 27.0, "eval_bleu": 92.2702, "eval_gen_len": 16.4554, "eval_loss": 0.13306637108325958, "eval_runtime": 1206.8893, "eval_samples_per_second": 37.839, "eval_steps_per_second": 1.183, "step": 713313 }, { "epoch": 27.01, "learning_rate": 6.501381581437603e-06, "loss": 0.0107, "step": 713500 }, { "epoch": 27.03, "learning_rate": 6.4919186948786865e-06, "loss": 0.0108, "step": 714000 }, { "epoch": 27.04, "learning_rate": 6.482455808319771e-06, "loss": 0.0112, "step": 714500 }, { "epoch": 27.06, "learning_rate": 6.472992921760855e-06, "loss": 0.0114, "step": 715000 }, { "epoch": 27.08, "learning_rate": 6.463530035201938e-06, "loss": 0.011, "step": 715500 }, { "epoch": 27.1, "learning_rate": 6.45408607441614e-06, "loss": 0.0113, "step": 716000 }, { "epoch": 27.12, "learning_rate": 6.444623187857224e-06, "loss": 0.0114, "step": 716500 }, { "epoch": 27.14, "learning_rate": 6.435179227071426e-06, "loss": 0.011, "step": 717000 }, { "epoch": 27.16, "learning_rate": 6.42571634051251e-06, "loss": 0.0111, "step": 717500 }, { "epoch": 27.18, "learning_rate": 6.416253453953595e-06, "loss": 0.0115, "step": 718000 }, { "epoch": 27.2, "learning_rate": 6.406790567394679e-06, "loss": 0.0111, "step": 718500 }, { "epoch": 27.22, "learning_rate": 6.397327680835763e-06, "loss": 0.0112, "step": 719000 }, { "epoch": 27.23, "learning_rate": 6.387864794276847e-06, "loss": 0.0109, "step": 719500 }, { "epoch": 27.25, "learning_rate": 6.378401907717931e-06, "loss": 0.0106, "step": 720000 }, { "epoch": 27.27, "learning_rate": 6.368939021159015e-06, "loss": 0.0114, "step": 720500 }, { "epoch": 27.29, "learning_rate": 6.359476134600099e-06, "loss": 0.0113, "step": 721000 }, { "epoch": 27.31, "learning_rate": 6.350013248041183e-06, "loss": 0.011, "step": 721500 }, { "epoch": 27.33, "learning_rate": 6.340550361482267e-06, "loss": 0.0114, "step": 722000 }, { "epoch": 27.35, "learning_rate": 6.331106400696468e-06, "loss": 0.0112, "step": 722500 }, { "epoch": 27.37, "learning_rate": 6.321643514137553e-06, "loss": 0.0111, "step": 723000 }, { "epoch": 27.39, "learning_rate": 6.312180627578637e-06, "loss": 0.0113, "step": 723500 }, { "epoch": 27.4, "learning_rate": 6.302736666792839e-06, "loss": 0.0112, "step": 724000 }, { "epoch": 27.42, "learning_rate": 6.293273780233923e-06, "loss": 0.0113, "step": 724500 }, { "epoch": 27.44, "learning_rate": 6.283810893675008e-06, "loss": 0.0117, "step": 725000 }, { "epoch": 27.46, "learning_rate": 6.274348007116091e-06, "loss": 0.0112, "step": 725500 }, { "epoch": 27.48, "learning_rate": 6.264885120557175e-06, "loss": 0.0108, "step": 726000 }, { "epoch": 27.5, "learning_rate": 6.25542223399826e-06, "loss": 0.0117, "step": 726500 }, { "epoch": 27.52, "learning_rate": 6.245959347439343e-06, "loss": 0.0112, "step": 727000 }, { "epoch": 27.54, "learning_rate": 6.236496460880428e-06, "loss": 0.0114, "step": 727500 }, { "epoch": 27.56, "learning_rate": 6.2270335743215116e-06, "loss": 0.0115, "step": 728000 }, { "epoch": 27.57, "learning_rate": 6.217570687762595e-06, "loss": 0.0113, "step": 728500 }, { "epoch": 27.59, "learning_rate": 6.20810780120368e-06, "loss": 0.012, "step": 729000 }, { "epoch": 27.61, "learning_rate": 6.1986449146447635e-06, "loss": 0.0105, "step": 729500 }, { "epoch": 27.63, "learning_rate": 6.189200953858966e-06, "loss": 0.0126, "step": 730000 }, { "epoch": 27.65, "learning_rate": 6.179756993073167e-06, "loss": 0.0116, "step": 730500 }, { "epoch": 27.67, "learning_rate": 6.170313032287369e-06, "loss": 0.0114, "step": 731000 }, { "epoch": 27.69, "learning_rate": 6.160850145728453e-06, "loss": 0.0113, "step": 731500 }, { "epoch": 27.71, "learning_rate": 6.151387259169538e-06, "loss": 0.012, "step": 732000 }, { "epoch": 27.73, "learning_rate": 6.141924372610621e-06, "loss": 0.011, "step": 732500 }, { "epoch": 27.75, "learning_rate": 6.132461486051706e-06, "loss": 0.012, "step": 733000 }, { "epoch": 27.76, "learning_rate": 6.1229985994927896e-06, "loss": 0.0115, "step": 733500 }, { "epoch": 27.78, "learning_rate": 6.113535712933873e-06, "loss": 0.0113, "step": 734000 }, { "epoch": 27.8, "learning_rate": 6.104072826374959e-06, "loss": 0.0114, "step": 734500 }, { "epoch": 27.82, "learning_rate": 6.0946099398160415e-06, "loss": 0.0117, "step": 735000 }, { "epoch": 27.84, "learning_rate": 6.085165979030244e-06, "loss": 0.0117, "step": 735500 }, { "epoch": 27.86, "learning_rate": 6.075703092471328e-06, "loss": 0.0112, "step": 736000 }, { "epoch": 27.88, "learning_rate": 6.066240205912412e-06, "loss": 0.0121, "step": 736500 }, { "epoch": 27.9, "learning_rate": 6.056777319353496e-06, "loss": 0.0115, "step": 737000 }, { "epoch": 27.92, "learning_rate": 6.04731443279458e-06, "loss": 0.0118, "step": 737500 }, { "epoch": 27.93, "learning_rate": 6.0378515462356645e-06, "loss": 0.0118, "step": 738000 }, { "epoch": 27.95, "learning_rate": 6.028407585449866e-06, "loss": 0.0117, "step": 738500 }, { "epoch": 27.97, "learning_rate": 6.01894469889095e-06, "loss": 0.0116, "step": 739000 }, { "epoch": 27.99, "learning_rate": 6.009481812332034e-06, "loss": 0.012, "step": 739500 }, { "epoch": 28.0, "eval_bleu": 92.2664, "eval_gen_len": 16.4506, "eval_loss": 0.1342269480228424, "eval_runtime": 1200.216, "eval_samples_per_second": 38.049, "eval_steps_per_second": 1.19, "step": 739732 }, { "epoch": 28.01, "learning_rate": 6.0000189257731176e-06, "loss": 0.0107, "step": 740000 }, { "epoch": 28.03, "learning_rate": 5.990556039214203e-06, "loss": 0.0101, "step": 740500 }, { "epoch": 28.05, "learning_rate": 5.981112078428404e-06, "loss": 0.0104, "step": 741000 }, { "epoch": 28.07, "learning_rate": 5.971649191869489e-06, "loss": 0.0101, "step": 741500 }, { "epoch": 28.09, "learning_rate": 5.962186305310572e-06, "loss": 0.0102, "step": 742000 }, { "epoch": 28.1, "learning_rate": 5.952723418751657e-06, "loss": 0.0107, "step": 742500 }, { "epoch": 28.12, "learning_rate": 5.9432605321927405e-06, "loss": 0.0103, "step": 743000 }, { "epoch": 28.14, "learning_rate": 5.933797645633824e-06, "loss": 0.0102, "step": 743500 }, { "epoch": 28.16, "learning_rate": 5.924334759074909e-06, "loss": 0.0104, "step": 744000 }, { "epoch": 28.18, "learning_rate": 5.9148718725159925e-06, "loss": 0.0101, "step": 744500 }, { "epoch": 28.2, "learning_rate": 5.905408985957077e-06, "loss": 0.0102, "step": 745000 }, { "epoch": 28.22, "learning_rate": 5.895946099398161e-06, "loss": 0.0101, "step": 745500 }, { "epoch": 28.24, "learning_rate": 5.8865021386123635e-06, "loss": 0.0103, "step": 746000 }, { "epoch": 28.26, "learning_rate": 5.877039252053447e-06, "loss": 0.0105, "step": 746500 }, { "epoch": 28.28, "learning_rate": 5.867576365494531e-06, "loss": 0.0104, "step": 747000 }, { "epoch": 28.29, "learning_rate": 5.8581134789356155e-06, "loss": 0.0102, "step": 747500 }, { "epoch": 28.31, "learning_rate": 5.848650592376699e-06, "loss": 0.0099, "step": 748000 }, { "epoch": 28.33, "learning_rate": 5.839187705817783e-06, "loss": 0.0107, "step": 748500 }, { "epoch": 28.35, "learning_rate": 5.829743745031985e-06, "loss": 0.0108, "step": 749000 }, { "epoch": 28.37, "learning_rate": 5.8202808584730685e-06, "loss": 0.0106, "step": 749500 }, { "epoch": 28.39, "learning_rate": 5.810855823460389e-06, "loss": 0.01, "step": 750000 }, { "epoch": 28.41, "learning_rate": 5.801392936901473e-06, "loss": 0.0105, "step": 750500 }, { "epoch": 28.43, "learning_rate": 5.791930050342557e-06, "loss": 0.0107, "step": 751000 }, { "epoch": 28.45, "learning_rate": 5.782467163783641e-06, "loss": 0.0108, "step": 751500 }, { "epoch": 28.46, "learning_rate": 5.773004277224725e-06, "loss": 0.0106, "step": 752000 }, { "epoch": 28.48, "learning_rate": 5.763541390665809e-06, "loss": 0.0107, "step": 752500 }, { "epoch": 28.5, "learning_rate": 5.7540785041068935e-06, "loss": 0.0107, "step": 753000 }, { "epoch": 28.52, "learning_rate": 5.744615617547977e-06, "loss": 0.0106, "step": 753500 }, { "epoch": 28.54, "learning_rate": 5.73517165676218e-06, "loss": 0.0109, "step": 754000 }, { "epoch": 28.56, "learning_rate": 5.725708770203263e-06, "loss": 0.0106, "step": 754500 }, { "epoch": 28.58, "learning_rate": 5.7162458836443465e-06, "loss": 0.0109, "step": 755000 }, { "epoch": 28.6, "learning_rate": 5.706782997085432e-06, "loss": 0.0103, "step": 755500 }, { "epoch": 28.62, "learning_rate": 5.697320110526516e-06, "loss": 0.0109, "step": 756000 }, { "epoch": 28.63, "learning_rate": 5.6878572239676e-06, "loss": 0.0102, "step": 756500 }, { "epoch": 28.65, "learning_rate": 5.678394337408684e-06, "loss": 0.0107, "step": 757000 }, { "epoch": 28.67, "learning_rate": 5.668931450849768e-06, "loss": 0.0113, "step": 757500 }, { "epoch": 28.69, "learning_rate": 5.659468564290852e-06, "loss": 0.011, "step": 758000 }, { "epoch": 28.71, "learning_rate": 5.650005677731936e-06, "loss": 0.0106, "step": 758500 }, { "epoch": 28.73, "learning_rate": 5.640561716946138e-06, "loss": 0.0105, "step": 759000 }, { "epoch": 28.75, "learning_rate": 5.6310988303872215e-06, "loss": 0.0106, "step": 759500 }, { "epoch": 28.77, "learning_rate": 5.621635943828306e-06, "loss": 0.0107, "step": 760000 }, { "epoch": 28.79, "learning_rate": 5.612191983042507e-06, "loss": 0.0108, "step": 760500 }, { "epoch": 28.81, "learning_rate": 5.6027290964835925e-06, "loss": 0.0103, "step": 761000 }, { "epoch": 28.82, "learning_rate": 5.593266209924676e-06, "loss": 0.0102, "step": 761500 }, { "epoch": 28.84, "learning_rate": 5.58380332336576e-06, "loss": 0.0104, "step": 762000 }, { "epoch": 28.86, "learning_rate": 5.5743404368068445e-06, "loss": 0.0113, "step": 762500 }, { "epoch": 28.88, "learning_rate": 5.564877550247928e-06, "loss": 0.011, "step": 763000 }, { "epoch": 28.9, "learning_rate": 5.555414663689012e-06, "loss": 0.0108, "step": 763500 }, { "epoch": 28.92, "learning_rate": 5.5459517771300964e-06, "loss": 0.011, "step": 764000 }, { "epoch": 28.94, "learning_rate": 5.536526742117417e-06, "loss": 0.0106, "step": 764500 }, { "epoch": 28.96, "learning_rate": 5.527082781331618e-06, "loss": 0.0107, "step": 765000 }, { "epoch": 28.98, "learning_rate": 5.517619894772702e-06, "loss": 0.011, "step": 765500 }, { "epoch": 28.99, "learning_rate": 5.508157008213786e-06, "loss": 0.0112, "step": 766000 }, { "epoch": 29.0, "eval_bleu": 92.3004, "eval_gen_len": 16.4833, "eval_loss": 0.13662399351596832, "eval_runtime": 1207.6101, "eval_samples_per_second": 37.816, "eval_steps_per_second": 1.183, "step": 766151 }, { "epoch": 29.01, "learning_rate": 5.4986941216548705e-06, "loss": 0.0097, "step": 766500 }, { "epoch": 29.03, "learning_rate": 5.489231235095954e-06, "loss": 0.0093, "step": 767000 }, { "epoch": 29.05, "learning_rate": 5.479768348537038e-06, "loss": 0.0096, "step": 767500 }, { "epoch": 29.07, "learning_rate": 5.4703054619781225e-06, "loss": 0.0095, "step": 768000 }, { "epoch": 29.09, "learning_rate": 5.460842575419206e-06, "loss": 0.0093, "step": 768500 }, { "epoch": 29.11, "learning_rate": 5.45137968886029e-06, "loss": 0.0098, "step": 769000 }, { "epoch": 29.13, "learning_rate": 5.4419168023013744e-06, "loss": 0.0094, "step": 769500 }, { "epoch": 29.15, "learning_rate": 5.432453915742458e-06, "loss": 0.0094, "step": 770000 }, { "epoch": 29.16, "learning_rate": 5.422991029183543e-06, "loss": 0.0098, "step": 770500 }, { "epoch": 29.18, "learning_rate": 5.413547068397745e-06, "loss": 0.0098, "step": 771000 }, { "epoch": 29.2, "learning_rate": 5.404084181838829e-06, "loss": 0.0098, "step": 771500 }, { "epoch": 29.22, "learning_rate": 5.394621295279913e-06, "loss": 0.0099, "step": 772000 }, { "epoch": 29.24, "learning_rate": 5.385158408720997e-06, "loss": 0.0094, "step": 772500 }, { "epoch": 29.26, "learning_rate": 5.3757144479351985e-06, "loss": 0.0097, "step": 773000 }, { "epoch": 29.28, "learning_rate": 5.366251561376282e-06, "loss": 0.0096, "step": 773500 }, { "epoch": 29.3, "learning_rate": 5.356788674817367e-06, "loss": 0.0096, "step": 774000 }, { "epoch": 29.32, "learning_rate": 5.3473257882584505e-06, "loss": 0.0097, "step": 774500 }, { "epoch": 29.33, "learning_rate": 5.337881827472653e-06, "loss": 0.0101, "step": 775000 }, { "epoch": 29.35, "learning_rate": 5.328418940913737e-06, "loss": 0.0098, "step": 775500 }, { "epoch": 29.37, "learning_rate": 5.3189560543548215e-06, "loss": 0.0099, "step": 776000 }, { "epoch": 29.39, "learning_rate": 5.309493167795905e-06, "loss": 0.0101, "step": 776500 }, { "epoch": 29.41, "learning_rate": 5.300030281236989e-06, "loss": 0.0099, "step": 777000 }, { "epoch": 29.43, "learning_rate": 5.2905673946780735e-06, "loss": 0.0099, "step": 777500 }, { "epoch": 29.45, "learning_rate": 5.281123433892275e-06, "loss": 0.01, "step": 778000 }, { "epoch": 29.47, "learning_rate": 5.271660547333359e-06, "loss": 0.0099, "step": 778500 }, { "epoch": 29.49, "learning_rate": 5.262197660774443e-06, "loss": 0.0101, "step": 779000 }, { "epoch": 29.51, "learning_rate": 5.252734774215527e-06, "loss": 0.0097, "step": 779500 }, { "epoch": 29.52, "learning_rate": 5.243271887656611e-06, "loss": 0.0097, "step": 780000 }, { "epoch": 29.54, "learning_rate": 5.233827926870814e-06, "loss": 0.0099, "step": 780500 }, { "epoch": 29.56, "learning_rate": 5.224365040311898e-06, "loss": 0.0101, "step": 781000 }, { "epoch": 29.58, "learning_rate": 5.214902153752981e-06, "loss": 0.0098, "step": 781500 }, { "epoch": 29.6, "learning_rate": 5.205439267194066e-06, "loss": 0.0101, "step": 782000 }, { "epoch": 29.62, "learning_rate": 5.1959763806351495e-06, "loss": 0.0095, "step": 782500 }, { "epoch": 29.64, "learning_rate": 5.186513494076233e-06, "loss": 0.0101, "step": 783000 }, { "epoch": 29.66, "learning_rate": 5.177050607517318e-06, "loss": 0.0097, "step": 783500 }, { "epoch": 29.68, "learning_rate": 5.167606646731519e-06, "loss": 0.0099, "step": 784000 }, { "epoch": 29.69, "learning_rate": 5.1581437601726034e-06, "loss": 0.0099, "step": 784500 }, { "epoch": 29.71, "learning_rate": 5.148680873613687e-06, "loss": 0.0103, "step": 785000 }, { "epoch": 29.73, "learning_rate": 5.139217987054772e-06, "loss": 0.0101, "step": 785500 }, { "epoch": 29.75, "learning_rate": 5.129755100495855e-06, "loss": 0.0104, "step": 786000 }, { "epoch": 29.77, "learning_rate": 5.120292213936939e-06, "loss": 0.0097, "step": 786500 }, { "epoch": 29.79, "learning_rate": 5.110848253151142e-06, "loss": 0.0097, "step": 787000 }, { "epoch": 29.81, "learning_rate": 5.101385366592226e-06, "loss": 0.0102, "step": 787500 }, { "epoch": 29.83, "learning_rate": 5.09192248003331e-06, "loss": 0.0102, "step": 788000 }, { "epoch": 29.85, "learning_rate": 5.082459593474394e-06, "loss": 0.01, "step": 788500 }, { "epoch": 29.86, "learning_rate": 5.072996706915478e-06, "loss": 0.0098, "step": 789000 }, { "epoch": 29.88, "learning_rate": 5.063533820356562e-06, "loss": 0.0104, "step": 789500 }, { "epoch": 29.9, "learning_rate": 5.054070933797646e-06, "loss": 0.01, "step": 790000 }, { "epoch": 29.92, "learning_rate": 5.04460804723873e-06, "loss": 0.0103, "step": 790500 }, { "epoch": 29.94, "learning_rate": 5.035145160679814e-06, "loss": 0.0099, "step": 791000 }, { "epoch": 29.96, "learning_rate": 5.025682274120898e-06, "loss": 0.0096, "step": 791500 }, { "epoch": 29.98, "learning_rate": 5.016219387561982e-06, "loss": 0.0095, "step": 792000 }, { "epoch": 30.0, "learning_rate": 5.006756501003066e-06, "loss": 0.0101, "step": 792500 }, { "epoch": 30.0, "eval_bleu": 92.2957, "eval_gen_len": 16.4943, "eval_loss": 0.13711272180080414, "eval_runtime": 1176.4369, "eval_samples_per_second": 38.818, "eval_steps_per_second": 1.214, "step": 792570 }, { "epoch": 30.02, "learning_rate": 4.997331465990386e-06, "loss": 0.0092, "step": 793000 }, { "epoch": 30.04, "learning_rate": 4.98786857943147e-06, "loss": 0.0088, "step": 793500 }, { "epoch": 30.05, "learning_rate": 4.9784056928725544e-06, "loss": 0.0087, "step": 794000 }, { "epoch": 30.07, "learning_rate": 4.968942806313639e-06, "loss": 0.0087, "step": 794500 }, { "epoch": 30.09, "learning_rate": 4.95949884552784e-06, "loss": 0.0088, "step": 795000 }, { "epoch": 30.11, "learning_rate": 4.950035958968925e-06, "loss": 0.0093, "step": 795500 }, { "epoch": 30.13, "learning_rate": 4.9405919981831266e-06, "loss": 0.0089, "step": 796000 }, { "epoch": 30.15, "learning_rate": 4.93112911162421e-06, "loss": 0.0085, "step": 796500 }, { "epoch": 30.17, "learning_rate": 4.921666225065294e-06, "loss": 0.009, "step": 797000 }, { "epoch": 30.19, "learning_rate": 4.912222264279497e-06, "loss": 0.0091, "step": 797500 }, { "epoch": 30.21, "learning_rate": 4.9027593777205805e-06, "loss": 0.0087, "step": 798000 }, { "epoch": 30.22, "learning_rate": 4.893296491161664e-06, "loss": 0.0093, "step": 798500 }, { "epoch": 30.24, "learning_rate": 4.883833604602749e-06, "loss": 0.0091, "step": 799000 }, { "epoch": 30.26, "learning_rate": 4.874370718043832e-06, "loss": 0.0088, "step": 799500 }, { "epoch": 30.28, "learning_rate": 4.864907831484916e-06, "loss": 0.0086, "step": 800000 }, { "epoch": 30.3, "learning_rate": 4.855444944926001e-06, "loss": 0.0091, "step": 800500 }, { "epoch": 30.32, "learning_rate": 4.846000984140203e-06, "loss": 0.0093, "step": 801000 }, { "epoch": 30.34, "learning_rate": 4.836538097581286e-06, "loss": 0.0093, "step": 801500 }, { "epoch": 30.36, "learning_rate": 4.827075211022371e-06, "loss": 0.009, "step": 802000 }, { "epoch": 30.38, "learning_rate": 4.8176123244634546e-06, "loss": 0.0096, "step": 802500 }, { "epoch": 30.39, "learning_rate": 4.8081683636776565e-06, "loss": 0.0092, "step": 803000 }, { "epoch": 30.41, "learning_rate": 4.798705477118741e-06, "loss": 0.0091, "step": 803500 }, { "epoch": 30.43, "learning_rate": 4.789242590559825e-06, "loss": 0.0089, "step": 804000 }, { "epoch": 30.45, "learning_rate": 4.7797797040009085e-06, "loss": 0.009, "step": 804500 }, { "epoch": 30.47, "learning_rate": 4.770316817441993e-06, "loss": 0.0093, "step": 805000 }, { "epoch": 30.49, "learning_rate": 4.760853930883077e-06, "loss": 0.0093, "step": 805500 }, { "epoch": 30.51, "learning_rate": 4.751391044324161e-06, "loss": 0.0091, "step": 806000 }, { "epoch": 30.53, "learning_rate": 4.741928157765245e-06, "loss": 0.009, "step": 806500 }, { "epoch": 30.55, "learning_rate": 4.732484196979447e-06, "loss": 0.0091, "step": 807000 }, { "epoch": 30.57, "learning_rate": 4.723021310420531e-06, "loss": 0.0094, "step": 807500 }, { "epoch": 30.58, "learning_rate": 4.713558423861615e-06, "loss": 0.0095, "step": 808000 }, { "epoch": 30.6, "learning_rate": 4.704095537302699e-06, "loss": 0.0094, "step": 808500 }, { "epoch": 30.62, "learning_rate": 4.694632650743783e-06, "loss": 0.0092, "step": 809000 }, { "epoch": 30.64, "learning_rate": 4.685169764184868e-06, "loss": 0.0092, "step": 809500 }, { "epoch": 30.66, "learning_rate": 4.675706877625951e-06, "loss": 0.0095, "step": 810000 }, { "epoch": 30.68, "learning_rate": 4.666243991067035e-06, "loss": 0.0096, "step": 810500 }, { "epoch": 30.7, "learning_rate": 4.65678110450812e-06, "loss": 0.0095, "step": 811000 }, { "epoch": 30.72, "learning_rate": 4.647318217949204e-06, "loss": 0.0092, "step": 811500 }, { "epoch": 30.74, "learning_rate": 4.637855331390287e-06, "loss": 0.009, "step": 812000 }, { "epoch": 30.75, "learning_rate": 4.628392444831372e-06, "loss": 0.0096, "step": 812500 }, { "epoch": 30.77, "learning_rate": 4.6189295582724556e-06, "loss": 0.009, "step": 813000 }, { "epoch": 30.79, "learning_rate": 4.60946667171354e-06, "loss": 0.0093, "step": 813500 }, { "epoch": 30.81, "learning_rate": 4.600022710927742e-06, "loss": 0.0089, "step": 814000 }, { "epoch": 30.83, "learning_rate": 4.590578750141943e-06, "loss": 0.0096, "step": 814500 }, { "epoch": 30.85, "learning_rate": 4.581115863583028e-06, "loss": 0.0091, "step": 815000 }, { "epoch": 30.87, "learning_rate": 4.57167190279723e-06, "loss": 0.0091, "step": 815500 }, { "epoch": 30.89, "learning_rate": 4.562209016238313e-06, "loss": 0.0093, "step": 816000 }, { "epoch": 30.91, "learning_rate": 4.552746129679398e-06, "loss": 0.0089, "step": 816500 }, { "epoch": 30.92, "learning_rate": 4.5432832431204825e-06, "loss": 0.0095, "step": 817000 }, { "epoch": 30.94, "learning_rate": 4.533820356561565e-06, "loss": 0.0094, "step": 817500 }, { "epoch": 30.96, "learning_rate": 4.52435747000265e-06, "loss": 0.0096, "step": 818000 }, { "epoch": 30.98, "learning_rate": 4.514894583443734e-06, "loss": 0.0095, "step": 818500 }, { "epoch": 31.0, "eval_bleu": 92.3047, "eval_gen_len": 16.4694, "eval_loss": 0.13931500911712646, "eval_runtime": 1172.6751, "eval_samples_per_second": 38.943, "eval_steps_per_second": 1.218, "step": 818989 }, { "epoch": 31.0, "learning_rate": 4.505431696884818e-06, "loss": 0.0097, "step": 819000 }, { "epoch": 31.02, "learning_rate": 4.495968810325902e-06, "loss": 0.0083, "step": 819500 }, { "epoch": 31.04, "learning_rate": 4.486524849540105e-06, "loss": 0.0081, "step": 820000 }, { "epoch": 31.06, "learning_rate": 4.477061962981188e-06, "loss": 0.0083, "step": 820500 }, { "epoch": 31.08, "learning_rate": 4.467599076422272e-06, "loss": 0.0085, "step": 821000 }, { "epoch": 31.1, "learning_rate": 4.4581361898633566e-06, "loss": 0.0083, "step": 821500 }, { "epoch": 31.11, "learning_rate": 4.44867330330444e-06, "loss": 0.0086, "step": 822000 }, { "epoch": 31.13, "learning_rate": 4.439210416745525e-06, "loss": 0.0084, "step": 822500 }, { "epoch": 31.15, "learning_rate": 4.4297475301866085e-06, "loss": 0.0083, "step": 823000 }, { "epoch": 31.17, "learning_rate": 4.420284643627692e-06, "loss": 0.0084, "step": 823500 }, { "epoch": 31.19, "learning_rate": 4.410840682841894e-06, "loss": 0.0086, "step": 824000 }, { "epoch": 31.21, "learning_rate": 4.401377796282979e-06, "loss": 0.0085, "step": 824500 }, { "epoch": 31.23, "learning_rate": 4.391914909724062e-06, "loss": 0.0089, "step": 825000 }, { "epoch": 31.25, "learning_rate": 4.382452023165147e-06, "loss": 0.0087, "step": 825500 }, { "epoch": 31.27, "learning_rate": 4.372989136606231e-06, "loss": 0.0083, "step": 826000 }, { "epoch": 31.28, "learning_rate": 4.363545175820433e-06, "loss": 0.0082, "step": 826500 }, { "epoch": 31.3, "learning_rate": 4.354082289261516e-06, "loss": 0.0081, "step": 827000 }, { "epoch": 31.32, "learning_rate": 4.344619402702601e-06, "loss": 0.0087, "step": 827500 }, { "epoch": 31.34, "learning_rate": 4.3351565161436846e-06, "loss": 0.0081, "step": 828000 }, { "epoch": 31.36, "learning_rate": 4.325693629584769e-06, "loss": 0.0085, "step": 828500 }, { "epoch": 31.38, "learning_rate": 4.316249668798971e-06, "loss": 0.0085, "step": 829000 }, { "epoch": 31.4, "learning_rate": 4.306786782240055e-06, "loss": 0.0085, "step": 829500 }, { "epoch": 31.42, "learning_rate": 4.297342821454257e-06, "loss": 0.0094, "step": 830000 }, { "epoch": 31.44, "learning_rate": 4.287879934895341e-06, "loss": 0.0089, "step": 830500 }, { "epoch": 31.45, "learning_rate": 4.278417048336425e-06, "loss": 0.0089, "step": 831000 }, { "epoch": 31.47, "learning_rate": 4.268954161777509e-06, "loss": 0.0086, "step": 831500 }, { "epoch": 31.49, "learning_rate": 4.259491275218593e-06, "loss": 0.0085, "step": 832000 }, { "epoch": 31.51, "learning_rate": 4.250047314432794e-06, "loss": 0.0083, "step": 832500 }, { "epoch": 31.53, "learning_rate": 4.240584427873879e-06, "loss": 0.0092, "step": 833000 }, { "epoch": 31.55, "learning_rate": 4.231121541314963e-06, "loss": 0.0088, "step": 833500 }, { "epoch": 31.57, "learning_rate": 4.221658654756047e-06, "loss": 0.0086, "step": 834000 }, { "epoch": 31.59, "learning_rate": 4.212195768197131e-06, "loss": 0.008, "step": 834500 }, { "epoch": 31.61, "learning_rate": 4.202732881638215e-06, "loss": 0.009, "step": 835000 }, { "epoch": 31.62, "learning_rate": 4.193269995079299e-06, "loss": 0.0083, "step": 835500 }, { "epoch": 31.64, "learning_rate": 4.183844960066619e-06, "loss": 0.0088, "step": 836000 }, { "epoch": 31.66, "learning_rate": 4.174382073507703e-06, "loss": 0.0085, "step": 836500 }, { "epoch": 31.68, "learning_rate": 4.164919186948787e-06, "loss": 0.0084, "step": 837000 }, { "epoch": 31.7, "learning_rate": 4.1554752261629895e-06, "loss": 0.0082, "step": 837500 }, { "epoch": 31.72, "learning_rate": 4.146012339604073e-06, "loss": 0.0087, "step": 838000 }, { "epoch": 31.74, "learning_rate": 4.136549453045157e-06, "loss": 0.0085, "step": 838500 }, { "epoch": 31.76, "learning_rate": 4.127086566486241e-06, "loss": 0.0085, "step": 839000 }, { "epoch": 31.78, "learning_rate": 4.117623679927326e-06, "loss": 0.0089, "step": 839500 }, { "epoch": 31.8, "learning_rate": 4.108160793368409e-06, "loss": 0.0089, "step": 840000 }, { "epoch": 31.81, "learning_rate": 4.098697906809493e-06, "loss": 0.0083, "step": 840500 }, { "epoch": 31.83, "learning_rate": 4.089235020250578e-06, "loss": 0.0087, "step": 841000 }, { "epoch": 31.85, "learning_rate": 4.079772133691662e-06, "loss": 0.0084, "step": 841500 }, { "epoch": 31.87, "learning_rate": 4.070309247132745e-06, "loss": 0.009, "step": 842000 }, { "epoch": 31.89, "learning_rate": 4.06084636057383e-06, "loss": 0.0091, "step": 842500 }, { "epoch": 31.91, "learning_rate": 4.0513834740149136e-06, "loss": 0.0088, "step": 843000 }, { "epoch": 31.93, "learning_rate": 4.041920587455998e-06, "loss": 0.0086, "step": 843500 }, { "epoch": 31.95, "learning_rate": 4.032457700897082e-06, "loss": 0.009, "step": 844000 }, { "epoch": 31.97, "learning_rate": 4.0229948143381655e-06, "loss": 0.0089, "step": 844500 }, { "epoch": 31.98, "learning_rate": 4.01353192777925e-06, "loss": 0.0091, "step": 845000 }, { "epoch": 32.0, "eval_bleu": 92.3903, "eval_gen_len": 16.4963, "eval_loss": 0.14042149484157562, "eval_runtime": 1210.5356, "eval_samples_per_second": 37.725, "eval_steps_per_second": 1.18, "step": 845408 }, { "epoch": 32.0, "learning_rate": 4.004087966993452e-06, "loss": 0.0085, "step": 845500 }, { "epoch": 32.02, "learning_rate": 3.994625080434536e-06, "loss": 0.0076, "step": 846000 }, { "epoch": 32.04, "learning_rate": 3.98516219387562e-06, "loss": 0.0079, "step": 846500 }, { "epoch": 32.06, "learning_rate": 3.975699307316705e-06, "loss": 0.008, "step": 847000 }, { "epoch": 32.08, "learning_rate": 3.966236420757788e-06, "loss": 0.0077, "step": 847500 }, { "epoch": 32.1, "learning_rate": 3.956773534198872e-06, "loss": 0.0079, "step": 848000 }, { "epoch": 32.12, "learning_rate": 3.947310647639957e-06, "loss": 0.0078, "step": 848500 }, { "epoch": 32.14, "learning_rate": 3.937866686854158e-06, "loss": 0.0078, "step": 849000 }, { "epoch": 32.15, "learning_rate": 3.928403800295242e-06, "loss": 0.0077, "step": 849500 }, { "epoch": 32.17, "learning_rate": 3.918940913736327e-06, "loss": 0.0075, "step": 850000 }, { "epoch": 32.19, "learning_rate": 3.909478027177411e-06, "loss": 0.008, "step": 850500 }, { "epoch": 32.21, "learning_rate": 3.900034066391613e-06, "loss": 0.0081, "step": 851000 }, { "epoch": 32.23, "learning_rate": 3.890571179832696e-06, "loss": 0.0083, "step": 851500 }, { "epoch": 32.25, "learning_rate": 3.88110829327378e-06, "loss": 0.0079, "step": 852000 }, { "epoch": 32.27, "learning_rate": 3.8716454067148646e-06, "loss": 0.0083, "step": 852500 }, { "epoch": 32.29, "learning_rate": 3.862182520155949e-06, "loss": 0.0079, "step": 853000 }, { "epoch": 32.31, "learning_rate": 3.852719633597033e-06, "loss": 0.0077, "step": 853500 }, { "epoch": 32.33, "learning_rate": 3.8432567470381165e-06, "loss": 0.0081, "step": 854000 }, { "epoch": 32.34, "learning_rate": 3.833793860479201e-06, "loss": 0.0083, "step": 854500 }, { "epoch": 32.36, "learning_rate": 3.82436882546652e-06, "loss": 0.0079, "step": 855000 }, { "epoch": 32.38, "learning_rate": 3.814905938907605e-06, "loss": 0.0082, "step": 855500 }, { "epoch": 32.4, "learning_rate": 3.8054430523486887e-06, "loss": 0.0076, "step": 856000 }, { "epoch": 32.42, "learning_rate": 3.7959801657897728e-06, "loss": 0.0082, "step": 856500 }, { "epoch": 32.44, "learning_rate": 3.786517279230857e-06, "loss": 0.0082, "step": 857000 }, { "epoch": 32.46, "learning_rate": 3.777054392671941e-06, "loss": 0.0084, "step": 857500 }, { "epoch": 32.48, "learning_rate": 3.767610431886143e-06, "loss": 0.0083, "step": 858000 }, { "epoch": 32.5, "learning_rate": 3.758147545327227e-06, "loss": 0.0079, "step": 858500 }, { "epoch": 32.51, "learning_rate": 3.7486846587683112e-06, "loss": 0.0082, "step": 859000 }, { "epoch": 32.53, "learning_rate": 3.739221772209395e-06, "loss": 0.0079, "step": 859500 }, { "epoch": 32.55, "learning_rate": 3.729758885650479e-06, "loss": 0.0078, "step": 860000 }, { "epoch": 32.57, "learning_rate": 3.720295999091563e-06, "loss": 0.0077, "step": 860500 }, { "epoch": 32.59, "learning_rate": 3.7108331125326473e-06, "loss": 0.0083, "step": 861000 }, { "epoch": 32.61, "learning_rate": 3.701370225973731e-06, "loss": 0.0085, "step": 861500 }, { "epoch": 32.63, "learning_rate": 3.6919262651879334e-06, "loss": 0.008, "step": 862000 }, { "epoch": 32.65, "learning_rate": 3.682463378629017e-06, "loss": 0.0078, "step": 862500 }, { "epoch": 32.67, "learning_rate": 3.673000492070101e-06, "loss": 0.0082, "step": 863000 }, { "epoch": 32.68, "learning_rate": 3.6635376055111853e-06, "loss": 0.0082, "step": 863500 }, { "epoch": 32.7, "learning_rate": 3.6540936447253873e-06, "loss": 0.008, "step": 864000 }, { "epoch": 32.72, "learning_rate": 3.6446307581664714e-06, "loss": 0.0085, "step": 864500 }, { "epoch": 32.74, "learning_rate": 3.6351867973806733e-06, "loss": 0.0084, "step": 865000 }, { "epoch": 32.76, "learning_rate": 3.6257239108217575e-06, "loss": 0.0081, "step": 865500 }, { "epoch": 32.78, "learning_rate": 3.6162610242628416e-06, "loss": 0.0081, "step": 866000 }, { "epoch": 32.8, "learning_rate": 3.6067981377039257e-06, "loss": 0.0085, "step": 866500 }, { "epoch": 32.82, "learning_rate": 3.5973352511450094e-06, "loss": 0.0084, "step": 867000 }, { "epoch": 32.84, "learning_rate": 3.5878723645860935e-06, "loss": 0.0079, "step": 867500 }, { "epoch": 32.86, "learning_rate": 3.5784094780271777e-06, "loss": 0.0083, "step": 868000 }, { "epoch": 32.87, "learning_rate": 3.568946591468262e-06, "loss": 0.0082, "step": 868500 }, { "epoch": 32.89, "learning_rate": 3.5594837049093455e-06, "loss": 0.0084, "step": 869000 }, { "epoch": 32.91, "learning_rate": 3.5500208183504296e-06, "loss": 0.0083, "step": 869500 }, { "epoch": 32.93, "learning_rate": 3.540557931791514e-06, "loss": 0.0083, "step": 870000 }, { "epoch": 32.95, "learning_rate": 3.5310950452325983e-06, "loss": 0.0078, "step": 870500 }, { "epoch": 32.97, "learning_rate": 3.5216321586736824e-06, "loss": 0.0079, "step": 871000 }, { "epoch": 32.99, "learning_rate": 3.512188197887884e-06, "loss": 0.0083, "step": 871500 }, { "epoch": 33.0, "eval_bleu": 92.367, "eval_gen_len": 16.4617, "eval_loss": 0.14141865074634552, "eval_runtime": 1174.1308, "eval_samples_per_second": 38.894, "eval_steps_per_second": 1.216, "step": 871827 }, { "epoch": 33.01, "learning_rate": 3.5027253113289685e-06, "loss": 0.008, "step": 872000 }, { "epoch": 33.03, "learning_rate": 3.4932624247700518e-06, "loss": 0.0078, "step": 872500 }, { "epoch": 33.04, "learning_rate": 3.4837995382111363e-06, "loss": 0.0073, "step": 873000 }, { "epoch": 33.06, "learning_rate": 3.4743366516522204e-06, "loss": 0.0072, "step": 873500 }, { "epoch": 33.08, "learning_rate": 3.4648737650933046e-06, "loss": 0.0077, "step": 874000 }, { "epoch": 33.1, "learning_rate": 3.455429804307506e-06, "loss": 0.0076, "step": 874500 }, { "epoch": 33.12, "learning_rate": 3.445985843521708e-06, "loss": 0.0075, "step": 875000 }, { "epoch": 33.14, "learning_rate": 3.436522956962792e-06, "loss": 0.0071, "step": 875500 }, { "epoch": 33.16, "learning_rate": 3.4270600704038763e-06, "loss": 0.0073, "step": 876000 }, { "epoch": 33.18, "learning_rate": 3.41759718384496e-06, "loss": 0.0071, "step": 876500 }, { "epoch": 33.2, "learning_rate": 3.408134297286044e-06, "loss": 0.0073, "step": 877000 }, { "epoch": 33.21, "learning_rate": 3.3986714107271287e-06, "loss": 0.0075, "step": 877500 }, { "epoch": 33.23, "learning_rate": 3.389208524168213e-06, "loss": 0.0076, "step": 878000 }, { "epoch": 33.25, "learning_rate": 3.3797645633824143e-06, "loss": 0.0072, "step": 878500 }, { "epoch": 33.27, "learning_rate": 3.3703016768234984e-06, "loss": 0.0075, "step": 879000 }, { "epoch": 33.29, "learning_rate": 3.360838790264583e-06, "loss": 0.0075, "step": 879500 }, { "epoch": 33.31, "learning_rate": 3.3513759037056663e-06, "loss": 0.0078, "step": 880000 }, { "epoch": 33.33, "learning_rate": 3.341913017146751e-06, "loss": 0.0075, "step": 880500 }, { "epoch": 33.35, "learning_rate": 3.3324690563609523e-06, "loss": 0.0076, "step": 881000 }, { "epoch": 33.37, "learning_rate": 3.3230061698020365e-06, "loss": 0.0075, "step": 881500 }, { "epoch": 33.39, "learning_rate": 3.3135432832431206e-06, "loss": 0.0074, "step": 882000 }, { "epoch": 33.4, "learning_rate": 3.304080396684205e-06, "loss": 0.008, "step": 882500 }, { "epoch": 33.42, "learning_rate": 3.2946364358984067e-06, "loss": 0.0078, "step": 883000 }, { "epoch": 33.44, "learning_rate": 3.285173549339491e-06, "loss": 0.0074, "step": 883500 }, { "epoch": 33.46, "learning_rate": 3.2757106627805745e-06, "loss": 0.0077, "step": 884000 }, { "epoch": 33.48, "learning_rate": 3.266266701994777e-06, "loss": 0.0076, "step": 884500 }, { "epoch": 33.5, "learning_rate": 3.2568038154358606e-06, "loss": 0.007, "step": 885000 }, { "epoch": 33.52, "learning_rate": 3.2473409288769447e-06, "loss": 0.0078, "step": 885500 }, { "epoch": 33.54, "learning_rate": 3.237878042318029e-06, "loss": 0.0075, "step": 886000 }, { "epoch": 33.56, "learning_rate": 3.228415155759113e-06, "loss": 0.0075, "step": 886500 }, { "epoch": 33.57, "learning_rate": 3.2189522692001975e-06, "loss": 0.008, "step": 887000 }, { "epoch": 33.59, "learning_rate": 3.2094893826412808e-06, "loss": 0.0075, "step": 887500 }, { "epoch": 33.61, "learning_rate": 3.2000264960823653e-06, "loss": 0.0077, "step": 888000 }, { "epoch": 33.63, "learning_rate": 3.1905636095234494e-06, "loss": 0.0078, "step": 888500 }, { "epoch": 33.65, "learning_rate": 3.1811007229645336e-06, "loss": 0.0072, "step": 889000 }, { "epoch": 33.67, "learning_rate": 3.1716378364056173e-06, "loss": 0.0074, "step": 889500 }, { "epoch": 33.69, "learning_rate": 3.1621749498467014e-06, "loss": 0.0077, "step": 890000 }, { "epoch": 33.71, "learning_rate": 3.1527309890609033e-06, "loss": 0.0077, "step": 890500 }, { "epoch": 33.73, "learning_rate": 3.1432681025019875e-06, "loss": 0.0076, "step": 891000 }, { "epoch": 33.74, "learning_rate": 3.1338052159430716e-06, "loss": 0.0078, "step": 891500 }, { "epoch": 33.76, "learning_rate": 3.124361255157273e-06, "loss": 0.0079, "step": 892000 }, { "epoch": 33.78, "learning_rate": 3.1148983685983577e-06, "loss": 0.0075, "step": 892500 }, { "epoch": 33.8, "learning_rate": 3.1054354820394418e-06, "loss": 0.0077, "step": 893000 }, { "epoch": 33.82, "learning_rate": 3.095972595480526e-06, "loss": 0.0075, "step": 893500 }, { "epoch": 33.84, "learning_rate": 3.0865286346947274e-06, "loss": 0.0077, "step": 894000 }, { "epoch": 33.86, "learning_rate": 3.0770846739089294e-06, "loss": 0.0077, "step": 894500 }, { "epoch": 33.88, "learning_rate": 3.0676217873500135e-06, "loss": 0.0076, "step": 895000 }, { "epoch": 33.9, "learning_rate": 3.0581589007910976e-06, "loss": 0.0077, "step": 895500 }, { "epoch": 33.91, "learning_rate": 3.0486960142321813e-06, "loss": 0.0082, "step": 896000 }, { "epoch": 33.93, "learning_rate": 3.0392331276732655e-06, "loss": 0.0078, "step": 896500 }, { "epoch": 33.95, "learning_rate": 3.02977024111435e-06, "loss": 0.0078, "step": 897000 }, { "epoch": 33.97, "learning_rate": 3.020307354555434e-06, "loss": 0.0076, "step": 897500 }, { "epoch": 33.99, "learning_rate": 3.010844467996518e-06, "loss": 0.0079, "step": 898000 }, { "epoch": 34.0, "eval_bleu": 92.3356, "eval_gen_len": 16.4683, "eval_loss": 0.14310364425182343, "eval_runtime": 1172.1921, "eval_samples_per_second": 38.959, "eval_steps_per_second": 1.218, "step": 898246 }, { "epoch": 34.01, "learning_rate": 3.001381581437602e-06, "loss": 0.0078, "step": 898500 }, { "epoch": 34.03, "learning_rate": 2.991918694878686e-06, "loss": 0.0074, "step": 899000 }, { "epoch": 34.05, "learning_rate": 2.9824747340928876e-06, "loss": 0.0069, "step": 899500 }, { "epoch": 34.07, "learning_rate": 2.97303077330709e-06, "loss": 0.0072, "step": 900000 }, { "epoch": 34.09, "learning_rate": 2.9635678867481737e-06, "loss": 0.0072, "step": 900500 }, { "epoch": 34.1, "learning_rate": 2.954105000189258e-06, "loss": 0.0073, "step": 901000 }, { "epoch": 34.12, "learning_rate": 2.944642113630342e-06, "loss": 0.0071, "step": 901500 }, { "epoch": 34.14, "learning_rate": 2.9351792270714265e-06, "loss": 0.007, "step": 902000 }, { "epoch": 34.16, "learning_rate": 2.9257163405125098e-06, "loss": 0.0072, "step": 902500 }, { "epoch": 34.18, "learning_rate": 2.916272379726712e-06, "loss": 0.0076, "step": 903000 }, { "epoch": 34.2, "learning_rate": 2.906809493167796e-06, "loss": 0.0071, "step": 903500 }, { "epoch": 34.22, "learning_rate": 2.89734660660888e-06, "loss": 0.0071, "step": 904000 }, { "epoch": 34.24, "learning_rate": 2.8878837200499645e-06, "loss": 0.007, "step": 904500 }, { "epoch": 34.26, "learning_rate": 2.8784208334910486e-06, "loss": 0.0071, "step": 905000 }, { "epoch": 34.27, "learning_rate": 2.8689579469321323e-06, "loss": 0.0073, "step": 905500 }, { "epoch": 34.29, "learning_rate": 2.8594950603732165e-06, "loss": 0.0068, "step": 906000 }, { "epoch": 34.31, "learning_rate": 2.8500321738143006e-06, "loss": 0.0069, "step": 906500 }, { "epoch": 34.33, "learning_rate": 2.8405692872553847e-06, "loss": 0.0074, "step": 907000 }, { "epoch": 34.35, "learning_rate": 2.831106400696469e-06, "loss": 0.0072, "step": 907500 }, { "epoch": 34.37, "learning_rate": 2.8216624399106708e-06, "loss": 0.0072, "step": 908000 }, { "epoch": 34.39, "learning_rate": 2.812199553351755e-06, "loss": 0.0076, "step": 908500 }, { "epoch": 34.41, "learning_rate": 2.8027366667928386e-06, "loss": 0.0076, "step": 909000 }, { "epoch": 34.43, "learning_rate": 2.7932737802339227e-06, "loss": 0.0073, "step": 909500 }, { "epoch": 34.44, "learning_rate": 2.783810893675007e-06, "loss": 0.0071, "step": 910000 }, { "epoch": 34.46, "learning_rate": 2.774348007116091e-06, "loss": 0.0075, "step": 910500 }, { "epoch": 34.48, "learning_rate": 2.7648851205571747e-06, "loss": 0.0072, "step": 911000 }, { "epoch": 34.5, "learning_rate": 2.755422233998259e-06, "loss": 0.0072, "step": 911500 }, { "epoch": 34.52, "learning_rate": 2.745959347439343e-06, "loss": 0.0075, "step": 912000 }, { "epoch": 34.54, "learning_rate": 2.736515386653545e-06, "loss": 0.0075, "step": 912500 }, { "epoch": 34.56, "learning_rate": 2.727052500094629e-06, "loss": 0.007, "step": 913000 }, { "epoch": 34.58, "learning_rate": 2.717589613535713e-06, "loss": 0.0072, "step": 913500 }, { "epoch": 34.6, "learning_rate": 2.7081267269767973e-06, "loss": 0.0069, "step": 914000 }, { "epoch": 34.62, "learning_rate": 2.698663840417881e-06, "loss": 0.0068, "step": 914500 }, { "epoch": 34.63, "learning_rate": 2.6892198796320833e-06, "loss": 0.0074, "step": 915000 }, { "epoch": 34.65, "learning_rate": 2.679756993073167e-06, "loss": 0.0075, "step": 915500 }, { "epoch": 34.67, "learning_rate": 2.670294106514251e-06, "loss": 0.0072, "step": 916000 }, { "epoch": 34.69, "learning_rate": 2.6608312199553353e-06, "loss": 0.0072, "step": 916500 }, { "epoch": 34.71, "learning_rate": 2.65136833339642e-06, "loss": 0.0074, "step": 917000 }, { "epoch": 34.73, "learning_rate": 2.6419243726106214e-06, "loss": 0.0069, "step": 917500 }, { "epoch": 34.75, "learning_rate": 2.6324614860517055e-06, "loss": 0.0071, "step": 918000 }, { "epoch": 34.77, "learning_rate": 2.6230175252659074e-06, "loss": 0.0075, "step": 918500 }, { "epoch": 34.79, "learning_rate": 2.6135546387069916e-06, "loss": 0.0074, "step": 919000 }, { "epoch": 34.8, "learning_rate": 2.6040917521480753e-06, "loss": 0.0071, "step": 919500 }, { "epoch": 34.82, "learning_rate": 2.5946288655891594e-06, "loss": 0.0071, "step": 920000 }, { "epoch": 34.84, "learning_rate": 2.5851849048033617e-06, "loss": 0.0075, "step": 920500 }, { "epoch": 34.86, "learning_rate": 2.5757220182444454e-06, "loss": 0.0071, "step": 921000 }, { "epoch": 34.88, "learning_rate": 2.5662591316855296e-06, "loss": 0.0073, "step": 921500 }, { "epoch": 34.9, "learning_rate": 2.5567962451266137e-06, "loss": 0.0073, "step": 922000 }, { "epoch": 34.92, "learning_rate": 2.547333358567698e-06, "loss": 0.0071, "step": 922500 }, { "epoch": 34.94, "learning_rate": 2.5378704720087815e-06, "loss": 0.0072, "step": 923000 }, { "epoch": 34.96, "learning_rate": 2.5284075854498657e-06, "loss": 0.0076, "step": 923500 }, { "epoch": 34.97, "learning_rate": 2.5189446988909498e-06, "loss": 0.0073, "step": 924000 }, { "epoch": 34.99, "learning_rate": 2.5095007381051517e-06, "loss": 0.0071, "step": 924500 }, { "epoch": 35.0, "eval_bleu": 92.3866, "eval_gen_len": 16.4826, "eval_loss": 0.14408156275749207, "eval_runtime": 1173.9215, "eval_samples_per_second": 38.901, "eval_steps_per_second": 1.216, "step": 924665 }, { "epoch": 35.01, "learning_rate": 2.500037851546236e-06, "loss": 0.007, "step": 925000 }, { "epoch": 35.03, "learning_rate": 2.49057496498732e-06, "loss": 0.0068, "step": 925500 }, { "epoch": 35.05, "learning_rate": 2.481112078428404e-06, "loss": 0.0068, "step": 926000 }, { "epoch": 35.07, "learning_rate": 2.4716491918694882e-06, "loss": 0.0064, "step": 926500 }, { "epoch": 35.09, "learning_rate": 2.462186305310572e-06, "loss": 0.0068, "step": 927000 }, { "epoch": 35.11, "learning_rate": 2.4527234187516565e-06, "loss": 0.0072, "step": 927500 }, { "epoch": 35.13, "learning_rate": 2.44326053219274e-06, "loss": 0.0068, "step": 928000 }, { "epoch": 35.15, "learning_rate": 2.4337976456338243e-06, "loss": 0.0066, "step": 928500 }, { "epoch": 35.16, "learning_rate": 2.4243347590749084e-06, "loss": 0.0068, "step": 929000 }, { "epoch": 35.18, "learning_rate": 2.4148718725159926e-06, "loss": 0.0065, "step": 929500 }, { "epoch": 35.2, "learning_rate": 2.4054279117301945e-06, "loss": 0.007, "step": 930000 }, { "epoch": 35.22, "learning_rate": 2.3959839509443964e-06, "loss": 0.0071, "step": 930500 }, { "epoch": 35.24, "learning_rate": 2.38652106438548e-06, "loss": 0.0068, "step": 931000 }, { "epoch": 35.26, "learning_rate": 2.3770581778265643e-06, "loss": 0.0073, "step": 931500 }, { "epoch": 35.28, "learning_rate": 2.3675952912676484e-06, "loss": 0.0064, "step": 932000 }, { "epoch": 35.3, "learning_rate": 2.3581324047087325e-06, "loss": 0.0069, "step": 932500 }, { "epoch": 35.32, "learning_rate": 2.3486695181498166e-06, "loss": 0.007, "step": 933000 }, { "epoch": 35.33, "learning_rate": 2.3392066315909008e-06, "loss": 0.0064, "step": 933500 }, { "epoch": 35.35, "learning_rate": 2.3297626708051027e-06, "loss": 0.0068, "step": 934000 }, { "epoch": 35.37, "learning_rate": 2.3202997842461864e-06, "loss": 0.0067, "step": 934500 }, { "epoch": 35.39, "learning_rate": 2.310836897687271e-06, "loss": 0.0067, "step": 935000 }, { "epoch": 35.41, "learning_rate": 2.3013740111283547e-06, "loss": 0.0066, "step": 935500 }, { "epoch": 35.43, "learning_rate": 2.291911124569439e-06, "loss": 0.007, "step": 936000 }, { "epoch": 35.45, "learning_rate": 2.2824671637836407e-06, "loss": 0.0068, "step": 936500 }, { "epoch": 35.47, "learning_rate": 2.273004277224725e-06, "loss": 0.0065, "step": 937000 }, { "epoch": 35.49, "learning_rate": 2.263541390665809e-06, "loss": 0.0067, "step": 937500 }, { "epoch": 35.5, "learning_rate": 2.254078504106893e-06, "loss": 0.0071, "step": 938000 }, { "epoch": 35.52, "learning_rate": 2.244615617547977e-06, "loss": 0.0067, "step": 938500 }, { "epoch": 35.54, "learning_rate": 2.235152730989061e-06, "loss": 0.007, "step": 939000 }, { "epoch": 35.56, "learning_rate": 2.225708770203263e-06, "loss": 0.0066, "step": 939500 }, { "epoch": 35.58, "learning_rate": 2.216245883644347e-06, "loss": 0.0072, "step": 940000 }, { "epoch": 35.6, "learning_rate": 2.206782997085431e-06, "loss": 0.0067, "step": 940500 }, { "epoch": 35.62, "learning_rate": 2.1973201105265153e-06, "loss": 0.0072, "step": 941000 }, { "epoch": 35.64, "learning_rate": 2.1878572239675994e-06, "loss": 0.0065, "step": 941500 }, { "epoch": 35.66, "learning_rate": 2.178413263181801e-06, "loss": 0.0072, "step": 942000 }, { "epoch": 35.68, "learning_rate": 2.1689503766228855e-06, "loss": 0.007, "step": 942500 }, { "epoch": 35.69, "learning_rate": 2.159487490063969e-06, "loss": 0.0066, "step": 943000 }, { "epoch": 35.71, "learning_rate": 2.1500246035050533e-06, "loss": 0.0071, "step": 943500 }, { "epoch": 35.73, "learning_rate": 2.1405806427192552e-06, "loss": 0.0068, "step": 944000 }, { "epoch": 35.75, "learning_rate": 2.1311177561603394e-06, "loss": 0.0065, "step": 944500 }, { "epoch": 35.77, "learning_rate": 2.1216548696014235e-06, "loss": 0.007, "step": 945000 }, { "epoch": 35.79, "learning_rate": 2.1121919830425076e-06, "loss": 0.0066, "step": 945500 }, { "epoch": 35.81, "learning_rate": 2.1027290964835913e-06, "loss": 0.0066, "step": 946000 }, { "epoch": 35.83, "learning_rate": 2.0932662099246754e-06, "loss": 0.0069, "step": 946500 }, { "epoch": 35.85, "learning_rate": 2.08380332336576e-06, "loss": 0.0073, "step": 947000 }, { "epoch": 35.86, "learning_rate": 2.0743404368068437e-06, "loss": 0.0067, "step": 947500 }, { "epoch": 35.88, "learning_rate": 2.064877550247928e-06, "loss": 0.0075, "step": 948000 }, { "epoch": 35.9, "learning_rate": 2.055414663689012e-06, "loss": 0.0074, "step": 948500 }, { "epoch": 35.92, "learning_rate": 2.045951777130096e-06, "loss": 0.0069, "step": 949000 }, { "epoch": 35.94, "learning_rate": 2.0364888905711798e-06, "loss": 0.0068, "step": 949500 }, { "epoch": 35.96, "learning_rate": 2.027044929785382e-06, "loss": 0.007, "step": 950000 }, { "epoch": 35.98, "learning_rate": 2.017582043226466e-06, "loss": 0.0067, "step": 950500 }, { "epoch": 36.0, "learning_rate": 2.00811915666755e-06, "loss": 0.0068, "step": 951000 }, { "epoch": 36.0, "eval_bleu": 92.3662, "eval_gen_len": 16.471, "eval_loss": 0.14456987380981445, "eval_runtime": 1176.3281, "eval_samples_per_second": 38.822, "eval_steps_per_second": 1.214, "step": 951084 }, { "epoch": 36.02, "learning_rate": 1.998656270108634e-06, "loss": 0.0065, "step": 951500 }, { "epoch": 36.03, "learning_rate": 1.9891933835497182e-06, "loss": 0.0065, "step": 952000 }, { "epoch": 36.05, "learning_rate": 1.979730496990802e-06, "loss": 0.0064, "step": 952500 }, { "epoch": 36.07, "learning_rate": 1.9702676104318865e-06, "loss": 0.0068, "step": 953000 }, { "epoch": 36.09, "learning_rate": 1.9608047238729706e-06, "loss": 0.0068, "step": 953500 }, { "epoch": 36.11, "learning_rate": 1.9513418373140543e-06, "loss": 0.0065, "step": 954000 }, { "epoch": 36.13, "learning_rate": 1.9418978765282562e-06, "loss": 0.0064, "step": 954500 }, { "epoch": 36.15, "learning_rate": 1.9324349899693404e-06, "loss": 0.0067, "step": 955000 }, { "epoch": 36.17, "learning_rate": 1.9229721034104245e-06, "loss": 0.0063, "step": 955500 }, { "epoch": 36.19, "learning_rate": 1.9135092168515086e-06, "loss": 0.0064, "step": 956000 }, { "epoch": 36.21, "learning_rate": 1.9040463302925927e-06, "loss": 0.0063, "step": 956500 }, { "epoch": 36.22, "learning_rate": 1.8945834437336767e-06, "loss": 0.0066, "step": 957000 }, { "epoch": 36.24, "learning_rate": 1.8851205571747608e-06, "loss": 0.0062, "step": 957500 }, { "epoch": 36.26, "learning_rate": 1.8756765963889625e-06, "loss": 0.0062, "step": 958000 }, { "epoch": 36.28, "learning_rate": 1.8662137098300466e-06, "loss": 0.0063, "step": 958500 }, { "epoch": 36.3, "learning_rate": 1.8567508232711306e-06, "loss": 0.0067, "step": 959000 }, { "epoch": 36.32, "learning_rate": 1.8472879367122149e-06, "loss": 0.0067, "step": 959500 }, { "epoch": 36.34, "learning_rate": 1.837825050153299e-06, "loss": 0.0062, "step": 960000 }, { "epoch": 36.36, "learning_rate": 1.828362163594383e-06, "loss": 0.0072, "step": 960500 }, { "epoch": 36.38, "learning_rate": 1.818899277035467e-06, "loss": 0.0068, "step": 961000 }, { "epoch": 36.39, "learning_rate": 1.809455316249669e-06, "loss": 0.0063, "step": 961500 }, { "epoch": 36.41, "learning_rate": 1.7999924296907531e-06, "loss": 0.0064, "step": 962000 }, { "epoch": 36.43, "learning_rate": 1.7905484689049549e-06, "loss": 0.0064, "step": 962500 }, { "epoch": 36.45, "learning_rate": 1.781085582346039e-06, "loss": 0.0066, "step": 963000 }, { "epoch": 36.47, "learning_rate": 1.771622695787123e-06, "loss": 0.0072, "step": 963500 }, { "epoch": 36.49, "learning_rate": 1.7621598092282072e-06, "loss": 0.0066, "step": 964000 }, { "epoch": 36.51, "learning_rate": 1.7526969226692912e-06, "loss": 0.0071, "step": 964500 }, { "epoch": 36.53, "learning_rate": 1.7432340361103753e-06, "loss": 0.0066, "step": 965000 }, { "epoch": 36.55, "learning_rate": 1.7337711495514592e-06, "loss": 0.0069, "step": 965500 }, { "epoch": 36.56, "learning_rate": 1.7243082629925433e-06, "loss": 0.0068, "step": 966000 }, { "epoch": 36.58, "learning_rate": 1.7148453764336277e-06, "loss": 0.0065, "step": 966500 }, { "epoch": 36.6, "learning_rate": 1.7053824898747116e-06, "loss": 0.0064, "step": 967000 }, { "epoch": 36.62, "learning_rate": 1.6959385290889135e-06, "loss": 0.0063, "step": 967500 }, { "epoch": 36.64, "learning_rate": 1.6864756425299974e-06, "loss": 0.0063, "step": 968000 }, { "epoch": 36.66, "learning_rate": 1.6770127559710816e-06, "loss": 0.0068, "step": 968500 }, { "epoch": 36.68, "learning_rate": 1.6675498694121655e-06, "loss": 0.0061, "step": 969000 }, { "epoch": 36.7, "learning_rate": 1.6580869828532498e-06, "loss": 0.0062, "step": 969500 }, { "epoch": 36.72, "learning_rate": 1.6486240962943337e-06, "loss": 0.0063, "step": 970000 }, { "epoch": 36.73, "learning_rate": 1.6391612097354178e-06, "loss": 0.0066, "step": 970500 }, { "epoch": 36.75, "learning_rate": 1.6296983231765018e-06, "loss": 0.0062, "step": 971000 }, { "epoch": 36.77, "learning_rate": 1.6202354366175859e-06, "loss": 0.007, "step": 971500 }, { "epoch": 36.79, "learning_rate": 1.6108104016049056e-06, "loss": 0.0065, "step": 972000 }, { "epoch": 36.81, "learning_rate": 1.6013475150459898e-06, "loss": 0.0071, "step": 972500 }, { "epoch": 36.83, "learning_rate": 1.5918846284870737e-06, "loss": 0.0066, "step": 973000 }, { "epoch": 36.85, "learning_rate": 1.5824217419281578e-06, "loss": 0.0064, "step": 973500 }, { "epoch": 36.87, "learning_rate": 1.5729588553692421e-06, "loss": 0.0064, "step": 974000 }, { "epoch": 36.89, "learning_rate": 1.563495968810326e-06, "loss": 0.0065, "step": 974500 }, { "epoch": 36.91, "learning_rate": 1.5540330822514102e-06, "loss": 0.0065, "step": 975000 }, { "epoch": 36.92, "learning_rate": 1.544570195692494e-06, "loss": 0.0066, "step": 975500 }, { "epoch": 36.94, "learning_rate": 1.5351073091335782e-06, "loss": 0.0072, "step": 976000 }, { "epoch": 36.96, "learning_rate": 1.5256633483477802e-06, "loss": 0.0066, "step": 976500 }, { "epoch": 36.98, "learning_rate": 1.5162004617888643e-06, "loss": 0.0063, "step": 977000 }, { "epoch": 37.0, "learning_rate": 1.5067375752299482e-06, "loss": 0.0066, "step": 977500 }, { "epoch": 37.0, "eval_bleu": 92.3884, "eval_gen_len": 16.4703, "eval_loss": 0.145610511302948, "eval_runtime": 1252.6827, "eval_samples_per_second": 36.455, "eval_steps_per_second": 1.14, "step": 977503 }, { "epoch": 37.02, "learning_rate": 1.4972746886710323e-06, "loss": 0.0061, "step": 978000 }, { "epoch": 37.04, "learning_rate": 1.4878118021121163e-06, "loss": 0.0061, "step": 978500 }, { "epoch": 37.06, "learning_rate": 1.4783678413263184e-06, "loss": 0.0061, "step": 979000 }, { "epoch": 37.08, "learning_rate": 1.4689049547674023e-06, "loss": 0.0062, "step": 979500 }, { "epoch": 37.09, "learning_rate": 1.4594420682084864e-06, "loss": 0.0063, "step": 980000 }, { "epoch": 37.11, "learning_rate": 1.4499791816495706e-06, "loss": 0.0063, "step": 980500 }, { "epoch": 37.13, "learning_rate": 1.4405162950906545e-06, "loss": 0.0063, "step": 981000 }, { "epoch": 37.15, "learning_rate": 1.4310534085317388e-06, "loss": 0.0065, "step": 981500 }, { "epoch": 37.17, "learning_rate": 1.4215905219728227e-06, "loss": 0.006, "step": 982000 }, { "epoch": 37.19, "learning_rate": 1.4121276354139069e-06, "loss": 0.0064, "step": 982500 }, { "epoch": 37.21, "learning_rate": 1.4027026004012264e-06, "loss": 0.0065, "step": 983000 }, { "epoch": 37.23, "learning_rate": 1.3932586396154286e-06, "loss": 0.006, "step": 983500 }, { "epoch": 37.25, "learning_rate": 1.3837957530565125e-06, "loss": 0.0062, "step": 984000 }, { "epoch": 37.26, "learning_rate": 1.3743328664975966e-06, "loss": 0.0063, "step": 984500 }, { "epoch": 37.28, "learning_rate": 1.3648699799386805e-06, "loss": 0.0058, "step": 985000 }, { "epoch": 37.3, "learning_rate": 1.3554070933797647e-06, "loss": 0.006, "step": 985500 }, { "epoch": 37.32, "learning_rate": 1.3459442068208486e-06, "loss": 0.0065, "step": 986000 }, { "epoch": 37.34, "learning_rate": 1.336481320261933e-06, "loss": 0.0063, "step": 986500 }, { "epoch": 37.36, "learning_rate": 1.3270184337030168e-06, "loss": 0.0063, "step": 987000 }, { "epoch": 37.38, "learning_rate": 1.317555547144101e-06, "loss": 0.0061, "step": 987500 }, { "epoch": 37.4, "learning_rate": 1.3081115863583027e-06, "loss": 0.0064, "step": 988000 }, { "epoch": 37.42, "learning_rate": 1.2986486997993868e-06, "loss": 0.0065, "step": 988500 }, { "epoch": 37.44, "learning_rate": 1.2891858132404711e-06, "loss": 0.0062, "step": 989000 }, { "epoch": 37.45, "learning_rate": 1.279722926681555e-06, "loss": 0.0061, "step": 989500 }, { "epoch": 37.47, "learning_rate": 1.270278965895757e-06, "loss": 0.0067, "step": 990000 }, { "epoch": 37.49, "learning_rate": 1.260816079336841e-06, "loss": 0.0058, "step": 990500 }, { "epoch": 37.51, "learning_rate": 1.2513531927779253e-06, "loss": 0.007, "step": 991000 }, { "epoch": 37.53, "learning_rate": 1.2418903062190092e-06, "loss": 0.0066, "step": 991500 }, { "epoch": 37.55, "learning_rate": 1.232446345433211e-06, "loss": 0.0064, "step": 992000 }, { "epoch": 37.57, "learning_rate": 1.2229834588742952e-06, "loss": 0.0064, "step": 992500 }, { "epoch": 37.59, "learning_rate": 1.2135205723153792e-06, "loss": 0.0061, "step": 993000 }, { "epoch": 37.61, "learning_rate": 1.204076611529581e-06, "loss": 0.0064, "step": 993500 }, { "epoch": 37.62, "learning_rate": 1.1946137249706652e-06, "loss": 0.0063, "step": 994000 }, { "epoch": 37.64, "learning_rate": 1.1851508384117491e-06, "loss": 0.0062, "step": 994500 }, { "epoch": 37.66, "learning_rate": 1.1756879518528333e-06, "loss": 0.006, "step": 995000 }, { "epoch": 37.68, "learning_rate": 1.1662250652939174e-06, "loss": 0.0058, "step": 995500 }, { "epoch": 37.7, "learning_rate": 1.1567621787350013e-06, "loss": 0.0062, "step": 996000 }, { "epoch": 37.72, "learning_rate": 1.1472992921760854e-06, "loss": 0.0063, "step": 996500 }, { "epoch": 37.74, "learning_rate": 1.1378364056171696e-06, "loss": 0.0061, "step": 997000 }, { "epoch": 37.76, "learning_rate": 1.1283735190582535e-06, "loss": 0.0065, "step": 997500 }, { "epoch": 37.78, "learning_rate": 1.1189106324993376e-06, "loss": 0.0063, "step": 998000 }, { "epoch": 37.79, "learning_rate": 1.1094477459404217e-06, "loss": 0.0062, "step": 998500 }, { "epoch": 37.81, "learning_rate": 1.0999848593815058e-06, "loss": 0.0062, "step": 999000 }, { "epoch": 37.83, "learning_rate": 1.0905408985957078e-06, "loss": 0.0062, "step": 999500 }, { "epoch": 37.85, "learning_rate": 1.081078012036792e-06, "loss": 0.0062, "step": 1000000 }, { "epoch": 37.87, "learning_rate": 1.0716151254778758e-06, "loss": 0.0062, "step": 1000500 }, { "epoch": 37.89, "learning_rate": 1.06215223891896e-06, "loss": 0.0063, "step": 1001000 }, { "epoch": 37.91, "learning_rate": 1.052689352360044e-06, "loss": 0.0062, "step": 1001500 }, { "epoch": 37.93, "learning_rate": 1.043226465801128e-06, "loss": 0.0066, "step": 1002000 }, { "epoch": 37.95, "learning_rate": 1.0337635792422121e-06, "loss": 0.0065, "step": 1002500 }, { "epoch": 37.97, "learning_rate": 1.0243006926832962e-06, "loss": 0.0063, "step": 1003000 }, { "epoch": 37.98, "learning_rate": 1.0148378061243802e-06, "loss": 0.0065, "step": 1003500 }, { "epoch": 38.0, "eval_bleu": 92.4009, "eval_gen_len": 16.4752, "eval_loss": 0.14630930125713348, "eval_runtime": 1235.7047, "eval_samples_per_second": 36.956, "eval_steps_per_second": 1.156, "step": 1003922 }, { "epoch": 38.0, "learning_rate": 1.005393845338582e-06, "loss": 0.0061, "step": 1004000 }, { "epoch": 38.02, "learning_rate": 9.959309587796662e-07, "loss": 0.0061, "step": 1004500 }, { "epoch": 38.04, "learning_rate": 9.864680722207504e-07, "loss": 0.006, "step": 1005000 }, { "epoch": 38.06, "learning_rate": 9.770051856618345e-07, "loss": 0.0062, "step": 1005500 }, { "epoch": 38.08, "learning_rate": 9.675422991029184e-07, "loss": 0.0064, "step": 1006000 }, { "epoch": 38.1, "learning_rate": 9.580983383171203e-07, "loss": 0.006, "step": 1006500 }, { "epoch": 38.12, "learning_rate": 9.486354517582045e-07, "loss": 0.0058, "step": 1007000 }, { "epoch": 38.14, "learning_rate": 9.391725651992885e-07, "loss": 0.0061, "step": 1007500 }, { "epoch": 38.15, "learning_rate": 9.297096786403725e-07, "loss": 0.0061, "step": 1008000 }, { "epoch": 38.17, "learning_rate": 9.202467920814566e-07, "loss": 0.0063, "step": 1008500 }, { "epoch": 38.19, "learning_rate": 9.107839055225406e-07, "loss": 0.0062, "step": 1009000 }, { "epoch": 38.21, "learning_rate": 9.013399447367426e-07, "loss": 0.0059, "step": 1009500 }, { "epoch": 38.23, "learning_rate": 8.918959839509444e-07, "loss": 0.0061, "step": 1010000 }, { "epoch": 38.25, "learning_rate": 8.824330973920286e-07, "loss": 0.0066, "step": 1010500 }, { "epoch": 38.27, "learning_rate": 8.729702108331126e-07, "loss": 0.0057, "step": 1011000 }, { "epoch": 38.29, "learning_rate": 8.635073242741966e-07, "loss": 0.0065, "step": 1011500 }, { "epoch": 38.31, "learning_rate": 8.540444377152807e-07, "loss": 0.0061, "step": 1012000 }, { "epoch": 38.32, "learning_rate": 8.445815511563648e-07, "loss": 0.0058, "step": 1012500 }, { "epoch": 38.34, "learning_rate": 8.351186645974489e-07, "loss": 0.0061, "step": 1013000 }, { "epoch": 38.36, "learning_rate": 8.25655778038533e-07, "loss": 0.006, "step": 1013500 }, { "epoch": 38.38, "learning_rate": 8.16192891479617e-07, "loss": 0.0059, "step": 1014000 }, { "epoch": 38.4, "learning_rate": 8.06730004920701e-07, "loss": 0.0061, "step": 1014500 }, { "epoch": 38.42, "learning_rate": 7.97286044134903e-07, "loss": 0.0058, "step": 1015000 }, { "epoch": 38.44, "learning_rate": 7.87823157575987e-07, "loss": 0.0059, "step": 1015500 }, { "epoch": 38.46, "learning_rate": 7.783602710170711e-07, "loss": 0.0056, "step": 1016000 }, { "epoch": 38.48, "learning_rate": 7.688973844581551e-07, "loss": 0.0059, "step": 1016500 }, { "epoch": 38.5, "learning_rate": 7.594344978992392e-07, "loss": 0.0057, "step": 1017000 }, { "epoch": 38.51, "learning_rate": 7.499905371134411e-07, "loss": 0.0064, "step": 1017500 }, { "epoch": 38.53, "learning_rate": 7.405276505545251e-07, "loss": 0.0064, "step": 1018000 }, { "epoch": 38.55, "learning_rate": 7.310836897687271e-07, "loss": 0.006, "step": 1018500 }, { "epoch": 38.57, "learning_rate": 7.21639728982929e-07, "loss": 0.0063, "step": 1019000 }, { "epoch": 38.59, "learning_rate": 7.12176842424013e-07, "loss": 0.0059, "step": 1019500 }, { "epoch": 38.61, "learning_rate": 7.027139558650971e-07, "loss": 0.006, "step": 1020000 }, { "epoch": 38.63, "learning_rate": 6.932510693061813e-07, "loss": 0.006, "step": 1020500 }, { "epoch": 38.65, "learning_rate": 6.837881827472653e-07, "loss": 0.0062, "step": 1021000 }, { "epoch": 38.67, "learning_rate": 6.743442219614673e-07, "loss": 0.0064, "step": 1021500 }, { "epoch": 38.68, "learning_rate": 6.648813354025513e-07, "loss": 0.0065, "step": 1022000 }, { "epoch": 38.7, "learning_rate": 6.554184488436353e-07, "loss": 0.0061, "step": 1022500 }, { "epoch": 38.72, "learning_rate": 6.459555622847194e-07, "loss": 0.0063, "step": 1023000 }, { "epoch": 38.74, "learning_rate": 6.364926757258034e-07, "loss": 0.0061, "step": 1023500 }, { "epoch": 38.76, "learning_rate": 6.270297891668875e-07, "loss": 0.0059, "step": 1024000 }, { "epoch": 38.78, "learning_rate": 6.175669026079716e-07, "loss": 0.0063, "step": 1024500 }, { "epoch": 38.8, "learning_rate": 6.081040160490557e-07, "loss": 0.0065, "step": 1025000 }, { "epoch": 38.82, "learning_rate": 5.986411294901397e-07, "loss": 0.0058, "step": 1025500 }, { "epoch": 38.84, "learning_rate": 5.891971687043417e-07, "loss": 0.0063, "step": 1026000 }, { "epoch": 38.85, "learning_rate": 5.797342821454257e-07, "loss": 0.0062, "step": 1026500 }, { "epoch": 38.87, "learning_rate": 5.702713955865097e-07, "loss": 0.0061, "step": 1027000 }, { "epoch": 38.89, "learning_rate": 5.608085090275938e-07, "loss": 0.0063, "step": 1027500 }, { "epoch": 38.91, "learning_rate": 5.513456224686779e-07, "loss": 0.0061, "step": 1028000 }, { "epoch": 38.93, "learning_rate": 5.41882735909762e-07, "loss": 0.0058, "step": 1028500 }, { "epoch": 38.95, "learning_rate": 5.32419849350846e-07, "loss": 0.0063, "step": 1029000 }, { "epoch": 38.97, "learning_rate": 5.229569627919301e-07, "loss": 0.0058, "step": 1029500 }, { "epoch": 38.99, "learning_rate": 5.13513002006132e-07, "loss": 0.0063, "step": 1030000 }, { "epoch": 39.0, "eval_bleu": 92.4327, "eval_gen_len": 16.4778, "eval_loss": 0.14681382477283478, "eval_runtime": 1461.2179, "eval_samples_per_second": 31.253, "eval_steps_per_second": 0.977, "step": 1030341 }, { "epoch": 39.01, "learning_rate": 5.040690412203339e-07, "loss": 0.0059, "step": 1030500 }, { "epoch": 39.02, "learning_rate": 4.946061546614179e-07, "loss": 0.006, "step": 1031000 }, { "epoch": 39.04, "learning_rate": 4.85143268102502e-07, "loss": 0.0059, "step": 1031500 }, { "epoch": 39.06, "learning_rate": 4.756803815435861e-07, "loss": 0.0061, "step": 1032000 }, { "epoch": 39.08, "learning_rate": 4.662174949846702e-07, "loss": 0.0063, "step": 1032500 }, { "epoch": 39.1, "learning_rate": 4.567546084257542e-07, "loss": 0.0059, "step": 1033000 }, { "epoch": 39.12, "learning_rate": 4.472917218668383e-07, "loss": 0.0058, "step": 1033500 }, { "epoch": 39.14, "learning_rate": 4.3782883530792237e-07, "loss": 0.0058, "step": 1034000 }, { "epoch": 39.16, "learning_rate": 4.283659487490064e-07, "loss": 0.0057, "step": 1034500 }, { "epoch": 39.18, "learning_rate": 4.189030621900905e-07, "loss": 0.0056, "step": 1035000 }, { "epoch": 39.2, "learning_rate": 4.094591014042924e-07, "loss": 0.0057, "step": 1035500 }, { "epoch": 39.21, "learning_rate": 3.999962148453765e-07, "loss": 0.0062, "step": 1036000 }, { "epoch": 39.23, "learning_rate": 3.9053332828646055e-07, "loss": 0.0057, "step": 1036500 }, { "epoch": 39.25, "learning_rate": 3.8107044172754457e-07, "loss": 0.0059, "step": 1037000 }, { "epoch": 39.27, "learning_rate": 3.7160755516862864e-07, "loss": 0.0062, "step": 1037500 }, { "epoch": 39.29, "learning_rate": 3.621446686097127e-07, "loss": 0.0056, "step": 1038000 }, { "epoch": 39.31, "learning_rate": 3.527007078239146e-07, "loss": 0.006, "step": 1038500 }, { "epoch": 39.33, "learning_rate": 3.4323782126499874e-07, "loss": 0.0058, "step": 1039000 }, { "epoch": 39.35, "learning_rate": 3.337749347060828e-07, "loss": 0.0056, "step": 1039500 }, { "epoch": 39.37, "learning_rate": 3.2431204814716683e-07, "loss": 0.0056, "step": 1040000 }, { "epoch": 39.38, "learning_rate": 3.148491615882509e-07, "loss": 0.006, "step": 1040500 }, { "epoch": 39.4, "learning_rate": 3.0538627502933497e-07, "loss": 0.006, "step": 1041000 }, { "epoch": 39.42, "learning_rate": 2.9592338847041904e-07, "loss": 0.0056, "step": 1041500 }, { "epoch": 39.44, "learning_rate": 2.8647942768462094e-07, "loss": 0.0061, "step": 1042000 }, { "epoch": 39.46, "learning_rate": 2.77016541125705e-07, "loss": 0.0057, "step": 1042500 }, { "epoch": 39.48, "learning_rate": 2.675536545667891e-07, "loss": 0.0061, "step": 1043000 }, { "epoch": 39.5, "learning_rate": 2.5809076800787315e-07, "loss": 0.006, "step": 1043500 }, { "epoch": 39.52, "learning_rate": 2.4862788144895723e-07, "loss": 0.0055, "step": 1044000 }, { "epoch": 39.54, "learning_rate": 2.391649948900413e-07, "loss": 0.006, "step": 1044500 }, { "epoch": 39.55, "learning_rate": 2.2970210833112534e-07, "loss": 0.0062, "step": 1045000 }, { "epoch": 39.57, "learning_rate": 2.2023922177220942e-07, "loss": 0.0064, "step": 1045500 }, { "epoch": 39.59, "learning_rate": 2.107763352132935e-07, "loss": 0.0058, "step": 1046000 }, { "epoch": 39.61, "learning_rate": 2.0131344865437753e-07, "loss": 0.0058, "step": 1046500 }, { "epoch": 39.63, "learning_rate": 1.9185056209546163e-07, "loss": 0.0058, "step": 1047000 }, { "epoch": 39.65, "learning_rate": 1.8242552708278135e-07, "loss": 0.0063, "step": 1047500 }, { "epoch": 39.67, "learning_rate": 1.7296264052386542e-07, "loss": 0.0063, "step": 1048000 }, { "epoch": 39.69, "learning_rate": 1.634997539649495e-07, "loss": 0.0058, "step": 1048500 }, { "epoch": 39.71, "learning_rate": 1.5403686740603357e-07, "loss": 0.0055, "step": 1049000 }, { "epoch": 39.73, "learning_rate": 1.445739808471176e-07, "loss": 0.0056, "step": 1049500 }, { "epoch": 39.74, "learning_rate": 1.3511109428820168e-07, "loss": 0.0058, "step": 1050000 }, { "epoch": 39.76, "learning_rate": 1.2566713350240358e-07, "loss": 0.0058, "step": 1050500 }, { "epoch": 39.78, "learning_rate": 1.1620424694348766e-07, "loss": 0.0061, "step": 1051000 }, { "epoch": 39.8, "learning_rate": 1.0674136038457171e-07, "loss": 0.0057, "step": 1051500 }, { "epoch": 39.82, "learning_rate": 9.727847382565578e-08, "loss": 0.006, "step": 1052000 }, { "epoch": 39.84, "learning_rate": 8.781558726673985e-08, "loss": 0.0057, "step": 1052500 }, { "epoch": 39.86, "learning_rate": 7.835270070782393e-08, "loss": 0.0059, "step": 1053000 }, { "epoch": 39.88, "learning_rate": 6.888981414890799e-08, "loss": 0.0058, "step": 1053500 }, { "epoch": 39.9, "learning_rate": 5.942692758999205e-08, "loss": 0.0057, "step": 1054000 }, { "epoch": 39.91, "learning_rate": 4.998296680419396e-08, "loss": 0.0059, "step": 1054500 }, { "epoch": 39.93, "learning_rate": 4.0520080245278024e-08, "loss": 0.0059, "step": 1055000 }, { "epoch": 39.95, "learning_rate": 3.105719368636209e-08, "loss": 0.0063, "step": 1055500 }, { "epoch": 39.97, "learning_rate": 2.1594307127446155e-08, "loss": 0.0057, "step": 1056000 }, { "epoch": 39.99, "learning_rate": 1.2131420568530226e-08, "loss": 0.0057, "step": 1056500 }, { "epoch": 40.0, "eval_bleu": 92.4356, "eval_gen_len": 16.4742, "eval_loss": 0.14701814949512482, "eval_runtime": 1304.0934, "eval_samples_per_second": 35.018, "eval_steps_per_second": 1.095, "step": 1056760 } ], "logging_steps": 500, "max_steps": 1056760, "num_input_tokens_seen": 0, "num_train_epochs": 40, "save_steps": 500, "total_flos": 2.5508178908682977e+18, "train_batch_size": 32, "trial_name": null, "trial_params": null }