diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,28414 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 6.999948237486413, + "global_step": 473312, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999260530052059e-05, + "loss": 2.919, + "step": 100 + }, + { + "epoch": 0.0, + "learning_rate": 4.9985210601041174e-05, + "loss": 2.7423, + "step": 200 + }, + { + "epoch": 0.0, + "learning_rate": 4.9977815901561766e-05, + "loss": 2.6872, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 4.997042120208235e-05, + "loss": 2.6637, + "step": 400 + }, + { + "epoch": 0.01, + "learning_rate": 4.996302650260294e-05, + "loss": 2.6088, + "step": 500 + }, + { + "epoch": 0.01, + "learning_rate": 4.995563180312352e-05, + "loss": 2.5799, + "step": 600 + }, + { + "epoch": 0.01, + "learning_rate": 4.994823710364411e-05, + "loss": 2.5585, + "step": 700 + }, + { + "epoch": 0.01, + "learning_rate": 4.9940842404164694e-05, + "loss": 2.5208, + "step": 800 + }, + { + "epoch": 0.01, + "learning_rate": 4.993344770468528e-05, + "loss": 2.5201, + "step": 900 + }, + { + "epoch": 0.01, + "learning_rate": 4.992605300520587e-05, + "loss": 2.4521, + "step": 1000 + }, + { + "epoch": 0.02, + "learning_rate": 4.991865830572646e-05, + "loss": 2.4482, + "step": 1100 + }, + { + "epoch": 0.02, + "learning_rate": 4.9911263606247044e-05, + "loss": 2.4095, + "step": 1200 + }, + { + "epoch": 0.02, + "learning_rate": 4.990386890676763e-05, + "loss": 2.4293, + "step": 1300 + }, + { + "epoch": 0.02, + "learning_rate": 4.989647420728822e-05, + "loss": 2.4321, + "step": 1400 + }, + { + "epoch": 0.02, + "learning_rate": 4.988907950780881e-05, + "loss": 2.3756, + "step": 1500 + }, + { + "epoch": 0.02, + "learning_rate": 4.988168480832939e-05, + "loss": 2.3572, + "step": 1600 + }, + { + "epoch": 0.03, + "learning_rate": 4.987429010884998e-05, + "loss": 2.3661, + "step": 1700 + }, + { + "epoch": 0.03, + "learning_rate": 4.9866895409370564e-05, + "loss": 2.3842, + "step": 1800 + }, + { + "epoch": 0.03, + "learning_rate": 4.9859500709891157e-05, + "loss": 2.3186, + "step": 1900 + }, + { + "epoch": 0.03, + "learning_rate": 4.9852106010411735e-05, + "loss": 2.3267, + "step": 2000 + }, + { + "epoch": 0.03, + "learning_rate": 4.984471131093232e-05, + "loss": 2.3331, + "step": 2100 + }, + { + "epoch": 0.03, + "learning_rate": 4.9837316611452914e-05, + "loss": 2.2617, + "step": 2200 + }, + { + "epoch": 0.03, + "learning_rate": 4.98299219119735e-05, + "loss": 2.3182, + "step": 2300 + }, + { + "epoch": 0.04, + "learning_rate": 4.9822527212494085e-05, + "loss": 2.3079, + "step": 2400 + }, + { + "epoch": 0.04, + "learning_rate": 4.981513251301467e-05, + "loss": 2.2781, + "step": 2500 + }, + { + "epoch": 0.04, + "learning_rate": 4.980773781353526e-05, + "loss": 2.2769, + "step": 2600 + }, + { + "epoch": 0.04, + "learning_rate": 4.980034311405585e-05, + "loss": 2.3029, + "step": 2700 + }, + { + "epoch": 0.04, + "learning_rate": 4.9792948414576434e-05, + "loss": 2.2748, + "step": 2800 + }, + { + "epoch": 0.04, + "learning_rate": 4.978555371509702e-05, + "loss": 2.2387, + "step": 2900 + }, + { + "epoch": 0.04, + "learning_rate": 4.977815901561761e-05, + "loss": 2.2111, + "step": 3000 + }, + { + "epoch": 0.05, + "learning_rate": 4.97707643161382e-05, + "loss": 2.2477, + "step": 3100 + }, + { + "epoch": 0.05, + "learning_rate": 4.9763369616658783e-05, + "loss": 2.229, + "step": 3200 + }, + { + "epoch": 0.05, + "learning_rate": 4.975597491717937e-05, + "loss": 2.256, + "step": 3300 + }, + { + "epoch": 0.05, + "learning_rate": 4.9748580217699955e-05, + "loss": 2.2034, + "step": 3400 + }, + { + "epoch": 0.05, + "learning_rate": 4.974118551822054e-05, + "loss": 2.241, + "step": 3500 + }, + { + "epoch": 0.05, + "learning_rate": 4.9733790818741126e-05, + "loss": 2.2128, + "step": 3600 + }, + { + "epoch": 0.05, + "learning_rate": 4.972639611926171e-05, + "loss": 2.2185, + "step": 3700 + }, + { + "epoch": 0.06, + "learning_rate": 4.9719001419782304e-05, + "loss": 2.2213, + "step": 3800 + }, + { + "epoch": 0.06, + "learning_rate": 4.971160672030289e-05, + "loss": 2.1884, + "step": 3900 + }, + { + "epoch": 0.06, + "learning_rate": 4.9704212020823475e-05, + "loss": 2.1957, + "step": 4000 + }, + { + "epoch": 0.06, + "learning_rate": 4.969681732134406e-05, + "loss": 2.204, + "step": 4100 + }, + { + "epoch": 0.06, + "learning_rate": 4.968949656885945e-05, + "loss": 2.1919, + "step": 4200 + }, + { + "epoch": 0.06, + "learning_rate": 4.9682101869380034e-05, + "loss": 2.1941, + "step": 4300 + }, + { + "epoch": 0.07, + "learning_rate": 4.967470716990062e-05, + "loss": 2.1981, + "step": 4400 + }, + { + "epoch": 0.07, + "learning_rate": 4.9667312470421205e-05, + "loss": 2.1546, + "step": 4500 + }, + { + "epoch": 0.07, + "learning_rate": 4.965991777094179e-05, + "loss": 2.1156, + "step": 4600 + }, + { + "epoch": 0.07, + "learning_rate": 4.9652523071462376e-05, + "loss": 2.142, + "step": 4700 + }, + { + "epoch": 0.07, + "learning_rate": 4.964512837198296e-05, + "loss": 2.1557, + "step": 4800 + }, + { + "epoch": 0.07, + "learning_rate": 4.963773367250355e-05, + "loss": 2.1273, + "step": 4900 + }, + { + "epoch": 0.07, + "learning_rate": 4.963033897302414e-05, + "loss": 2.1309, + "step": 5000 + }, + { + "epoch": 0.08, + "learning_rate": 4.9622944273544726e-05, + "loss": 2.1652, + "step": 5100 + }, + { + "epoch": 0.08, + "learning_rate": 4.961554957406531e-05, + "loss": 2.149, + "step": 5200 + }, + { + "epoch": 0.08, + "learning_rate": 4.96081548745859e-05, + "loss": 2.1456, + "step": 5300 + }, + { + "epoch": 0.08, + "learning_rate": 4.960076017510649e-05, + "loss": 2.1594, + "step": 5400 + }, + { + "epoch": 0.08, + "learning_rate": 4.9593365475627075e-05, + "loss": 2.1398, + "step": 5500 + }, + { + "epoch": 0.08, + "learning_rate": 4.958597077614766e-05, + "loss": 2.1148, + "step": 5600 + }, + { + "epoch": 0.08, + "learning_rate": 4.9578576076668246e-05, + "loss": 2.1583, + "step": 5700 + }, + { + "epoch": 0.09, + "learning_rate": 4.957118137718884e-05, + "loss": 2.1114, + "step": 5800 + }, + { + "epoch": 0.09, + "learning_rate": 4.9563786677709424e-05, + "loss": 2.1155, + "step": 5900 + }, + { + "epoch": 0.09, + "learning_rate": 4.955639197823e-05, + "loss": 2.1098, + "step": 6000 + }, + { + "epoch": 0.09, + "learning_rate": 4.954899727875059e-05, + "loss": 2.0925, + "step": 6100 + }, + { + "epoch": 0.09, + "learning_rate": 4.9541676526265976e-05, + "loss": 2.1266, + "step": 6200 + }, + { + "epoch": 0.09, + "learning_rate": 4.953428182678656e-05, + "loss": 2.1477, + "step": 6300 + }, + { + "epoch": 0.09, + "learning_rate": 4.952688712730715e-05, + "loss": 2.1214, + "step": 6400 + }, + { + "epoch": 0.1, + "learning_rate": 4.951949242782773e-05, + "loss": 2.093, + "step": 6500 + }, + { + "epoch": 0.1, + "learning_rate": 4.9512097728348325e-05, + "loss": 2.0955, + "step": 6600 + }, + { + "epoch": 0.1, + "learning_rate": 4.950470302886891e-05, + "loss": 2.0927, + "step": 6700 + }, + { + "epoch": 0.1, + "learning_rate": 4.94973083293895e-05, + "loss": 2.0823, + "step": 6800 + }, + { + "epoch": 0.1, + "learning_rate": 4.948991362991008e-05, + "loss": 2.0801, + "step": 6900 + }, + { + "epoch": 0.1, + "learning_rate": 4.948251893043067e-05, + "loss": 2.1013, + "step": 7000 + }, + { + "epoch": 0.11, + "learning_rate": 4.947512423095126e-05, + "loss": 2.0966, + "step": 7100 + }, + { + "epoch": 0.11, + "learning_rate": 4.9467729531471846e-05, + "loss": 2.0613, + "step": 7200 + }, + { + "epoch": 0.11, + "learning_rate": 4.9460334831992425e-05, + "loss": 2.063, + "step": 7300 + }, + { + "epoch": 0.11, + "learning_rate": 4.945294013251302e-05, + "loss": 2.0941, + "step": 7400 + }, + { + "epoch": 0.11, + "learning_rate": 4.94455454330336e-05, + "loss": 2.0996, + "step": 7500 + }, + { + "epoch": 0.11, + "learning_rate": 4.943815073355419e-05, + "loss": 2.0831, + "step": 7600 + }, + { + "epoch": 0.11, + "learning_rate": 4.9430756034074774e-05, + "loss": 2.0941, + "step": 7700 + }, + { + "epoch": 0.12, + "learning_rate": 4.942336133459537e-05, + "loss": 2.1278, + "step": 7800 + }, + { + "epoch": 0.12, + "learning_rate": 4.941596663511595e-05, + "loss": 2.0738, + "step": 7900 + }, + { + "epoch": 0.12, + "learning_rate": 4.940857193563654e-05, + "loss": 2.0666, + "step": 8000 + }, + { + "epoch": 0.12, + "learning_rate": 4.9401177236157124e-05, + "loss": 2.0527, + "step": 8100 + }, + { + "epoch": 0.12, + "learning_rate": 4.9393782536677716e-05, + "loss": 2.0532, + "step": 8200 + }, + { + "epoch": 0.12, + "learning_rate": 4.93863878371983e-05, + "loss": 2.0297, + "step": 8300 + }, + { + "epoch": 0.12, + "learning_rate": 4.937899313771889e-05, + "loss": 2.0484, + "step": 8400 + }, + { + "epoch": 0.13, + "learning_rate": 4.937159843823947e-05, + "loss": 2.0632, + "step": 8500 + }, + { + "epoch": 0.13, + "learning_rate": 4.936420373876006e-05, + "loss": 2.0324, + "step": 8600 + }, + { + "epoch": 0.13, + "learning_rate": 4.935688298627544e-05, + "loss": 2.0349, + "step": 8700 + }, + { + "epoch": 0.13, + "learning_rate": 4.9349488286796025e-05, + "loss": 2.0879, + "step": 8800 + }, + { + "epoch": 0.13, + "learning_rate": 4.934209358731661e-05, + "loss": 2.0388, + "step": 8900 + }, + { + "epoch": 0.13, + "learning_rate": 4.93346988878372e-05, + "loss": 2.0342, + "step": 9000 + }, + { + "epoch": 0.13, + "learning_rate": 4.932730418835779e-05, + "loss": 2.0462, + "step": 9100 + }, + { + "epoch": 0.14, + "learning_rate": 4.9319909488878374e-05, + "loss": 2.0469, + "step": 9200 + }, + { + "epoch": 0.14, + "learning_rate": 4.931251478939896e-05, + "loss": 2.0628, + "step": 9300 + }, + { + "epoch": 0.14, + "learning_rate": 4.930512008991955e-05, + "loss": 2.0183, + "step": 9400 + }, + { + "epoch": 0.14, + "learning_rate": 4.929772539044014e-05, + "loss": 2.0261, + "step": 9500 + }, + { + "epoch": 0.14, + "learning_rate": 4.929033069096072e-05, + "loss": 2.0298, + "step": 9600 + }, + { + "epoch": 0.14, + "learning_rate": 4.928293599148131e-05, + "loss": 2.0427, + "step": 9700 + }, + { + "epoch": 0.14, + "learning_rate": 4.9275541292001895e-05, + "loss": 2.0566, + "step": 9800 + }, + { + "epoch": 0.15, + "learning_rate": 4.926814659252249e-05, + "loss": 1.998, + "step": 9900 + }, + { + "epoch": 0.15, + "learning_rate": 4.9260751893043066e-05, + "loss": 2.0508, + "step": 10000 + }, + { + "epoch": 0.15, + "learning_rate": 4.925335719356365e-05, + "loss": 1.9931, + "step": 10100 + }, + { + "epoch": 0.15, + "learning_rate": 4.9245962494084244e-05, + "loss": 2.04, + "step": 10200 + }, + { + "epoch": 0.15, + "learning_rate": 4.923856779460483e-05, + "loss": 1.9981, + "step": 10300 + }, + { + "epoch": 0.15, + "learning_rate": 4.9231173095125415e-05, + "loss": 2.0008, + "step": 10400 + }, + { + "epoch": 0.16, + "learning_rate": 4.9223778395646e-05, + "loss": 1.9967, + "step": 10500 + }, + { + "epoch": 0.16, + "learning_rate": 4.921638369616659e-05, + "loss": 2.013, + "step": 10600 + }, + { + "epoch": 0.16, + "learning_rate": 4.920898899668718e-05, + "loss": 2.0167, + "step": 10700 + }, + { + "epoch": 0.16, + "learning_rate": 4.9201594297207764e-05, + "loss": 2.012, + "step": 10800 + }, + { + "epoch": 0.16, + "learning_rate": 4.919419959772835e-05, + "loss": 1.9876, + "step": 10900 + }, + { + "epoch": 0.16, + "learning_rate": 4.918680489824894e-05, + "loss": 2.0142, + "step": 11000 + }, + { + "epoch": 0.16, + "learning_rate": 4.917941019876953e-05, + "loss": 1.9965, + "step": 11100 + }, + { + "epoch": 0.17, + "learning_rate": 4.917208944628491e-05, + "loss": 1.9778, + "step": 11200 + }, + { + "epoch": 0.17, + "learning_rate": 4.916469474680549e-05, + "loss": 2.0163, + "step": 11300 + }, + { + "epoch": 0.17, + "learning_rate": 4.915730004732608e-05, + "loss": 2.0398, + "step": 11400 + }, + { + "epoch": 0.17, + "learning_rate": 4.9149905347846666e-05, + "loss": 2.0177, + "step": 11500 + }, + { + "epoch": 0.17, + "learning_rate": 4.914251064836725e-05, + "loss": 1.9847, + "step": 11600 + }, + { + "epoch": 0.17, + "learning_rate": 4.913511594888784e-05, + "loss": 1.9609, + "step": 11700 + }, + { + "epoch": 0.17, + "learning_rate": 4.912772124940843e-05, + "loss": 1.9641, + "step": 11800 + }, + { + "epoch": 0.18, + "learning_rate": 4.9120326549929015e-05, + "loss": 2.0005, + "step": 11900 + }, + { + "epoch": 0.18, + "learning_rate": 4.91129318504496e-05, + "loss": 1.9991, + "step": 12000 + }, + { + "epoch": 0.18, + "learning_rate": 4.9105537150970186e-05, + "loss": 1.9912, + "step": 12100 + }, + { + "epoch": 0.18, + "learning_rate": 4.909814245149077e-05, + "loss": 1.987, + "step": 12200 + }, + { + "epoch": 0.18, + "learning_rate": 4.9090747752011364e-05, + "loss": 1.9887, + "step": 12300 + }, + { + "epoch": 0.18, + "learning_rate": 4.908335305253195e-05, + "loss": 1.9956, + "step": 12400 + }, + { + "epoch": 0.18, + "learning_rate": 4.9075958353052535e-05, + "loss": 1.9633, + "step": 12500 + }, + { + "epoch": 0.19, + "learning_rate": 4.906856365357312e-05, + "loss": 1.9802, + "step": 12600 + }, + { + "epoch": 0.19, + "learning_rate": 4.906116895409371e-05, + "loss": 2.0055, + "step": 12700 + }, + { + "epoch": 0.19, + "learning_rate": 4.905377425461429e-05, + "loss": 1.9626, + "step": 12800 + }, + { + "epoch": 0.19, + "learning_rate": 4.904637955513488e-05, + "loss": 1.9506, + "step": 12900 + }, + { + "epoch": 0.19, + "learning_rate": 4.903898485565547e-05, + "loss": 1.963, + "step": 13000 + }, + { + "epoch": 0.19, + "learning_rate": 4.9031590156176056e-05, + "loss": 1.9647, + "step": 13100 + }, + { + "epoch": 0.2, + "learning_rate": 4.9024269403691436e-05, + "loss": 1.9834, + "step": 13200 + }, + { + "epoch": 0.2, + "learning_rate": 4.901687470421202e-05, + "loss": 1.9763, + "step": 13300 + }, + { + "epoch": 0.2, + "learning_rate": 4.900948000473261e-05, + "loss": 1.971, + "step": 13400 + }, + { + "epoch": 0.2, + "learning_rate": 4.90020853052532e-05, + "loss": 1.9911, + "step": 13500 + }, + { + "epoch": 0.2, + "learning_rate": 4.8994690605773786e-05, + "loss": 1.9565, + "step": 13600 + }, + { + "epoch": 0.2, + "learning_rate": 4.898729590629437e-05, + "loss": 1.9361, + "step": 13700 + }, + { + "epoch": 0.2, + "learning_rate": 4.897990120681496e-05, + "loss": 1.9895, + "step": 13800 + }, + { + "epoch": 0.21, + "learning_rate": 4.897250650733554e-05, + "loss": 1.9424, + "step": 13900 + }, + { + "epoch": 0.21, + "learning_rate": 4.896511180785613e-05, + "loss": 1.9614, + "step": 14000 + }, + { + "epoch": 0.21, + "learning_rate": 4.8957717108376714e-05, + "loss": 1.9662, + "step": 14100 + }, + { + "epoch": 0.21, + "learning_rate": 4.8950322408897306e-05, + "loss": 2.0129, + "step": 14200 + }, + { + "epoch": 0.21, + "learning_rate": 4.894292770941789e-05, + "loss": 1.9652, + "step": 14300 + }, + { + "epoch": 0.21, + "learning_rate": 4.893553300993848e-05, + "loss": 1.9475, + "step": 14400 + }, + { + "epoch": 0.21, + "learning_rate": 4.892813831045906e-05, + "loss": 1.9473, + "step": 14500 + }, + { + "epoch": 0.22, + "learning_rate": 4.8920743610979656e-05, + "loss": 1.944, + "step": 14600 + }, + { + "epoch": 0.22, + "learning_rate": 4.891334891150024e-05, + "loss": 1.9409, + "step": 14700 + }, + { + "epoch": 0.22, + "learning_rate": 4.890595421202083e-05, + "loss": 1.949, + "step": 14800 + }, + { + "epoch": 0.22, + "learning_rate": 4.889855951254141e-05, + "loss": 1.9782, + "step": 14900 + }, + { + "epoch": 0.22, + "learning_rate": 4.8891164813062e-05, + "loss": 1.9485, + "step": 15000 + }, + { + "epoch": 0.22, + "learning_rate": 4.888377011358259e-05, + "loss": 1.9484, + "step": 15100 + }, + { + "epoch": 0.22, + "learning_rate": 4.887637541410317e-05, + "loss": 1.9551, + "step": 15200 + }, + { + "epoch": 0.23, + "learning_rate": 4.8868980714623755e-05, + "loss": 1.9261, + "step": 15300 + }, + { + "epoch": 0.23, + "learning_rate": 4.886158601514435e-05, + "loss": 1.9625, + "step": 15400 + }, + { + "epoch": 0.23, + "learning_rate": 4.885419131566493e-05, + "loss": 1.9619, + "step": 15500 + }, + { + "epoch": 0.23, + "learning_rate": 4.884679661618552e-05, + "loss": 1.9496, + "step": 15600 + }, + { + "epoch": 0.23, + "learning_rate": 4.8839401916706105e-05, + "loss": 1.975, + "step": 15700 + }, + { + "epoch": 0.23, + "learning_rate": 4.88320072172267e-05, + "loss": 1.9324, + "step": 15800 + }, + { + "epoch": 0.24, + "learning_rate": 4.882461251774728e-05, + "loss": 1.9643, + "step": 15900 + }, + { + "epoch": 0.24, + "learning_rate": 4.881721781826787e-05, + "loss": 1.9366, + "step": 16000 + }, + { + "epoch": 0.24, + "learning_rate": 4.8809823118788454e-05, + "loss": 1.934, + "step": 16100 + }, + { + "epoch": 0.24, + "learning_rate": 4.8802502366303834e-05, + "loss": 1.9236, + "step": 16200 + }, + { + "epoch": 0.24, + "learning_rate": 4.879510766682443e-05, + "loss": 1.9284, + "step": 16300 + }, + { + "epoch": 0.24, + "learning_rate": 4.878771296734501e-05, + "loss": 1.9677, + "step": 16400 + }, + { + "epoch": 0.24, + "learning_rate": 4.87803182678656e-05, + "loss": 1.9315, + "step": 16500 + }, + { + "epoch": 0.25, + "learning_rate": 4.8772923568386184e-05, + "loss": 1.9345, + "step": 16600 + }, + { + "epoch": 0.25, + "learning_rate": 4.876552886890677e-05, + "loss": 1.9373, + "step": 16700 + }, + { + "epoch": 0.25, + "learning_rate": 4.8758134169427355e-05, + "loss": 1.932, + "step": 16800 + }, + { + "epoch": 0.25, + "learning_rate": 4.875073946994794e-05, + "loss": 1.9039, + "step": 16900 + }, + { + "epoch": 0.25, + "learning_rate": 4.874334477046853e-05, + "loss": 1.9559, + "step": 17000 + }, + { + "epoch": 0.25, + "learning_rate": 4.873595007098912e-05, + "loss": 1.9104, + "step": 17100 + }, + { + "epoch": 0.25, + "learning_rate": 4.8728555371509704e-05, + "loss": 1.9314, + "step": 17200 + }, + { + "epoch": 0.26, + "learning_rate": 4.872116067203029e-05, + "loss": 1.9595, + "step": 17300 + }, + { + "epoch": 0.26, + "learning_rate": 4.8713765972550876e-05, + "loss": 1.9264, + "step": 17400 + }, + { + "epoch": 0.26, + "learning_rate": 4.870637127307147e-05, + "loss": 1.8972, + "step": 17500 + }, + { + "epoch": 0.26, + "learning_rate": 4.8698976573592054e-05, + "loss": 1.9434, + "step": 17600 + }, + { + "epoch": 0.26, + "learning_rate": 4.869158187411264e-05, + "loss": 1.9297, + "step": 17700 + }, + { + "epoch": 0.26, + "learning_rate": 4.8684187174633225e-05, + "loss": 1.9031, + "step": 17800 + }, + { + "epoch": 0.26, + "learning_rate": 4.867679247515381e-05, + "loss": 1.9358, + "step": 17900 + }, + { + "epoch": 0.27, + "learning_rate": 4.8669397775674396e-05, + "loss": 1.8736, + "step": 18000 + }, + { + "epoch": 0.27, + "learning_rate": 4.866200307619498e-05, + "loss": 1.9253, + "step": 18100 + }, + { + "epoch": 0.27, + "learning_rate": 4.8654608376715574e-05, + "loss": 1.9201, + "step": 18200 + }, + { + "epoch": 0.27, + "learning_rate": 4.864721367723616e-05, + "loss": 1.9401, + "step": 18300 + }, + { + "epoch": 0.27, + "learning_rate": 4.8639818977756745e-05, + "loss": 1.9467, + "step": 18400 + }, + { + "epoch": 0.27, + "learning_rate": 4.8632498225272126e-05, + "loss": 1.9439, + "step": 18500 + }, + { + "epoch": 0.28, + "learning_rate": 4.862510352579271e-05, + "loss": 1.9153, + "step": 18600 + }, + { + "epoch": 0.28, + "learning_rate": 4.8617708826313304e-05, + "loss": 1.9149, + "step": 18700 + }, + { + "epoch": 0.28, + "learning_rate": 4.861031412683389e-05, + "loss": 1.9285, + "step": 18800 + }, + { + "epoch": 0.28, + "learning_rate": 4.8602919427354475e-05, + "loss": 1.9138, + "step": 18900 + }, + { + "epoch": 0.28, + "learning_rate": 4.859552472787506e-05, + "loss": 1.9528, + "step": 19000 + }, + { + "epoch": 0.28, + "learning_rate": 4.858813002839565e-05, + "loss": 1.8888, + "step": 19100 + }, + { + "epoch": 0.28, + "learning_rate": 4.858073532891623e-05, + "loss": 1.9066, + "step": 19200 + }, + { + "epoch": 0.29, + "learning_rate": 4.857334062943682e-05, + "loss": 1.932, + "step": 19300 + }, + { + "epoch": 0.29, + "learning_rate": 4.856594592995741e-05, + "loss": 1.9112, + "step": 19400 + }, + { + "epoch": 0.29, + "learning_rate": 4.8558551230477996e-05, + "loss": 1.9011, + "step": 19500 + }, + { + "epoch": 0.29, + "learning_rate": 4.855115653099858e-05, + "loss": 1.9064, + "step": 19600 + }, + { + "epoch": 0.29, + "learning_rate": 4.854376183151917e-05, + "loss": 1.9445, + "step": 19700 + }, + { + "epoch": 0.29, + "learning_rate": 4.853636713203976e-05, + "loss": 1.9436, + "step": 19800 + }, + { + "epoch": 0.29, + "learning_rate": 4.8528972432560345e-05, + "loss": 1.8964, + "step": 19900 + }, + { + "epoch": 0.3, + "learning_rate": 4.852157773308093e-05, + "loss": 1.8949, + "step": 20000 + }, + { + "epoch": 0.3, + "learning_rate": 4.8514183033601516e-05, + "loss": 1.9179, + "step": 20100 + }, + { + "epoch": 0.3, + "learning_rate": 4.85067883341221e-05, + "loss": 1.9006, + "step": 20200 + }, + { + "epoch": 0.3, + "learning_rate": 4.8499393634642694e-05, + "loss": 1.8683, + "step": 20300 + }, + { + "epoch": 0.3, + "learning_rate": 4.849199893516328e-05, + "loss": 1.8887, + "step": 20400 + }, + { + "epoch": 0.3, + "learning_rate": 4.848460423568386e-05, + "loss": 1.8973, + "step": 20500 + }, + { + "epoch": 0.3, + "learning_rate": 4.847720953620445e-05, + "loss": 1.8831, + "step": 20600 + }, + { + "epoch": 0.31, + "learning_rate": 4.846981483672504e-05, + "loss": 1.8713, + "step": 20700 + }, + { + "epoch": 0.31, + "learning_rate": 4.846242013724562e-05, + "loss": 1.9004, + "step": 20800 + }, + { + "epoch": 0.31, + "learning_rate": 4.845502543776621e-05, + "loss": 1.8996, + "step": 20900 + }, + { + "epoch": 0.31, + "learning_rate": 4.84476307382868e-05, + "loss": 1.9057, + "step": 21000 + }, + { + "epoch": 0.31, + "learning_rate": 4.8440236038807386e-05, + "loss": 1.9165, + "step": 21100 + }, + { + "epoch": 0.31, + "learning_rate": 4.843284133932797e-05, + "loss": 1.885, + "step": 21200 + }, + { + "epoch": 0.32, + "learning_rate": 4.842544663984856e-05, + "loss": 1.8674, + "step": 21300 + }, + { + "epoch": 0.32, + "learning_rate": 4.841812588736394e-05, + "loss": 1.9026, + "step": 21400 + }, + { + "epoch": 0.32, + "learning_rate": 4.841073118788453e-05, + "loss": 1.8985, + "step": 21500 + }, + { + "epoch": 0.32, + "learning_rate": 4.8403336488405116e-05, + "loss": 1.8923, + "step": 21600 + }, + { + "epoch": 0.32, + "learning_rate": 4.83959417889257e-05, + "loss": 1.8978, + "step": 21700 + }, + { + "epoch": 0.32, + "learning_rate": 4.838854708944629e-05, + "loss": 1.8871, + "step": 21800 + }, + { + "epoch": 0.32, + "learning_rate": 4.838115238996687e-05, + "loss": 1.8718, + "step": 21900 + }, + { + "epoch": 0.33, + "learning_rate": 4.837375769048746e-05, + "loss": 1.9119, + "step": 22000 + }, + { + "epoch": 0.33, + "learning_rate": 4.8366362991008044e-05, + "loss": 1.8625, + "step": 22100 + }, + { + "epoch": 0.33, + "learning_rate": 4.835896829152864e-05, + "loss": 1.8834, + "step": 22200 + }, + { + "epoch": 0.33, + "learning_rate": 4.835157359204922e-05, + "loss": 1.8734, + "step": 22300 + }, + { + "epoch": 0.33, + "learning_rate": 4.834417889256981e-05, + "loss": 1.8839, + "step": 22400 + }, + { + "epoch": 0.33, + "learning_rate": 4.8336784193090394e-05, + "loss": 1.8718, + "step": 22500 + }, + { + "epoch": 0.33, + "learning_rate": 4.832938949361098e-05, + "loss": 1.9251, + "step": 22600 + }, + { + "epoch": 0.34, + "learning_rate": 4.832199479413157e-05, + "loss": 1.8665, + "step": 22700 + }, + { + "epoch": 0.34, + "learning_rate": 4.831460009465216e-05, + "loss": 1.8725, + "step": 22800 + }, + { + "epoch": 0.34, + "learning_rate": 4.830720539517274e-05, + "loss": 1.881, + "step": 22900 + }, + { + "epoch": 0.34, + "learning_rate": 4.829981069569333e-05, + "loss": 1.875, + "step": 23000 + }, + { + "epoch": 0.34, + "learning_rate": 4.829241599621392e-05, + "loss": 1.8611, + "step": 23100 + }, + { + "epoch": 0.34, + "learning_rate": 4.82850212967345e-05, + "loss": 1.8724, + "step": 23200 + }, + { + "epoch": 0.34, + "learning_rate": 4.8277626597255086e-05, + "loss": 1.8807, + "step": 23300 + }, + { + "epoch": 0.35, + "learning_rate": 4.827023189777568e-05, + "loss": 1.8552, + "step": 23400 + }, + { + "epoch": 0.35, + "learning_rate": 4.8262837198296264e-05, + "loss": 1.853, + "step": 23500 + }, + { + "epoch": 0.35, + "learning_rate": 4.825544249881685e-05, + "loss": 1.8351, + "step": 23600 + }, + { + "epoch": 0.35, + "learning_rate": 4.8248047799337435e-05, + "loss": 1.875, + "step": 23700 + }, + { + "epoch": 0.35, + "learning_rate": 4.824065309985803e-05, + "loss": 1.8807, + "step": 23800 + }, + { + "epoch": 0.35, + "learning_rate": 4.823325840037861e-05, + "loss": 1.8734, + "step": 23900 + }, + { + "epoch": 0.35, + "learning_rate": 4.82258637008992e-05, + "loss": 1.8642, + "step": 24000 + }, + { + "epoch": 0.36, + "learning_rate": 4.8218469001419784e-05, + "loss": 1.8599, + "step": 24100 + }, + { + "epoch": 0.36, + "learning_rate": 4.8211074301940377e-05, + "loss": 1.8656, + "step": 24200 + }, + { + "epoch": 0.36, + "learning_rate": 4.820367960246096e-05, + "loss": 1.8629, + "step": 24300 + }, + { + "epoch": 0.36, + "learning_rate": 4.819628490298155e-05, + "loss": 1.8598, + "step": 24400 + }, + { + "epoch": 0.36, + "learning_rate": 4.818889020350213e-05, + "loss": 1.8552, + "step": 24500 + }, + { + "epoch": 0.36, + "learning_rate": 4.818149550402272e-05, + "loss": 1.8565, + "step": 24600 + }, + { + "epoch": 0.37, + "learning_rate": 4.81741747515381e-05, + "loss": 1.8701, + "step": 24700 + }, + { + "epoch": 0.37, + "learning_rate": 4.8166780052058685e-05, + "loss": 1.8765, + "step": 24800 + }, + { + "epoch": 0.37, + "learning_rate": 4.815938535257927e-05, + "loss": 1.8726, + "step": 24900 + }, + { + "epoch": 0.37, + "learning_rate": 4.815199065309986e-05, + "loss": 1.8706, + "step": 25000 + }, + { + "epoch": 0.37, + "learning_rate": 4.814459595362045e-05, + "loss": 1.873, + "step": 25100 + }, + { + "epoch": 0.37, + "learning_rate": 4.8137201254141035e-05, + "loss": 1.8408, + "step": 25200 + }, + { + "epoch": 0.37, + "learning_rate": 4.812980655466162e-05, + "loss": 1.8744, + "step": 25300 + }, + { + "epoch": 0.38, + "learning_rate": 4.8122411855182206e-05, + "loss": 1.885, + "step": 25400 + }, + { + "epoch": 0.38, + "learning_rate": 4.81150171557028e-05, + "loss": 1.8794, + "step": 25500 + }, + { + "epoch": 0.38, + "learning_rate": 4.8107622456223384e-05, + "loss": 1.842, + "step": 25600 + }, + { + "epoch": 0.38, + "learning_rate": 4.810022775674397e-05, + "loss": 1.8753, + "step": 25700 + }, + { + "epoch": 0.38, + "learning_rate": 4.8092833057264555e-05, + "loss": 1.8326, + "step": 25800 + }, + { + "epoch": 0.38, + "learning_rate": 4.808543835778514e-05, + "loss": 1.8327, + "step": 25900 + }, + { + "epoch": 0.38, + "learning_rate": 4.8078043658305726e-05, + "loss": 1.8257, + "step": 26000 + }, + { + "epoch": 0.39, + "learning_rate": 4.807064895882631e-05, + "loss": 1.8652, + "step": 26100 + }, + { + "epoch": 0.39, + "learning_rate": 4.8063254259346904e-05, + "loss": 1.8509, + "step": 26200 + }, + { + "epoch": 0.39, + "learning_rate": 4.805585955986749e-05, + "loss": 1.8735, + "step": 26300 + }, + { + "epoch": 0.39, + "learning_rate": 4.8048464860388076e-05, + "loss": 1.8531, + "step": 26400 + }, + { + "epoch": 0.39, + "learning_rate": 4.804107016090866e-05, + "loss": 1.8617, + "step": 26500 + }, + { + "epoch": 0.39, + "learning_rate": 4.8033675461429254e-05, + "loss": 1.8583, + "step": 26600 + }, + { + "epoch": 0.39, + "learning_rate": 4.802628076194984e-05, + "loss": 1.8491, + "step": 26700 + }, + { + "epoch": 0.4, + "learning_rate": 4.8018886062470425e-05, + "loss": 1.8717, + "step": 26800 + }, + { + "epoch": 0.4, + "learning_rate": 4.8011565309985806e-05, + "loss": 1.8676, + "step": 26900 + }, + { + "epoch": 0.4, + "learning_rate": 4.800417061050639e-05, + "loss": 1.8635, + "step": 27000 + }, + { + "epoch": 0.4, + "learning_rate": 4.799677591102698e-05, + "loss": 1.8291, + "step": 27100 + }, + { + "epoch": 0.4, + "learning_rate": 4.798938121154756e-05, + "loss": 1.859, + "step": 27200 + }, + { + "epoch": 0.4, + "learning_rate": 4.798198651206815e-05, + "loss": 1.8158, + "step": 27300 + }, + { + "epoch": 0.41, + "learning_rate": 4.797459181258874e-05, + "loss": 1.8652, + "step": 27400 + }, + { + "epoch": 0.41, + "learning_rate": 4.7967197113109326e-05, + "loss": 1.8637, + "step": 27500 + }, + { + "epoch": 0.41, + "learning_rate": 4.795980241362991e-05, + "loss": 1.8458, + "step": 27600 + }, + { + "epoch": 0.41, + "learning_rate": 4.79524077141505e-05, + "loss": 1.8086, + "step": 27700 + }, + { + "epoch": 0.41, + "learning_rate": 4.794501301467108e-05, + "loss": 1.8499, + "step": 27800 + }, + { + "epoch": 0.41, + "learning_rate": 4.7937618315191675e-05, + "loss": 1.879, + "step": 27900 + }, + { + "epoch": 0.41, + "learning_rate": 4.793022361571226e-05, + "loss": 1.8575, + "step": 28000 + }, + { + "epoch": 0.42, + "learning_rate": 4.792282891623285e-05, + "loss": 1.8326, + "step": 28100 + }, + { + "epoch": 0.42, + "learning_rate": 4.791543421675343e-05, + "loss": 1.875, + "step": 28200 + }, + { + "epoch": 0.42, + "learning_rate": 4.7908039517274025e-05, + "loss": 1.8682, + "step": 28300 + }, + { + "epoch": 0.42, + "learning_rate": 4.790064481779461e-05, + "loss": 1.8373, + "step": 28400 + }, + { + "epoch": 0.42, + "learning_rate": 4.789325011831519e-05, + "loss": 1.7958, + "step": 28500 + }, + { + "epoch": 0.42, + "learning_rate": 4.788585541883578e-05, + "loss": 1.8596, + "step": 28600 + }, + { + "epoch": 0.42, + "learning_rate": 4.787846071935637e-05, + "loss": 1.8747, + "step": 28700 + }, + { + "epoch": 0.43, + "learning_rate": 4.787106601987695e-05, + "loss": 1.8629, + "step": 28800 + }, + { + "epoch": 0.43, + "learning_rate": 4.786367132039754e-05, + "loss": 1.8307, + "step": 28900 + }, + { + "epoch": 0.43, + "learning_rate": 4.785627662091813e-05, + "loss": 1.8578, + "step": 29000 + }, + { + "epoch": 0.43, + "learning_rate": 4.784888192143872e-05, + "loss": 1.8484, + "step": 29100 + }, + { + "epoch": 0.43, + "learning_rate": 4.78414872219593e-05, + "loss": 1.8428, + "step": 29200 + }, + { + "epoch": 0.43, + "learning_rate": 4.783409252247989e-05, + "loss": 1.8385, + "step": 29300 + }, + { + "epoch": 0.43, + "learning_rate": 4.7826697823000474e-05, + "loss": 1.8431, + "step": 29400 + }, + { + "epoch": 0.44, + "learning_rate": 4.7819303123521066e-05, + "loss": 1.8292, + "step": 29500 + }, + { + "epoch": 0.44, + "learning_rate": 4.781190842404165e-05, + "loss": 1.8305, + "step": 29600 + }, + { + "epoch": 0.44, + "learning_rate": 4.780451372456224e-05, + "loss": 1.8412, + "step": 29700 + }, + { + "epoch": 0.44, + "learning_rate": 4.779711902508282e-05, + "loss": 1.8244, + "step": 29800 + }, + { + "epoch": 0.44, + "learning_rate": 4.778972432560341e-05, + "loss": 1.8344, + "step": 29900 + }, + { + "epoch": 0.44, + "learning_rate": 4.7782329626123994e-05, + "loss": 1.8502, + "step": 30000 + }, + { + "epoch": 0.45, + "learning_rate": 4.777493492664458e-05, + "loss": 1.8605, + "step": 30100 + }, + { + "epoch": 0.45, + "learning_rate": 4.776754022716517e-05, + "loss": 1.8364, + "step": 30200 + }, + { + "epoch": 0.45, + "learning_rate": 4.776021947468055e-05, + "loss": 1.8621, + "step": 30300 + }, + { + "epoch": 0.45, + "learning_rate": 4.775282477520114e-05, + "loss": 1.8395, + "step": 30400 + }, + { + "epoch": 0.45, + "learning_rate": 4.7745430075721724e-05, + "loss": 1.7994, + "step": 30500 + }, + { + "epoch": 0.45, + "learning_rate": 4.773803537624231e-05, + "loss": 1.8275, + "step": 30600 + }, + { + "epoch": 0.45, + "learning_rate": 4.77306406767629e-05, + "loss": 1.8363, + "step": 30700 + }, + { + "epoch": 0.46, + "learning_rate": 4.772324597728349e-05, + "loss": 1.8223, + "step": 30800 + }, + { + "epoch": 0.46, + "learning_rate": 4.771585127780407e-05, + "loss": 1.8288, + "step": 30900 + }, + { + "epoch": 0.46, + "learning_rate": 4.770845657832466e-05, + "loss": 1.8547, + "step": 31000 + }, + { + "epoch": 0.46, + "learning_rate": 4.7701061878845245e-05, + "loss": 1.8582, + "step": 31100 + }, + { + "epoch": 0.46, + "learning_rate": 4.769366717936583e-05, + "loss": 1.8086, + "step": 31200 + }, + { + "epoch": 0.46, + "learning_rate": 4.7686272479886416e-05, + "loss": 1.8162, + "step": 31300 + }, + { + "epoch": 0.46, + "learning_rate": 4.767887778040701e-05, + "loss": 1.8085, + "step": 31400 + }, + { + "epoch": 0.47, + "learning_rate": 4.7671483080927594e-05, + "loss": 1.8304, + "step": 31500 + }, + { + "epoch": 0.47, + "learning_rate": 4.766408838144818e-05, + "loss": 1.8654, + "step": 31600 + }, + { + "epoch": 0.47, + "learning_rate": 4.7656693681968765e-05, + "loss": 1.8079, + "step": 31700 + }, + { + "epoch": 0.47, + "learning_rate": 4.764929898248936e-05, + "loss": 1.8224, + "step": 31800 + }, + { + "epoch": 0.47, + "learning_rate": 4.764190428300994e-05, + "loss": 1.8346, + "step": 31900 + }, + { + "epoch": 0.47, + "learning_rate": 4.763450958353053e-05, + "loss": 1.8433, + "step": 32000 + }, + { + "epoch": 0.47, + "learning_rate": 4.7627114884051115e-05, + "loss": 1.8315, + "step": 32100 + }, + { + "epoch": 0.48, + "learning_rate": 4.76197201845717e-05, + "loss": 1.8166, + "step": 32200 + }, + { + "epoch": 0.48, + "learning_rate": 4.761232548509229e-05, + "loss": 1.8255, + "step": 32300 + }, + { + "epoch": 0.48, + "learning_rate": 4.760493078561287e-05, + "loss": 1.7861, + "step": 32400 + }, + { + "epoch": 0.48, + "learning_rate": 4.759753608613346e-05, + "loss": 1.8063, + "step": 32500 + }, + { + "epoch": 0.48, + "learning_rate": 4.759014138665405e-05, + "loss": 1.8162, + "step": 32600 + }, + { + "epoch": 0.48, + "learning_rate": 4.758282063416943e-05, + "loss": 1.8133, + "step": 32700 + }, + { + "epoch": 0.49, + "learning_rate": 4.7575425934690016e-05, + "loss": 1.8252, + "step": 32800 + }, + { + "epoch": 0.49, + "learning_rate": 4.75680312352106e-05, + "loss": 1.8241, + "step": 32900 + }, + { + "epoch": 0.49, + "learning_rate": 4.756063653573119e-05, + "loss": 1.8034, + "step": 33000 + }, + { + "epoch": 0.49, + "learning_rate": 4.755324183625178e-05, + "loss": 1.8063, + "step": 33100 + }, + { + "epoch": 0.49, + "learning_rate": 4.7545847136772365e-05, + "loss": 1.846, + "step": 33200 + }, + { + "epoch": 0.49, + "learning_rate": 4.753845243729295e-05, + "loss": 1.8431, + "step": 33300 + }, + { + "epoch": 0.49, + "learning_rate": 4.7531057737813536e-05, + "loss": 1.8001, + "step": 33400 + }, + { + "epoch": 0.5, + "learning_rate": 4.752366303833413e-05, + "loss": 1.8041, + "step": 33500 + }, + { + "epoch": 0.5, + "learning_rate": 4.7516268338854714e-05, + "loss": 1.8031, + "step": 33600 + }, + { + "epoch": 0.5, + "learning_rate": 4.750887363937529e-05, + "loss": 1.8613, + "step": 33700 + }, + { + "epoch": 0.5, + "learning_rate": 4.7501478939895885e-05, + "loss": 1.8254, + "step": 33800 + }, + { + "epoch": 0.5, + "learning_rate": 4.749408424041647e-05, + "loss": 1.8084, + "step": 33900 + }, + { + "epoch": 0.5, + "learning_rate": 4.748668954093706e-05, + "loss": 1.8285, + "step": 34000 + }, + { + "epoch": 0.5, + "learning_rate": 4.747929484145764e-05, + "loss": 1.8077, + "step": 34100 + }, + { + "epoch": 0.51, + "learning_rate": 4.7471900141978235e-05, + "loss": 1.797, + "step": 34200 + }, + { + "epoch": 0.51, + "learning_rate": 4.746450544249882e-05, + "loss": 1.8165, + "step": 34300 + }, + { + "epoch": 0.51, + "learning_rate": 4.7457110743019406e-05, + "loss": 1.8253, + "step": 34400 + }, + { + "epoch": 0.51, + "learning_rate": 4.744971604353999e-05, + "loss": 1.8192, + "step": 34500 + }, + { + "epoch": 0.51, + "learning_rate": 4.744232134406058e-05, + "loss": 1.817, + "step": 34600 + }, + { + "epoch": 0.51, + "learning_rate": 4.743492664458117e-05, + "loss": 1.8394, + "step": 34700 + }, + { + "epoch": 0.51, + "learning_rate": 4.7427531945101755e-05, + "loss": 1.8201, + "step": 34800 + }, + { + "epoch": 0.52, + "learning_rate": 4.742013724562234e-05, + "loss": 1.8149, + "step": 34900 + }, + { + "epoch": 0.52, + "learning_rate": 4.741274254614293e-05, + "loss": 1.8006, + "step": 35000 + }, + { + "epoch": 0.52, + "learning_rate": 4.740534784666351e-05, + "loss": 1.8284, + "step": 35100 + }, + { + "epoch": 0.52, + "learning_rate": 4.73979531471841e-05, + "loss": 1.834, + "step": 35200 + }, + { + "epoch": 0.52, + "learning_rate": 4.7390558447704684e-05, + "loss": 1.8289, + "step": 35300 + }, + { + "epoch": 0.52, + "learning_rate": 4.7383163748225276e-05, + "loss": 1.776, + "step": 35400 + }, + { + "epoch": 0.53, + "learning_rate": 4.737576904874586e-05, + "loss": 1.7996, + "step": 35500 + }, + { + "epoch": 0.53, + "learning_rate": 4.736837434926645e-05, + "loss": 1.8297, + "step": 35600 + }, + { + "epoch": 0.53, + "learning_rate": 4.736105359678183e-05, + "loss": 1.8323, + "step": 35700 + }, + { + "epoch": 0.53, + "learning_rate": 4.735365889730241e-05, + "loss": 1.7792, + "step": 35800 + }, + { + "epoch": 0.53, + "learning_rate": 4.7346264197823006e-05, + "loss": 1.8141, + "step": 35900 + }, + { + "epoch": 0.53, + "learning_rate": 4.733886949834359e-05, + "loss": 1.8022, + "step": 36000 + }, + { + "epoch": 0.53, + "learning_rate": 4.733147479886418e-05, + "loss": 1.8013, + "step": 36100 + }, + { + "epoch": 0.54, + "learning_rate": 4.732408009938476e-05, + "loss": 1.8144, + "step": 36200 + }, + { + "epoch": 0.54, + "learning_rate": 4.7316685399905355e-05, + "loss": 1.8317, + "step": 36300 + }, + { + "epoch": 0.54, + "learning_rate": 4.7309290700425934e-05, + "loss": 1.7999, + "step": 36400 + }, + { + "epoch": 0.54, + "learning_rate": 4.730189600094652e-05, + "loss": 1.838, + "step": 36500 + }, + { + "epoch": 0.54, + "learning_rate": 4.729450130146711e-05, + "loss": 1.8051, + "step": 36600 + }, + { + "epoch": 0.54, + "learning_rate": 4.72871066019877e-05, + "loss": 1.828, + "step": 36700 + }, + { + "epoch": 0.54, + "learning_rate": 4.727971190250828e-05, + "loss": 1.7934, + "step": 36800 + }, + { + "epoch": 0.55, + "learning_rate": 4.727231720302887e-05, + "loss": 1.8061, + "step": 36900 + }, + { + "epoch": 0.55, + "learning_rate": 4.726492250354946e-05, + "loss": 1.8287, + "step": 37000 + }, + { + "epoch": 0.55, + "learning_rate": 4.725752780407005e-05, + "loss": 1.7928, + "step": 37100 + }, + { + "epoch": 0.55, + "learning_rate": 4.725013310459063e-05, + "loss": 1.797, + "step": 37200 + }, + { + "epoch": 0.55, + "learning_rate": 4.724273840511122e-05, + "loss": 1.8524, + "step": 37300 + }, + { + "epoch": 0.55, + "learning_rate": 4.7235343705631804e-05, + "loss": 1.7954, + "step": 37400 + }, + { + "epoch": 0.55, + "learning_rate": 4.7227949006152396e-05, + "loss": 1.8148, + "step": 37500 + }, + { + "epoch": 0.56, + "learning_rate": 4.722055430667298e-05, + "loss": 1.808, + "step": 37600 + }, + { + "epoch": 0.56, + "learning_rate": 4.721315960719356e-05, + "loss": 1.7973, + "step": 37700 + }, + { + "epoch": 0.56, + "learning_rate": 4.720576490771415e-05, + "loss": 1.8017, + "step": 37800 + }, + { + "epoch": 0.56, + "learning_rate": 4.719837020823474e-05, + "loss": 1.8122, + "step": 37900 + }, + { + "epoch": 0.56, + "learning_rate": 4.7190975508755325e-05, + "loss": 1.8046, + "step": 38000 + }, + { + "epoch": 0.56, + "learning_rate": 4.718358080927591e-05, + "loss": 1.7849, + "step": 38100 + }, + { + "epoch": 0.56, + "learning_rate": 4.71761861097965e-05, + "loss": 1.8101, + "step": 38200 + }, + { + "epoch": 0.57, + "learning_rate": 4.716879141031709e-05, + "loss": 1.7937, + "step": 38300 + }, + { + "epoch": 0.57, + "learning_rate": 4.7161396710837674e-05, + "loss": 1.8027, + "step": 38400 + }, + { + "epoch": 0.57, + "learning_rate": 4.715400201135826e-05, + "loss": 1.8289, + "step": 38500 + }, + { + "epoch": 0.57, + "learning_rate": 4.714660731187885e-05, + "loss": 1.7869, + "step": 38600 + }, + { + "epoch": 0.57, + "learning_rate": 4.713921261239944e-05, + "loss": 1.7739, + "step": 38700 + }, + { + "epoch": 0.57, + "learning_rate": 4.713181791292002e-05, + "loss": 1.7466, + "step": 38800 + }, + { + "epoch": 0.58, + "learning_rate": 4.712442321344061e-05, + "loss": 1.8165, + "step": 38900 + }, + { + "epoch": 0.58, + "learning_rate": 4.7117028513961194e-05, + "loss": 1.7868, + "step": 39000 + }, + { + "epoch": 0.58, + "learning_rate": 4.710963381448178e-05, + "loss": 1.7665, + "step": 39100 + }, + { + "epoch": 0.58, + "learning_rate": 4.710231306199716e-05, + "loss": 1.7795, + "step": 39200 + }, + { + "epoch": 0.58, + "learning_rate": 4.7094918362517746e-05, + "loss": 1.7745, + "step": 39300 + }, + { + "epoch": 0.58, + "learning_rate": 4.708752366303834e-05, + "loss": 1.7927, + "step": 39400 + }, + { + "epoch": 0.58, + "learning_rate": 4.7080128963558924e-05, + "loss": 1.7867, + "step": 39500 + }, + { + "epoch": 0.59, + "learning_rate": 4.707273426407951e-05, + "loss": 1.7866, + "step": 39600 + }, + { + "epoch": 0.59, + "learning_rate": 4.7065339564600095e-05, + "loss": 1.7803, + "step": 39700 + }, + { + "epoch": 0.59, + "learning_rate": 4.705794486512068e-05, + "loss": 1.7881, + "step": 39800 + }, + { + "epoch": 0.59, + "learning_rate": 4.7050550165641274e-05, + "loss": 1.7949, + "step": 39900 + }, + { + "epoch": 0.59, + "learning_rate": 4.704315546616186e-05, + "loss": 1.7869, + "step": 40000 + }, + { + "epoch": 0.59, + "learning_rate": 4.7035760766682445e-05, + "loss": 1.7688, + "step": 40100 + }, + { + "epoch": 0.59, + "learning_rate": 4.702836606720303e-05, + "loss": 1.8111, + "step": 40200 + }, + { + "epoch": 0.6, + "learning_rate": 4.7020971367723616e-05, + "loss": 1.7692, + "step": 40300 + }, + { + "epoch": 0.6, + "learning_rate": 4.70135766682442e-05, + "loss": 1.7678, + "step": 40400 + }, + { + "epoch": 0.6, + "learning_rate": 4.700618196876479e-05, + "loss": 1.7915, + "step": 40500 + }, + { + "epoch": 0.6, + "learning_rate": 4.699878726928538e-05, + "loss": 1.761, + "step": 40600 + }, + { + "epoch": 0.6, + "learning_rate": 4.6991392569805965e-05, + "loss": 1.7747, + "step": 40700 + }, + { + "epoch": 0.6, + "learning_rate": 4.698399787032655e-05, + "loss": 1.7642, + "step": 40800 + }, + { + "epoch": 0.6, + "learning_rate": 4.697660317084714e-05, + "loss": 1.7973, + "step": 40900 + }, + { + "epoch": 0.61, + "learning_rate": 4.696920847136773e-05, + "loss": 1.7942, + "step": 41000 + }, + { + "epoch": 0.61, + "learning_rate": 4.6961813771888315e-05, + "loss": 1.7908, + "step": 41100 + }, + { + "epoch": 0.61, + "learning_rate": 4.69544190724089e-05, + "loss": 1.7836, + "step": 41200 + }, + { + "epoch": 0.61, + "learning_rate": 4.6947024372929486e-05, + "loss": 1.803, + "step": 41300 + }, + { + "epoch": 0.61, + "learning_rate": 4.693962967345008e-05, + "loss": 1.7714, + "step": 41400 + }, + { + "epoch": 0.61, + "learning_rate": 4.6932234973970664e-05, + "loss": 1.7912, + "step": 41500 + }, + { + "epoch": 0.62, + "learning_rate": 4.692484027449125e-05, + "loss": 1.7617, + "step": 41600 + }, + { + "epoch": 0.62, + "learning_rate": 4.691744557501183e-05, + "loss": 1.7715, + "step": 41700 + }, + { + "epoch": 0.62, + "learning_rate": 4.6910124822527216e-05, + "loss": 1.7794, + "step": 41800 + }, + { + "epoch": 0.62, + "learning_rate": 4.69027301230478e-05, + "loss": 1.7629, + "step": 41900 + }, + { + "epoch": 0.62, + "learning_rate": 4.689533542356839e-05, + "loss": 1.7977, + "step": 42000 + }, + { + "epoch": 0.62, + "learning_rate": 4.688794072408897e-05, + "loss": 1.7731, + "step": 42100 + }, + { + "epoch": 0.62, + "learning_rate": 4.6880546024609565e-05, + "loss": 1.7539, + "step": 42200 + }, + { + "epoch": 0.63, + "learning_rate": 4.687315132513015e-05, + "loss": 1.788, + "step": 42300 + }, + { + "epoch": 0.63, + "learning_rate": 4.6865756625650736e-05, + "loss": 1.7804, + "step": 42400 + }, + { + "epoch": 0.63, + "learning_rate": 4.685836192617132e-05, + "loss": 1.7973, + "step": 42500 + }, + { + "epoch": 0.63, + "learning_rate": 4.685096722669191e-05, + "loss": 1.8139, + "step": 42600 + }, + { + "epoch": 0.63, + "learning_rate": 4.68435725272125e-05, + "loss": 1.7456, + "step": 42700 + }, + { + "epoch": 0.63, + "learning_rate": 4.6836177827733086e-05, + "loss": 1.7873, + "step": 42800 + }, + { + "epoch": 0.63, + "learning_rate": 4.682878312825367e-05, + "loss": 1.7842, + "step": 42900 + }, + { + "epoch": 0.64, + "learning_rate": 4.682138842877426e-05, + "loss": 1.7672, + "step": 43000 + }, + { + "epoch": 0.64, + "learning_rate": 4.681399372929484e-05, + "loss": 1.7784, + "step": 43100 + }, + { + "epoch": 0.64, + "learning_rate": 4.680659902981543e-05, + "loss": 1.7817, + "step": 43200 + }, + { + "epoch": 0.64, + "learning_rate": 4.6799204330336014e-05, + "loss": 1.7407, + "step": 43300 + }, + { + "epoch": 0.64, + "learning_rate": 4.6791809630856606e-05, + "loss": 1.7646, + "step": 43400 + }, + { + "epoch": 0.64, + "learning_rate": 4.678441493137719e-05, + "loss": 1.7657, + "step": 43500 + }, + { + "epoch": 0.64, + "learning_rate": 4.677702023189778e-05, + "loss": 1.7983, + "step": 43600 + }, + { + "epoch": 0.65, + "learning_rate": 4.676962553241836e-05, + "loss": 1.743, + "step": 43700 + }, + { + "epoch": 0.65, + "learning_rate": 4.6762230832938956e-05, + "loss": 1.8009, + "step": 43800 + }, + { + "epoch": 0.65, + "learning_rate": 4.6754910080454336e-05, + "loss": 1.8079, + "step": 43900 + }, + { + "epoch": 0.65, + "learning_rate": 4.674751538097492e-05, + "loss": 1.7845, + "step": 44000 + }, + { + "epoch": 0.65, + "learning_rate": 4.674012068149551e-05, + "loss": 1.7908, + "step": 44100 + }, + { + "epoch": 0.65, + "learning_rate": 4.673272598201609e-05, + "loss": 1.7645, + "step": 44200 + }, + { + "epoch": 0.66, + "learning_rate": 4.672533128253668e-05, + "loss": 1.792, + "step": 44300 + }, + { + "epoch": 0.66, + "learning_rate": 4.6717936583057264e-05, + "loss": 1.7845, + "step": 44400 + }, + { + "epoch": 0.66, + "learning_rate": 4.671054188357785e-05, + "loss": 1.7503, + "step": 44500 + }, + { + "epoch": 0.66, + "learning_rate": 4.670314718409844e-05, + "loss": 1.7454, + "step": 44600 + }, + { + "epoch": 0.66, + "learning_rate": 4.669575248461903e-05, + "loss": 1.7689, + "step": 44700 + }, + { + "epoch": 0.66, + "learning_rate": 4.6688357785139614e-05, + "loss": 1.7625, + "step": 44800 + }, + { + "epoch": 0.66, + "learning_rate": 4.66809630856602e-05, + "loss": 1.7851, + "step": 44900 + }, + { + "epoch": 0.67, + "learning_rate": 4.6673568386180785e-05, + "loss": 1.7418, + "step": 45000 + }, + { + "epoch": 0.67, + "learning_rate": 4.666617368670138e-05, + "loss": 1.7588, + "step": 45100 + }, + { + "epoch": 0.67, + "learning_rate": 4.665877898722196e-05, + "loss": 1.7847, + "step": 45200 + }, + { + "epoch": 0.67, + "learning_rate": 4.665138428774255e-05, + "loss": 1.7753, + "step": 45300 + }, + { + "epoch": 0.67, + "learning_rate": 4.6643989588263134e-05, + "loss": 1.782, + "step": 45400 + }, + { + "epoch": 0.67, + "learning_rate": 4.663659488878373e-05, + "loss": 1.7734, + "step": 45500 + }, + { + "epoch": 0.67, + "learning_rate": 4.6629200189304306e-05, + "loss": 1.765, + "step": 45600 + }, + { + "epoch": 0.68, + "learning_rate": 4.662180548982489e-05, + "loss": 1.7741, + "step": 45700 + }, + { + "epoch": 0.68, + "learning_rate": 4.6614410790345484e-05, + "loss": 1.766, + "step": 45800 + }, + { + "epoch": 0.68, + "learning_rate": 4.660701609086607e-05, + "loss": 1.7594, + "step": 45900 + }, + { + "epoch": 0.68, + "learning_rate": 4.6599621391386655e-05, + "loss": 1.781, + "step": 46000 + }, + { + "epoch": 0.68, + "learning_rate": 4.659222669190724e-05, + "loss": 1.7948, + "step": 46100 + }, + { + "epoch": 0.68, + "learning_rate": 4.658483199242783e-05, + "loss": 1.7626, + "step": 46200 + }, + { + "epoch": 0.68, + "learning_rate": 4.657743729294842e-05, + "loss": 1.7658, + "step": 46300 + }, + { + "epoch": 0.69, + "learning_rate": 4.6570042593469004e-05, + "loss": 1.7611, + "step": 46400 + }, + { + "epoch": 0.69, + "learning_rate": 4.656264789398959e-05, + "loss": 1.7607, + "step": 46500 + }, + { + "epoch": 0.69, + "learning_rate": 4.655532714150497e-05, + "loss": 1.7708, + "step": 46600 + }, + { + "epoch": 0.69, + "learning_rate": 4.654793244202556e-05, + "loss": 1.7282, + "step": 46700 + }, + { + "epoch": 0.69, + "learning_rate": 4.654053774254615e-05, + "loss": 1.7611, + "step": 46800 + }, + { + "epoch": 0.69, + "learning_rate": 4.653314304306673e-05, + "loss": 1.7682, + "step": 46900 + }, + { + "epoch": 0.7, + "learning_rate": 4.652574834358732e-05, + "loss": 1.7663, + "step": 47000 + }, + { + "epoch": 0.7, + "learning_rate": 4.6518353644107905e-05, + "loss": 1.7679, + "step": 47100 + }, + { + "epoch": 0.7, + "learning_rate": 4.651095894462849e-05, + "loss": 1.7655, + "step": 47200 + }, + { + "epoch": 0.7, + "learning_rate": 4.6503564245149076e-05, + "loss": 1.7753, + "step": 47300 + }, + { + "epoch": 0.7, + "learning_rate": 4.649616954566967e-05, + "loss": 1.7546, + "step": 47400 + }, + { + "epoch": 0.7, + "learning_rate": 4.6488774846190255e-05, + "loss": 1.7677, + "step": 47500 + }, + { + "epoch": 0.7, + "learning_rate": 4.648138014671084e-05, + "loss": 1.786, + "step": 47600 + }, + { + "epoch": 0.71, + "learning_rate": 4.6473985447231426e-05, + "loss": 1.7512, + "step": 47700 + }, + { + "epoch": 0.71, + "learning_rate": 4.646659074775201e-05, + "loss": 1.7708, + "step": 47800 + }, + { + "epoch": 0.71, + "learning_rate": 4.6459196048272604e-05, + "loss": 1.7607, + "step": 47900 + }, + { + "epoch": 0.71, + "learning_rate": 4.645180134879319e-05, + "loss": 1.78, + "step": 48000 + }, + { + "epoch": 0.71, + "learning_rate": 4.6444406649313775e-05, + "loss": 1.794, + "step": 48100 + }, + { + "epoch": 0.71, + "learning_rate": 4.643701194983436e-05, + "loss": 1.7604, + "step": 48200 + }, + { + "epoch": 0.71, + "learning_rate": 4.6429617250354946e-05, + "loss": 1.7383, + "step": 48300 + }, + { + "epoch": 0.72, + "learning_rate": 4.642222255087553e-05, + "loss": 1.7602, + "step": 48400 + }, + { + "epoch": 0.72, + "learning_rate": 4.641482785139612e-05, + "loss": 1.7424, + "step": 48500 + }, + { + "epoch": 0.72, + "learning_rate": 4.640743315191671e-05, + "loss": 1.7605, + "step": 48600 + }, + { + "epoch": 0.72, + "learning_rate": 4.640011239943209e-05, + "loss": 1.771, + "step": 48700 + }, + { + "epoch": 0.72, + "learning_rate": 4.6392717699952676e-05, + "loss": 1.7781, + "step": 48800 + }, + { + "epoch": 0.72, + "learning_rate": 4.638532300047326e-05, + "loss": 1.7859, + "step": 48900 + }, + { + "epoch": 0.72, + "learning_rate": 4.637792830099385e-05, + "loss": 1.7836, + "step": 49000 + }, + { + "epoch": 0.73, + "learning_rate": 4.637053360151444e-05, + "loss": 1.7654, + "step": 49100 + }, + { + "epoch": 0.73, + "learning_rate": 4.6363138902035026e-05, + "loss": 1.7866, + "step": 49200 + }, + { + "epoch": 0.73, + "learning_rate": 4.635574420255561e-05, + "loss": 1.7538, + "step": 49300 + }, + { + "epoch": 0.73, + "learning_rate": 4.63483495030762e-05, + "loss": 1.7573, + "step": 49400 + }, + { + "epoch": 0.73, + "learning_rate": 4.634095480359679e-05, + "loss": 1.7741, + "step": 49500 + }, + { + "epoch": 0.73, + "learning_rate": 4.633356010411737e-05, + "loss": 1.7288, + "step": 49600 + }, + { + "epoch": 0.74, + "learning_rate": 4.6326165404637954e-05, + "loss": 1.789, + "step": 49700 + }, + { + "epoch": 0.74, + "learning_rate": 4.6318770705158546e-05, + "loss": 1.7548, + "step": 49800 + }, + { + "epoch": 0.74, + "learning_rate": 4.631137600567913e-05, + "loss": 1.75, + "step": 49900 + }, + { + "epoch": 0.74, + "learning_rate": 4.630398130619972e-05, + "loss": 1.7316, + "step": 50000 + }, + { + "epoch": 0.74, + "learning_rate": 4.62965866067203e-05, + "loss": 1.7567, + "step": 50100 + }, + { + "epoch": 0.74, + "learning_rate": 4.628919190724089e-05, + "loss": 1.7842, + "step": 50200 + }, + { + "epoch": 0.74, + "learning_rate": 4.628179720776148e-05, + "loss": 1.7256, + "step": 50300 + }, + { + "epoch": 0.75, + "learning_rate": 4.627440250828207e-05, + "loss": 1.8103, + "step": 50400 + }, + { + "epoch": 0.75, + "learning_rate": 4.626700780880265e-05, + "loss": 1.753, + "step": 50500 + }, + { + "epoch": 0.75, + "learning_rate": 4.625961310932324e-05, + "loss": 1.7531, + "step": 50600 + }, + { + "epoch": 0.75, + "learning_rate": 4.6252292356838625e-05, + "loss": 1.7673, + "step": 50700 + }, + { + "epoch": 0.75, + "learning_rate": 4.624489765735921e-05, + "loss": 1.6973, + "step": 50800 + }, + { + "epoch": 0.75, + "learning_rate": 4.623750295787979e-05, + "loss": 1.75, + "step": 50900 + }, + { + "epoch": 0.75, + "learning_rate": 4.6230108258400375e-05, + "loss": 1.7644, + "step": 51000 + }, + { + "epoch": 0.76, + "learning_rate": 4.622271355892097e-05, + "loss": 1.7536, + "step": 51100 + }, + { + "epoch": 0.76, + "learning_rate": 4.6215318859441553e-05, + "loss": 1.7453, + "step": 51200 + }, + { + "epoch": 0.76, + "learning_rate": 4.620792415996214e-05, + "loss": 1.7729, + "step": 51300 + }, + { + "epoch": 0.76, + "learning_rate": 4.6200529460482725e-05, + "loss": 1.7514, + "step": 51400 + }, + { + "epoch": 0.76, + "learning_rate": 4.619313476100332e-05, + "loss": 1.7211, + "step": 51500 + }, + { + "epoch": 0.76, + "learning_rate": 4.61857400615239e-05, + "loss": 1.777, + "step": 51600 + }, + { + "epoch": 0.76, + "learning_rate": 4.617834536204449e-05, + "loss": 1.7453, + "step": 51700 + }, + { + "epoch": 0.77, + "learning_rate": 4.6170950662565074e-05, + "loss": 1.7488, + "step": 51800 + }, + { + "epoch": 0.77, + "learning_rate": 4.6163555963085666e-05, + "loss": 1.7438, + "step": 51900 + }, + { + "epoch": 0.77, + "learning_rate": 4.615616126360625e-05, + "loss": 1.7071, + "step": 52000 + }, + { + "epoch": 0.77, + "learning_rate": 4.614876656412684e-05, + "loss": 1.7346, + "step": 52100 + }, + { + "epoch": 0.77, + "learning_rate": 4.614137186464742e-05, + "loss": 1.7436, + "step": 52200 + }, + { + "epoch": 0.77, + "learning_rate": 4.613397716516801e-05, + "loss": 1.761, + "step": 52300 + }, + { + "epoch": 0.77, + "learning_rate": 4.6126582465688595e-05, + "loss": 1.7495, + "step": 52400 + }, + { + "epoch": 0.78, + "learning_rate": 4.611918776620918e-05, + "loss": 1.7501, + "step": 52500 + }, + { + "epoch": 0.78, + "learning_rate": 4.6111793066729766e-05, + "loss": 1.7356, + "step": 52600 + }, + { + "epoch": 0.78, + "learning_rate": 4.610439836725036e-05, + "loss": 1.7315, + "step": 52700 + }, + { + "epoch": 0.78, + "learning_rate": 4.6097003667770944e-05, + "loss": 1.7563, + "step": 52800 + }, + { + "epoch": 0.78, + "learning_rate": 4.608960896829153e-05, + "loss": 1.7512, + "step": 52900 + }, + { + "epoch": 0.78, + "learning_rate": 4.6082214268812115e-05, + "loss": 1.7517, + "step": 53000 + }, + { + "epoch": 0.79, + "learning_rate": 4.607481956933271e-05, + "loss": 1.7447, + "step": 53100 + }, + { + "epoch": 0.79, + "learning_rate": 4.606742486985329e-05, + "loss": 1.7181, + "step": 53200 + }, + { + "epoch": 0.79, + "learning_rate": 4.606003017037388e-05, + "loss": 1.7548, + "step": 53300 + }, + { + "epoch": 0.79, + "learning_rate": 4.6052635470894465e-05, + "loss": 1.7634, + "step": 53400 + }, + { + "epoch": 0.79, + "learning_rate": 4.604524077141506e-05, + "loss": 1.761, + "step": 53500 + }, + { + "epoch": 0.79, + "learning_rate": 4.6037846071935636e-05, + "loss": 1.7367, + "step": 53600 + }, + { + "epoch": 0.79, + "learning_rate": 4.603045137245622e-05, + "loss": 1.7466, + "step": 53700 + }, + { + "epoch": 0.8, + "learning_rate": 4.6023056672976814e-05, + "loss": 1.7266, + "step": 53800 + }, + { + "epoch": 0.8, + "learning_rate": 4.60156619734974e-05, + "loss": 1.7491, + "step": 53900 + }, + { + "epoch": 0.8, + "learning_rate": 4.6008267274017985e-05, + "loss": 1.7448, + "step": 54000 + }, + { + "epoch": 0.8, + "learning_rate": 4.600087257453857e-05, + "loss": 1.7238, + "step": 54100 + }, + { + "epoch": 0.8, + "learning_rate": 4.599347787505916e-05, + "loss": 1.7352, + "step": 54200 + }, + { + "epoch": 0.8, + "learning_rate": 4.598608317557975e-05, + "loss": 1.7386, + "step": 54300 + }, + { + "epoch": 0.8, + "learning_rate": 4.5978688476100334e-05, + "loss": 1.7512, + "step": 54400 + }, + { + "epoch": 0.81, + "learning_rate": 4.597129377662092e-05, + "loss": 1.7432, + "step": 54500 + }, + { + "epoch": 0.81, + "learning_rate": 4.5963899077141506e-05, + "loss": 1.7693, + "step": 54600 + }, + { + "epoch": 0.81, + "learning_rate": 4.59565043776621e-05, + "loss": 1.7302, + "step": 54700 + }, + { + "epoch": 0.81, + "learning_rate": 4.5949109678182684e-05, + "loss": 1.751, + "step": 54800 + }, + { + "epoch": 0.81, + "learning_rate": 4.594171497870326e-05, + "loss": 1.7279, + "step": 54900 + }, + { + "epoch": 0.81, + "learning_rate": 4.5934320279223855e-05, + "loss": 1.746, + "step": 55000 + }, + { + "epoch": 0.81, + "learning_rate": 4.592692557974444e-05, + "loss": 1.7433, + "step": 55100 + }, + { + "epoch": 0.82, + "learning_rate": 4.5919530880265026e-05, + "loss": 1.76, + "step": 55200 + }, + { + "epoch": 0.82, + "learning_rate": 4.591213618078561e-05, + "loss": 1.7278, + "step": 55300 + }, + { + "epoch": 0.82, + "learning_rate": 4.590481542830099e-05, + "loss": 1.7517, + "step": 55400 + }, + { + "epoch": 0.82, + "learning_rate": 4.5897420728821585e-05, + "loss": 1.7349, + "step": 55500 + }, + { + "epoch": 0.82, + "learning_rate": 4.589002602934217e-05, + "loss": 1.7422, + "step": 55600 + }, + { + "epoch": 0.82, + "learning_rate": 4.5882631329862756e-05, + "loss": 1.7348, + "step": 55700 + }, + { + "epoch": 0.83, + "learning_rate": 4.587523663038334e-05, + "loss": 1.7244, + "step": 55800 + }, + { + "epoch": 0.83, + "learning_rate": 4.5867841930903934e-05, + "loss": 1.7639, + "step": 55900 + }, + { + "epoch": 0.83, + "learning_rate": 4.586044723142452e-05, + "loss": 1.7663, + "step": 56000 + }, + { + "epoch": 0.83, + "learning_rate": 4.5853052531945105e-05, + "loss": 1.7351, + "step": 56100 + }, + { + "epoch": 0.83, + "learning_rate": 4.584565783246569e-05, + "loss": 1.719, + "step": 56200 + }, + { + "epoch": 0.83, + "learning_rate": 4.583826313298628e-05, + "loss": 1.7408, + "step": 56300 + }, + { + "epoch": 0.83, + "learning_rate": 4.583086843350686e-05, + "loss": 1.7307, + "step": 56400 + }, + { + "epoch": 0.84, + "learning_rate": 4.582347373402745e-05, + "loss": 1.7479, + "step": 56500 + }, + { + "epoch": 0.84, + "learning_rate": 4.581607903454804e-05, + "loss": 1.7718, + "step": 56600 + }, + { + "epoch": 0.84, + "learning_rate": 4.5808684335068626e-05, + "loss": 1.7378, + "step": 56700 + }, + { + "epoch": 0.84, + "learning_rate": 4.580128963558921e-05, + "loss": 1.741, + "step": 56800 + }, + { + "epoch": 0.84, + "learning_rate": 4.57938949361098e-05, + "loss": 1.7423, + "step": 56900 + }, + { + "epoch": 0.84, + "learning_rate": 4.578650023663038e-05, + "loss": 1.7297, + "step": 57000 + }, + { + "epoch": 0.84, + "learning_rate": 4.5779105537150975e-05, + "loss": 1.7669, + "step": 57100 + }, + { + "epoch": 0.85, + "learning_rate": 4.577171083767156e-05, + "loss": 1.7625, + "step": 57200 + }, + { + "epoch": 0.85, + "learning_rate": 4.576431613819215e-05, + "loss": 1.745, + "step": 57300 + }, + { + "epoch": 0.85, + "learning_rate": 4.575692143871273e-05, + "loss": 1.7359, + "step": 57400 + }, + { + "epoch": 0.85, + "learning_rate": 4.574952673923332e-05, + "loss": 1.724, + "step": 57500 + }, + { + "epoch": 0.85, + "learning_rate": 4.5742132039753904e-05, + "loss": 1.698, + "step": 57600 + }, + { + "epoch": 0.85, + "learning_rate": 4.573473734027449e-05, + "loss": 1.7201, + "step": 57700 + }, + { + "epoch": 0.85, + "learning_rate": 4.572734264079508e-05, + "loss": 1.75, + "step": 57800 + }, + { + "epoch": 0.86, + "learning_rate": 4.571994794131567e-05, + "loss": 1.7518, + "step": 57900 + }, + { + "epoch": 0.86, + "learning_rate": 4.571255324183625e-05, + "loss": 1.7491, + "step": 58000 + }, + { + "epoch": 0.86, + "learning_rate": 4.570515854235684e-05, + "loss": 1.7018, + "step": 58100 + }, + { + "epoch": 0.86, + "learning_rate": 4.569776384287743e-05, + "loss": 1.7489, + "step": 58200 + }, + { + "epoch": 0.86, + "learning_rate": 4.5690369143398017e-05, + "loss": 1.7427, + "step": 58300 + }, + { + "epoch": 0.86, + "learning_rate": 4.56830483909134e-05, + "loss": 1.748, + "step": 58400 + }, + { + "epoch": 0.87, + "learning_rate": 4.567565369143398e-05, + "loss": 1.7254, + "step": 58500 + }, + { + "epoch": 0.87, + "learning_rate": 4.566825899195457e-05, + "loss": 1.748, + "step": 58600 + }, + { + "epoch": 0.87, + "learning_rate": 4.566086429247516e-05, + "loss": 1.7413, + "step": 58700 + }, + { + "epoch": 0.87, + "learning_rate": 4.565346959299574e-05, + "loss": 1.7595, + "step": 58800 + }, + { + "epoch": 0.87, + "learning_rate": 4.5646074893516325e-05, + "loss": 1.7217, + "step": 58900 + }, + { + "epoch": 0.87, + "learning_rate": 4.563868019403692e-05, + "loss": 1.7287, + "step": 59000 + }, + { + "epoch": 0.87, + "learning_rate": 4.56312854945575e-05, + "loss": 1.7052, + "step": 59100 + }, + { + "epoch": 0.88, + "learning_rate": 4.562389079507809e-05, + "loss": 1.721, + "step": 59200 + }, + { + "epoch": 0.88, + "learning_rate": 4.5616496095598675e-05, + "loss": 1.7357, + "step": 59300 + }, + { + "epoch": 0.88, + "learning_rate": 4.560910139611927e-05, + "loss": 1.74, + "step": 59400 + }, + { + "epoch": 0.88, + "learning_rate": 4.560170669663985e-05, + "loss": 1.7229, + "step": 59500 + }, + { + "epoch": 0.88, + "learning_rate": 4.559431199716044e-05, + "loss": 1.7402, + "step": 59600 + }, + { + "epoch": 0.88, + "learning_rate": 4.5586917297681024e-05, + "loss": 1.7735, + "step": 59700 + }, + { + "epoch": 0.88, + "learning_rate": 4.557952259820161e-05, + "loss": 1.7196, + "step": 59800 + }, + { + "epoch": 0.89, + "learning_rate": 4.55721278987222e-05, + "loss": 1.7319, + "step": 59900 + }, + { + "epoch": 0.89, + "learning_rate": 4.556473319924279e-05, + "loss": 1.7132, + "step": 60000 + }, + { + "epoch": 0.89, + "learning_rate": 4.555733849976337e-05, + "loss": 1.737, + "step": 60100 + }, + { + "epoch": 0.89, + "learning_rate": 4.554994380028396e-05, + "loss": 1.7046, + "step": 60200 + }, + { + "epoch": 0.89, + "learning_rate": 4.5542549100804544e-05, + "loss": 1.7156, + "step": 60300 + }, + { + "epoch": 0.89, + "learning_rate": 4.553515440132513e-05, + "loss": 1.7484, + "step": 60400 + }, + { + "epoch": 0.89, + "learning_rate": 4.5527759701845716e-05, + "loss": 1.6991, + "step": 60500 + }, + { + "epoch": 0.9, + "learning_rate": 4.552036500236631e-05, + "loss": 1.7218, + "step": 60600 + }, + { + "epoch": 0.9, + "learning_rate": 4.5512970302886894e-05, + "loss": 1.7342, + "step": 60700 + }, + { + "epoch": 0.9, + "learning_rate": 4.5505649550402274e-05, + "loss": 1.7479, + "step": 60800 + }, + { + "epoch": 0.9, + "learning_rate": 4.549825485092286e-05, + "loss": 1.7051, + "step": 60900 + }, + { + "epoch": 0.9, + "learning_rate": 4.5490860151443446e-05, + "loss": 1.7051, + "step": 61000 + }, + { + "epoch": 0.9, + "learning_rate": 4.548346545196404e-05, + "loss": 1.7394, + "step": 61100 + }, + { + "epoch": 0.91, + "learning_rate": 4.5476070752484624e-05, + "loss": 1.7232, + "step": 61200 + }, + { + "epoch": 0.91, + "learning_rate": 4.546867605300521e-05, + "loss": 1.7034, + "step": 61300 + }, + { + "epoch": 0.91, + "learning_rate": 4.5461281353525795e-05, + "loss": 1.7138, + "step": 61400 + }, + { + "epoch": 0.91, + "learning_rate": 4.545388665404638e-05, + "loss": 1.728, + "step": 61500 + }, + { + "epoch": 0.91, + "learning_rate": 4.5446491954566966e-05, + "loss": 1.7295, + "step": 61600 + }, + { + "epoch": 0.91, + "learning_rate": 4.543909725508755e-05, + "loss": 1.7085, + "step": 61700 + }, + { + "epoch": 0.91, + "learning_rate": 4.5431702555608144e-05, + "loss": 1.7272, + "step": 61800 + }, + { + "epoch": 0.92, + "learning_rate": 4.542430785612873e-05, + "loss": 1.744, + "step": 61900 + }, + { + "epoch": 0.92, + "learning_rate": 4.5416913156649315e-05, + "loss": 1.739, + "step": 62000 + }, + { + "epoch": 0.92, + "learning_rate": 4.54095184571699e-05, + "loss": 1.7242, + "step": 62100 + }, + { + "epoch": 0.92, + "learning_rate": 4.540212375769049e-05, + "loss": 1.7257, + "step": 62200 + }, + { + "epoch": 0.92, + "learning_rate": 4.539472905821108e-05, + "loss": 1.7315, + "step": 62300 + }, + { + "epoch": 0.92, + "learning_rate": 4.5387334358731665e-05, + "loss": 1.7385, + "step": 62400 + }, + { + "epoch": 0.92, + "learning_rate": 4.537993965925225e-05, + "loss": 1.7208, + "step": 62500 + }, + { + "epoch": 0.93, + "learning_rate": 4.5372544959772836e-05, + "loss": 1.7236, + "step": 62600 + }, + { + "epoch": 0.93, + "learning_rate": 4.536515026029343e-05, + "loss": 1.7295, + "step": 62700 + }, + { + "epoch": 0.93, + "learning_rate": 4.535775556081401e-05, + "loss": 1.6798, + "step": 62800 + }, + { + "epoch": 0.93, + "learning_rate": 4.535036086133459e-05, + "loss": 1.7287, + "step": 62900 + }, + { + "epoch": 0.93, + "learning_rate": 4.5343040108849973e-05, + "loss": 1.7127, + "step": 63000 + }, + { + "epoch": 0.93, + "learning_rate": 4.5335645409370566e-05, + "loss": 1.7257, + "step": 63100 + }, + { + "epoch": 0.93, + "learning_rate": 4.532825070989115e-05, + "loss": 1.7089, + "step": 63200 + }, + { + "epoch": 0.94, + "learning_rate": 4.532085601041174e-05, + "loss": 1.7232, + "step": 63300 + }, + { + "epoch": 0.94, + "learning_rate": 4.531346131093232e-05, + "loss": 1.6997, + "step": 63400 + }, + { + "epoch": 0.94, + "learning_rate": 4.5306066611452915e-05, + "loss": 1.7407, + "step": 63500 + }, + { + "epoch": 0.94, + "learning_rate": 4.52986719119735e-05, + "loss": 1.7252, + "step": 63600 + }, + { + "epoch": 0.94, + "learning_rate": 4.5291277212494086e-05, + "loss": 1.7325, + "step": 63700 + }, + { + "epoch": 0.94, + "learning_rate": 4.528388251301467e-05, + "loss": 1.703, + "step": 63800 + }, + { + "epoch": 0.95, + "learning_rate": 4.5276487813535265e-05, + "loss": 1.7222, + "step": 63900 + }, + { + "epoch": 0.95, + "learning_rate": 4.526909311405585e-05, + "loss": 1.7445, + "step": 64000 + }, + { + "epoch": 0.95, + "learning_rate": 4.526169841457643e-05, + "loss": 1.7151, + "step": 64100 + }, + { + "epoch": 0.95, + "learning_rate": 4.525430371509702e-05, + "loss": 1.7084, + "step": 64200 + }, + { + "epoch": 0.95, + "learning_rate": 4.524690901561761e-05, + "loss": 1.7056, + "step": 64300 + }, + { + "epoch": 0.95, + "learning_rate": 4.523951431613819e-05, + "loss": 1.7038, + "step": 64400 + }, + { + "epoch": 0.95, + "learning_rate": 4.523219356365357e-05, + "loss": 1.7043, + "step": 64500 + }, + { + "epoch": 0.96, + "learning_rate": 4.522479886417416e-05, + "loss": 1.7337, + "step": 64600 + }, + { + "epoch": 0.96, + "learning_rate": 4.521740416469475e-05, + "loss": 1.7563, + "step": 64700 + }, + { + "epoch": 0.96, + "learning_rate": 4.521000946521534e-05, + "loss": 1.7321, + "step": 64800 + }, + { + "epoch": 0.96, + "learning_rate": 4.520261476573592e-05, + "loss": 1.6985, + "step": 64900 + }, + { + "epoch": 0.96, + "learning_rate": 4.519522006625651e-05, + "loss": 1.7249, + "step": 65000 + }, + { + "epoch": 0.96, + "learning_rate": 4.51878253667771e-05, + "loss": 1.7415, + "step": 65100 + }, + { + "epoch": 0.96, + "learning_rate": 4.5180430667297686e-05, + "loss": 1.7054, + "step": 65200 + }, + { + "epoch": 0.97, + "learning_rate": 4.517303596781827e-05, + "loss": 1.6725, + "step": 65300 + }, + { + "epoch": 0.97, + "learning_rate": 4.516564126833886e-05, + "loss": 1.6966, + "step": 65400 + }, + { + "epoch": 0.97, + "learning_rate": 4.515824656885944e-05, + "loss": 1.7048, + "step": 65500 + }, + { + "epoch": 0.97, + "learning_rate": 4.515085186938003e-05, + "loss": 1.7154, + "step": 65600 + }, + { + "epoch": 0.97, + "learning_rate": 4.5143457169900614e-05, + "loss": 1.6992, + "step": 65700 + }, + { + "epoch": 0.97, + "learning_rate": 4.51360624704212e-05, + "loss": 1.7224, + "step": 65800 + }, + { + "epoch": 0.97, + "learning_rate": 4.512866777094179e-05, + "loss": 1.6985, + "step": 65900 + }, + { + "epoch": 0.98, + "learning_rate": 4.512127307146238e-05, + "loss": 1.7052, + "step": 66000 + }, + { + "epoch": 0.98, + "learning_rate": 4.5113878371982964e-05, + "loss": 1.7244, + "step": 66100 + }, + { + "epoch": 0.98, + "learning_rate": 4.510648367250355e-05, + "loss": 1.7161, + "step": 66200 + }, + { + "epoch": 0.98, + "learning_rate": 4.509908897302414e-05, + "loss": 1.7339, + "step": 66300 + }, + { + "epoch": 0.98, + "learning_rate": 4.509169427354473e-05, + "loss": 1.7057, + "step": 66400 + }, + { + "epoch": 0.98, + "learning_rate": 4.508429957406531e-05, + "loss": 1.7268, + "step": 66500 + }, + { + "epoch": 0.98, + "learning_rate": 4.50769048745859e-05, + "loss": 1.6977, + "step": 66600 + }, + { + "epoch": 0.99, + "learning_rate": 4.506951017510649e-05, + "loss": 1.7667, + "step": 66700 + }, + { + "epoch": 0.99, + "learning_rate": 4.506211547562707e-05, + "loss": 1.7237, + "step": 66800 + }, + { + "epoch": 0.99, + "learning_rate": 4.5054720776147656e-05, + "loss": 1.6985, + "step": 66900 + }, + { + "epoch": 0.99, + "learning_rate": 4.504732607666825e-05, + "loss": 1.7174, + "step": 67000 + }, + { + "epoch": 0.99, + "learning_rate": 4.5039931377188834e-05, + "loss": 1.7205, + "step": 67100 + }, + { + "epoch": 0.99, + "learning_rate": 4.503253667770942e-05, + "loss": 1.7076, + "step": 67200 + }, + { + "epoch": 1.0, + "learning_rate": 4.5025141978230005e-05, + "loss": 1.6836, + "step": 67300 + }, + { + "epoch": 1.0, + "learning_rate": 4.501774727875059e-05, + "loss": 1.7388, + "step": 67400 + }, + { + "epoch": 1.0, + "learning_rate": 4.501035257927118e-05, + "loss": 1.7164, + "step": 67500 + }, + { + "epoch": 1.0, + "learning_rate": 4.500295787979177e-05, + "loss": 1.7383, + "step": 67600 + }, + { + "epoch": 1.0, + "learning_rate": 4.4995563180312354e-05, + "loss": 1.669, + "step": 67700 + }, + { + "epoch": 1.0, + "learning_rate": 4.498816848083294e-05, + "loss": 1.6484, + "step": 67800 + }, + { + "epoch": 1.0, + "learning_rate": 4.498077378135353e-05, + "loss": 1.6639, + "step": 67900 + }, + { + "epoch": 1.01, + "learning_rate": 4.497337908187412e-05, + "loss": 1.6289, + "step": 68000 + }, + { + "epoch": 1.01, + "learning_rate": 4.49659843823947e-05, + "loss": 1.6387, + "step": 68100 + }, + { + "epoch": 1.01, + "learning_rate": 4.495858968291529e-05, + "loss": 1.6487, + "step": 68200 + }, + { + "epoch": 1.01, + "learning_rate": 4.4951194983435875e-05, + "loss": 1.6556, + "step": 68300 + }, + { + "epoch": 1.01, + "learning_rate": 4.494380028395646e-05, + "loss": 1.6545, + "step": 68400 + }, + { + "epoch": 1.01, + "learning_rate": 4.4936405584477046e-05, + "loss": 1.6228, + "step": 68500 + }, + { + "epoch": 1.01, + "learning_rate": 4.492901088499764e-05, + "loss": 1.6589, + "step": 68600 + }, + { + "epoch": 1.02, + "learning_rate": 4.4921616185518224e-05, + "loss": 1.6365, + "step": 68700 + }, + { + "epoch": 1.02, + "learning_rate": 4.491422148603881e-05, + "loss": 1.6372, + "step": 68800 + }, + { + "epoch": 1.02, + "learning_rate": 4.4906826786559395e-05, + "loss": 1.654, + "step": 68900 + }, + { + "epoch": 1.02, + "learning_rate": 4.489943208707999e-05, + "loss": 1.6128, + "step": 69000 + }, + { + "epoch": 1.02, + "learning_rate": 4.4892037387600573e-05, + "loss": 1.6337, + "step": 69100 + }, + { + "epoch": 1.02, + "learning_rate": 4.4884716635115954e-05, + "loss": 1.6445, + "step": 69200 + }, + { + "epoch": 1.02, + "learning_rate": 4.487732193563654e-05, + "loss": 1.6732, + "step": 69300 + }, + { + "epoch": 1.03, + "learning_rate": 4.4869927236157125e-05, + "loss": 1.6443, + "step": 69400 + }, + { + "epoch": 1.03, + "learning_rate": 4.486253253667771e-05, + "loss": 1.6231, + "step": 69500 + }, + { + "epoch": 1.03, + "learning_rate": 4.4855137837198296e-05, + "loss": 1.6425, + "step": 69600 + }, + { + "epoch": 1.03, + "learning_rate": 4.484774313771888e-05, + "loss": 1.6342, + "step": 69700 + }, + { + "epoch": 1.03, + "learning_rate": 4.4840348438239475e-05, + "loss": 1.6311, + "step": 69800 + }, + { + "epoch": 1.03, + "learning_rate": 4.483295373876006e-05, + "loss": 1.6294, + "step": 69900 + }, + { + "epoch": 1.04, + "learning_rate": 4.4825559039280646e-05, + "loss": 1.6494, + "step": 70000 + }, + { + "epoch": 1.04, + "learning_rate": 4.481816433980123e-05, + "loss": 1.6323, + "step": 70100 + }, + { + "epoch": 1.04, + "learning_rate": 4.481076964032182e-05, + "loss": 1.6091, + "step": 70200 + }, + { + "epoch": 1.04, + "learning_rate": 4.480337494084241e-05, + "loss": 1.6803, + "step": 70300 + }, + { + "epoch": 1.04, + "learning_rate": 4.4795980241362995e-05, + "loss": 1.6503, + "step": 70400 + }, + { + "epoch": 1.04, + "learning_rate": 4.478858554188358e-05, + "loss": 1.6331, + "step": 70500 + }, + { + "epoch": 1.04, + "learning_rate": 4.4781190842404166e-05, + "loss": 1.6153, + "step": 70600 + }, + { + "epoch": 1.05, + "learning_rate": 4.477379614292475e-05, + "loss": 1.634, + "step": 70700 + }, + { + "epoch": 1.05, + "learning_rate": 4.476640144344534e-05, + "loss": 1.6496, + "step": 70800 + }, + { + "epoch": 1.05, + "learning_rate": 4.475900674396592e-05, + "loss": 1.6467, + "step": 70900 + }, + { + "epoch": 1.05, + "learning_rate": 4.4751612044486516e-05, + "loss": 1.646, + "step": 71000 + }, + { + "epoch": 1.05, + "learning_rate": 4.47442173450071e-05, + "loss": 1.6283, + "step": 71100 + }, + { + "epoch": 1.05, + "learning_rate": 4.473682264552769e-05, + "loss": 1.6491, + "step": 71200 + }, + { + "epoch": 1.05, + "learning_rate": 4.472942794604827e-05, + "loss": 1.6546, + "step": 71300 + }, + { + "epoch": 1.06, + "learning_rate": 4.4722033246568865e-05, + "loss": 1.658, + "step": 71400 + }, + { + "epoch": 1.06, + "learning_rate": 4.471463854708945e-05, + "loss": 1.6407, + "step": 71500 + }, + { + "epoch": 1.06, + "learning_rate": 4.4707243847610036e-05, + "loss": 1.6168, + "step": 71600 + }, + { + "epoch": 1.06, + "learning_rate": 4.469984914813062e-05, + "loss": 1.6646, + "step": 71700 + }, + { + "epoch": 1.06, + "learning_rate": 4.4692528395646e-05, + "loss": 1.6625, + "step": 71800 + }, + { + "epoch": 1.06, + "learning_rate": 4.4685133696166595e-05, + "loss": 1.6334, + "step": 71900 + }, + { + "epoch": 1.06, + "learning_rate": 4.467773899668718e-05, + "loss": 1.6648, + "step": 72000 + }, + { + "epoch": 1.07, + "learning_rate": 4.467034429720776e-05, + "loss": 1.6427, + "step": 72100 + }, + { + "epoch": 1.07, + "learning_rate": 4.466294959772835e-05, + "loss": 1.6329, + "step": 72200 + }, + { + "epoch": 1.07, + "learning_rate": 4.465555489824894e-05, + "loss": 1.6358, + "step": 72300 + }, + { + "epoch": 1.07, + "learning_rate": 4.464816019876952e-05, + "loss": 1.6472, + "step": 72400 + }, + { + "epoch": 1.07, + "learning_rate": 4.464076549929011e-05, + "loss": 1.6301, + "step": 72500 + }, + { + "epoch": 1.07, + "learning_rate": 4.4633370799810694e-05, + "loss": 1.6448, + "step": 72600 + }, + { + "epoch": 1.08, + "learning_rate": 4.462597610033129e-05, + "loss": 1.6268, + "step": 72700 + }, + { + "epoch": 1.08, + "learning_rate": 4.461858140085187e-05, + "loss": 1.6422, + "step": 72800 + }, + { + "epoch": 1.08, + "learning_rate": 4.461118670137246e-05, + "loss": 1.6508, + "step": 72900 + }, + { + "epoch": 1.08, + "learning_rate": 4.4603792001893044e-05, + "loss": 1.6502, + "step": 73000 + }, + { + "epoch": 1.08, + "learning_rate": 4.4596397302413636e-05, + "loss": 1.6549, + "step": 73100 + }, + { + "epoch": 1.08, + "learning_rate": 4.458900260293422e-05, + "loss": 1.6403, + "step": 73200 + }, + { + "epoch": 1.08, + "learning_rate": 4.458160790345481e-05, + "loss": 1.6211, + "step": 73300 + }, + { + "epoch": 1.09, + "learning_rate": 4.457421320397539e-05, + "loss": 1.6717, + "step": 73400 + }, + { + "epoch": 1.09, + "learning_rate": 4.456681850449598e-05, + "loss": 1.623, + "step": 73500 + }, + { + "epoch": 1.09, + "learning_rate": 4.4559423805016564e-05, + "loss": 1.636, + "step": 73600 + }, + { + "epoch": 1.09, + "learning_rate": 4.455202910553715e-05, + "loss": 1.653, + "step": 73700 + }, + { + "epoch": 1.09, + "learning_rate": 4.454463440605774e-05, + "loss": 1.6364, + "step": 73800 + }, + { + "epoch": 1.09, + "learning_rate": 4.453723970657833e-05, + "loss": 1.6604, + "step": 73900 + }, + { + "epoch": 1.09, + "learning_rate": 4.4529845007098914e-05, + "loss": 1.6192, + "step": 74000 + }, + { + "epoch": 1.1, + "learning_rate": 4.45224503076195e-05, + "loss": 1.6533, + "step": 74100 + }, + { + "epoch": 1.1, + "learning_rate": 4.451505560814009e-05, + "loss": 1.658, + "step": 74200 + }, + { + "epoch": 1.1, + "learning_rate": 4.450766090866068e-05, + "loss": 1.6628, + "step": 74300 + }, + { + "epoch": 1.1, + "learning_rate": 4.450026620918126e-05, + "loss": 1.6612, + "step": 74400 + }, + { + "epoch": 1.1, + "learning_rate": 4.449294545669664e-05, + "loss": 1.6311, + "step": 74500 + }, + { + "epoch": 1.1, + "learning_rate": 4.448555075721723e-05, + "loss": 1.6191, + "step": 74600 + }, + { + "epoch": 1.1, + "learning_rate": 4.4478156057737815e-05, + "loss": 1.6699, + "step": 74700 + }, + { + "epoch": 1.11, + "learning_rate": 4.44707613582584e-05, + "loss": 1.6596, + "step": 74800 + }, + { + "epoch": 1.11, + "learning_rate": 4.4463366658778986e-05, + "loss": 1.6416, + "step": 74900 + }, + { + "epoch": 1.11, + "learning_rate": 4.445597195929958e-05, + "loss": 1.658, + "step": 75000 + }, + { + "epoch": 1.11, + "learning_rate": 4.4448577259820164e-05, + "loss": 1.6409, + "step": 75100 + }, + { + "epoch": 1.11, + "learning_rate": 4.444118256034075e-05, + "loss": 1.6429, + "step": 75200 + }, + { + "epoch": 1.11, + "learning_rate": 4.4433787860861335e-05, + "loss": 1.62, + "step": 75300 + }, + { + "epoch": 1.12, + "learning_rate": 4.442639316138192e-05, + "loss": 1.6508, + "step": 75400 + }, + { + "epoch": 1.12, + "learning_rate": 4.441899846190251e-05, + "loss": 1.6689, + "step": 75500 + }, + { + "epoch": 1.12, + "learning_rate": 4.44116037624231e-05, + "loss": 1.6373, + "step": 75600 + }, + { + "epoch": 1.12, + "learning_rate": 4.4404209062943685e-05, + "loss": 1.6261, + "step": 75700 + }, + { + "epoch": 1.12, + "learning_rate": 4.439681436346427e-05, + "loss": 1.6451, + "step": 75800 + }, + { + "epoch": 1.12, + "learning_rate": 4.438941966398486e-05, + "loss": 1.6637, + "step": 75900 + }, + { + "epoch": 1.12, + "learning_rate": 4.438202496450544e-05, + "loss": 1.6368, + "step": 76000 + }, + { + "epoch": 1.13, + "learning_rate": 4.437463026502603e-05, + "loss": 1.6531, + "step": 76100 + }, + { + "epoch": 1.13, + "learning_rate": 4.436723556554662e-05, + "loss": 1.6539, + "step": 76200 + }, + { + "epoch": 1.13, + "learning_rate": 4.4359840866067205e-05, + "loss": 1.6337, + "step": 76300 + }, + { + "epoch": 1.13, + "learning_rate": 4.435244616658779e-05, + "loss": 1.6454, + "step": 76400 + }, + { + "epoch": 1.13, + "learning_rate": 4.434512541410317e-05, + "loss": 1.6357, + "step": 76500 + }, + { + "epoch": 1.13, + "learning_rate": 4.433773071462376e-05, + "loss": 1.6724, + "step": 76600 + }, + { + "epoch": 1.13, + "learning_rate": 4.433033601514435e-05, + "loss": 1.6366, + "step": 76700 + }, + { + "epoch": 1.14, + "learning_rate": 4.4322941315664935e-05, + "loss": 1.6473, + "step": 76800 + }, + { + "epoch": 1.14, + "learning_rate": 4.431554661618552e-05, + "loss": 1.6549, + "step": 76900 + }, + { + "epoch": 1.14, + "learning_rate": 4.4308151916706106e-05, + "loss": 1.6261, + "step": 77000 + }, + { + "epoch": 1.14, + "learning_rate": 4.43007572172267e-05, + "loss": 1.6345, + "step": 77100 + }, + { + "epoch": 1.14, + "learning_rate": 4.4293362517747284e-05, + "loss": 1.6534, + "step": 77200 + }, + { + "epoch": 1.14, + "learning_rate": 4.428596781826786e-05, + "loss": 1.6261, + "step": 77300 + }, + { + "epoch": 1.14, + "learning_rate": 4.4278573118788456e-05, + "loss": 1.6517, + "step": 77400 + }, + { + "epoch": 1.15, + "learning_rate": 4.427117841930904e-05, + "loss": 1.6511, + "step": 77500 + }, + { + "epoch": 1.15, + "learning_rate": 4.426378371982963e-05, + "loss": 1.6348, + "step": 77600 + }, + { + "epoch": 1.15, + "learning_rate": 4.425638902035021e-05, + "loss": 1.6357, + "step": 77700 + }, + { + "epoch": 1.15, + "learning_rate": 4.42489943208708e-05, + "loss": 1.6104, + "step": 77800 + }, + { + "epoch": 1.15, + "learning_rate": 4.424159962139139e-05, + "loss": 1.662, + "step": 77900 + }, + { + "epoch": 1.15, + "learning_rate": 4.4234204921911976e-05, + "loss": 1.6615, + "step": 78000 + }, + { + "epoch": 1.16, + "learning_rate": 4.422681022243256e-05, + "loss": 1.6471, + "step": 78100 + }, + { + "epoch": 1.16, + "learning_rate": 4.421941552295315e-05, + "loss": 1.6302, + "step": 78200 + }, + { + "epoch": 1.16, + "learning_rate": 4.421202082347374e-05, + "loss": 1.6192, + "step": 78300 + }, + { + "epoch": 1.16, + "learning_rate": 4.4204626123994325e-05, + "loss": 1.6047, + "step": 78400 + }, + { + "epoch": 1.16, + "learning_rate": 4.419723142451491e-05, + "loss": 1.6212, + "step": 78500 + }, + { + "epoch": 1.16, + "learning_rate": 4.418991067203029e-05, + "loss": 1.606, + "step": 78600 + }, + { + "epoch": 1.16, + "learning_rate": 4.418251597255088e-05, + "loss": 1.6442, + "step": 78700 + }, + { + "epoch": 1.17, + "learning_rate": 4.417512127307146e-05, + "loss": 1.6533, + "step": 78800 + }, + { + "epoch": 1.17, + "learning_rate": 4.416772657359205e-05, + "loss": 1.655, + "step": 78900 + }, + { + "epoch": 1.17, + "learning_rate": 4.4160331874112634e-05, + "loss": 1.6277, + "step": 79000 + }, + { + "epoch": 1.17, + "learning_rate": 4.4152937174633227e-05, + "loss": 1.6666, + "step": 79100 + }, + { + "epoch": 1.17, + "learning_rate": 4.414554247515381e-05, + "loss": 1.6253, + "step": 79200 + }, + { + "epoch": 1.17, + "learning_rate": 4.41381477756744e-05, + "loss": 1.6345, + "step": 79300 + }, + { + "epoch": 1.17, + "learning_rate": 4.4130753076194983e-05, + "loss": 1.6389, + "step": 79400 + }, + { + "epoch": 1.18, + "learning_rate": 4.4123358376715576e-05, + "loss": 1.6313, + "step": 79500 + }, + { + "epoch": 1.18, + "learning_rate": 4.411596367723616e-05, + "loss": 1.6361, + "step": 79600 + }, + { + "epoch": 1.18, + "learning_rate": 4.410856897775675e-05, + "loss": 1.6882, + "step": 79700 + }, + { + "epoch": 1.18, + "learning_rate": 4.410117427827733e-05, + "loss": 1.6232, + "step": 79800 + }, + { + "epoch": 1.18, + "learning_rate": 4.4093779578797925e-05, + "loss": 1.6481, + "step": 79900 + }, + { + "epoch": 1.18, + "learning_rate": 4.4086384879318504e-05, + "loss": 1.6444, + "step": 80000 + }, + { + "epoch": 1.18, + "learning_rate": 4.407899017983909e-05, + "loss": 1.6399, + "step": 80100 + }, + { + "epoch": 1.19, + "learning_rate": 4.4071595480359675e-05, + "loss": 1.6394, + "step": 80200 + }, + { + "epoch": 1.19, + "learning_rate": 4.406420078088027e-05, + "loss": 1.6524, + "step": 80300 + }, + { + "epoch": 1.19, + "learning_rate": 4.405680608140085e-05, + "loss": 1.6401, + "step": 80400 + }, + { + "epoch": 1.19, + "learning_rate": 4.404941138192144e-05, + "loss": 1.6188, + "step": 80500 + }, + { + "epoch": 1.19, + "learning_rate": 4.404209062943682e-05, + "loss": 1.6411, + "step": 80600 + }, + { + "epoch": 1.19, + "learning_rate": 4.403469592995741e-05, + "loss": 1.6328, + "step": 80700 + }, + { + "epoch": 1.19, + "learning_rate": 4.4027301230478e-05, + "loss": 1.595, + "step": 80800 + }, + { + "epoch": 1.2, + "learning_rate": 4.401990653099858e-05, + "loss": 1.6619, + "step": 80900 + }, + { + "epoch": 1.2, + "learning_rate": 4.401251183151917e-05, + "loss": 1.61, + "step": 81000 + }, + { + "epoch": 1.2, + "learning_rate": 4.400511713203976e-05, + "loss": 1.6605, + "step": 81100 + }, + { + "epoch": 1.2, + "learning_rate": 4.399772243256035e-05, + "loss": 1.6238, + "step": 81200 + }, + { + "epoch": 1.2, + "learning_rate": 4.3990327733080926e-05, + "loss": 1.6199, + "step": 81300 + }, + { + "epoch": 1.2, + "learning_rate": 4.398293303360151e-05, + "loss": 1.6419, + "step": 81400 + }, + { + "epoch": 1.21, + "learning_rate": 4.3975538334122104e-05, + "loss": 1.632, + "step": 81500 + }, + { + "epoch": 1.21, + "learning_rate": 4.396814363464269e-05, + "loss": 1.62, + "step": 81600 + }, + { + "epoch": 1.21, + "learning_rate": 4.3960748935163275e-05, + "loss": 1.6128, + "step": 81700 + }, + { + "epoch": 1.21, + "learning_rate": 4.395335423568386e-05, + "loss": 1.6094, + "step": 81800 + }, + { + "epoch": 1.21, + "learning_rate": 4.394595953620445e-05, + "loss": 1.6465, + "step": 81900 + }, + { + "epoch": 1.21, + "learning_rate": 4.393856483672504e-05, + "loss": 1.6343, + "step": 82000 + }, + { + "epoch": 1.21, + "learning_rate": 4.3931170137245624e-05, + "loss": 1.6188, + "step": 82100 + }, + { + "epoch": 1.22, + "learning_rate": 4.392377543776621e-05, + "loss": 1.6546, + "step": 82200 + }, + { + "epoch": 1.22, + "learning_rate": 4.39163807382868e-05, + "loss": 1.6277, + "step": 82300 + }, + { + "epoch": 1.22, + "learning_rate": 4.390898603880739e-05, + "loss": 1.635, + "step": 82400 + }, + { + "epoch": 1.22, + "learning_rate": 4.3901591339327974e-05, + "loss": 1.6634, + "step": 82500 + }, + { + "epoch": 1.22, + "learning_rate": 4.389419663984856e-05, + "loss": 1.6109, + "step": 82600 + }, + { + "epoch": 1.22, + "learning_rate": 4.3886801940369145e-05, + "loss": 1.6295, + "step": 82700 + }, + { + "epoch": 1.22, + "learning_rate": 4.387940724088973e-05, + "loss": 1.6304, + "step": 82800 + }, + { + "epoch": 1.23, + "learning_rate": 4.387208648840511e-05, + "loss": 1.6545, + "step": 82900 + }, + { + "epoch": 1.23, + "learning_rate": 4.38646917889257e-05, + "loss": 1.6472, + "step": 83000 + }, + { + "epoch": 1.23, + "learning_rate": 4.385729708944629e-05, + "loss": 1.6056, + "step": 83100 + }, + { + "epoch": 1.23, + "learning_rate": 4.3849902389966875e-05, + "loss": 1.6097, + "step": 83200 + }, + { + "epoch": 1.23, + "learning_rate": 4.384250769048746e-05, + "loss": 1.6203, + "step": 83300 + }, + { + "epoch": 1.23, + "learning_rate": 4.3835112991008046e-05, + "loss": 1.6279, + "step": 83400 + }, + { + "epoch": 1.23, + "learning_rate": 4.382771829152864e-05, + "loss": 1.634, + "step": 83500 + }, + { + "epoch": 1.24, + "learning_rate": 4.3820323592049224e-05, + "loss": 1.6468, + "step": 83600 + }, + { + "epoch": 1.24, + "learning_rate": 4.381292889256981e-05, + "loss": 1.632, + "step": 83700 + }, + { + "epoch": 1.24, + "learning_rate": 4.3805534193090395e-05, + "loss": 1.6681, + "step": 83800 + }, + { + "epoch": 1.24, + "learning_rate": 4.379813949361098e-05, + "loss": 1.647, + "step": 83900 + }, + { + "epoch": 1.24, + "learning_rate": 4.3790744794131567e-05, + "loss": 1.6719, + "step": 84000 + }, + { + "epoch": 1.24, + "learning_rate": 4.378335009465215e-05, + "loss": 1.6415, + "step": 84100 + }, + { + "epoch": 1.25, + "learning_rate": 4.377595539517274e-05, + "loss": 1.6047, + "step": 84200 + }, + { + "epoch": 1.25, + "learning_rate": 4.376856069569333e-05, + "loss": 1.6338, + "step": 84300 + }, + { + "epoch": 1.25, + "learning_rate": 4.3761165996213916e-05, + "loss": 1.6209, + "step": 84400 + }, + { + "epoch": 1.25, + "learning_rate": 4.37537712967345e-05, + "loss": 1.6684, + "step": 84500 + }, + { + "epoch": 1.25, + "learning_rate": 4.374637659725509e-05, + "loss": 1.6398, + "step": 84600 + }, + { + "epoch": 1.25, + "learning_rate": 4.373898189777568e-05, + "loss": 1.636, + "step": 84700 + }, + { + "epoch": 1.25, + "learning_rate": 4.3731587198296265e-05, + "loss": 1.6495, + "step": 84800 + }, + { + "epoch": 1.26, + "learning_rate": 4.372419249881685e-05, + "loss": 1.6281, + "step": 84900 + }, + { + "epoch": 1.26, + "learning_rate": 4.3716797799337437e-05, + "loss": 1.598, + "step": 85000 + }, + { + "epoch": 1.26, + "learning_rate": 4.370940309985803e-05, + "loss": 1.635, + "step": 85100 + }, + { + "epoch": 1.26, + "learning_rate": 4.3702008400378615e-05, + "loss": 1.6374, + "step": 85200 + }, + { + "epoch": 1.26, + "learning_rate": 4.3694613700899193e-05, + "loss": 1.6208, + "step": 85300 + }, + { + "epoch": 1.26, + "learning_rate": 4.3687292948414574e-05, + "loss": 1.6426, + "step": 85400 + }, + { + "epoch": 1.26, + "learning_rate": 4.3679898248935166e-05, + "loss": 1.6094, + "step": 85500 + }, + { + "epoch": 1.27, + "learning_rate": 4.367250354945575e-05, + "loss": 1.6125, + "step": 85600 + }, + { + "epoch": 1.27, + "learning_rate": 4.366510884997634e-05, + "loss": 1.6425, + "step": 85700 + }, + { + "epoch": 1.27, + "learning_rate": 4.365771415049692e-05, + "loss": 1.6346, + "step": 85800 + }, + { + "epoch": 1.27, + "learning_rate": 4.3650319451017516e-05, + "loss": 1.6576, + "step": 85900 + }, + { + "epoch": 1.27, + "learning_rate": 4.36429247515381e-05, + "loss": 1.6152, + "step": 86000 + }, + { + "epoch": 1.27, + "learning_rate": 4.363553005205869e-05, + "loss": 1.6333, + "step": 86100 + }, + { + "epoch": 1.27, + "learning_rate": 4.362813535257927e-05, + "loss": 1.6195, + "step": 86200 + }, + { + "epoch": 1.28, + "learning_rate": 4.3620740653099865e-05, + "loss": 1.6354, + "step": 86300 + }, + { + "epoch": 1.28, + "learning_rate": 4.361334595362045e-05, + "loss": 1.6393, + "step": 86400 + }, + { + "epoch": 1.28, + "learning_rate": 4.3605951254141036e-05, + "loss": 1.6292, + "step": 86500 + }, + { + "epoch": 1.28, + "learning_rate": 4.3598556554661615e-05, + "loss": 1.633, + "step": 86600 + }, + { + "epoch": 1.28, + "learning_rate": 4.359116185518221e-05, + "loss": 1.6108, + "step": 86700 + }, + { + "epoch": 1.28, + "learning_rate": 4.358376715570279e-05, + "loss": 1.6389, + "step": 86800 + }, + { + "epoch": 1.29, + "learning_rate": 4.357637245622338e-05, + "loss": 1.6415, + "step": 86900 + }, + { + "epoch": 1.29, + "learning_rate": 4.3568977756743964e-05, + "loss": 1.6349, + "step": 87000 + }, + { + "epoch": 1.29, + "learning_rate": 4.356158305726456e-05, + "loss": 1.6338, + "step": 87100 + }, + { + "epoch": 1.29, + "learning_rate": 4.355418835778514e-05, + "loss": 1.6118, + "step": 87200 + }, + { + "epoch": 1.29, + "learning_rate": 4.354679365830573e-05, + "loss": 1.6309, + "step": 87300 + }, + { + "epoch": 1.29, + "learning_rate": 4.3539398958826314e-05, + "loss": 1.6465, + "step": 87400 + }, + { + "epoch": 1.29, + "learning_rate": 4.3532004259346906e-05, + "loss": 1.6596, + "step": 87500 + }, + { + "epoch": 1.3, + "learning_rate": 4.352460955986749e-05, + "loss": 1.6285, + "step": 87600 + }, + { + "epoch": 1.3, + "learning_rate": 4.351721486038808e-05, + "loss": 1.6171, + "step": 87700 + }, + { + "epoch": 1.3, + "learning_rate": 4.350982016090866e-05, + "loss": 1.6266, + "step": 87800 + }, + { + "epoch": 1.3, + "learning_rate": 4.350242546142925e-05, + "loss": 1.6647, + "step": 87900 + }, + { + "epoch": 1.3, + "learning_rate": 4.3495030761949834e-05, + "loss": 1.6415, + "step": 88000 + }, + { + "epoch": 1.3, + "learning_rate": 4.348763606247042e-05, + "loss": 1.6301, + "step": 88100 + }, + { + "epoch": 1.3, + "learning_rate": 4.3480241362991006e-05, + "loss": 1.6248, + "step": 88200 + }, + { + "epoch": 1.31, + "learning_rate": 4.34728466635116e-05, + "loss": 1.6763, + "step": 88300 + }, + { + "epoch": 1.31, + "learning_rate": 4.3465451964032184e-05, + "loss": 1.6353, + "step": 88400 + }, + { + "epoch": 1.31, + "learning_rate": 4.3458131211547564e-05, + "loss": 1.6283, + "step": 88500 + }, + { + "epoch": 1.31, + "learning_rate": 4.345073651206815e-05, + "loss": 1.6197, + "step": 88600 + }, + { + "epoch": 1.31, + "learning_rate": 4.344334181258874e-05, + "loss": 1.6424, + "step": 88700 + }, + { + "epoch": 1.31, + "learning_rate": 4.343594711310933e-05, + "loss": 1.6498, + "step": 88800 + }, + { + "epoch": 1.31, + "learning_rate": 4.3428552413629913e-05, + "loss": 1.6237, + "step": 88900 + }, + { + "epoch": 1.32, + "learning_rate": 4.34211577141505e-05, + "loss": 1.6559, + "step": 89000 + }, + { + "epoch": 1.32, + "learning_rate": 4.341376301467109e-05, + "loss": 1.6505, + "step": 89100 + }, + { + "epoch": 1.32, + "learning_rate": 4.340636831519167e-05, + "loss": 1.617, + "step": 89200 + }, + { + "epoch": 1.32, + "learning_rate": 4.3398973615712256e-05, + "loss": 1.6224, + "step": 89300 + }, + { + "epoch": 1.32, + "learning_rate": 4.339157891623284e-05, + "loss": 1.6106, + "step": 89400 + }, + { + "epoch": 1.32, + "learning_rate": 4.3384184216753434e-05, + "loss": 1.6361, + "step": 89500 + }, + { + "epoch": 1.33, + "learning_rate": 4.337678951727402e-05, + "loss": 1.6356, + "step": 89600 + }, + { + "epoch": 1.33, + "learning_rate": 4.3369394817794605e-05, + "loss": 1.6164, + "step": 89700 + }, + { + "epoch": 1.33, + "learning_rate": 4.336200011831519e-05, + "loss": 1.633, + "step": 89800 + }, + { + "epoch": 1.33, + "learning_rate": 4.335460541883578e-05, + "loss": 1.6322, + "step": 89900 + }, + { + "epoch": 1.33, + "learning_rate": 4.334721071935637e-05, + "loss": 1.6373, + "step": 90000 + }, + { + "epoch": 1.33, + "learning_rate": 4.3339816019876955e-05, + "loss": 1.6194, + "step": 90100 + }, + { + "epoch": 1.33, + "learning_rate": 4.333242132039754e-05, + "loss": 1.659, + "step": 90200 + }, + { + "epoch": 1.34, + "learning_rate": 4.332502662091813e-05, + "loss": 1.6306, + "step": 90300 + }, + { + "epoch": 1.34, + "learning_rate": 4.331763192143872e-05, + "loss": 1.6437, + "step": 90400 + }, + { + "epoch": 1.34, + "learning_rate": 4.33102372219593e-05, + "loss": 1.6229, + "step": 90500 + }, + { + "epoch": 1.34, + "learning_rate": 4.330284252247988e-05, + "loss": 1.6333, + "step": 90600 + }, + { + "epoch": 1.34, + "learning_rate": 4.3295447823000475e-05, + "loss": 1.6032, + "step": 90700 + }, + { + "epoch": 1.34, + "learning_rate": 4.328805312352106e-05, + "loss": 1.6351, + "step": 90800 + }, + { + "epoch": 1.34, + "learning_rate": 4.3280658424041647e-05, + "loss": 1.6236, + "step": 90900 + }, + { + "epoch": 1.35, + "learning_rate": 4.327326372456223e-05, + "loss": 1.6266, + "step": 91000 + }, + { + "epoch": 1.35, + "learning_rate": 4.326594297207762e-05, + "loss": 1.6116, + "step": 91100 + }, + { + "epoch": 1.35, + "learning_rate": 4.3258548272598205e-05, + "loss": 1.6466, + "step": 91200 + }, + { + "epoch": 1.35, + "learning_rate": 4.325115357311879e-05, + "loss": 1.6218, + "step": 91300 + }, + { + "epoch": 1.35, + "learning_rate": 4.3243758873639376e-05, + "loss": 1.6277, + "step": 91400 + }, + { + "epoch": 1.35, + "learning_rate": 4.323636417415997e-05, + "loss": 1.6232, + "step": 91500 + }, + { + "epoch": 1.35, + "learning_rate": 4.3228969474680554e-05, + "loss": 1.6173, + "step": 91600 + }, + { + "epoch": 1.36, + "learning_rate": 4.322157477520114e-05, + "loss": 1.6286, + "step": 91700 + }, + { + "epoch": 1.36, + "learning_rate": 4.3214180075721726e-05, + "loss": 1.6258, + "step": 91800 + }, + { + "epoch": 1.36, + "learning_rate": 4.320678537624231e-05, + "loss": 1.591, + "step": 91900 + }, + { + "epoch": 1.36, + "learning_rate": 4.31993906767629e-05, + "loss": 1.5922, + "step": 92000 + }, + { + "epoch": 1.36, + "learning_rate": 4.319199597728348e-05, + "loss": 1.6444, + "step": 92100 + }, + { + "epoch": 1.36, + "learning_rate": 4.318460127780407e-05, + "loss": 1.6526, + "step": 92200 + }, + { + "epoch": 1.37, + "learning_rate": 4.317720657832466e-05, + "loss": 1.6265, + "step": 92300 + }, + { + "epoch": 1.37, + "learning_rate": 4.3169811878845246e-05, + "loss": 1.6333, + "step": 92400 + }, + { + "epoch": 1.37, + "learning_rate": 4.316241717936583e-05, + "loss": 1.6407, + "step": 92500 + }, + { + "epoch": 1.37, + "learning_rate": 4.315502247988642e-05, + "loss": 1.6282, + "step": 92600 + }, + { + "epoch": 1.37, + "learning_rate": 4.314762778040701e-05, + "loss": 1.6108, + "step": 92700 + }, + { + "epoch": 1.37, + "learning_rate": 4.3140233080927596e-05, + "loss": 1.6281, + "step": 92800 + }, + { + "epoch": 1.37, + "learning_rate": 4.313283838144818e-05, + "loss": 1.639, + "step": 92900 + }, + { + "epoch": 1.38, + "learning_rate": 4.312544368196877e-05, + "loss": 1.6396, + "step": 93000 + }, + { + "epoch": 1.38, + "learning_rate": 4.311804898248936e-05, + "loss": 1.6426, + "step": 93100 + }, + { + "epoch": 1.38, + "learning_rate": 4.311065428300994e-05, + "loss": 1.6028, + "step": 93200 + }, + { + "epoch": 1.38, + "learning_rate": 4.310333353052532e-05, + "loss": 1.6297, + "step": 93300 + }, + { + "epoch": 1.38, + "learning_rate": 4.3095938831045904e-05, + "loss": 1.6002, + "step": 93400 + }, + { + "epoch": 1.38, + "learning_rate": 4.30885441315665e-05, + "loss": 1.6076, + "step": 93500 + }, + { + "epoch": 1.38, + "learning_rate": 4.308114943208708e-05, + "loss": 1.6142, + "step": 93600 + }, + { + "epoch": 1.39, + "learning_rate": 4.307375473260767e-05, + "loss": 1.6157, + "step": 93700 + }, + { + "epoch": 1.39, + "learning_rate": 4.3066360033128254e-05, + "loss": 1.6288, + "step": 93800 + }, + { + "epoch": 1.39, + "learning_rate": 4.3058965333648846e-05, + "loss": 1.6287, + "step": 93900 + }, + { + "epoch": 1.39, + "learning_rate": 4.305157063416943e-05, + "loss": 1.6418, + "step": 94000 + }, + { + "epoch": 1.39, + "learning_rate": 4.304417593469002e-05, + "loss": 1.6308, + "step": 94100 + }, + { + "epoch": 1.39, + "learning_rate": 4.30367812352106e-05, + "loss": 1.6333, + "step": 94200 + }, + { + "epoch": 1.39, + "learning_rate": 4.3029386535731195e-05, + "loss": 1.6172, + "step": 94300 + }, + { + "epoch": 1.4, + "learning_rate": 4.302199183625178e-05, + "loss": 1.6401, + "step": 94400 + }, + { + "epoch": 1.4, + "learning_rate": 4.301459713677236e-05, + "loss": 1.639, + "step": 94500 + }, + { + "epoch": 1.4, + "learning_rate": 4.3007202437292945e-05, + "loss": 1.6379, + "step": 94600 + }, + { + "epoch": 1.4, + "learning_rate": 4.299980773781354e-05, + "loss": 1.648, + "step": 94700 + }, + { + "epoch": 1.4, + "learning_rate": 4.2992413038334123e-05, + "loss": 1.6262, + "step": 94800 + }, + { + "epoch": 1.4, + "learning_rate": 4.298501833885471e-05, + "loss": 1.6528, + "step": 94900 + }, + { + "epoch": 1.4, + "learning_rate": 4.2977623639375295e-05, + "loss": 1.5951, + "step": 95000 + }, + { + "epoch": 1.41, + "learning_rate": 4.297022893989589e-05, + "loss": 1.6108, + "step": 95100 + }, + { + "epoch": 1.41, + "learning_rate": 4.296283424041647e-05, + "loss": 1.6145, + "step": 95200 + }, + { + "epoch": 1.41, + "learning_rate": 4.295543954093706e-05, + "loss": 1.6635, + "step": 95300 + }, + { + "epoch": 1.41, + "learning_rate": 4.2948044841457644e-05, + "loss": 1.6269, + "step": 95400 + }, + { + "epoch": 1.41, + "learning_rate": 4.2940650141978236e-05, + "loss": 1.6338, + "step": 95500 + }, + { + "epoch": 1.41, + "learning_rate": 4.293325544249882e-05, + "loss": 1.6208, + "step": 95600 + }, + { + "epoch": 1.42, + "learning_rate": 4.292586074301941e-05, + "loss": 1.6224, + "step": 95700 + }, + { + "epoch": 1.42, + "learning_rate": 4.291853999053478e-05, + "loss": 1.6248, + "step": 95800 + }, + { + "epoch": 1.42, + "learning_rate": 4.2911145291055374e-05, + "loss": 1.5974, + "step": 95900 + }, + { + "epoch": 1.42, + "learning_rate": 4.290375059157596e-05, + "loss": 1.6579, + "step": 96000 + }, + { + "epoch": 1.42, + "learning_rate": 4.2896355892096545e-05, + "loss": 1.6457, + "step": 96100 + }, + { + "epoch": 1.42, + "learning_rate": 4.288896119261713e-05, + "loss": 1.6079, + "step": 96200 + }, + { + "epoch": 1.42, + "learning_rate": 4.288156649313772e-05, + "loss": 1.668, + "step": 96300 + }, + { + "epoch": 1.43, + "learning_rate": 4.287417179365831e-05, + "loss": 1.6228, + "step": 96400 + }, + { + "epoch": 1.43, + "learning_rate": 4.2866777094178894e-05, + "loss": 1.6389, + "step": 96500 + }, + { + "epoch": 1.43, + "learning_rate": 4.285938239469948e-05, + "loss": 1.6237, + "step": 96600 + }, + { + "epoch": 1.43, + "learning_rate": 4.285198769522007e-05, + "loss": 1.6382, + "step": 96700 + }, + { + "epoch": 1.43, + "learning_rate": 4.284459299574066e-05, + "loss": 1.6451, + "step": 96800 + }, + { + "epoch": 1.43, + "learning_rate": 4.2837198296261244e-05, + "loss": 1.6141, + "step": 96900 + }, + { + "epoch": 1.43, + "learning_rate": 4.282980359678183e-05, + "loss": 1.6399, + "step": 97000 + }, + { + "epoch": 1.44, + "learning_rate": 4.282240889730242e-05, + "loss": 1.596, + "step": 97100 + }, + { + "epoch": 1.44, + "learning_rate": 4.2815014197823e-05, + "loss": 1.6428, + "step": 97200 + }, + { + "epoch": 1.44, + "learning_rate": 4.2807619498343586e-05, + "loss": 1.6249, + "step": 97300 + }, + { + "epoch": 1.44, + "learning_rate": 4.280022479886417e-05, + "loss": 1.6077, + "step": 97400 + }, + { + "epoch": 1.44, + "learning_rate": 4.2792830099384764e-05, + "loss": 1.635, + "step": 97500 + }, + { + "epoch": 1.44, + "learning_rate": 4.278543539990535e-05, + "loss": 1.6368, + "step": 97600 + }, + { + "epoch": 1.44, + "learning_rate": 4.2778040700425936e-05, + "loss": 1.5985, + "step": 97700 + }, + { + "epoch": 1.45, + "learning_rate": 4.2770719947941316e-05, + "loss": 1.6455, + "step": 97800 + }, + { + "epoch": 1.45, + "learning_rate": 4.27633252484619e-05, + "loss": 1.6137, + "step": 97900 + }, + { + "epoch": 1.45, + "learning_rate": 4.2755930548982494e-05, + "loss": 1.6158, + "step": 98000 + }, + { + "epoch": 1.45, + "learning_rate": 4.274853584950308e-05, + "loss": 1.6094, + "step": 98100 + }, + { + "epoch": 1.45, + "learning_rate": 4.2741141150023665e-05, + "loss": 1.6193, + "step": 98200 + }, + { + "epoch": 1.45, + "learning_rate": 4.273374645054425e-05, + "loss": 1.6304, + "step": 98300 + }, + { + "epoch": 1.46, + "learning_rate": 4.2726351751064843e-05, + "loss": 1.6345, + "step": 98400 + }, + { + "epoch": 1.46, + "learning_rate": 4.271895705158542e-05, + "loss": 1.6217, + "step": 98500 + }, + { + "epoch": 1.46, + "learning_rate": 4.271156235210601e-05, + "loss": 1.6285, + "step": 98600 + }, + { + "epoch": 1.46, + "learning_rate": 4.27041676526266e-05, + "loss": 1.5898, + "step": 98700 + }, + { + "epoch": 1.46, + "learning_rate": 4.2696772953147186e-05, + "loss": 1.6426, + "step": 98800 + }, + { + "epoch": 1.46, + "learning_rate": 4.268937825366777e-05, + "loss": 1.5872, + "step": 98900 + }, + { + "epoch": 1.46, + "learning_rate": 4.268198355418836e-05, + "loss": 1.6302, + "step": 99000 + }, + { + "epoch": 1.47, + "learning_rate": 4.267458885470895e-05, + "loss": 1.6212, + "step": 99100 + }, + { + "epoch": 1.47, + "learning_rate": 4.2667194155229535e-05, + "loss": 1.6052, + "step": 99200 + }, + { + "epoch": 1.47, + "learning_rate": 4.265979945575012e-05, + "loss": 1.6149, + "step": 99300 + }, + { + "epoch": 1.47, + "learning_rate": 4.265240475627071e-05, + "loss": 1.6224, + "step": 99400 + }, + { + "epoch": 1.47, + "learning_rate": 4.26450100567913e-05, + "loss": 1.5973, + "step": 99500 + }, + { + "epoch": 1.47, + "learning_rate": 4.2637615357311885e-05, + "loss": 1.6388, + "step": 99600 + }, + { + "epoch": 1.47, + "learning_rate": 4.263022065783247e-05, + "loss": 1.6465, + "step": 99700 + }, + { + "epoch": 1.48, + "learning_rate": 4.262282595835305e-05, + "loss": 1.6219, + "step": 99800 + }, + { + "epoch": 1.48, + "learning_rate": 4.2615505205868436e-05, + "loss": 1.6281, + "step": 99900 + }, + { + "epoch": 1.48, + "learning_rate": 4.260811050638902e-05, + "loss": 1.6182, + "step": 100000 + }, + { + "epoch": 1.48, + "learning_rate": 4.260071580690961e-05, + "loss": 1.6147, + "step": 100100 + }, + { + "epoch": 1.48, + "learning_rate": 4.259332110743019e-05, + "loss": 1.5992, + "step": 100200 + }, + { + "epoch": 1.48, + "learning_rate": 4.2585926407950786e-05, + "loss": 1.6439, + "step": 100300 + }, + { + "epoch": 1.48, + "learning_rate": 4.257853170847137e-05, + "loss": 1.6332, + "step": 100400 + }, + { + "epoch": 1.49, + "learning_rate": 4.257113700899196e-05, + "loss": 1.6309, + "step": 100500 + }, + { + "epoch": 1.49, + "learning_rate": 4.256374230951254e-05, + "loss": 1.6421, + "step": 100600 + }, + { + "epoch": 1.49, + "learning_rate": 4.255634761003313e-05, + "loss": 1.6317, + "step": 100700 + }, + { + "epoch": 1.49, + "learning_rate": 4.254895291055372e-05, + "loss": 1.6046, + "step": 100800 + }, + { + "epoch": 1.49, + "learning_rate": 4.2541558211074306e-05, + "loss": 1.5878, + "step": 100900 + }, + { + "epoch": 1.49, + "learning_rate": 4.253416351159489e-05, + "loss": 1.6006, + "step": 101000 + }, + { + "epoch": 1.5, + "learning_rate": 4.252676881211548e-05, + "loss": 1.6034, + "step": 101100 + }, + { + "epoch": 1.5, + "learning_rate": 4.251937411263606e-05, + "loss": 1.6061, + "step": 101200 + }, + { + "epoch": 1.5, + "learning_rate": 4.251197941315665e-05, + "loss": 1.6392, + "step": 101300 + }, + { + "epoch": 1.5, + "learning_rate": 4.2504584713677235e-05, + "loss": 1.64, + "step": 101400 + }, + { + "epoch": 1.5, + "learning_rate": 4.249719001419783e-05, + "loss": 1.6063, + "step": 101500 + }, + { + "epoch": 1.5, + "learning_rate": 4.248979531471841e-05, + "loss": 1.6245, + "step": 101600 + }, + { + "epoch": 1.5, + "learning_rate": 4.2482400615239e-05, + "loss": 1.6393, + "step": 101700 + }, + { + "epoch": 1.51, + "learning_rate": 4.2475005915759584e-05, + "loss": 1.6135, + "step": 101800 + }, + { + "epoch": 1.51, + "learning_rate": 4.2467611216280176e-05, + "loss": 1.5999, + "step": 101900 + }, + { + "epoch": 1.51, + "learning_rate": 4.246021651680076e-05, + "loss": 1.6203, + "step": 102000 + }, + { + "epoch": 1.51, + "learning_rate": 4.245282181732135e-05, + "loss": 1.6123, + "step": 102100 + }, + { + "epoch": 1.51, + "learning_rate": 4.244542711784193e-05, + "loss": 1.6135, + "step": 102200 + }, + { + "epoch": 1.51, + "learning_rate": 4.243803241836252e-05, + "loss": 1.6087, + "step": 102300 + }, + { + "epoch": 1.51, + "learning_rate": 4.2430637718883104e-05, + "loss": 1.5978, + "step": 102400 + }, + { + "epoch": 1.52, + "learning_rate": 4.242324301940369e-05, + "loss": 1.6235, + "step": 102500 + }, + { + "epoch": 1.52, + "learning_rate": 4.2415848319924276e-05, + "loss": 1.6231, + "step": 102600 + }, + { + "epoch": 1.52, + "learning_rate": 4.240845362044487e-05, + "loss": 1.6129, + "step": 102700 + }, + { + "epoch": 1.52, + "learning_rate": 4.2401058920965454e-05, + "loss": 1.6083, + "step": 102800 + }, + { + "epoch": 1.52, + "learning_rate": 4.2393738168480834e-05, + "loss": 1.6398, + "step": 102900 + }, + { + "epoch": 1.52, + "learning_rate": 4.238634346900142e-05, + "loss": 1.6559, + "step": 103000 + }, + { + "epoch": 1.52, + "learning_rate": 4.2378948769522006e-05, + "loss": 1.6594, + "step": 103100 + }, + { + "epoch": 1.53, + "learning_rate": 4.23715540700426e-05, + "loss": 1.6223, + "step": 103200 + }, + { + "epoch": 1.53, + "learning_rate": 4.2364159370563184e-05, + "loss": 1.6015, + "step": 103300 + }, + { + "epoch": 1.53, + "learning_rate": 4.235676467108377e-05, + "loss": 1.6017, + "step": 103400 + }, + { + "epoch": 1.53, + "learning_rate": 4.2349369971604355e-05, + "loss": 1.591, + "step": 103500 + }, + { + "epoch": 1.53, + "learning_rate": 4.234197527212495e-05, + "loss": 1.6354, + "step": 103600 + }, + { + "epoch": 1.53, + "learning_rate": 4.233458057264553e-05, + "loss": 1.6536, + "step": 103700 + }, + { + "epoch": 1.54, + "learning_rate": 4.232718587316611e-05, + "loss": 1.6496, + "step": 103800 + }, + { + "epoch": 1.54, + "learning_rate": 4.2319791173686704e-05, + "loss": 1.6224, + "step": 103900 + }, + { + "epoch": 1.54, + "learning_rate": 4.231239647420729e-05, + "loss": 1.6042, + "step": 104000 + }, + { + "epoch": 1.54, + "learning_rate": 4.2305001774727875e-05, + "loss": 1.6002, + "step": 104100 + }, + { + "epoch": 1.54, + "learning_rate": 4.229760707524846e-05, + "loss": 1.6119, + "step": 104200 + }, + { + "epoch": 1.54, + "learning_rate": 4.2290212375769053e-05, + "loss": 1.6331, + "step": 104300 + }, + { + "epoch": 1.54, + "learning_rate": 4.228281767628964e-05, + "loss": 1.5944, + "step": 104400 + }, + { + "epoch": 1.55, + "learning_rate": 4.2275422976810225e-05, + "loss": 1.6276, + "step": 104500 + }, + { + "epoch": 1.55, + "learning_rate": 4.226802827733081e-05, + "loss": 1.6114, + "step": 104600 + }, + { + "epoch": 1.55, + "learning_rate": 4.22606335778514e-05, + "loss": 1.6055, + "step": 104700 + }, + { + "epoch": 1.55, + "learning_rate": 4.225323887837199e-05, + "loss": 1.6407, + "step": 104800 + }, + { + "epoch": 1.55, + "learning_rate": 4.2245844178892574e-05, + "loss": 1.6003, + "step": 104900 + }, + { + "epoch": 1.55, + "learning_rate": 4.223844947941316e-05, + "loss": 1.5993, + "step": 105000 + }, + { + "epoch": 1.55, + "learning_rate": 4.2231054779933745e-05, + "loss": 1.6179, + "step": 105100 + }, + { + "epoch": 1.56, + "learning_rate": 4.2223734027449126e-05, + "loss": 1.6285, + "step": 105200 + }, + { + "epoch": 1.56, + "learning_rate": 4.221633932796971e-05, + "loss": 1.6104, + "step": 105300 + }, + { + "epoch": 1.56, + "learning_rate": 4.22089446284903e-05, + "loss": 1.6088, + "step": 105400 + }, + { + "epoch": 1.56, + "learning_rate": 4.220154992901089e-05, + "loss": 1.5981, + "step": 105500 + }, + { + "epoch": 1.56, + "learning_rate": 4.2194155229531475e-05, + "loss": 1.6151, + "step": 105600 + }, + { + "epoch": 1.56, + "learning_rate": 4.218676053005206e-05, + "loss": 1.6077, + "step": 105700 + }, + { + "epoch": 1.56, + "learning_rate": 4.2179365830572646e-05, + "loss": 1.5926, + "step": 105800 + }, + { + "epoch": 1.57, + "learning_rate": 4.217197113109323e-05, + "loss": 1.625, + "step": 105900 + }, + { + "epoch": 1.57, + "learning_rate": 4.2164576431613824e-05, + "loss": 1.6096, + "step": 106000 + }, + { + "epoch": 1.57, + "learning_rate": 4.215718173213441e-05, + "loss": 1.6453, + "step": 106100 + }, + { + "epoch": 1.57, + "learning_rate": 4.2149787032654996e-05, + "loss": 1.6129, + "step": 106200 + }, + { + "epoch": 1.57, + "learning_rate": 4.214239233317558e-05, + "loss": 1.6151, + "step": 106300 + }, + { + "epoch": 1.57, + "learning_rate": 4.213499763369617e-05, + "loss": 1.6382, + "step": 106400 + }, + { + "epoch": 1.58, + "learning_rate": 4.212760293421675e-05, + "loss": 1.6186, + "step": 106500 + }, + { + "epoch": 1.58, + "learning_rate": 4.212020823473734e-05, + "loss": 1.6173, + "step": 106600 + }, + { + "epoch": 1.58, + "learning_rate": 4.211281353525793e-05, + "loss": 1.6034, + "step": 106700 + }, + { + "epoch": 1.58, + "learning_rate": 4.2105418835778516e-05, + "loss": 1.601, + "step": 106800 + }, + { + "epoch": 1.58, + "learning_rate": 4.20980241362991e-05, + "loss": 1.573, + "step": 106900 + }, + { + "epoch": 1.58, + "learning_rate": 4.209062943681969e-05, + "loss": 1.6017, + "step": 107000 + }, + { + "epoch": 1.58, + "learning_rate": 4.208323473734028e-05, + "loss": 1.6469, + "step": 107100 + }, + { + "epoch": 1.59, + "learning_rate": 4.2075840037860866e-05, + "loss": 1.6156, + "step": 107200 + }, + { + "epoch": 1.59, + "learning_rate": 4.206844533838145e-05, + "loss": 1.6119, + "step": 107300 + }, + { + "epoch": 1.59, + "learning_rate": 4.206105063890204e-05, + "loss": 1.5963, + "step": 107400 + }, + { + "epoch": 1.59, + "learning_rate": 4.205365593942262e-05, + "loss": 1.601, + "step": 107500 + }, + { + "epoch": 1.59, + "learning_rate": 4.2046261239943215e-05, + "loss": 1.6264, + "step": 107600 + }, + { + "epoch": 1.59, + "learning_rate": 4.2038866540463794e-05, + "loss": 1.6199, + "step": 107700 + }, + { + "epoch": 1.59, + "learning_rate": 4.203147184098438e-05, + "loss": 1.5711, + "step": 107800 + }, + { + "epoch": 1.6, + "learning_rate": 4.202407714150497e-05, + "loss": 1.5916, + "step": 107900 + }, + { + "epoch": 1.6, + "learning_rate": 4.201668244202556e-05, + "loss": 1.6458, + "step": 108000 + }, + { + "epoch": 1.6, + "learning_rate": 4.200928774254614e-05, + "loss": 1.6154, + "step": 108100 + }, + { + "epoch": 1.6, + "learning_rate": 4.200189304306673e-05, + "loss": 1.6247, + "step": 108200 + }, + { + "epoch": 1.6, + "learning_rate": 4.199449834358732e-05, + "loss": 1.6042, + "step": 108300 + }, + { + "epoch": 1.6, + "learning_rate": 4.198710364410791e-05, + "loss": 1.607, + "step": 108400 + }, + { + "epoch": 1.6, + "learning_rate": 4.197970894462849e-05, + "loss": 1.63, + "step": 108500 + }, + { + "epoch": 1.61, + "learning_rate": 4.197231424514908e-05, + "loss": 1.6201, + "step": 108600 + }, + { + "epoch": 1.61, + "learning_rate": 4.196491954566967e-05, + "loss": 1.6012, + "step": 108700 + }, + { + "epoch": 1.61, + "learning_rate": 4.195759879318505e-05, + "loss": 1.6356, + "step": 108800 + }, + { + "epoch": 1.61, + "learning_rate": 4.195020409370564e-05, + "loss": 1.6124, + "step": 108900 + }, + { + "epoch": 1.61, + "learning_rate": 4.1942809394226216e-05, + "loss": 1.6356, + "step": 109000 + }, + { + "epoch": 1.61, + "learning_rate": 4.193541469474681e-05, + "loss": 1.6144, + "step": 109100 + }, + { + "epoch": 1.61, + "learning_rate": 4.1928019995267394e-05, + "loss": 1.6298, + "step": 109200 + }, + { + "epoch": 1.62, + "learning_rate": 4.192062529578798e-05, + "loss": 1.6183, + "step": 109300 + }, + { + "epoch": 1.62, + "learning_rate": 4.1913230596308565e-05, + "loss": 1.5844, + "step": 109400 + }, + { + "epoch": 1.62, + "learning_rate": 4.190583589682916e-05, + "loss": 1.6193, + "step": 109500 + }, + { + "epoch": 1.62, + "learning_rate": 4.189844119734974e-05, + "loss": 1.6049, + "step": 109600 + }, + { + "epoch": 1.62, + "learning_rate": 4.189104649787033e-05, + "loss": 1.6099, + "step": 109700 + }, + { + "epoch": 1.62, + "learning_rate": 4.1883651798390914e-05, + "loss": 1.6278, + "step": 109800 + }, + { + "epoch": 1.63, + "learning_rate": 4.1876257098911507e-05, + "loss": 1.6018, + "step": 109900 + }, + { + "epoch": 1.63, + "learning_rate": 4.186886239943209e-05, + "loss": 1.6577, + "step": 110000 + }, + { + "epoch": 1.63, + "learning_rate": 4.186146769995268e-05, + "loss": 1.6181, + "step": 110100 + }, + { + "epoch": 1.63, + "learning_rate": 4.1854073000473264e-05, + "loss": 1.5939, + "step": 110200 + }, + { + "epoch": 1.63, + "learning_rate": 4.184667830099385e-05, + "loss": 1.6358, + "step": 110300 + }, + { + "epoch": 1.63, + "learning_rate": 4.1839283601514435e-05, + "loss": 1.6107, + "step": 110400 + }, + { + "epoch": 1.63, + "learning_rate": 4.183188890203502e-05, + "loss": 1.6319, + "step": 110500 + }, + { + "epoch": 1.64, + "learning_rate": 4.1824494202555606e-05, + "loss": 1.6506, + "step": 110600 + }, + { + "epoch": 1.64, + "learning_rate": 4.18170995030762e-05, + "loss": 1.6076, + "step": 110700 + }, + { + "epoch": 1.64, + "learning_rate": 4.1809704803596784e-05, + "loss": 1.6188, + "step": 110800 + }, + { + "epoch": 1.64, + "learning_rate": 4.180231010411737e-05, + "loss": 1.6039, + "step": 110900 + }, + { + "epoch": 1.64, + "learning_rate": 4.1794915404637955e-05, + "loss": 1.6352, + "step": 111000 + }, + { + "epoch": 1.64, + "learning_rate": 4.178752070515855e-05, + "loss": 1.6277, + "step": 111100 + }, + { + "epoch": 1.64, + "learning_rate": 4.1780126005679133e-05, + "loss": 1.6297, + "step": 111200 + }, + { + "epoch": 1.65, + "learning_rate": 4.177273130619972e-05, + "loss": 1.6275, + "step": 111300 + }, + { + "epoch": 1.65, + "learning_rate": 4.1765336606720305e-05, + "loss": 1.5865, + "step": 111400 + }, + { + "epoch": 1.65, + "learning_rate": 4.17579419072409e-05, + "loss": 1.6496, + "step": 111500 + }, + { + "epoch": 1.65, + "learning_rate": 4.175054720776148e-05, + "loss": 1.6375, + "step": 111600 + }, + { + "epoch": 1.65, + "learning_rate": 4.174315250828206e-05, + "loss": 1.6219, + "step": 111700 + }, + { + "epoch": 1.65, + "learning_rate": 4.173575780880265e-05, + "loss": 1.6049, + "step": 111800 + }, + { + "epoch": 1.65, + "learning_rate": 4.172836310932324e-05, + "loss": 1.6053, + "step": 111900 + }, + { + "epoch": 1.66, + "learning_rate": 4.1720968409843825e-05, + "loss": 1.5958, + "step": 112000 + }, + { + "epoch": 1.66, + "learning_rate": 4.171357371036441e-05, + "loss": 1.6279, + "step": 112100 + }, + { + "epoch": 1.66, + "learning_rate": 4.170625295787979e-05, + "loss": 1.5954, + "step": 112200 + }, + { + "epoch": 1.66, + "learning_rate": 4.1698858258400384e-05, + "loss": 1.6297, + "step": 112300 + }, + { + "epoch": 1.66, + "learning_rate": 4.169146355892097e-05, + "loss": 1.5992, + "step": 112400 + }, + { + "epoch": 1.66, + "learning_rate": 4.1684068859441555e-05, + "loss": 1.6119, + "step": 112500 + }, + { + "epoch": 1.67, + "learning_rate": 4.167667415996214e-05, + "loss": 1.6081, + "step": 112600 + }, + { + "epoch": 1.67, + "learning_rate": 4.1669279460482726e-05, + "loss": 1.5996, + "step": 112700 + }, + { + "epoch": 1.67, + "learning_rate": 4.166188476100332e-05, + "loss": 1.6175, + "step": 112800 + }, + { + "epoch": 1.67, + "learning_rate": 4.1654490061523904e-05, + "loss": 1.5972, + "step": 112900 + }, + { + "epoch": 1.67, + "learning_rate": 4.164709536204448e-05, + "loss": 1.6118, + "step": 113000 + }, + { + "epoch": 1.67, + "learning_rate": 4.1639700662565076e-05, + "loss": 1.5981, + "step": 113100 + }, + { + "epoch": 1.67, + "learning_rate": 4.163230596308566e-05, + "loss": 1.6277, + "step": 113200 + }, + { + "epoch": 1.68, + "learning_rate": 4.162491126360625e-05, + "loss": 1.6026, + "step": 113300 + }, + { + "epoch": 1.68, + "learning_rate": 4.161751656412683e-05, + "loss": 1.618, + "step": 113400 + }, + { + "epoch": 1.68, + "learning_rate": 4.1610121864647425e-05, + "loss": 1.6391, + "step": 113500 + }, + { + "epoch": 1.68, + "learning_rate": 4.160272716516801e-05, + "loss": 1.6182, + "step": 113600 + }, + { + "epoch": 1.68, + "learning_rate": 4.1595332465688596e-05, + "loss": 1.6186, + "step": 113700 + }, + { + "epoch": 1.68, + "learning_rate": 4.158793776620918e-05, + "loss": 1.6004, + "step": 113800 + }, + { + "epoch": 1.68, + "learning_rate": 4.1580543066729774e-05, + "loss": 1.6235, + "step": 113900 + }, + { + "epoch": 1.69, + "learning_rate": 4.157314836725036e-05, + "loss": 1.6156, + "step": 114000 + }, + { + "epoch": 1.69, + "learning_rate": 4.1565753667770946e-05, + "loss": 1.6116, + "step": 114100 + }, + { + "epoch": 1.69, + "learning_rate": 4.155835896829153e-05, + "loss": 1.6172, + "step": 114200 + }, + { + "epoch": 1.69, + "learning_rate": 4.155103821580691e-05, + "loss": 1.6125, + "step": 114300 + }, + { + "epoch": 1.69, + "learning_rate": 4.15436435163275e-05, + "loss": 1.63, + "step": 114400 + }, + { + "epoch": 1.69, + "learning_rate": 4.153624881684808e-05, + "loss": 1.6347, + "step": 114500 + }, + { + "epoch": 1.69, + "learning_rate": 4.152885411736867e-05, + "loss": 1.5803, + "step": 114600 + }, + { + "epoch": 1.7, + "learning_rate": 4.152145941788926e-05, + "loss": 1.6091, + "step": 114700 + }, + { + "epoch": 1.7, + "learning_rate": 4.151406471840985e-05, + "loss": 1.5889, + "step": 114800 + }, + { + "epoch": 1.7, + "learning_rate": 4.150667001893043e-05, + "loss": 1.6024, + "step": 114900 + }, + { + "epoch": 1.7, + "learning_rate": 4.149927531945102e-05, + "loss": 1.6139, + "step": 115000 + }, + { + "epoch": 1.7, + "learning_rate": 4.149188061997161e-05, + "loss": 1.6277, + "step": 115100 + }, + { + "epoch": 1.7, + "learning_rate": 4.1484485920492196e-05, + "loss": 1.6008, + "step": 115200 + }, + { + "epoch": 1.71, + "learning_rate": 4.147709122101278e-05, + "loss": 1.6125, + "step": 115300 + }, + { + "epoch": 1.71, + "learning_rate": 4.146969652153337e-05, + "loss": 1.6353, + "step": 115400 + }, + { + "epoch": 1.71, + "learning_rate": 4.146230182205395e-05, + "loss": 1.6403, + "step": 115500 + }, + { + "epoch": 1.71, + "learning_rate": 4.145490712257454e-05, + "loss": 1.605, + "step": 115600 + }, + { + "epoch": 1.71, + "learning_rate": 4.1447512423095124e-05, + "loss": 1.6449, + "step": 115700 + }, + { + "epoch": 1.71, + "learning_rate": 4.144011772361571e-05, + "loss": 1.5805, + "step": 115800 + }, + { + "epoch": 1.71, + "learning_rate": 4.14327230241363e-05, + "loss": 1.603, + "step": 115900 + }, + { + "epoch": 1.72, + "learning_rate": 4.142532832465689e-05, + "loss": 1.5695, + "step": 116000 + }, + { + "epoch": 1.72, + "learning_rate": 4.1417933625177474e-05, + "loss": 1.6149, + "step": 116100 + }, + { + "epoch": 1.72, + "learning_rate": 4.141053892569806e-05, + "loss": 1.6009, + "step": 116200 + }, + { + "epoch": 1.72, + "learning_rate": 4.140314422621865e-05, + "loss": 1.6232, + "step": 116300 + }, + { + "epoch": 1.72, + "learning_rate": 4.139582347373403e-05, + "loss": 1.6138, + "step": 116400 + }, + { + "epoch": 1.72, + "learning_rate": 4.138842877425462e-05, + "loss": 1.6044, + "step": 116500 + }, + { + "epoch": 1.72, + "learning_rate": 4.13810340747752e-05, + "loss": 1.6301, + "step": 116600 + }, + { + "epoch": 1.73, + "learning_rate": 4.137363937529579e-05, + "loss": 1.6139, + "step": 116700 + }, + { + "epoch": 1.73, + "learning_rate": 4.136624467581638e-05, + "loss": 1.6057, + "step": 116800 + }, + { + "epoch": 1.73, + "learning_rate": 4.135884997633697e-05, + "loss": 1.6101, + "step": 116900 + }, + { + "epoch": 1.73, + "learning_rate": 4.1351455276857546e-05, + "loss": 1.6063, + "step": 117000 + }, + { + "epoch": 1.73, + "learning_rate": 4.134406057737814e-05, + "loss": 1.5845, + "step": 117100 + }, + { + "epoch": 1.73, + "learning_rate": 4.1336665877898724e-05, + "loss": 1.6047, + "step": 117200 + }, + { + "epoch": 1.73, + "learning_rate": 4.132927117841931e-05, + "loss": 1.6, + "step": 117300 + }, + { + "epoch": 1.74, + "learning_rate": 4.1321876478939895e-05, + "loss": 1.6186, + "step": 117400 + }, + { + "epoch": 1.74, + "learning_rate": 4.131448177946049e-05, + "loss": 1.591, + "step": 117500 + }, + { + "epoch": 1.74, + "learning_rate": 4.130708707998107e-05, + "loss": 1.5918, + "step": 117600 + }, + { + "epoch": 1.74, + "learning_rate": 4.129969238050166e-05, + "loss": 1.6115, + "step": 117700 + }, + { + "epoch": 1.74, + "learning_rate": 4.1292297681022245e-05, + "loss": 1.6066, + "step": 117800 + }, + { + "epoch": 1.74, + "learning_rate": 4.128490298154283e-05, + "loss": 1.6162, + "step": 117900 + }, + { + "epoch": 1.75, + "learning_rate": 4.127750828206342e-05, + "loss": 1.5887, + "step": 118000 + }, + { + "epoch": 1.75, + "learning_rate": 4.127011358258401e-05, + "loss": 1.5795, + "step": 118100 + }, + { + "epoch": 1.75, + "learning_rate": 4.1262718883104594e-05, + "loss": 1.6066, + "step": 118200 + }, + { + "epoch": 1.75, + "learning_rate": 4.125532418362518e-05, + "loss": 1.6255, + "step": 118300 + }, + { + "epoch": 1.75, + "learning_rate": 4.1247929484145765e-05, + "loss": 1.6086, + "step": 118400 + }, + { + "epoch": 1.75, + "learning_rate": 4.1240608731661146e-05, + "loss": 1.6209, + "step": 118500 + }, + { + "epoch": 1.75, + "learning_rate": 4.123321403218173e-05, + "loss": 1.6299, + "step": 118600 + }, + { + "epoch": 1.76, + "learning_rate": 4.122581933270232e-05, + "loss": 1.6455, + "step": 118700 + }, + { + "epoch": 1.76, + "learning_rate": 4.121842463322291e-05, + "loss": 1.5975, + "step": 118800 + }, + { + "epoch": 1.76, + "learning_rate": 4.1211029933743495e-05, + "loss": 1.5845, + "step": 118900 + }, + { + "epoch": 1.76, + "learning_rate": 4.120363523426408e-05, + "loss": 1.598, + "step": 119000 + }, + { + "epoch": 1.76, + "learning_rate": 4.1196240534784666e-05, + "loss": 1.6076, + "step": 119100 + }, + { + "epoch": 1.76, + "learning_rate": 4.118884583530526e-05, + "loss": 1.6015, + "step": 119200 + }, + { + "epoch": 1.76, + "learning_rate": 4.1181451135825844e-05, + "loss": 1.6004, + "step": 119300 + }, + { + "epoch": 1.77, + "learning_rate": 4.117405643634643e-05, + "loss": 1.6358, + "step": 119400 + }, + { + "epoch": 1.77, + "learning_rate": 4.1166661736867015e-05, + "loss": 1.6061, + "step": 119500 + }, + { + "epoch": 1.77, + "learning_rate": 4.11592670373876e-05, + "loss": 1.5997, + "step": 119600 + }, + { + "epoch": 1.77, + "learning_rate": 4.115187233790819e-05, + "loss": 1.5813, + "step": 119700 + }, + { + "epoch": 1.77, + "learning_rate": 4.114447763842877e-05, + "loss": 1.5993, + "step": 119800 + }, + { + "epoch": 1.77, + "learning_rate": 4.1137082938949365e-05, + "loss": 1.5982, + "step": 119900 + }, + { + "epoch": 1.77, + "learning_rate": 4.112968823946995e-05, + "loss": 1.5928, + "step": 120000 + }, + { + "epoch": 1.78, + "learning_rate": 4.1122293539990536e-05, + "loss": 1.6137, + "step": 120100 + }, + { + "epoch": 1.78, + "learning_rate": 4.111489884051112e-05, + "loss": 1.6005, + "step": 120200 + }, + { + "epoch": 1.78, + "learning_rate": 4.1107504141031714e-05, + "loss": 1.5991, + "step": 120300 + }, + { + "epoch": 1.78, + "learning_rate": 4.11001094415523e-05, + "loss": 1.5959, + "step": 120400 + }, + { + "epoch": 1.78, + "learning_rate": 4.1092714742072885e-05, + "loss": 1.6012, + "step": 120500 + }, + { + "epoch": 1.78, + "learning_rate": 4.1085393989588266e-05, + "loss": 1.5821, + "step": 120600 + }, + { + "epoch": 1.79, + "learning_rate": 4.107799929010885e-05, + "loss": 1.6066, + "step": 120700 + }, + { + "epoch": 1.79, + "learning_rate": 4.1070604590629444e-05, + "loss": 1.6028, + "step": 120800 + }, + { + "epoch": 1.79, + "learning_rate": 4.106320989115002e-05, + "loss": 1.5734, + "step": 120900 + }, + { + "epoch": 1.79, + "learning_rate": 4.105581519167061e-05, + "loss": 1.5935, + "step": 121000 + }, + { + "epoch": 1.79, + "learning_rate": 4.1048420492191194e-05, + "loss": 1.6123, + "step": 121100 + }, + { + "epoch": 1.79, + "learning_rate": 4.1041025792711786e-05, + "loss": 1.5882, + "step": 121200 + }, + { + "epoch": 1.79, + "learning_rate": 4.103363109323237e-05, + "loss": 1.583, + "step": 121300 + }, + { + "epoch": 1.8, + "learning_rate": 4.102623639375296e-05, + "loss": 1.6131, + "step": 121400 + }, + { + "epoch": 1.8, + "learning_rate": 4.1018841694273543e-05, + "loss": 1.5695, + "step": 121500 + }, + { + "epoch": 1.8, + "learning_rate": 4.1011446994794136e-05, + "loss": 1.6164, + "step": 121600 + }, + { + "epoch": 1.8, + "learning_rate": 4.100405229531472e-05, + "loss": 1.6036, + "step": 121700 + }, + { + "epoch": 1.8, + "learning_rate": 4.099665759583531e-05, + "loss": 1.6041, + "step": 121800 + }, + { + "epoch": 1.8, + "learning_rate": 4.098926289635589e-05, + "loss": 1.5975, + "step": 121900 + }, + { + "epoch": 1.8, + "learning_rate": 4.0981868196876485e-05, + "loss": 1.6005, + "step": 122000 + }, + { + "epoch": 1.81, + "learning_rate": 4.097447349739707e-05, + "loss": 1.6059, + "step": 122100 + }, + { + "epoch": 1.81, + "learning_rate": 4.0967078797917656e-05, + "loss": 1.5917, + "step": 122200 + }, + { + "epoch": 1.81, + "learning_rate": 4.095968409843824e-05, + "loss": 1.5738, + "step": 122300 + }, + { + "epoch": 1.81, + "learning_rate": 4.095228939895883e-05, + "loss": 1.5641, + "step": 122400 + }, + { + "epoch": 1.81, + "learning_rate": 4.094489469947941e-05, + "loss": 1.5712, + "step": 122500 + }, + { + "epoch": 1.81, + "learning_rate": 4.09375e-05, + "loss": 1.5988, + "step": 122600 + }, + { + "epoch": 1.81, + "learning_rate": 4.093010530052059e-05, + "loss": 1.5891, + "step": 122700 + }, + { + "epoch": 1.82, + "learning_rate": 4.092278454803597e-05, + "loss": 1.6058, + "step": 122800 + }, + { + "epoch": 1.82, + "learning_rate": 4.091538984855656e-05, + "loss": 1.6177, + "step": 122900 + }, + { + "epoch": 1.82, + "learning_rate": 4.090799514907714e-05, + "loss": 1.6241, + "step": 123000 + }, + { + "epoch": 1.82, + "learning_rate": 4.090060044959773e-05, + "loss": 1.5881, + "step": 123100 + }, + { + "epoch": 1.82, + "learning_rate": 4.089320575011832e-05, + "loss": 1.5933, + "step": 123200 + }, + { + "epoch": 1.82, + "learning_rate": 4.088581105063891e-05, + "loss": 1.5864, + "step": 123300 + }, + { + "epoch": 1.82, + "learning_rate": 4.087841635115949e-05, + "loss": 1.6276, + "step": 123400 + }, + { + "epoch": 1.83, + "learning_rate": 4.087102165168008e-05, + "loss": 1.6104, + "step": 123500 + }, + { + "epoch": 1.83, + "learning_rate": 4.0863626952200664e-05, + "loss": 1.6178, + "step": 123600 + }, + { + "epoch": 1.83, + "learning_rate": 4.085623225272125e-05, + "loss": 1.6164, + "step": 123700 + }, + { + "epoch": 1.83, + "learning_rate": 4.0848837553241835e-05, + "loss": 1.6137, + "step": 123800 + }, + { + "epoch": 1.83, + "learning_rate": 4.084144285376242e-05, + "loss": 1.6, + "step": 123900 + }, + { + "epoch": 1.83, + "learning_rate": 4.083404815428301e-05, + "loss": 1.6108, + "step": 124000 + }, + { + "epoch": 1.84, + "learning_rate": 4.08266534548036e-05, + "loss": 1.62, + "step": 124100 + }, + { + "epoch": 1.84, + "learning_rate": 4.0819258755324184e-05, + "loss": 1.5989, + "step": 124200 + }, + { + "epoch": 1.84, + "learning_rate": 4.081186405584477e-05, + "loss": 1.5963, + "step": 124300 + }, + { + "epoch": 1.84, + "learning_rate": 4.080446935636536e-05, + "loss": 1.5965, + "step": 124400 + }, + { + "epoch": 1.84, + "learning_rate": 4.079707465688595e-05, + "loss": 1.6012, + "step": 124500 + }, + { + "epoch": 1.84, + "learning_rate": 4.0789679957406534e-05, + "loss": 1.6007, + "step": 124600 + }, + { + "epoch": 1.84, + "learning_rate": 4.078228525792712e-05, + "loss": 1.5601, + "step": 124700 + }, + { + "epoch": 1.85, + "learning_rate": 4.0774964505442507e-05, + "loss": 1.6455, + "step": 124800 + }, + { + "epoch": 1.85, + "learning_rate": 4.0767569805963085e-05, + "loss": 1.5978, + "step": 124900 + }, + { + "epoch": 1.85, + "learning_rate": 4.076017510648367e-05, + "loss": 1.6046, + "step": 125000 + }, + { + "epoch": 1.85, + "learning_rate": 4.075278040700426e-05, + "loss": 1.5961, + "step": 125100 + }, + { + "epoch": 1.85, + "learning_rate": 4.074538570752485e-05, + "loss": 1.6216, + "step": 125200 + }, + { + "epoch": 1.85, + "learning_rate": 4.0737991008045435e-05, + "loss": 1.615, + "step": 125300 + }, + { + "epoch": 1.85, + "learning_rate": 4.073059630856602e-05, + "loss": 1.5908, + "step": 125400 + }, + { + "epoch": 1.86, + "learning_rate": 4.0723201609086606e-05, + "loss": 1.5896, + "step": 125500 + }, + { + "epoch": 1.86, + "learning_rate": 4.07158069096072e-05, + "loss": 1.6085, + "step": 125600 + }, + { + "epoch": 1.86, + "learning_rate": 4.0708412210127784e-05, + "loss": 1.5954, + "step": 125700 + }, + { + "epoch": 1.86, + "learning_rate": 4.070101751064837e-05, + "loss": 1.5981, + "step": 125800 + }, + { + "epoch": 1.86, + "learning_rate": 4.0693622811168955e-05, + "loss": 1.599, + "step": 125900 + }, + { + "epoch": 1.86, + "learning_rate": 4.068622811168955e-05, + "loss": 1.5742, + "step": 126000 + }, + { + "epoch": 1.86, + "learning_rate": 4.067883341221013e-05, + "loss": 1.5918, + "step": 126100 + }, + { + "epoch": 1.87, + "learning_rate": 4.067143871273071e-05, + "loss": 1.6162, + "step": 126200 + }, + { + "epoch": 1.87, + "learning_rate": 4.06640440132513e-05, + "loss": 1.5793, + "step": 126300 + }, + { + "epoch": 1.87, + "learning_rate": 4.065664931377189e-05, + "loss": 1.5984, + "step": 126400 + }, + { + "epoch": 1.87, + "learning_rate": 4.0649254614292476e-05, + "loss": 1.5852, + "step": 126500 + }, + { + "epoch": 1.87, + "learning_rate": 4.064185991481306e-05, + "loss": 1.5996, + "step": 126600 + }, + { + "epoch": 1.87, + "learning_rate": 4.063446521533365e-05, + "loss": 1.5966, + "step": 126700 + }, + { + "epoch": 1.88, + "learning_rate": 4.062707051585424e-05, + "loss": 1.593, + "step": 126800 + }, + { + "epoch": 1.88, + "learning_rate": 4.0619675816374825e-05, + "loss": 1.5904, + "step": 126900 + }, + { + "epoch": 1.88, + "learning_rate": 4.061228111689541e-05, + "loss": 1.5995, + "step": 127000 + }, + { + "epoch": 1.88, + "learning_rate": 4.0604886417415996e-05, + "loss": 1.5983, + "step": 127100 + }, + { + "epoch": 1.88, + "learning_rate": 4.059749171793659e-05, + "loss": 1.6027, + "step": 127200 + }, + { + "epoch": 1.88, + "learning_rate": 4.0590097018457175e-05, + "loss": 1.5768, + "step": 127300 + }, + { + "epoch": 1.88, + "learning_rate": 4.058270231897776e-05, + "loss": 1.6083, + "step": 127400 + }, + { + "epoch": 1.89, + "learning_rate": 4.0575307619498346e-05, + "loss": 1.6082, + "step": 127500 + }, + { + "epoch": 1.89, + "learning_rate": 4.0567986867013726e-05, + "loss": 1.5841, + "step": 127600 + }, + { + "epoch": 1.89, + "learning_rate": 4.056059216753431e-05, + "loss": 1.617, + "step": 127700 + }, + { + "epoch": 1.89, + "learning_rate": 4.05531974680549e-05, + "loss": 1.6136, + "step": 127800 + }, + { + "epoch": 1.89, + "learning_rate": 4.054580276857548e-05, + "loss": 1.6089, + "step": 127900 + }, + { + "epoch": 1.89, + "learning_rate": 4.0538408069096076e-05, + "loss": 1.6092, + "step": 128000 + }, + { + "epoch": 1.89, + "learning_rate": 4.053101336961666e-05, + "loss": 1.6015, + "step": 128100 + }, + { + "epoch": 1.9, + "learning_rate": 4.052361867013725e-05, + "loss": 1.5771, + "step": 128200 + }, + { + "epoch": 1.9, + "learning_rate": 4.051622397065783e-05, + "loss": 1.5755, + "step": 128300 + }, + { + "epoch": 1.9, + "learning_rate": 4.0508829271178425e-05, + "loss": 1.5881, + "step": 128400 + }, + { + "epoch": 1.9, + "learning_rate": 4.050143457169901e-05, + "loss": 1.5942, + "step": 128500 + }, + { + "epoch": 1.9, + "learning_rate": 4.0494039872219596e-05, + "loss": 1.608, + "step": 128600 + }, + { + "epoch": 1.9, + "learning_rate": 4.048664517274018e-05, + "loss": 1.5872, + "step": 128700 + }, + { + "epoch": 1.9, + "learning_rate": 4.0479250473260774e-05, + "loss": 1.5956, + "step": 128800 + }, + { + "epoch": 1.91, + "learning_rate": 4.047185577378135e-05, + "loss": 1.5904, + "step": 128900 + }, + { + "epoch": 1.91, + "learning_rate": 4.046446107430194e-05, + "loss": 1.597, + "step": 129000 + }, + { + "epoch": 1.91, + "learning_rate": 4.0457066374822524e-05, + "loss": 1.5936, + "step": 129100 + }, + { + "epoch": 1.91, + "learning_rate": 4.044967167534312e-05, + "loss": 1.5797, + "step": 129200 + }, + { + "epoch": 1.91, + "learning_rate": 4.04422769758637e-05, + "loss": 1.6017, + "step": 129300 + }, + { + "epoch": 1.91, + "learning_rate": 4.043488227638429e-05, + "loss": 1.5854, + "step": 129400 + }, + { + "epoch": 1.92, + "learning_rate": 4.0427487576904874e-05, + "loss": 1.6013, + "step": 129500 + }, + { + "epoch": 1.92, + "learning_rate": 4.0420092877425466e-05, + "loss": 1.5885, + "step": 129600 + }, + { + "epoch": 1.92, + "learning_rate": 4.041269817794605e-05, + "loss": 1.58, + "step": 129700 + }, + { + "epoch": 1.92, + "learning_rate": 4.040530347846664e-05, + "loss": 1.5903, + "step": 129800 + }, + { + "epoch": 1.92, + "learning_rate": 4.039798272598202e-05, + "loss": 1.5957, + "step": 129900 + }, + { + "epoch": 1.92, + "learning_rate": 4.039058802650261e-05, + "loss": 1.6014, + "step": 130000 + }, + { + "epoch": 1.92, + "learning_rate": 4.0383193327023196e-05, + "loss": 1.5769, + "step": 130100 + }, + { + "epoch": 1.93, + "learning_rate": 4.0375798627543775e-05, + "loss": 1.5868, + "step": 130200 + }, + { + "epoch": 1.93, + "learning_rate": 4.036840392806436e-05, + "loss": 1.5793, + "step": 130300 + }, + { + "epoch": 1.93, + "learning_rate": 4.036100922858495e-05, + "loss": 1.5808, + "step": 130400 + }, + { + "epoch": 1.93, + "learning_rate": 4.035361452910554e-05, + "loss": 1.6049, + "step": 130500 + }, + { + "epoch": 1.93, + "learning_rate": 4.0346219829626124e-05, + "loss": 1.5859, + "step": 130600 + }, + { + "epoch": 1.93, + "learning_rate": 4.033882513014671e-05, + "loss": 1.5937, + "step": 130700 + }, + { + "epoch": 1.93, + "learning_rate": 4.03314304306673e-05, + "loss": 1.5681, + "step": 130800 + }, + { + "epoch": 1.94, + "learning_rate": 4.032403573118789e-05, + "loss": 1.5643, + "step": 130900 + }, + { + "epoch": 1.94, + "learning_rate": 4.0316641031708473e-05, + "loss": 1.5766, + "step": 131000 + }, + { + "epoch": 1.94, + "learning_rate": 4.030924633222906e-05, + "loss": 1.5856, + "step": 131100 + }, + { + "epoch": 1.94, + "learning_rate": 4.030185163274965e-05, + "loss": 1.5886, + "step": 131200 + }, + { + "epoch": 1.94, + "learning_rate": 4.029445693327024e-05, + "loss": 1.5961, + "step": 131300 + }, + { + "epoch": 1.94, + "learning_rate": 4.028706223379082e-05, + "loss": 1.5748, + "step": 131400 + }, + { + "epoch": 1.94, + "learning_rate": 4.02796675343114e-05, + "loss": 1.5699, + "step": 131500 + }, + { + "epoch": 1.95, + "learning_rate": 4.0272272834831994e-05, + "loss": 1.5609, + "step": 131600 + }, + { + "epoch": 1.95, + "learning_rate": 4.026487813535258e-05, + "loss": 1.5749, + "step": 131700 + }, + { + "epoch": 1.95, + "learning_rate": 4.0257483435873165e-05, + "loss": 1.6034, + "step": 131800 + }, + { + "epoch": 1.95, + "learning_rate": 4.025008873639375e-05, + "loss": 1.5878, + "step": 131900 + }, + { + "epoch": 1.95, + "learning_rate": 4.024276798390914e-05, + "loss": 1.578, + "step": 132000 + }, + { + "epoch": 1.95, + "learning_rate": 4.0235373284429724e-05, + "loss": 1.5826, + "step": 132100 + }, + { + "epoch": 1.96, + "learning_rate": 4.022797858495031e-05, + "loss": 1.583, + "step": 132200 + }, + { + "epoch": 1.96, + "learning_rate": 4.0220583885470895e-05, + "loss": 1.5857, + "step": 132300 + }, + { + "epoch": 1.96, + "learning_rate": 4.021318918599149e-05, + "loss": 1.5949, + "step": 132400 + }, + { + "epoch": 1.96, + "learning_rate": 4.020579448651207e-05, + "loss": 1.571, + "step": 132500 + }, + { + "epoch": 1.96, + "learning_rate": 4.019839978703266e-05, + "loss": 1.5904, + "step": 132600 + }, + { + "epoch": 1.96, + "learning_rate": 4.0191005087553244e-05, + "loss": 1.6172, + "step": 132700 + }, + { + "epoch": 1.96, + "learning_rate": 4.018361038807383e-05, + "loss": 1.6077, + "step": 132800 + }, + { + "epoch": 1.97, + "learning_rate": 4.0176215688594416e-05, + "loss": 1.5975, + "step": 132900 + }, + { + "epoch": 1.97, + "learning_rate": 4.0168820989115e-05, + "loss": 1.6193, + "step": 133000 + }, + { + "epoch": 1.97, + "learning_rate": 4.016142628963559e-05, + "loss": 1.6009, + "step": 133100 + }, + { + "epoch": 1.97, + "learning_rate": 4.015403159015618e-05, + "loss": 1.6021, + "step": 133200 + }, + { + "epoch": 1.97, + "learning_rate": 4.0146636890676765e-05, + "loss": 1.5894, + "step": 133300 + }, + { + "epoch": 1.97, + "learning_rate": 4.013924219119735e-05, + "loss": 1.5904, + "step": 133400 + }, + { + "epoch": 1.97, + "learning_rate": 4.0131847491717936e-05, + "loss": 1.602, + "step": 133500 + }, + { + "epoch": 1.98, + "learning_rate": 4.012445279223853e-05, + "loss": 1.6167, + "step": 133600 + }, + { + "epoch": 1.98, + "learning_rate": 4.0117058092759114e-05, + "loss": 1.5815, + "step": 133700 + }, + { + "epoch": 1.98, + "learning_rate": 4.01096633932797e-05, + "loss": 1.5991, + "step": 133800 + }, + { + "epoch": 1.98, + "learning_rate": 4.0102268693800286e-05, + "loss": 1.5706, + "step": 133900 + }, + { + "epoch": 1.98, + "learning_rate": 4.009487399432088e-05, + "loss": 1.5871, + "step": 134000 + }, + { + "epoch": 1.98, + "learning_rate": 4.008747929484146e-05, + "loss": 1.5782, + "step": 134100 + }, + { + "epoch": 1.98, + "learning_rate": 4.008008459536204e-05, + "loss": 1.5842, + "step": 134200 + }, + { + "epoch": 1.99, + "learning_rate": 4.007268989588263e-05, + "loss": 1.5758, + "step": 134300 + }, + { + "epoch": 1.99, + "learning_rate": 4.006529519640322e-05, + "loss": 1.6083, + "step": 134400 + }, + { + "epoch": 1.99, + "learning_rate": 4.0057900496923806e-05, + "loss": 1.5591, + "step": 134500 + }, + { + "epoch": 1.99, + "learning_rate": 4.005050579744439e-05, + "loss": 1.5663, + "step": 134600 + }, + { + "epoch": 1.99, + "learning_rate": 4.004318504495977e-05, + "loss": 1.6085, + "step": 134700 + }, + { + "epoch": 1.99, + "learning_rate": 4.0035790345480365e-05, + "loss": 1.5919, + "step": 134800 + }, + { + "epoch": 2.0, + "learning_rate": 4.002839564600095e-05, + "loss": 1.5955, + "step": 134900 + }, + { + "epoch": 2.0, + "learning_rate": 4.0021000946521536e-05, + "loss": 1.5813, + "step": 135000 + }, + { + "epoch": 2.0, + "learning_rate": 4.001360624704212e-05, + "loss": 1.5893, + "step": 135100 + }, + { + "epoch": 2.0, + "learning_rate": 4.0006211547562714e-05, + "loss": 1.5904, + "step": 135200 + }, + { + "epoch": 2.0, + "learning_rate": 3.99988168480833e-05, + "loss": 1.5645, + "step": 135300 + }, + { + "epoch": 2.0, + "learning_rate": 3.9991422148603885e-05, + "loss": 1.5418, + "step": 135400 + }, + { + "epoch": 2.0, + "learning_rate": 3.9984027449124464e-05, + "loss": 1.5324, + "step": 135500 + }, + { + "epoch": 2.01, + "learning_rate": 3.9976632749645057e-05, + "loss": 1.5107, + "step": 135600 + }, + { + "epoch": 2.01, + "learning_rate": 3.996923805016564e-05, + "loss": 1.512, + "step": 135700 + }, + { + "epoch": 2.01, + "learning_rate": 3.996184335068623e-05, + "loss": 1.5242, + "step": 135800 + }, + { + "epoch": 2.01, + "learning_rate": 3.9954448651206814e-05, + "loss": 1.4948, + "step": 135900 + }, + { + "epoch": 2.01, + "learning_rate": 3.9947053951727406e-05, + "loss": 1.5368, + "step": 136000 + }, + { + "epoch": 2.01, + "learning_rate": 3.993965925224799e-05, + "loss": 1.5353, + "step": 136100 + }, + { + "epoch": 2.01, + "learning_rate": 3.993226455276858e-05, + "loss": 1.5298, + "step": 136200 + }, + { + "epoch": 2.02, + "learning_rate": 3.992486985328916e-05, + "loss": 1.5413, + "step": 136300 + }, + { + "epoch": 2.02, + "learning_rate": 3.9917475153809755e-05, + "loss": 1.498, + "step": 136400 + }, + { + "epoch": 2.02, + "learning_rate": 3.991008045433034e-05, + "loss": 1.5026, + "step": 136500 + }, + { + "epoch": 2.02, + "learning_rate": 3.9902685754850927e-05, + "loss": 1.5059, + "step": 136600 + }, + { + "epoch": 2.02, + "learning_rate": 3.989529105537151e-05, + "loss": 1.5477, + "step": 136700 + }, + { + "epoch": 2.02, + "learning_rate": 3.98878963558921e-05, + "loss": 1.509, + "step": 136800 + }, + { + "epoch": 2.02, + "learning_rate": 3.9880501656412683e-05, + "loss": 1.5292, + "step": 136900 + }, + { + "epoch": 2.03, + "learning_rate": 3.987310695693327e-05, + "loss": 1.5119, + "step": 137000 + }, + { + "epoch": 2.03, + "learning_rate": 3.9865712257453855e-05, + "loss": 1.5227, + "step": 137100 + }, + { + "epoch": 2.03, + "learning_rate": 3.985831755797445e-05, + "loss": 1.5055, + "step": 137200 + }, + { + "epoch": 2.03, + "learning_rate": 3.985099680548983e-05, + "loss": 1.5252, + "step": 137300 + }, + { + "epoch": 2.03, + "learning_rate": 3.984360210601041e-05, + "loss": 1.5189, + "step": 137400 + }, + { + "epoch": 2.03, + "learning_rate": 3.9836207406531e-05, + "loss": 1.5386, + "step": 137500 + }, + { + "epoch": 2.04, + "learning_rate": 3.982881270705159e-05, + "loss": 1.5112, + "step": 137600 + }, + { + "epoch": 2.04, + "learning_rate": 3.982141800757218e-05, + "loss": 1.5152, + "step": 137700 + }, + { + "epoch": 2.04, + "learning_rate": 3.981402330809276e-05, + "loss": 1.5212, + "step": 137800 + }, + { + "epoch": 2.04, + "learning_rate": 3.980662860861335e-05, + "loss": 1.5273, + "step": 137900 + }, + { + "epoch": 2.04, + "learning_rate": 3.979923390913394e-05, + "loss": 1.518, + "step": 138000 + }, + { + "epoch": 2.04, + "learning_rate": 3.979183920965452e-05, + "loss": 1.4853, + "step": 138100 + }, + { + "epoch": 2.04, + "learning_rate": 3.9784444510175105e-05, + "loss": 1.5302, + "step": 138200 + }, + { + "epoch": 2.05, + "learning_rate": 3.977704981069569e-05, + "loss": 1.524, + "step": 138300 + }, + { + "epoch": 2.05, + "learning_rate": 3.976965511121628e-05, + "loss": 1.4986, + "step": 138400 + }, + { + "epoch": 2.05, + "learning_rate": 3.976226041173687e-05, + "loss": 1.5343, + "step": 138500 + }, + { + "epoch": 2.05, + "learning_rate": 3.9754865712257454e-05, + "loss": 1.5223, + "step": 138600 + }, + { + "epoch": 2.05, + "learning_rate": 3.974747101277804e-05, + "loss": 1.5207, + "step": 138700 + }, + { + "epoch": 2.05, + "learning_rate": 3.974007631329863e-05, + "loss": 1.5492, + "step": 138800 + }, + { + "epoch": 2.05, + "learning_rate": 3.973268161381922e-05, + "loss": 1.5124, + "step": 138900 + }, + { + "epoch": 2.06, + "learning_rate": 3.9725286914339804e-05, + "loss": 1.5268, + "step": 139000 + }, + { + "epoch": 2.06, + "learning_rate": 3.971789221486039e-05, + "loss": 1.507, + "step": 139100 + }, + { + "epoch": 2.06, + "learning_rate": 3.971049751538098e-05, + "loss": 1.5437, + "step": 139200 + }, + { + "epoch": 2.06, + "learning_rate": 3.970310281590157e-05, + "loss": 1.5095, + "step": 139300 + }, + { + "epoch": 2.06, + "learning_rate": 3.9695708116422146e-05, + "loss": 1.5011, + "step": 139400 + }, + { + "epoch": 2.06, + "learning_rate": 3.968838736393753e-05, + "loss": 1.5183, + "step": 139500 + }, + { + "epoch": 2.06, + "learning_rate": 3.968099266445812e-05, + "loss": 1.5071, + "step": 139600 + }, + { + "epoch": 2.07, + "learning_rate": 3.9673597964978705e-05, + "loss": 1.5277, + "step": 139700 + }, + { + "epoch": 2.07, + "learning_rate": 3.966620326549929e-05, + "loss": 1.5349, + "step": 139800 + }, + { + "epoch": 2.07, + "learning_rate": 3.9658808566019876e-05, + "loss": 1.5254, + "step": 139900 + }, + { + "epoch": 2.07, + "learning_rate": 3.965141386654047e-05, + "loss": 1.5267, + "step": 140000 + }, + { + "epoch": 2.07, + "learning_rate": 3.9644019167061054e-05, + "loss": 1.508, + "step": 140100 + }, + { + "epoch": 2.07, + "learning_rate": 3.963662446758164e-05, + "loss": 1.5463, + "step": 140200 + }, + { + "epoch": 2.07, + "learning_rate": 3.9629229768102225e-05, + "loss": 1.5292, + "step": 140300 + }, + { + "epoch": 2.08, + "learning_rate": 3.962183506862282e-05, + "loss": 1.5332, + "step": 140400 + }, + { + "epoch": 2.08, + "learning_rate": 3.9614440369143403e-05, + "loss": 1.5044, + "step": 140500 + }, + { + "epoch": 2.08, + "learning_rate": 3.960704566966399e-05, + "loss": 1.543, + "step": 140600 + }, + { + "epoch": 2.08, + "learning_rate": 3.9599650970184575e-05, + "loss": 1.5229, + "step": 140700 + }, + { + "epoch": 2.08, + "learning_rate": 3.959225627070516e-05, + "loss": 1.5348, + "step": 140800 + }, + { + "epoch": 2.08, + "learning_rate": 3.9584861571225746e-05, + "loss": 1.5165, + "step": 140900 + }, + { + "epoch": 2.09, + "learning_rate": 3.957746687174633e-05, + "loss": 1.5313, + "step": 141000 + }, + { + "epoch": 2.09, + "learning_rate": 3.957007217226692e-05, + "loss": 1.5268, + "step": 141100 + }, + { + "epoch": 2.09, + "learning_rate": 3.956267747278751e-05, + "loss": 1.5111, + "step": 141200 + }, + { + "epoch": 2.09, + "learning_rate": 3.9555282773308095e-05, + "loss": 1.5268, + "step": 141300 + }, + { + "epoch": 2.09, + "learning_rate": 3.954788807382868e-05, + "loss": 1.5399, + "step": 141400 + }, + { + "epoch": 2.09, + "learning_rate": 3.9540493374349267e-05, + "loss": 1.5195, + "step": 141500 + }, + { + "epoch": 2.09, + "learning_rate": 3.953309867486986e-05, + "loss": 1.5128, + "step": 141600 + }, + { + "epoch": 2.1, + "learning_rate": 3.9525703975390445e-05, + "loss": 1.5272, + "step": 141700 + }, + { + "epoch": 2.1, + "learning_rate": 3.9518383222905825e-05, + "loss": 1.5113, + "step": 141800 + }, + { + "epoch": 2.1, + "learning_rate": 3.951098852342641e-05, + "loss": 1.5051, + "step": 141900 + }, + { + "epoch": 2.1, + "learning_rate": 3.9503593823946996e-05, + "loss": 1.5356, + "step": 142000 + }, + { + "epoch": 2.1, + "learning_rate": 3.949619912446758e-05, + "loss": 1.5489, + "step": 142100 + }, + { + "epoch": 2.1, + "learning_rate": 3.948880442498817e-05, + "loss": 1.54, + "step": 142200 + }, + { + "epoch": 2.1, + "learning_rate": 3.948140972550875e-05, + "loss": 1.5468, + "step": 142300 + }, + { + "epoch": 2.11, + "learning_rate": 3.9474015026029346e-05, + "loss": 1.5041, + "step": 142400 + }, + { + "epoch": 2.11, + "learning_rate": 3.946662032654993e-05, + "loss": 1.4953, + "step": 142500 + }, + { + "epoch": 2.11, + "learning_rate": 3.945922562707052e-05, + "loss": 1.5088, + "step": 142600 + }, + { + "epoch": 2.11, + "learning_rate": 3.94518309275911e-05, + "loss": 1.5399, + "step": 142700 + }, + { + "epoch": 2.11, + "learning_rate": 3.9444436228111695e-05, + "loss": 1.5358, + "step": 142800 + }, + { + "epoch": 2.11, + "learning_rate": 3.943704152863228e-05, + "loss": 1.5313, + "step": 142900 + }, + { + "epoch": 2.11, + "learning_rate": 3.9429646829152866e-05, + "loss": 1.5396, + "step": 143000 + }, + { + "epoch": 2.12, + "learning_rate": 3.942225212967345e-05, + "loss": 1.5393, + "step": 143100 + }, + { + "epoch": 2.12, + "learning_rate": 3.941485743019404e-05, + "loss": 1.5284, + "step": 143200 + }, + { + "epoch": 2.12, + "learning_rate": 3.940746273071463e-05, + "loss": 1.5039, + "step": 143300 + }, + { + "epoch": 2.12, + "learning_rate": 3.940006803123521e-05, + "loss": 1.5301, + "step": 143400 + }, + { + "epoch": 2.12, + "learning_rate": 3.9392673331755795e-05, + "loss": 1.5297, + "step": 143500 + }, + { + "epoch": 2.12, + "learning_rate": 3.938527863227639e-05, + "loss": 1.534, + "step": 143600 + }, + { + "epoch": 2.13, + "learning_rate": 3.937788393279697e-05, + "loss": 1.5147, + "step": 143700 + }, + { + "epoch": 2.13, + "learning_rate": 3.937048923331756e-05, + "loss": 1.5064, + "step": 143800 + }, + { + "epoch": 2.13, + "learning_rate": 3.9363094533838144e-05, + "loss": 1.5557, + "step": 143900 + }, + { + "epoch": 2.13, + "learning_rate": 3.9355699834358736e-05, + "loss": 1.5135, + "step": 144000 + }, + { + "epoch": 2.13, + "learning_rate": 3.934830513487932e-05, + "loss": 1.5297, + "step": 144100 + }, + { + "epoch": 2.13, + "learning_rate": 3.934091043539991e-05, + "loss": 1.5096, + "step": 144200 + }, + { + "epoch": 2.13, + "learning_rate": 3.933351573592049e-05, + "loss": 1.5627, + "step": 144300 + }, + { + "epoch": 2.14, + "learning_rate": 3.9326194983435874e-05, + "loss": 1.5208, + "step": 144400 + }, + { + "epoch": 2.14, + "learning_rate": 3.9318800283956466e-05, + "loss": 1.5148, + "step": 144500 + }, + { + "epoch": 2.14, + "learning_rate": 3.931140558447705e-05, + "loss": 1.5257, + "step": 144600 + }, + { + "epoch": 2.14, + "learning_rate": 3.930401088499763e-05, + "loss": 1.5337, + "step": 144700 + }, + { + "epoch": 2.14, + "learning_rate": 3.929661618551822e-05, + "loss": 1.525, + "step": 144800 + }, + { + "epoch": 2.14, + "learning_rate": 3.928922148603881e-05, + "loss": 1.4842, + "step": 144900 + }, + { + "epoch": 2.14, + "learning_rate": 3.9281826786559394e-05, + "loss": 1.5046, + "step": 145000 + }, + { + "epoch": 2.15, + "learning_rate": 3.927443208707998e-05, + "loss": 1.5138, + "step": 145100 + }, + { + "epoch": 2.15, + "learning_rate": 3.926703738760057e-05, + "loss": 1.5022, + "step": 145200 + }, + { + "epoch": 2.15, + "learning_rate": 3.925964268812116e-05, + "loss": 1.5468, + "step": 145300 + }, + { + "epoch": 2.15, + "learning_rate": 3.9252247988641744e-05, + "loss": 1.5206, + "step": 145400 + }, + { + "epoch": 2.15, + "learning_rate": 3.924485328916233e-05, + "loss": 1.5115, + "step": 145500 + }, + { + "epoch": 2.15, + "learning_rate": 3.923745858968292e-05, + "loss": 1.5336, + "step": 145600 + }, + { + "epoch": 2.15, + "learning_rate": 3.923006389020351e-05, + "loss": 1.5051, + "step": 145700 + }, + { + "epoch": 2.16, + "learning_rate": 3.922266919072409e-05, + "loss": 1.5278, + "step": 145800 + }, + { + "epoch": 2.16, + "learning_rate": 3.921527449124468e-05, + "loss": 1.5284, + "step": 145900 + }, + { + "epoch": 2.16, + "learning_rate": 3.9207879791765264e-05, + "loss": 1.5171, + "step": 146000 + }, + { + "epoch": 2.16, + "learning_rate": 3.920048509228585e-05, + "loss": 1.5166, + "step": 146100 + }, + { + "epoch": 2.16, + "learning_rate": 3.9193090392806435e-05, + "loss": 1.5325, + "step": 146200 + }, + { + "epoch": 2.16, + "learning_rate": 3.918569569332702e-05, + "loss": 1.495, + "step": 146300 + }, + { + "epoch": 2.17, + "learning_rate": 3.9178300993847613e-05, + "loss": 1.5307, + "step": 146400 + }, + { + "epoch": 2.17, + "learning_rate": 3.91709062943682e-05, + "loss": 1.5433, + "step": 146500 + }, + { + "epoch": 2.17, + "learning_rate": 3.9163511594888785e-05, + "loss": 1.5535, + "step": 146600 + }, + { + "epoch": 2.17, + "learning_rate": 3.915611689540937e-05, + "loss": 1.5668, + "step": 146700 + }, + { + "epoch": 2.17, + "learning_rate": 3.914872219592996e-05, + "loss": 1.5206, + "step": 146800 + }, + { + "epoch": 2.17, + "learning_rate": 3.914132749645055e-05, + "loss": 1.5292, + "step": 146900 + }, + { + "epoch": 2.17, + "learning_rate": 3.9133932796971134e-05, + "loss": 1.5257, + "step": 147000 + }, + { + "epoch": 2.18, + "learning_rate": 3.912653809749172e-05, + "loss": 1.5159, + "step": 147100 + }, + { + "epoch": 2.18, + "learning_rate": 3.911914339801231e-05, + "loss": 1.5149, + "step": 147200 + }, + { + "epoch": 2.18, + "learning_rate": 3.91117486985329e-05, + "loss": 1.5172, + "step": 147300 + }, + { + "epoch": 2.18, + "learning_rate": 3.910435399905348e-05, + "loss": 1.516, + "step": 147400 + }, + { + "epoch": 2.18, + "learning_rate": 3.909695929957406e-05, + "loss": 1.5206, + "step": 147500 + }, + { + "epoch": 2.18, + "learning_rate": 3.9089564600094655e-05, + "loss": 1.5205, + "step": 147600 + }, + { + "epoch": 2.18, + "learning_rate": 3.908216990061524e-05, + "loss": 1.5496, + "step": 147700 + }, + { + "epoch": 2.19, + "learning_rate": 3.907484914813062e-05, + "loss": 1.5248, + "step": 147800 + }, + { + "epoch": 2.19, + "learning_rate": 3.9067454448651206e-05, + "loss": 1.5232, + "step": 147900 + }, + { + "epoch": 2.19, + "learning_rate": 3.90600597491718e-05, + "loss": 1.5097, + "step": 148000 + }, + { + "epoch": 2.19, + "learning_rate": 3.9052665049692384e-05, + "loss": 1.5178, + "step": 148100 + }, + { + "epoch": 2.19, + "learning_rate": 3.904527035021297e-05, + "loss": 1.4961, + "step": 148200 + }, + { + "epoch": 2.19, + "learning_rate": 3.9037875650733556e-05, + "loss": 1.5114, + "step": 148300 + }, + { + "epoch": 2.19, + "learning_rate": 3.903048095125414e-05, + "loss": 1.5442, + "step": 148400 + }, + { + "epoch": 2.2, + "learning_rate": 3.9023086251774734e-05, + "loss": 1.5203, + "step": 148500 + }, + { + "epoch": 2.2, + "learning_rate": 3.901569155229532e-05, + "loss": 1.5245, + "step": 148600 + }, + { + "epoch": 2.2, + "learning_rate": 3.90082968528159e-05, + "loss": 1.521, + "step": 148700 + }, + { + "epoch": 2.2, + "learning_rate": 3.900090215333649e-05, + "loss": 1.5147, + "step": 148800 + }, + { + "epoch": 2.2, + "learning_rate": 3.8993507453857076e-05, + "loss": 1.5165, + "step": 148900 + }, + { + "epoch": 2.2, + "learning_rate": 3.898611275437766e-05, + "loss": 1.5301, + "step": 149000 + }, + { + "epoch": 2.21, + "learning_rate": 3.897871805489825e-05, + "loss": 1.5131, + "step": 149100 + }, + { + "epoch": 2.21, + "learning_rate": 3.897132335541884e-05, + "loss": 1.5122, + "step": 149200 + }, + { + "epoch": 2.21, + "learning_rate": 3.8963928655939426e-05, + "loss": 1.5287, + "step": 149300 + }, + { + "epoch": 2.21, + "learning_rate": 3.895653395646001e-05, + "loss": 1.5202, + "step": 149400 + }, + { + "epoch": 2.21, + "learning_rate": 3.89491392569806e-05, + "loss": 1.5268, + "step": 149500 + }, + { + "epoch": 2.21, + "learning_rate": 3.894174455750119e-05, + "loss": 1.5686, + "step": 149600 + }, + { + "epoch": 2.21, + "learning_rate": 3.8934349858021775e-05, + "loss": 1.5052, + "step": 149700 + }, + { + "epoch": 2.22, + "learning_rate": 3.8927029105537155e-05, + "loss": 1.5105, + "step": 149800 + }, + { + "epoch": 2.22, + "learning_rate": 3.891963440605774e-05, + "loss": 1.5223, + "step": 149900 + }, + { + "epoch": 2.22, + "learning_rate": 3.891223970657833e-05, + "loss": 1.521, + "step": 150000 + }, + { + "epoch": 2.22, + "learning_rate": 3.890484500709891e-05, + "loss": 1.537, + "step": 150100 + }, + { + "epoch": 2.22, + "learning_rate": 3.88974503076195e-05, + "loss": 1.5392, + "step": 150200 + }, + { + "epoch": 2.22, + "learning_rate": 3.8890055608140084e-05, + "loss": 1.5224, + "step": 150300 + }, + { + "epoch": 2.22, + "learning_rate": 3.8882660908660676e-05, + "loss": 1.5096, + "step": 150400 + }, + { + "epoch": 2.23, + "learning_rate": 3.887526620918126e-05, + "loss": 1.5212, + "step": 150500 + }, + { + "epoch": 2.23, + "learning_rate": 3.886787150970185e-05, + "loss": 1.5359, + "step": 150600 + }, + { + "epoch": 2.23, + "learning_rate": 3.886047681022243e-05, + "loss": 1.5101, + "step": 150700 + }, + { + "epoch": 2.23, + "learning_rate": 3.8853082110743025e-05, + "loss": 1.5368, + "step": 150800 + }, + { + "epoch": 2.23, + "learning_rate": 3.884568741126361e-05, + "loss": 1.5273, + "step": 150900 + }, + { + "epoch": 2.23, + "learning_rate": 3.88382927117842e-05, + "loss": 1.5458, + "step": 151000 + }, + { + "epoch": 2.23, + "learning_rate": 3.883089801230478e-05, + "loss": 1.5151, + "step": 151100 + }, + { + "epoch": 2.24, + "learning_rate": 3.882350331282537e-05, + "loss": 1.5246, + "step": 151200 + }, + { + "epoch": 2.24, + "learning_rate": 3.8816108613345954e-05, + "loss": 1.5414, + "step": 151300 + }, + { + "epoch": 2.24, + "learning_rate": 3.880871391386654e-05, + "loss": 1.5416, + "step": 151400 + }, + { + "epoch": 2.24, + "learning_rate": 3.8801319214387125e-05, + "loss": 1.5138, + "step": 151500 + }, + { + "epoch": 2.24, + "learning_rate": 3.879392451490772e-05, + "loss": 1.5274, + "step": 151600 + }, + { + "epoch": 2.24, + "learning_rate": 3.87865298154283e-05, + "loss": 1.5137, + "step": 151700 + }, + { + "epoch": 2.25, + "learning_rate": 3.877913511594889e-05, + "loss": 1.5414, + "step": 151800 + }, + { + "epoch": 2.25, + "learning_rate": 3.8771740416469474e-05, + "loss": 1.5365, + "step": 151900 + }, + { + "epoch": 2.25, + "learning_rate": 3.8764345716990067e-05, + "loss": 1.5146, + "step": 152000 + }, + { + "epoch": 2.25, + "learning_rate": 3.875695101751065e-05, + "loss": 1.548, + "step": 152100 + }, + { + "epoch": 2.25, + "learning_rate": 3.874963026502603e-05, + "loss": 1.5208, + "step": 152200 + }, + { + "epoch": 2.25, + "learning_rate": 3.874223556554662e-05, + "loss": 1.5034, + "step": 152300 + }, + { + "epoch": 2.25, + "learning_rate": 3.8734840866067204e-05, + "loss": 1.5216, + "step": 152400 + }, + { + "epoch": 2.26, + "learning_rate": 3.8727446166587796e-05, + "loss": 1.4953, + "step": 152500 + }, + { + "epoch": 2.26, + "learning_rate": 3.8720051467108375e-05, + "loss": 1.4898, + "step": 152600 + }, + { + "epoch": 2.26, + "learning_rate": 3.871265676762896e-05, + "loss": 1.4953, + "step": 152700 + }, + { + "epoch": 2.26, + "learning_rate": 3.870526206814955e-05, + "loss": 1.5367, + "step": 152800 + }, + { + "epoch": 2.26, + "learning_rate": 3.869786736867014e-05, + "loss": 1.5486, + "step": 152900 + }, + { + "epoch": 2.26, + "learning_rate": 3.8690472669190725e-05, + "loss": 1.5268, + "step": 153000 + }, + { + "epoch": 2.26, + "learning_rate": 3.868307796971131e-05, + "loss": 1.5197, + "step": 153100 + }, + { + "epoch": 2.27, + "learning_rate": 3.86756832702319e-05, + "loss": 1.5053, + "step": 153200 + }, + { + "epoch": 2.27, + "learning_rate": 3.866828857075249e-05, + "loss": 1.5296, + "step": 153300 + }, + { + "epoch": 2.27, + "learning_rate": 3.8660893871273074e-05, + "loss": 1.518, + "step": 153400 + }, + { + "epoch": 2.27, + "learning_rate": 3.865349917179366e-05, + "loss": 1.5227, + "step": 153500 + }, + { + "epoch": 2.27, + "learning_rate": 3.8646104472314245e-05, + "loss": 1.5223, + "step": 153600 + }, + { + "epoch": 2.27, + "learning_rate": 3.863870977283484e-05, + "loss": 1.5138, + "step": 153700 + }, + { + "epoch": 2.27, + "learning_rate": 3.863131507335542e-05, + "loss": 1.5296, + "step": 153800 + }, + { + "epoch": 2.28, + "learning_rate": 3.862392037387601e-05, + "loss": 1.5117, + "step": 153900 + }, + { + "epoch": 2.28, + "learning_rate": 3.8616525674396594e-05, + "loss": 1.538, + "step": 154000 + }, + { + "epoch": 2.28, + "learning_rate": 3.860913097491718e-05, + "loss": 1.5303, + "step": 154100 + }, + { + "epoch": 2.28, + "learning_rate": 3.8601736275437766e-05, + "loss": 1.5112, + "step": 154200 + }, + { + "epoch": 2.28, + "learning_rate": 3.8594415522953146e-05, + "loss": 1.5185, + "step": 154300 + }, + { + "epoch": 2.28, + "learning_rate": 3.858702082347373e-05, + "loss": 1.5206, + "step": 154400 + }, + { + "epoch": 2.28, + "learning_rate": 3.8579626123994324e-05, + "loss": 1.5452, + "step": 154500 + }, + { + "epoch": 2.29, + "learning_rate": 3.857223142451491e-05, + "loss": 1.5308, + "step": 154600 + }, + { + "epoch": 2.29, + "learning_rate": 3.8564836725035496e-05, + "loss": 1.5355, + "step": 154700 + }, + { + "epoch": 2.29, + "learning_rate": 3.855744202555608e-05, + "loss": 1.5261, + "step": 154800 + }, + { + "epoch": 2.29, + "learning_rate": 3.8550047326076674e-05, + "loss": 1.5119, + "step": 154900 + }, + { + "epoch": 2.29, + "learning_rate": 3.854265262659726e-05, + "loss": 1.542, + "step": 155000 + }, + { + "epoch": 2.29, + "learning_rate": 3.8535257927117845e-05, + "loss": 1.5279, + "step": 155100 + }, + { + "epoch": 2.3, + "learning_rate": 3.852786322763843e-05, + "loss": 1.5346, + "step": 155200 + }, + { + "epoch": 2.3, + "learning_rate": 3.8520468528159016e-05, + "loss": 1.5291, + "step": 155300 + }, + { + "epoch": 2.3, + "learning_rate": 3.85130738286796e-05, + "loss": 1.5604, + "step": 155400 + }, + { + "epoch": 2.3, + "learning_rate": 3.850567912920019e-05, + "loss": 1.4958, + "step": 155500 + }, + { + "epoch": 2.3, + "learning_rate": 3.849828442972078e-05, + "loss": 1.5123, + "step": 155600 + }, + { + "epoch": 2.3, + "learning_rate": 3.8490889730241365e-05, + "loss": 1.5425, + "step": 155700 + }, + { + "epoch": 2.3, + "learning_rate": 3.848349503076195e-05, + "loss": 1.4989, + "step": 155800 + }, + { + "epoch": 2.31, + "learning_rate": 3.847610033128254e-05, + "loss": 1.5123, + "step": 155900 + }, + { + "epoch": 2.31, + "learning_rate": 3.846870563180313e-05, + "loss": 1.5212, + "step": 156000 + }, + { + "epoch": 2.31, + "learning_rate": 3.8461310932323715e-05, + "loss": 1.5018, + "step": 156100 + }, + { + "epoch": 2.31, + "learning_rate": 3.84539162328443e-05, + "loss": 1.5205, + "step": 156200 + }, + { + "epoch": 2.31, + "learning_rate": 3.8446521533364886e-05, + "loss": 1.5217, + "step": 156300 + }, + { + "epoch": 2.31, + "learning_rate": 3.8439200780880267e-05, + "loss": 1.5149, + "step": 156400 + }, + { + "epoch": 2.31, + "learning_rate": 3.843180608140086e-05, + "loss": 1.4915, + "step": 156500 + }, + { + "epoch": 2.32, + "learning_rate": 3.842441138192144e-05, + "loss": 1.5571, + "step": 156600 + }, + { + "epoch": 2.32, + "learning_rate": 3.8417016682442023e-05, + "loss": 1.5349, + "step": 156700 + }, + { + "epoch": 2.32, + "learning_rate": 3.8409621982962616e-05, + "loss": 1.5304, + "step": 156800 + }, + { + "epoch": 2.32, + "learning_rate": 3.84022272834832e-05, + "loss": 1.5405, + "step": 156900 + }, + { + "epoch": 2.32, + "learning_rate": 3.839483258400379e-05, + "loss": 1.5182, + "step": 157000 + }, + { + "epoch": 2.32, + "learning_rate": 3.838743788452437e-05, + "loss": 1.5217, + "step": 157100 + }, + { + "epoch": 2.32, + "learning_rate": 3.838004318504496e-05, + "loss": 1.4984, + "step": 157200 + }, + { + "epoch": 2.33, + "learning_rate": 3.837264848556555e-05, + "loss": 1.5241, + "step": 157300 + }, + { + "epoch": 2.33, + "learning_rate": 3.8365253786086136e-05, + "loss": 1.5191, + "step": 157400 + }, + { + "epoch": 2.33, + "learning_rate": 3.835785908660672e-05, + "loss": 1.519, + "step": 157500 + }, + { + "epoch": 2.33, + "learning_rate": 3.835046438712731e-05, + "loss": 1.5303, + "step": 157600 + }, + { + "epoch": 2.33, + "learning_rate": 3.83430696876479e-05, + "loss": 1.5395, + "step": 157700 + }, + { + "epoch": 2.33, + "learning_rate": 3.8335674988168486e-05, + "loss": 1.5196, + "step": 157800 + }, + { + "epoch": 2.34, + "learning_rate": 3.8328280288689065e-05, + "loss": 1.5375, + "step": 157900 + }, + { + "epoch": 2.34, + "learning_rate": 3.832088558920966e-05, + "loss": 1.5428, + "step": 158000 + }, + { + "epoch": 2.34, + "learning_rate": 3.831349088973024e-05, + "loss": 1.5359, + "step": 158100 + }, + { + "epoch": 2.34, + "learning_rate": 3.830609619025083e-05, + "loss": 1.513, + "step": 158200 + }, + { + "epoch": 2.34, + "learning_rate": 3.8298701490771414e-05, + "loss": 1.5272, + "step": 158300 + }, + { + "epoch": 2.34, + "learning_rate": 3.8291306791292006e-05, + "loss": 1.5364, + "step": 158400 + }, + { + "epoch": 2.34, + "learning_rate": 3.828391209181259e-05, + "loss": 1.5082, + "step": 158500 + }, + { + "epoch": 2.35, + "learning_rate": 3.827651739233318e-05, + "loss": 1.5272, + "step": 158600 + }, + { + "epoch": 2.35, + "learning_rate": 3.826912269285376e-05, + "loss": 1.543, + "step": 158700 + }, + { + "epoch": 2.35, + "learning_rate": 3.826172799337435e-05, + "loss": 1.5267, + "step": 158800 + }, + { + "epoch": 2.35, + "learning_rate": 3.825433329389494e-05, + "loss": 1.5242, + "step": 158900 + }, + { + "epoch": 2.35, + "learning_rate": 3.824693859441553e-05, + "loss": 1.5197, + "step": 159000 + }, + { + "epoch": 2.35, + "learning_rate": 3.823954389493611e-05, + "loss": 1.5113, + "step": 159100 + }, + { + "epoch": 2.35, + "learning_rate": 3.82321491954567e-05, + "loss": 1.5295, + "step": 159200 + }, + { + "epoch": 2.36, + "learning_rate": 3.822482844297208e-05, + "loss": 1.5319, + "step": 159300 + }, + { + "epoch": 2.36, + "learning_rate": 3.8217433743492664e-05, + "loss": 1.5453, + "step": 159400 + }, + { + "epoch": 2.36, + "learning_rate": 3.821003904401325e-05, + "loss": 1.5206, + "step": 159500 + }, + { + "epoch": 2.36, + "learning_rate": 3.8202644344533836e-05, + "loss": 1.5244, + "step": 159600 + }, + { + "epoch": 2.36, + "learning_rate": 3.819524964505443e-05, + "loss": 1.514, + "step": 159700 + }, + { + "epoch": 2.36, + "learning_rate": 3.8187854945575014e-05, + "loss": 1.522, + "step": 159800 + }, + { + "epoch": 2.36, + "learning_rate": 3.81804602460956e-05, + "loss": 1.5148, + "step": 159900 + }, + { + "epoch": 2.37, + "learning_rate": 3.8173065546616185e-05, + "loss": 1.5149, + "step": 160000 + }, + { + "epoch": 2.37, + "learning_rate": 3.816567084713678e-05, + "loss": 1.4791, + "step": 160100 + }, + { + "epoch": 2.37, + "learning_rate": 3.815827614765736e-05, + "loss": 1.5172, + "step": 160200 + }, + { + "epoch": 2.37, + "learning_rate": 3.815088144817795e-05, + "loss": 1.5129, + "step": 160300 + }, + { + "epoch": 2.37, + "learning_rate": 3.8143486748698534e-05, + "loss": 1.5584, + "step": 160400 + }, + { + "epoch": 2.37, + "learning_rate": 3.813609204921913e-05, + "loss": 1.5532, + "step": 160500 + }, + { + "epoch": 2.38, + "learning_rate": 3.8128697349739706e-05, + "loss": 1.5191, + "step": 160600 + }, + { + "epoch": 2.38, + "learning_rate": 3.812130265026029e-05, + "loss": 1.5165, + "step": 160700 + }, + { + "epoch": 2.38, + "learning_rate": 3.8113907950780884e-05, + "loss": 1.5104, + "step": 160800 + }, + { + "epoch": 2.38, + "learning_rate": 3.810651325130147e-05, + "loss": 1.5127, + "step": 160900 + }, + { + "epoch": 2.38, + "learning_rate": 3.8099118551822055e-05, + "loss": 1.5256, + "step": 161000 + }, + { + "epoch": 2.38, + "learning_rate": 3.809172385234264e-05, + "loss": 1.5308, + "step": 161100 + }, + { + "epoch": 2.38, + "learning_rate": 3.808432915286323e-05, + "loss": 1.5015, + "step": 161200 + }, + { + "epoch": 2.39, + "learning_rate": 3.807693445338382e-05, + "loss": 1.5248, + "step": 161300 + }, + { + "epoch": 2.39, + "learning_rate": 3.8069539753904404e-05, + "loss": 1.5092, + "step": 161400 + }, + { + "epoch": 2.39, + "learning_rate": 3.806214505442499e-05, + "loss": 1.5393, + "step": 161500 + }, + { + "epoch": 2.39, + "learning_rate": 3.8054750354945575e-05, + "loss": 1.5294, + "step": 161600 + }, + { + "epoch": 2.39, + "learning_rate": 3.804735565546617e-05, + "loss": 1.5432, + "step": 161700 + }, + { + "epoch": 2.39, + "learning_rate": 3.8039960955986754e-05, + "loss": 1.5007, + "step": 161800 + }, + { + "epoch": 2.39, + "learning_rate": 3.803256625650733e-05, + "loss": 1.5168, + "step": 161900 + }, + { + "epoch": 2.4, + "learning_rate": 3.8025171557027925e-05, + "loss": 1.5084, + "step": 162000 + }, + { + "epoch": 2.4, + "learning_rate": 3.801777685754851e-05, + "loss": 1.5377, + "step": 162100 + }, + { + "epoch": 2.4, + "learning_rate": 3.8010382158069096e-05, + "loss": 1.5564, + "step": 162200 + }, + { + "epoch": 2.4, + "learning_rate": 3.8003061405584477e-05, + "loss": 1.5204, + "step": 162300 + }, + { + "epoch": 2.4, + "learning_rate": 3.799566670610506e-05, + "loss": 1.5378, + "step": 162400 + }, + { + "epoch": 2.4, + "learning_rate": 3.7988272006625655e-05, + "loss": 1.5489, + "step": 162500 + }, + { + "epoch": 2.4, + "learning_rate": 3.798087730714624e-05, + "loss": 1.5433, + "step": 162600 + }, + { + "epoch": 2.41, + "learning_rate": 3.7973482607666826e-05, + "loss": 1.4955, + "step": 162700 + }, + { + "epoch": 2.41, + "learning_rate": 3.796608790818741e-05, + "loss": 1.5404, + "step": 162800 + }, + { + "epoch": 2.41, + "learning_rate": 3.7958693208708004e-05, + "loss": 1.5299, + "step": 162900 + }, + { + "epoch": 2.41, + "learning_rate": 3.795129850922859e-05, + "loss": 1.5257, + "step": 163000 + }, + { + "epoch": 2.41, + "learning_rate": 3.7943903809749175e-05, + "loss": 1.5343, + "step": 163100 + }, + { + "epoch": 2.41, + "learning_rate": 3.793650911026976e-05, + "loss": 1.5286, + "step": 163200 + }, + { + "epoch": 2.42, + "learning_rate": 3.7929114410790346e-05, + "loss": 1.4956, + "step": 163300 + }, + { + "epoch": 2.42, + "learning_rate": 3.792171971131093e-05, + "loss": 1.541, + "step": 163400 + }, + { + "epoch": 2.42, + "learning_rate": 3.791432501183152e-05, + "loss": 1.5287, + "step": 163500 + }, + { + "epoch": 2.42, + "learning_rate": 3.790693031235211e-05, + "loss": 1.5048, + "step": 163600 + }, + { + "epoch": 2.42, + "learning_rate": 3.7899535612872696e-05, + "loss": 1.5468, + "step": 163700 + }, + { + "epoch": 2.42, + "learning_rate": 3.789214091339328e-05, + "loss": 1.5572, + "step": 163800 + }, + { + "epoch": 2.42, + "learning_rate": 3.788474621391387e-05, + "loss": 1.5488, + "step": 163900 + }, + { + "epoch": 2.43, + "learning_rate": 3.787735151443445e-05, + "loss": 1.5341, + "step": 164000 + }, + { + "epoch": 2.43, + "learning_rate": 3.7869956814955045e-05, + "loss": 1.5196, + "step": 164100 + }, + { + "epoch": 2.43, + "learning_rate": 3.786256211547563e-05, + "loss": 1.551, + "step": 164200 + }, + { + "epoch": 2.43, + "learning_rate": 3.7855167415996216e-05, + "loss": 1.5046, + "step": 164300 + }, + { + "epoch": 2.43, + "learning_rate": 3.78477727165168e-05, + "loss": 1.5463, + "step": 164400 + }, + { + "epoch": 2.43, + "learning_rate": 3.784037801703739e-05, + "loss": 1.5447, + "step": 164500 + }, + { + "epoch": 2.43, + "learning_rate": 3.783298331755797e-05, + "loss": 1.5415, + "step": 164600 + }, + { + "epoch": 2.44, + "learning_rate": 3.782558861807856e-05, + "loss": 1.5306, + "step": 164700 + }, + { + "epoch": 2.44, + "learning_rate": 3.781819391859915e-05, + "loss": 1.5344, + "step": 164800 + }, + { + "epoch": 2.44, + "learning_rate": 3.781079921911974e-05, + "loss": 1.5478, + "step": 164900 + }, + { + "epoch": 2.44, + "learning_rate": 3.780347846663512e-05, + "loss": 1.5138, + "step": 165000 + }, + { + "epoch": 2.44, + "learning_rate": 3.77960837671557e-05, + "loss": 1.538, + "step": 165100 + }, + { + "epoch": 2.44, + "learning_rate": 3.778868906767629e-05, + "loss": 1.509, + "step": 165200 + }, + { + "epoch": 2.44, + "learning_rate": 3.778129436819688e-05, + "loss": 1.5328, + "step": 165300 + }, + { + "epoch": 2.45, + "learning_rate": 3.777389966871747e-05, + "loss": 1.5146, + "step": 165400 + }, + { + "epoch": 2.45, + "learning_rate": 3.776650496923805e-05, + "loss": 1.5116, + "step": 165500 + }, + { + "epoch": 2.45, + "learning_rate": 3.775911026975864e-05, + "loss": 1.5542, + "step": 165600 + }, + { + "epoch": 2.45, + "learning_rate": 3.775171557027923e-05, + "loss": 1.5504, + "step": 165700 + }, + { + "epoch": 2.45, + "learning_rate": 3.7744320870799816e-05, + "loss": 1.5533, + "step": 165800 + }, + { + "epoch": 2.45, + "learning_rate": 3.7736926171320395e-05, + "loss": 1.5306, + "step": 165900 + }, + { + "epoch": 2.46, + "learning_rate": 3.772953147184099e-05, + "loss": 1.5183, + "step": 166000 + }, + { + "epoch": 2.46, + "learning_rate": 3.772213677236157e-05, + "loss": 1.5319, + "step": 166100 + }, + { + "epoch": 2.46, + "learning_rate": 3.771474207288216e-05, + "loss": 1.5312, + "step": 166200 + }, + { + "epoch": 2.46, + "learning_rate": 3.7707347373402744e-05, + "loss": 1.498, + "step": 166300 + }, + { + "epoch": 2.46, + "learning_rate": 3.769995267392334e-05, + "loss": 1.5548, + "step": 166400 + }, + { + "epoch": 2.46, + "learning_rate": 3.769255797444392e-05, + "loss": 1.5129, + "step": 166500 + }, + { + "epoch": 2.46, + "learning_rate": 3.768516327496451e-05, + "loss": 1.5138, + "step": 166600 + }, + { + "epoch": 2.47, + "learning_rate": 3.7677768575485094e-05, + "loss": 1.5326, + "step": 166700 + }, + { + "epoch": 2.47, + "learning_rate": 3.767037387600568e-05, + "loss": 1.5412, + "step": 166800 + }, + { + "epoch": 2.47, + "learning_rate": 3.766297917652627e-05, + "loss": 1.5079, + "step": 166900 + }, + { + "epoch": 2.47, + "learning_rate": 3.765558447704686e-05, + "loss": 1.5241, + "step": 167000 + }, + { + "epoch": 2.47, + "learning_rate": 3.764818977756744e-05, + "loss": 1.5191, + "step": 167100 + }, + { + "epoch": 2.47, + "learning_rate": 3.764079507808803e-05, + "loss": 1.5298, + "step": 167200 + }, + { + "epoch": 2.47, + "learning_rate": 3.7633400378608614e-05, + "loss": 1.5229, + "step": 167300 + }, + { + "epoch": 2.48, + "learning_rate": 3.7626079626123995e-05, + "loss": 1.5194, + "step": 167400 + }, + { + "epoch": 2.48, + "learning_rate": 3.761868492664458e-05, + "loss": 1.5241, + "step": 167500 + }, + { + "epoch": 2.48, + "learning_rate": 3.7611290227165166e-05, + "loss": 1.5364, + "step": 167600 + }, + { + "epoch": 2.48, + "learning_rate": 3.760389552768576e-05, + "loss": 1.5252, + "step": 167700 + }, + { + "epoch": 2.48, + "learning_rate": 3.7596500828206344e-05, + "loss": 1.5377, + "step": 167800 + }, + { + "epoch": 2.48, + "learning_rate": 3.758910612872693e-05, + "loss": 1.5146, + "step": 167900 + }, + { + "epoch": 2.48, + "learning_rate": 3.7581711429247515e-05, + "loss": 1.5325, + "step": 168000 + }, + { + "epoch": 2.49, + "learning_rate": 3.757431672976811e-05, + "loss": 1.5062, + "step": 168100 + }, + { + "epoch": 2.49, + "learning_rate": 3.756692203028869e-05, + "loss": 1.5162, + "step": 168200 + }, + { + "epoch": 2.49, + "learning_rate": 3.755952733080928e-05, + "loss": 1.5347, + "step": 168300 + }, + { + "epoch": 2.49, + "learning_rate": 3.7552132631329865e-05, + "loss": 1.5173, + "step": 168400 + }, + { + "epoch": 2.49, + "learning_rate": 3.754473793185045e-05, + "loss": 1.5102, + "step": 168500 + }, + { + "epoch": 2.49, + "learning_rate": 3.7537343232371036e-05, + "loss": 1.5276, + "step": 168600 + }, + { + "epoch": 2.49, + "learning_rate": 3.752994853289162e-05, + "loss": 1.5202, + "step": 168700 + }, + { + "epoch": 2.5, + "learning_rate": 3.7522553833412214e-05, + "loss": 1.5366, + "step": 168800 + }, + { + "epoch": 2.5, + "learning_rate": 3.75151591339328e-05, + "loss": 1.5176, + "step": 168900 + }, + { + "epoch": 2.5, + "learning_rate": 3.7507764434453385e-05, + "loss": 1.5426, + "step": 169000 + }, + { + "epoch": 2.5, + "learning_rate": 3.750036973497397e-05, + "loss": 1.532, + "step": 169100 + }, + { + "epoch": 2.5, + "learning_rate": 3.7492975035494556e-05, + "loss": 1.5048, + "step": 169200 + }, + { + "epoch": 2.5, + "learning_rate": 3.748558033601515e-05, + "loss": 1.5477, + "step": 169300 + }, + { + "epoch": 2.51, + "learning_rate": 3.747825958353053e-05, + "loss": 1.5361, + "step": 169400 + }, + { + "epoch": 2.51, + "learning_rate": 3.7470864884051115e-05, + "loss": 1.5399, + "step": 169500 + }, + { + "epoch": 2.51, + "learning_rate": 3.74634701845717e-05, + "loss": 1.5545, + "step": 169600 + }, + { + "epoch": 2.51, + "learning_rate": 3.745607548509229e-05, + "loss": 1.5011, + "step": 169700 + }, + { + "epoch": 2.51, + "learning_rate": 3.744868078561287e-05, + "loss": 1.5148, + "step": 169800 + }, + { + "epoch": 2.51, + "learning_rate": 3.744128608613346e-05, + "loss": 1.5231, + "step": 169900 + }, + { + "epoch": 2.51, + "learning_rate": 3.743389138665404e-05, + "loss": 1.5292, + "step": 170000 + }, + { + "epoch": 2.52, + "learning_rate": 3.7426496687174636e-05, + "loss": 1.4809, + "step": 170100 + }, + { + "epoch": 2.52, + "learning_rate": 3.741910198769522e-05, + "loss": 1.5251, + "step": 170200 + }, + { + "epoch": 2.52, + "learning_rate": 3.741170728821581e-05, + "loss": 1.5232, + "step": 170300 + }, + { + "epoch": 2.52, + "learning_rate": 3.740431258873639e-05, + "loss": 1.5087, + "step": 170400 + }, + { + "epoch": 2.52, + "learning_rate": 3.7396917889256985e-05, + "loss": 1.5524, + "step": 170500 + }, + { + "epoch": 2.52, + "learning_rate": 3.738952318977757e-05, + "loss": 1.5114, + "step": 170600 + }, + { + "epoch": 2.52, + "learning_rate": 3.7382128490298156e-05, + "loss": 1.5244, + "step": 170700 + }, + { + "epoch": 2.53, + "learning_rate": 3.737473379081874e-05, + "loss": 1.4936, + "step": 170800 + }, + { + "epoch": 2.53, + "learning_rate": 3.7367339091339334e-05, + "loss": 1.5364, + "step": 170900 + }, + { + "epoch": 2.53, + "learning_rate": 3.735994439185992e-05, + "loss": 1.5315, + "step": 171000 + }, + { + "epoch": 2.53, + "learning_rate": 3.73525496923805e-05, + "loss": 1.5272, + "step": 171100 + }, + { + "epoch": 2.53, + "learning_rate": 3.734515499290109e-05, + "loss": 1.5493, + "step": 171200 + }, + { + "epoch": 2.53, + "learning_rate": 3.733776029342168e-05, + "loss": 1.5101, + "step": 171300 + }, + { + "epoch": 2.53, + "learning_rate": 3.733036559394226e-05, + "loss": 1.5398, + "step": 171400 + }, + { + "epoch": 2.54, + "learning_rate": 3.732297089446285e-05, + "loss": 1.5201, + "step": 171500 + }, + { + "epoch": 2.54, + "learning_rate": 3.7315576194983434e-05, + "loss": 1.4922, + "step": 171600 + }, + { + "epoch": 2.54, + "learning_rate": 3.7308181495504026e-05, + "loss": 1.5116, + "step": 171700 + }, + { + "epoch": 2.54, + "learning_rate": 3.730078679602461e-05, + "loss": 1.5347, + "step": 171800 + }, + { + "epoch": 2.54, + "learning_rate": 3.72933920965452e-05, + "loss": 1.5198, + "step": 171900 + }, + { + "epoch": 2.54, + "learning_rate": 3.728599739706578e-05, + "loss": 1.4854, + "step": 172000 + }, + { + "epoch": 2.55, + "learning_rate": 3.7278602697586375e-05, + "loss": 1.5242, + "step": 172100 + }, + { + "epoch": 2.55, + "learning_rate": 3.727120799810696e-05, + "loss": 1.5208, + "step": 172200 + }, + { + "epoch": 2.55, + "learning_rate": 3.726381329862755e-05, + "loss": 1.4922, + "step": 172300 + }, + { + "epoch": 2.55, + "learning_rate": 3.725649254614293e-05, + "loss": 1.5441, + "step": 172400 + }, + { + "epoch": 2.55, + "learning_rate": 3.724909784666351e-05, + "loss": 1.553, + "step": 172500 + }, + { + "epoch": 2.55, + "learning_rate": 3.72417031471841e-05, + "loss": 1.5018, + "step": 172600 + }, + { + "epoch": 2.55, + "learning_rate": 3.7234308447704684e-05, + "loss": 1.5003, + "step": 172700 + }, + { + "epoch": 2.56, + "learning_rate": 3.722691374822527e-05, + "loss": 1.5171, + "step": 172800 + }, + { + "epoch": 2.56, + "learning_rate": 3.721951904874586e-05, + "loss": 1.5237, + "step": 172900 + }, + { + "epoch": 2.56, + "learning_rate": 3.721212434926645e-05, + "loss": 1.5277, + "step": 173000 + }, + { + "epoch": 2.56, + "learning_rate": 3.7204729649787033e-05, + "loss": 1.5279, + "step": 173100 + }, + { + "epoch": 2.56, + "learning_rate": 3.719733495030762e-05, + "loss": 1.5025, + "step": 173200 + }, + { + "epoch": 2.56, + "learning_rate": 3.718994025082821e-05, + "loss": 1.5385, + "step": 173300 + }, + { + "epoch": 2.56, + "learning_rate": 3.71825455513488e-05, + "loss": 1.5167, + "step": 173400 + }, + { + "epoch": 2.57, + "learning_rate": 3.717515085186938e-05, + "loss": 1.5155, + "step": 173500 + }, + { + "epoch": 2.57, + "learning_rate": 3.716775615238997e-05, + "loss": 1.5142, + "step": 173600 + }, + { + "epoch": 2.57, + "learning_rate": 3.716036145291056e-05, + "loss": 1.5074, + "step": 173700 + }, + { + "epoch": 2.57, + "learning_rate": 3.715296675343114e-05, + "loss": 1.5261, + "step": 173800 + }, + { + "epoch": 2.57, + "learning_rate": 3.7145572053951725e-05, + "loss": 1.5034, + "step": 173900 + }, + { + "epoch": 2.57, + "learning_rate": 3.713817735447232e-05, + "loss": 1.5219, + "step": 174000 + }, + { + "epoch": 2.57, + "learning_rate": 3.71307826549929e-05, + "loss": 1.5377, + "step": 174100 + }, + { + "epoch": 2.58, + "learning_rate": 3.712338795551349e-05, + "loss": 1.5329, + "step": 174200 + }, + { + "epoch": 2.58, + "learning_rate": 3.7115993256034075e-05, + "loss": 1.4909, + "step": 174300 + }, + { + "epoch": 2.58, + "learning_rate": 3.7108672503549455e-05, + "loss": 1.5052, + "step": 174400 + }, + { + "epoch": 2.58, + "learning_rate": 3.710127780407005e-05, + "loss": 1.5397, + "step": 174500 + }, + { + "epoch": 2.58, + "learning_rate": 3.709388310459063e-05, + "loss": 1.5459, + "step": 174600 + }, + { + "epoch": 2.58, + "learning_rate": 3.708648840511122e-05, + "loss": 1.5564, + "step": 174700 + }, + { + "epoch": 2.59, + "learning_rate": 3.7079093705631804e-05, + "loss": 1.529, + "step": 174800 + }, + { + "epoch": 2.59, + "learning_rate": 3.70716990061524e-05, + "loss": 1.5226, + "step": 174900 + }, + { + "epoch": 2.59, + "learning_rate": 3.706430430667298e-05, + "loss": 1.5175, + "step": 175000 + }, + { + "epoch": 2.59, + "learning_rate": 3.705690960719356e-05, + "loss": 1.5519, + "step": 175100 + }, + { + "epoch": 2.59, + "learning_rate": 3.704951490771415e-05, + "loss": 1.5236, + "step": 175200 + }, + { + "epoch": 2.59, + "learning_rate": 3.704212020823474e-05, + "loss": 1.5137, + "step": 175300 + }, + { + "epoch": 2.59, + "learning_rate": 3.7034725508755325e-05, + "loss": 1.4916, + "step": 175400 + }, + { + "epoch": 2.6, + "learning_rate": 3.702733080927591e-05, + "loss": 1.5355, + "step": 175500 + }, + { + "epoch": 2.6, + "learning_rate": 3.7019936109796496e-05, + "loss": 1.5257, + "step": 175600 + }, + { + "epoch": 2.6, + "learning_rate": 3.701254141031709e-05, + "loss": 1.5348, + "step": 175700 + }, + { + "epoch": 2.6, + "learning_rate": 3.7005146710837674e-05, + "loss": 1.5064, + "step": 175800 + }, + { + "epoch": 2.6, + "learning_rate": 3.699775201135826e-05, + "loss": 1.5246, + "step": 175900 + }, + { + "epoch": 2.6, + "learning_rate": 3.6990357311878846e-05, + "loss": 1.5271, + "step": 176000 + }, + { + "epoch": 2.6, + "learning_rate": 3.698296261239944e-05, + "loss": 1.4834, + "step": 176100 + }, + { + "epoch": 2.61, + "learning_rate": 3.6975567912920024e-05, + "loss": 1.4941, + "step": 176200 + }, + { + "epoch": 2.61, + "learning_rate": 3.696817321344061e-05, + "loss": 1.5328, + "step": 176300 + }, + { + "epoch": 2.61, + "learning_rate": 3.6960778513961195e-05, + "loss": 1.541, + "step": 176400 + }, + { + "epoch": 2.61, + "learning_rate": 3.695338381448178e-05, + "loss": 1.5115, + "step": 176500 + }, + { + "epoch": 2.61, + "learning_rate": 3.6945989115002366e-05, + "loss": 1.5309, + "step": 176600 + }, + { + "epoch": 2.61, + "learning_rate": 3.693859441552295e-05, + "loss": 1.5556, + "step": 176700 + }, + { + "epoch": 2.61, + "learning_rate": 3.693119971604354e-05, + "loss": 1.5194, + "step": 176800 + }, + { + "epoch": 2.62, + "learning_rate": 3.692380501656413e-05, + "loss": 1.523, + "step": 176900 + }, + { + "epoch": 2.62, + "learning_rate": 3.691648426407951e-05, + "loss": 1.5487, + "step": 177000 + }, + { + "epoch": 2.62, + "learning_rate": 3.6909089564600096e-05, + "loss": 1.5289, + "step": 177100 + }, + { + "epoch": 2.62, + "learning_rate": 3.690169486512068e-05, + "loss": 1.5006, + "step": 177200 + }, + { + "epoch": 2.62, + "learning_rate": 3.6894300165641274e-05, + "loss": 1.5327, + "step": 177300 + }, + { + "epoch": 2.62, + "learning_rate": 3.688690546616186e-05, + "loss": 1.5296, + "step": 177400 + }, + { + "epoch": 2.63, + "learning_rate": 3.6879510766682445e-05, + "loss": 1.5279, + "step": 177500 + }, + { + "epoch": 2.63, + "learning_rate": 3.687211606720303e-05, + "loss": 1.5324, + "step": 177600 + }, + { + "epoch": 2.63, + "learning_rate": 3.6864721367723617e-05, + "loss": 1.5256, + "step": 177700 + }, + { + "epoch": 2.63, + "learning_rate": 3.68573266682442e-05, + "loss": 1.5204, + "step": 177800 + }, + { + "epoch": 2.63, + "learning_rate": 3.684993196876479e-05, + "loss": 1.5193, + "step": 177900 + }, + { + "epoch": 2.63, + "learning_rate": 3.6842537269285373e-05, + "loss": 1.5009, + "step": 178000 + }, + { + "epoch": 2.63, + "learning_rate": 3.6835142569805966e-05, + "loss": 1.5057, + "step": 178100 + }, + { + "epoch": 2.64, + "learning_rate": 3.682774787032655e-05, + "loss": 1.519, + "step": 178200 + }, + { + "epoch": 2.64, + "learning_rate": 3.682035317084714e-05, + "loss": 1.53, + "step": 178300 + }, + { + "epoch": 2.64, + "learning_rate": 3.681295847136772e-05, + "loss": 1.5103, + "step": 178400 + }, + { + "epoch": 2.64, + "learning_rate": 3.6805563771888315e-05, + "loss": 1.4949, + "step": 178500 + }, + { + "epoch": 2.64, + "learning_rate": 3.67981690724089e-05, + "loss": 1.4811, + "step": 178600 + }, + { + "epoch": 2.64, + "learning_rate": 3.6790774372929486e-05, + "loss": 1.517, + "step": 178700 + }, + { + "epoch": 2.64, + "learning_rate": 3.678337967345007e-05, + "loss": 1.5269, + "step": 178800 + }, + { + "epoch": 2.65, + "learning_rate": 3.6775984973970665e-05, + "loss": 1.5268, + "step": 178900 + }, + { + "epoch": 2.65, + "learning_rate": 3.676859027449125e-05, + "loss": 1.499, + "step": 179000 + }, + { + "epoch": 2.65, + "learning_rate": 3.6761269522006624e-05, + "loss": 1.5117, + "step": 179100 + }, + { + "epoch": 2.65, + "learning_rate": 3.675387482252721e-05, + "loss": 1.5198, + "step": 179200 + }, + { + "epoch": 2.65, + "learning_rate": 3.67464801230478e-05, + "loss": 1.5191, + "step": 179300 + }, + { + "epoch": 2.65, + "learning_rate": 3.673908542356839e-05, + "loss": 1.5095, + "step": 179400 + }, + { + "epoch": 2.65, + "learning_rate": 3.673169072408897e-05, + "loss": 1.5299, + "step": 179500 + }, + { + "epoch": 2.66, + "learning_rate": 3.672429602460956e-05, + "loss": 1.5283, + "step": 179600 + }, + { + "epoch": 2.66, + "learning_rate": 3.671690132513015e-05, + "loss": 1.5228, + "step": 179700 + }, + { + "epoch": 2.66, + "learning_rate": 3.670950662565074e-05, + "loss": 1.5254, + "step": 179800 + }, + { + "epoch": 2.66, + "learning_rate": 3.670211192617132e-05, + "loss": 1.5281, + "step": 179900 + }, + { + "epoch": 2.66, + "learning_rate": 3.669471722669191e-05, + "loss": 1.5111, + "step": 180000 + }, + { + "epoch": 2.66, + "learning_rate": 3.66873225272125e-05, + "loss": 1.5066, + "step": 180100 + }, + { + "epoch": 2.67, + "learning_rate": 3.6679927827733086e-05, + "loss": 1.5302, + "step": 180200 + }, + { + "epoch": 2.67, + "learning_rate": 3.667253312825367e-05, + "loss": 1.5206, + "step": 180300 + }, + { + "epoch": 2.67, + "learning_rate": 3.666513842877425e-05, + "loss": 1.5028, + "step": 180400 + }, + { + "epoch": 2.67, + "learning_rate": 3.665774372929484e-05, + "loss": 1.5163, + "step": 180500 + }, + { + "epoch": 2.67, + "learning_rate": 3.665034902981543e-05, + "loss": 1.5158, + "step": 180600 + }, + { + "epoch": 2.67, + "learning_rate": 3.6642954330336014e-05, + "loss": 1.4898, + "step": 180700 + }, + { + "epoch": 2.67, + "learning_rate": 3.66355596308566e-05, + "loss": 1.5197, + "step": 180800 + }, + { + "epoch": 2.68, + "learning_rate": 3.662816493137719e-05, + "loss": 1.5166, + "step": 180900 + }, + { + "epoch": 2.68, + "learning_rate": 3.662077023189778e-05, + "loss": 1.5093, + "step": 181000 + }, + { + "epoch": 2.68, + "learning_rate": 3.6613375532418364e-05, + "loss": 1.5269, + "step": 181100 + }, + { + "epoch": 2.68, + "learning_rate": 3.660598083293895e-05, + "loss": 1.5286, + "step": 181200 + }, + { + "epoch": 2.68, + "learning_rate": 3.659858613345954e-05, + "loss": 1.5275, + "step": 181300 + }, + { + "epoch": 2.68, + "learning_rate": 3.659119143398013e-05, + "loss": 1.5296, + "step": 181400 + }, + { + "epoch": 2.68, + "learning_rate": 3.658379673450071e-05, + "loss": 1.5133, + "step": 181500 + }, + { + "epoch": 2.69, + "learning_rate": 3.65764020350213e-05, + "loss": 1.516, + "step": 181600 + }, + { + "epoch": 2.69, + "learning_rate": 3.6569007335541884e-05, + "loss": 1.4879, + "step": 181700 + }, + { + "epoch": 2.69, + "learning_rate": 3.656161263606247e-05, + "loss": 1.5291, + "step": 181800 + }, + { + "epoch": 2.69, + "learning_rate": 3.6554217936583056e-05, + "loss": 1.5436, + "step": 181900 + }, + { + "epoch": 2.69, + "learning_rate": 3.654682323710364e-05, + "loss": 1.5087, + "step": 182000 + }, + { + "epoch": 2.69, + "learning_rate": 3.653950248461903e-05, + "loss": 1.5057, + "step": 182100 + }, + { + "epoch": 2.69, + "learning_rate": 3.6532107785139614e-05, + "loss": 1.5076, + "step": 182200 + }, + { + "epoch": 2.7, + "learning_rate": 3.65247130856602e-05, + "loss": 1.5163, + "step": 182300 + }, + { + "epoch": 2.7, + "learning_rate": 3.6517318386180785e-05, + "loss": 1.5357, + "step": 182400 + }, + { + "epoch": 2.7, + "learning_rate": 3.650992368670138e-05, + "loss": 1.5286, + "step": 182500 + }, + { + "epoch": 2.7, + "learning_rate": 3.6502528987221963e-05, + "loss": 1.5326, + "step": 182600 + }, + { + "epoch": 2.7, + "learning_rate": 3.649513428774255e-05, + "loss": 1.5369, + "step": 182700 + }, + { + "epoch": 2.7, + "learning_rate": 3.6487739588263135e-05, + "loss": 1.5321, + "step": 182800 + }, + { + "epoch": 2.7, + "learning_rate": 3.648034488878373e-05, + "loss": 1.5332, + "step": 182900 + }, + { + "epoch": 2.71, + "learning_rate": 3.6472950189304306e-05, + "loss": 1.5201, + "step": 183000 + }, + { + "epoch": 2.71, + "learning_rate": 3.646555548982489e-05, + "loss": 1.5396, + "step": 183100 + }, + { + "epoch": 2.71, + "learning_rate": 3.645816079034548e-05, + "loss": 1.5077, + "step": 183200 + }, + { + "epoch": 2.71, + "learning_rate": 3.645076609086607e-05, + "loss": 1.5171, + "step": 183300 + }, + { + "epoch": 2.71, + "learning_rate": 3.6443371391386655e-05, + "loss": 1.4933, + "step": 183400 + }, + { + "epoch": 2.71, + "learning_rate": 3.643597669190724e-05, + "loss": 1.5208, + "step": 183500 + }, + { + "epoch": 2.72, + "learning_rate": 3.6428581992427827e-05, + "loss": 1.5156, + "step": 183600 + }, + { + "epoch": 2.72, + "learning_rate": 3.642118729294842e-05, + "loss": 1.5185, + "step": 183700 + }, + { + "epoch": 2.72, + "learning_rate": 3.6413792593469005e-05, + "loss": 1.5241, + "step": 183800 + }, + { + "epoch": 2.72, + "learning_rate": 3.640639789398959e-05, + "loss": 1.5073, + "step": 183900 + }, + { + "epoch": 2.72, + "learning_rate": 3.6399003194510176e-05, + "loss": 1.5314, + "step": 184000 + }, + { + "epoch": 2.72, + "learning_rate": 3.639160849503077e-05, + "loss": 1.5269, + "step": 184100 + }, + { + "epoch": 2.72, + "learning_rate": 3.6384213795551354e-05, + "loss": 1.5071, + "step": 184200 + }, + { + "epoch": 2.73, + "learning_rate": 3.637681909607193e-05, + "loss": 1.5156, + "step": 184300 + }, + { + "epoch": 2.73, + "learning_rate": 3.636949834358731e-05, + "loss": 1.5187, + "step": 184400 + }, + { + "epoch": 2.73, + "learning_rate": 3.6362103644107906e-05, + "loss": 1.5487, + "step": 184500 + }, + { + "epoch": 2.73, + "learning_rate": 3.635470894462849e-05, + "loss": 1.5219, + "step": 184600 + }, + { + "epoch": 2.73, + "learning_rate": 3.634731424514908e-05, + "loss": 1.5236, + "step": 184700 + }, + { + "epoch": 2.73, + "learning_rate": 3.633991954566966e-05, + "loss": 1.5314, + "step": 184800 + }, + { + "epoch": 2.73, + "learning_rate": 3.6332524846190255e-05, + "loss": 1.5143, + "step": 184900 + }, + { + "epoch": 2.74, + "learning_rate": 3.632513014671084e-05, + "loss": 1.5217, + "step": 185000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6317735447231426e-05, + "loss": 1.495, + "step": 185100 + }, + { + "epoch": 2.74, + "learning_rate": 3.631034074775201e-05, + "loss": 1.5373, + "step": 185200 + }, + { + "epoch": 2.74, + "learning_rate": 3.6302946048272604e-05, + "loss": 1.5249, + "step": 185300 + }, + { + "epoch": 2.74, + "learning_rate": 3.629555134879319e-05, + "loss": 1.5114, + "step": 185400 + }, + { + "epoch": 2.74, + "learning_rate": 3.6288156649313776e-05, + "loss": 1.5211, + "step": 185500 + }, + { + "epoch": 2.74, + "learning_rate": 3.628076194983436e-05, + "loss": 1.5106, + "step": 185600 + }, + { + "epoch": 2.75, + "learning_rate": 3.627336725035495e-05, + "loss": 1.5191, + "step": 185700 + }, + { + "epoch": 2.75, + "learning_rate": 3.626597255087553e-05, + "loss": 1.4975, + "step": 185800 + }, + { + "epoch": 2.75, + "learning_rate": 3.625857785139612e-05, + "loss": 1.5076, + "step": 185900 + }, + { + "epoch": 2.75, + "learning_rate": 3.6251183151916704e-05, + "loss": 1.5015, + "step": 186000 + }, + { + "epoch": 2.75, + "learning_rate": 3.6243788452437296e-05, + "loss": 1.4949, + "step": 186100 + }, + { + "epoch": 2.75, + "learning_rate": 3.623639375295788e-05, + "loss": 1.5212, + "step": 186200 + }, + { + "epoch": 2.76, + "learning_rate": 3.622899905347847e-05, + "loss": 1.5231, + "step": 186300 + }, + { + "epoch": 2.76, + "learning_rate": 3.622167830099385e-05, + "loss": 1.5121, + "step": 186400 + }, + { + "epoch": 2.76, + "learning_rate": 3.621428360151444e-05, + "loss": 1.5301, + "step": 186500 + }, + { + "epoch": 2.76, + "learning_rate": 3.6206888902035026e-05, + "loss": 1.526, + "step": 186600 + }, + { + "epoch": 2.76, + "learning_rate": 3.619949420255561e-05, + "loss": 1.5143, + "step": 186700 + }, + { + "epoch": 2.76, + "learning_rate": 3.61920995030762e-05, + "loss": 1.5298, + "step": 186800 + }, + { + "epoch": 2.76, + "learning_rate": 3.618470480359678e-05, + "loss": 1.5281, + "step": 186900 + }, + { + "epoch": 2.77, + "learning_rate": 3.617731010411737e-05, + "loss": 1.5238, + "step": 187000 + }, + { + "epoch": 2.77, + "learning_rate": 3.6169915404637954e-05, + "loss": 1.5398, + "step": 187100 + }, + { + "epoch": 2.77, + "learning_rate": 3.616252070515854e-05, + "loss": 1.5104, + "step": 187200 + }, + { + "epoch": 2.77, + "learning_rate": 3.615512600567913e-05, + "loss": 1.4956, + "step": 187300 + }, + { + "epoch": 2.77, + "learning_rate": 3.614773130619972e-05, + "loss": 1.5285, + "step": 187400 + }, + { + "epoch": 2.77, + "learning_rate": 3.6140336606720304e-05, + "loss": 1.5209, + "step": 187500 + }, + { + "epoch": 2.77, + "learning_rate": 3.613294190724089e-05, + "loss": 1.5156, + "step": 187600 + }, + { + "epoch": 2.78, + "learning_rate": 3.612554720776148e-05, + "loss": 1.5481, + "step": 187700 + }, + { + "epoch": 2.78, + "learning_rate": 3.611815250828207e-05, + "loss": 1.5176, + "step": 187800 + }, + { + "epoch": 2.78, + "learning_rate": 3.611075780880265e-05, + "loss": 1.5333, + "step": 187900 + }, + { + "epoch": 2.78, + "learning_rate": 3.610336310932324e-05, + "loss": 1.5093, + "step": 188000 + }, + { + "epoch": 2.78, + "learning_rate": 3.609596840984383e-05, + "loss": 1.5141, + "step": 188100 + }, + { + "epoch": 2.78, + "learning_rate": 3.6088573710364417e-05, + "loss": 1.489, + "step": 188200 + }, + { + "epoch": 2.78, + "learning_rate": 3.6081179010884995e-05, + "loss": 1.5148, + "step": 188300 + }, + { + "epoch": 2.79, + "learning_rate": 3.607378431140558e-05, + "loss": 1.5215, + "step": 188400 + }, + { + "epoch": 2.79, + "learning_rate": 3.6066389611926173e-05, + "loss": 1.5124, + "step": 188500 + }, + { + "epoch": 2.79, + "learning_rate": 3.605899491244676e-05, + "loss": 1.5182, + "step": 188600 + }, + { + "epoch": 2.79, + "learning_rate": 3.6051600212967345e-05, + "loss": 1.5096, + "step": 188700 + }, + { + "epoch": 2.79, + "learning_rate": 3.604420551348793e-05, + "loss": 1.4897, + "step": 188800 + }, + { + "epoch": 2.79, + "learning_rate": 3.603681081400852e-05, + "loss": 1.5105, + "step": 188900 + }, + { + "epoch": 2.8, + "learning_rate": 3.602941611452911e-05, + "loss": 1.4967, + "step": 189000 + }, + { + "epoch": 2.8, + "learning_rate": 3.602209536204449e-05, + "loss": 1.518, + "step": 189100 + }, + { + "epoch": 2.8, + "learning_rate": 3.6014700662565074e-05, + "loss": 1.4998, + "step": 189200 + }, + { + "epoch": 2.8, + "learning_rate": 3.600730596308566e-05, + "loss": 1.4873, + "step": 189300 + }, + { + "epoch": 2.8, + "learning_rate": 3.599991126360625e-05, + "loss": 1.5366, + "step": 189400 + }, + { + "epoch": 2.8, + "learning_rate": 3.599251656412684e-05, + "loss": 1.5384, + "step": 189500 + }, + { + "epoch": 2.8, + "learning_rate": 3.598512186464742e-05, + "loss": 1.5302, + "step": 189600 + }, + { + "epoch": 2.81, + "learning_rate": 3.597772716516801e-05, + "loss": 1.5117, + "step": 189700 + }, + { + "epoch": 2.81, + "learning_rate": 3.5970332465688595e-05, + "loss": 1.5345, + "step": 189800 + }, + { + "epoch": 2.81, + "learning_rate": 3.596293776620918e-05, + "loss": 1.4953, + "step": 189900 + }, + { + "epoch": 2.81, + "learning_rate": 3.5955543066729766e-05, + "loss": 1.518, + "step": 190000 + }, + { + "epoch": 2.81, + "learning_rate": 3.594814836725036e-05, + "loss": 1.5336, + "step": 190100 + }, + { + "epoch": 2.81, + "learning_rate": 3.5940753667770944e-05, + "loss": 1.5194, + "step": 190200 + }, + { + "epoch": 2.81, + "learning_rate": 3.593335896829153e-05, + "loss": 1.532, + "step": 190300 + }, + { + "epoch": 2.82, + "learning_rate": 3.5925964268812116e-05, + "loss": 1.5191, + "step": 190400 + }, + { + "epoch": 2.82, + "learning_rate": 3.591856956933271e-05, + "loss": 1.4929, + "step": 190500 + }, + { + "epoch": 2.82, + "learning_rate": 3.5911174869853294e-05, + "loss": 1.5123, + "step": 190600 + }, + { + "epoch": 2.82, + "learning_rate": 3.590378017037388e-05, + "loss": 1.5103, + "step": 190700 + }, + { + "epoch": 2.82, + "learning_rate": 3.5896385470894465e-05, + "loss": 1.5328, + "step": 190800 + }, + { + "epoch": 2.82, + "learning_rate": 3.588899077141506e-05, + "loss": 1.5155, + "step": 190900 + }, + { + "epoch": 2.82, + "learning_rate": 3.5881596071935636e-05, + "loss": 1.5347, + "step": 191000 + }, + { + "epoch": 2.83, + "learning_rate": 3.587420137245622e-05, + "loss": 1.5276, + "step": 191100 + }, + { + "epoch": 2.83, + "learning_rate": 3.586680667297681e-05, + "loss": 1.5183, + "step": 191200 + }, + { + "epoch": 2.83, + "learning_rate": 3.58594119734974e-05, + "loss": 1.484, + "step": 191300 + }, + { + "epoch": 2.83, + "learning_rate": 3.5852017274017986e-05, + "loss": 1.547, + "step": 191400 + }, + { + "epoch": 2.83, + "learning_rate": 3.5844696521533366e-05, + "loss": 1.5224, + "step": 191500 + }, + { + "epoch": 2.83, + "learning_rate": 3.583730182205395e-05, + "loss": 1.5068, + "step": 191600 + }, + { + "epoch": 2.84, + "learning_rate": 3.5829907122574544e-05, + "loss": 1.4842, + "step": 191700 + }, + { + "epoch": 2.84, + "learning_rate": 3.582251242309513e-05, + "loss": 1.505, + "step": 191800 + }, + { + "epoch": 2.84, + "learning_rate": 3.5815117723615715e-05, + "loss": 1.514, + "step": 191900 + }, + { + "epoch": 2.84, + "learning_rate": 3.58077230241363e-05, + "loss": 1.5378, + "step": 192000 + }, + { + "epoch": 2.84, + "learning_rate": 3.580032832465689e-05, + "loss": 1.5025, + "step": 192100 + }, + { + "epoch": 2.84, + "learning_rate": 3.579293362517748e-05, + "loss": 1.5358, + "step": 192200 + }, + { + "epoch": 2.84, + "learning_rate": 3.578553892569806e-05, + "loss": 1.5185, + "step": 192300 + }, + { + "epoch": 2.85, + "learning_rate": 3.5778144226218644e-05, + "loss": 1.4998, + "step": 192400 + }, + { + "epoch": 2.85, + "learning_rate": 3.5770749526739236e-05, + "loss": 1.511, + "step": 192500 + }, + { + "epoch": 2.85, + "learning_rate": 3.576335482725982e-05, + "loss": 1.4953, + "step": 192600 + }, + { + "epoch": 2.85, + "learning_rate": 3.575596012778041e-05, + "loss": 1.4966, + "step": 192700 + }, + { + "epoch": 2.85, + "learning_rate": 3.574856542830099e-05, + "loss": 1.5428, + "step": 192800 + }, + { + "epoch": 2.85, + "learning_rate": 3.5741170728821585e-05, + "loss": 1.4629, + "step": 192900 + }, + { + "epoch": 2.85, + "learning_rate": 3.573377602934217e-05, + "loss": 1.4836, + "step": 193000 + }, + { + "epoch": 2.86, + "learning_rate": 3.5726381329862757e-05, + "loss": 1.5118, + "step": 193100 + }, + { + "epoch": 2.86, + "learning_rate": 3.571898663038334e-05, + "loss": 1.5125, + "step": 193200 + }, + { + "epoch": 2.86, + "learning_rate": 3.5711591930903935e-05, + "loss": 1.5186, + "step": 193300 + }, + { + "epoch": 2.86, + "learning_rate": 3.570419723142452e-05, + "loss": 1.5334, + "step": 193400 + }, + { + "epoch": 2.86, + "learning_rate": 3.5696802531945106e-05, + "loss": 1.5433, + "step": 193500 + }, + { + "epoch": 2.86, + "learning_rate": 3.5689407832465685e-05, + "loss": 1.4849, + "step": 193600 + }, + { + "epoch": 2.86, + "learning_rate": 3.568201313298628e-05, + "loss": 1.5385, + "step": 193700 + }, + { + "epoch": 2.87, + "learning_rate": 3.567461843350686e-05, + "loss": 1.5001, + "step": 193800 + }, + { + "epoch": 2.87, + "learning_rate": 3.566722373402745e-05, + "loss": 1.5286, + "step": 193900 + }, + { + "epoch": 2.87, + "learning_rate": 3.5659829034548034e-05, + "loss": 1.5162, + "step": 194000 + }, + { + "epoch": 2.87, + "learning_rate": 3.5652434335068627e-05, + "loss": 1.4958, + "step": 194100 + }, + { + "epoch": 2.87, + "learning_rate": 3.564503963558921e-05, + "loss": 1.5164, + "step": 194200 + }, + { + "epoch": 2.87, + "learning_rate": 3.563771888310459e-05, + "loss": 1.5182, + "step": 194300 + }, + { + "epoch": 2.88, + "learning_rate": 3.563032418362518e-05, + "loss": 1.5071, + "step": 194400 + }, + { + "epoch": 2.88, + "learning_rate": 3.5622929484145764e-05, + "loss": 1.518, + "step": 194500 + }, + { + "epoch": 2.88, + "learning_rate": 3.5615534784666356e-05, + "loss": 1.5085, + "step": 194600 + }, + { + "epoch": 2.88, + "learning_rate": 3.560814008518694e-05, + "loss": 1.5154, + "step": 194700 + }, + { + "epoch": 2.88, + "learning_rate": 3.560074538570753e-05, + "loss": 1.515, + "step": 194800 + }, + { + "epoch": 2.88, + "learning_rate": 3.559335068622811e-05, + "loss": 1.5017, + "step": 194900 + }, + { + "epoch": 2.88, + "learning_rate": 3.55859559867487e-05, + "loss": 1.4901, + "step": 195000 + }, + { + "epoch": 2.89, + "learning_rate": 3.5578561287269285e-05, + "loss": 1.4999, + "step": 195100 + }, + { + "epoch": 2.89, + "learning_rate": 3.557116658778987e-05, + "loss": 1.5216, + "step": 195200 + }, + { + "epoch": 2.89, + "learning_rate": 3.556377188831046e-05, + "loss": 1.5138, + "step": 195300 + }, + { + "epoch": 2.89, + "learning_rate": 3.555637718883105e-05, + "loss": 1.4859, + "step": 195400 + }, + { + "epoch": 2.89, + "learning_rate": 3.5548982489351634e-05, + "loss": 1.554, + "step": 195500 + }, + { + "epoch": 2.89, + "learning_rate": 3.554158778987222e-05, + "loss": 1.5064, + "step": 195600 + }, + { + "epoch": 2.89, + "learning_rate": 3.553419309039281e-05, + "loss": 1.5136, + "step": 195700 + }, + { + "epoch": 2.9, + "learning_rate": 3.55267983909134e-05, + "loss": 1.5297, + "step": 195800 + }, + { + "epoch": 2.9, + "learning_rate": 3.551940369143398e-05, + "loss": 1.5249, + "step": 195900 + }, + { + "epoch": 2.9, + "learning_rate": 3.551200899195457e-05, + "loss": 1.5049, + "step": 196000 + }, + { + "epoch": 2.9, + "learning_rate": 3.550461429247516e-05, + "loss": 1.4953, + "step": 196100 + }, + { + "epoch": 2.9, + "learning_rate": 3.549721959299574e-05, + "loss": 1.5036, + "step": 196200 + }, + { + "epoch": 2.9, + "learning_rate": 3.5489824893516326e-05, + "loss": 1.5086, + "step": 196300 + }, + { + "epoch": 2.9, + "learning_rate": 3.548243019403691e-05, + "loss": 1.4954, + "step": 196400 + }, + { + "epoch": 2.91, + "learning_rate": 3.5475035494557504e-05, + "loss": 1.5042, + "step": 196500 + }, + { + "epoch": 2.91, + "learning_rate": 3.546764079507809e-05, + "loss": 1.5187, + "step": 196600 + }, + { + "epoch": 2.91, + "learning_rate": 3.5460246095598675e-05, + "loss": 1.5182, + "step": 196700 + }, + { + "epoch": 2.91, + "learning_rate": 3.5452925343114055e-05, + "loss": 1.5204, + "step": 196800 + }, + { + "epoch": 2.91, + "learning_rate": 3.544553064363465e-05, + "loss": 1.5163, + "step": 196900 + }, + { + "epoch": 2.91, + "learning_rate": 3.5438135944155234e-05, + "loss": 1.5131, + "step": 197000 + }, + { + "epoch": 2.91, + "learning_rate": 3.543074124467582e-05, + "loss": 1.4985, + "step": 197100 + }, + { + "epoch": 2.92, + "learning_rate": 3.5423346545196405e-05, + "loss": 1.5153, + "step": 197200 + }, + { + "epoch": 2.92, + "learning_rate": 3.541595184571699e-05, + "loss": 1.5113, + "step": 197300 + }, + { + "epoch": 2.92, + "learning_rate": 3.540855714623758e-05, + "loss": 1.5174, + "step": 197400 + }, + { + "epoch": 2.92, + "learning_rate": 3.540116244675817e-05, + "loss": 1.5142, + "step": 197500 + }, + { + "epoch": 2.92, + "learning_rate": 3.539376774727875e-05, + "loss": 1.5156, + "step": 197600 + }, + { + "epoch": 2.92, + "learning_rate": 3.538637304779934e-05, + "loss": 1.5239, + "step": 197700 + }, + { + "epoch": 2.93, + "learning_rate": 3.5378978348319925e-05, + "loss": 1.5239, + "step": 197800 + }, + { + "epoch": 2.93, + "learning_rate": 3.537158364884051e-05, + "loss": 1.5112, + "step": 197900 + }, + { + "epoch": 2.93, + "learning_rate": 3.53641889493611e-05, + "loss": 1.5009, + "step": 198000 + }, + { + "epoch": 2.93, + "learning_rate": 3.535679424988169e-05, + "loss": 1.5268, + "step": 198100 + }, + { + "epoch": 2.93, + "learning_rate": 3.5349399550402275e-05, + "loss": 1.5139, + "step": 198200 + }, + { + "epoch": 2.93, + "learning_rate": 3.534200485092286e-05, + "loss": 1.5154, + "step": 198300 + }, + { + "epoch": 2.93, + "learning_rate": 3.5334610151443446e-05, + "loss": 1.5108, + "step": 198400 + }, + { + "epoch": 2.94, + "learning_rate": 3.532721545196404e-05, + "loss": 1.5173, + "step": 198500 + }, + { + "epoch": 2.94, + "learning_rate": 3.5319820752484624e-05, + "loss": 1.5252, + "step": 198600 + }, + { + "epoch": 2.94, + "learning_rate": 3.531242605300521e-05, + "loss": 1.4895, + "step": 198700 + }, + { + "epoch": 2.94, + "learning_rate": 3.5305031353525795e-05, + "loss": 1.5209, + "step": 198800 + }, + { + "epoch": 2.94, + "learning_rate": 3.529763665404638e-05, + "loss": 1.5039, + "step": 198900 + }, + { + "epoch": 2.94, + "learning_rate": 3.529024195456697e-05, + "loss": 1.536, + "step": 199000 + }, + { + "epoch": 2.94, + "learning_rate": 3.528284725508755e-05, + "loss": 1.4951, + "step": 199100 + }, + { + "epoch": 2.95, + "learning_rate": 3.527545255560814e-05, + "loss": 1.5265, + "step": 199200 + }, + { + "epoch": 2.95, + "learning_rate": 3.526805785612873e-05, + "loss": 1.4864, + "step": 199300 + }, + { + "epoch": 2.95, + "learning_rate": 3.526073710364411e-05, + "loss": 1.5132, + "step": 199400 + }, + { + "epoch": 2.95, + "learning_rate": 3.5253342404164696e-05, + "loss": 1.5211, + "step": 199500 + }, + { + "epoch": 2.95, + "learning_rate": 3.524594770468528e-05, + "loss": 1.5048, + "step": 199600 + }, + { + "epoch": 2.95, + "learning_rate": 3.523855300520587e-05, + "loss": 1.48, + "step": 199700 + }, + { + "epoch": 2.95, + "learning_rate": 3.523115830572646e-05, + "loss": 1.4973, + "step": 199800 + }, + { + "epoch": 2.96, + "learning_rate": 3.5223763606247046e-05, + "loss": 1.5294, + "step": 199900 + }, + { + "epoch": 2.96, + "learning_rate": 3.521636890676763e-05, + "loss": 1.4917, + "step": 200000 + }, + { + "epoch": 2.96, + "learning_rate": 3.520897420728822e-05, + "loss": 1.5269, + "step": 200100 + }, + { + "epoch": 2.96, + "learning_rate": 3.52015795078088e-05, + "loss": 1.5321, + "step": 200200 + }, + { + "epoch": 2.96, + "learning_rate": 3.519418480832939e-05, + "loss": 1.5146, + "step": 200300 + }, + { + "epoch": 2.96, + "learning_rate": 3.5186790108849974e-05, + "loss": 1.5024, + "step": 200400 + }, + { + "epoch": 2.97, + "learning_rate": 3.5179395409370566e-05, + "loss": 1.4993, + "step": 200500 + }, + { + "epoch": 2.97, + "learning_rate": 3.517200070989115e-05, + "loss": 1.5319, + "step": 200600 + }, + { + "epoch": 2.97, + "learning_rate": 3.516460601041174e-05, + "loss": 1.5143, + "step": 200700 + }, + { + "epoch": 2.97, + "learning_rate": 3.515721131093232e-05, + "loss": 1.5179, + "step": 200800 + }, + { + "epoch": 2.97, + "learning_rate": 3.5149816611452916e-05, + "loss": 1.5357, + "step": 200900 + }, + { + "epoch": 2.97, + "learning_rate": 3.51424219119735e-05, + "loss": 1.5288, + "step": 201000 + }, + { + "epoch": 2.97, + "learning_rate": 3.513502721249409e-05, + "loss": 1.4975, + "step": 201100 + }, + { + "epoch": 2.98, + "learning_rate": 3.512763251301467e-05, + "loss": 1.5186, + "step": 201200 + }, + { + "epoch": 2.98, + "learning_rate": 3.5120237813535265e-05, + "loss": 1.5258, + "step": 201300 + }, + { + "epoch": 2.98, + "learning_rate": 3.511284311405585e-05, + "loss": 1.515, + "step": 201400 + }, + { + "epoch": 2.98, + "learning_rate": 3.510544841457643e-05, + "loss": 1.5121, + "step": 201500 + }, + { + "epoch": 2.98, + "learning_rate": 3.509812766209181e-05, + "loss": 1.4926, + "step": 201600 + }, + { + "epoch": 2.98, + "learning_rate": 3.50907329626124e-05, + "loss": 1.5254, + "step": 201700 + }, + { + "epoch": 2.98, + "learning_rate": 3.508333826313299e-05, + "loss": 1.5122, + "step": 201800 + }, + { + "epoch": 2.99, + "learning_rate": 3.5075943563653574e-05, + "loss": 1.5524, + "step": 201900 + }, + { + "epoch": 2.99, + "learning_rate": 3.506854886417416e-05, + "loss": 1.4987, + "step": 202000 + }, + { + "epoch": 2.99, + "learning_rate": 3.506115416469475e-05, + "loss": 1.5117, + "step": 202100 + }, + { + "epoch": 2.99, + "learning_rate": 3.505375946521534e-05, + "loss": 1.5141, + "step": 202200 + }, + { + "epoch": 2.99, + "learning_rate": 3.504636476573592e-05, + "loss": 1.5227, + "step": 202300 + }, + { + "epoch": 2.99, + "learning_rate": 3.503897006625651e-05, + "loss": 1.5017, + "step": 202400 + }, + { + "epoch": 2.99, + "learning_rate": 3.5031575366777094e-05, + "loss": 1.5095, + "step": 202500 + }, + { + "epoch": 3.0, + "learning_rate": 3.502418066729769e-05, + "loss": 1.4859, + "step": 202600 + }, + { + "epoch": 3.0, + "learning_rate": 3.501678596781827e-05, + "loss": 1.5046, + "step": 202700 + }, + { + "epoch": 3.0, + "learning_rate": 3.500939126833885e-05, + "loss": 1.4923, + "step": 202800 + }, + { + "epoch": 3.0, + "learning_rate": 3.5001996568859444e-05, + "loss": 1.476, + "step": 202900 + }, + { + "epoch": 3.0, + "learning_rate": 3.499460186938003e-05, + "loss": 1.4202, + "step": 203000 + }, + { + "epoch": 3.0, + "learning_rate": 3.4987207169900615e-05, + "loss": 1.4452, + "step": 203100 + }, + { + "epoch": 3.01, + "learning_rate": 3.49798124704212e-05, + "loss": 1.4498, + "step": 203200 + }, + { + "epoch": 3.01, + "learning_rate": 3.497241777094179e-05, + "loss": 1.4632, + "step": 203300 + }, + { + "epoch": 3.01, + "learning_rate": 3.496502307146238e-05, + "loss": 1.4482, + "step": 203400 + }, + { + "epoch": 3.01, + "learning_rate": 3.4957628371982964e-05, + "loss": 1.433, + "step": 203500 + }, + { + "epoch": 3.01, + "learning_rate": 3.495023367250355e-05, + "loss": 1.4313, + "step": 203600 + }, + { + "epoch": 3.01, + "learning_rate": 3.494283897302414e-05, + "loss": 1.4511, + "step": 203700 + }, + { + "epoch": 3.01, + "learning_rate": 3.493551822053952e-05, + "loss": 1.448, + "step": 203800 + }, + { + "epoch": 3.02, + "learning_rate": 3.492812352106011e-05, + "loss": 1.4216, + "step": 203900 + }, + { + "epoch": 3.02, + "learning_rate": 3.4920728821580694e-05, + "loss": 1.4277, + "step": 204000 + }, + { + "epoch": 3.02, + "learning_rate": 3.491333412210128e-05, + "loss": 1.45, + "step": 204100 + }, + { + "epoch": 3.02, + "learning_rate": 3.4905939422621865e-05, + "loss": 1.4198, + "step": 204200 + }, + { + "epoch": 3.02, + "learning_rate": 3.489854472314245e-05, + "loss": 1.4095, + "step": 204300 + }, + { + "epoch": 3.02, + "learning_rate": 3.4891150023663036e-05, + "loss": 1.4597, + "step": 204400 + }, + { + "epoch": 3.02, + "learning_rate": 3.488375532418363e-05, + "loss": 1.444, + "step": 204500 + }, + { + "epoch": 3.03, + "learning_rate": 3.4876360624704215e-05, + "loss": 1.4615, + "step": 204600 + }, + { + "epoch": 3.03, + "learning_rate": 3.48689659252248e-05, + "loss": 1.4499, + "step": 204700 + }, + { + "epoch": 3.03, + "learning_rate": 3.4861571225745386e-05, + "loss": 1.4282, + "step": 204800 + }, + { + "epoch": 3.03, + "learning_rate": 3.485417652626597e-05, + "loss": 1.4389, + "step": 204900 + }, + { + "epoch": 3.03, + "learning_rate": 3.4846781826786564e-05, + "loss": 1.4435, + "step": 205000 + }, + { + "epoch": 3.03, + "learning_rate": 3.483938712730715e-05, + "loss": 1.4389, + "step": 205100 + }, + { + "epoch": 3.03, + "learning_rate": 3.4831992427827735e-05, + "loss": 1.4442, + "step": 205200 + }, + { + "epoch": 3.04, + "learning_rate": 3.482459772834832e-05, + "loss": 1.4303, + "step": 205300 + }, + { + "epoch": 3.04, + "learning_rate": 3.481720302886891e-05, + "loss": 1.4535, + "step": 205400 + }, + { + "epoch": 3.04, + "learning_rate": 3.480980832938949e-05, + "loss": 1.4368, + "step": 205500 + }, + { + "epoch": 3.04, + "learning_rate": 3.480241362991008e-05, + "loss": 1.4427, + "step": 205600 + }, + { + "epoch": 3.04, + "learning_rate": 3.479501893043067e-05, + "loss": 1.4436, + "step": 205700 + }, + { + "epoch": 3.04, + "learning_rate": 3.4787624230951256e-05, + "loss": 1.4624, + "step": 205800 + }, + { + "epoch": 3.05, + "learning_rate": 3.478022953147184e-05, + "loss": 1.4673, + "step": 205900 + }, + { + "epoch": 3.05, + "learning_rate": 3.477283483199243e-05, + "loss": 1.4762, + "step": 206000 + }, + { + "epoch": 3.05, + "learning_rate": 3.476544013251302e-05, + "loss": 1.4508, + "step": 206100 + }, + { + "epoch": 3.05, + "learning_rate": 3.4758045433033605e-05, + "loss": 1.4374, + "step": 206200 + }, + { + "epoch": 3.05, + "learning_rate": 3.475065073355419e-05, + "loss": 1.4383, + "step": 206300 + }, + { + "epoch": 3.05, + "learning_rate": 3.4743256034074776e-05, + "loss": 1.4746, + "step": 206400 + }, + { + "epoch": 3.05, + "learning_rate": 3.473586133459537e-05, + "loss": 1.4509, + "step": 206500 + }, + { + "epoch": 3.06, + "learning_rate": 3.4728466635115954e-05, + "loss": 1.4372, + "step": 206600 + }, + { + "epoch": 3.06, + "learning_rate": 3.472107193563654e-05, + "loss": 1.4318, + "step": 206700 + }, + { + "epoch": 3.06, + "learning_rate": 3.471367723615712e-05, + "loss": 1.4513, + "step": 206800 + }, + { + "epoch": 3.06, + "learning_rate": 3.470628253667771e-05, + "loss": 1.449, + "step": 206900 + }, + { + "epoch": 3.06, + "learning_rate": 3.46988878371983e-05, + "loss": 1.4339, + "step": 207000 + }, + { + "epoch": 3.06, + "learning_rate": 3.469149313771888e-05, + "loss": 1.451, + "step": 207100 + }, + { + "epoch": 3.06, + "learning_rate": 3.468409843823947e-05, + "loss": 1.4445, + "step": 207200 + }, + { + "epoch": 3.07, + "learning_rate": 3.467670373876006e-05, + "loss": 1.4159, + "step": 207300 + }, + { + "epoch": 3.07, + "learning_rate": 3.466938298627544e-05, + "loss": 1.4671, + "step": 207400 + }, + { + "epoch": 3.07, + "learning_rate": 3.466198828679603e-05, + "loss": 1.4663, + "step": 207500 + }, + { + "epoch": 3.07, + "learning_rate": 3.465459358731661e-05, + "loss": 1.4643, + "step": 207600 + }, + { + "epoch": 3.07, + "learning_rate": 3.46471988878372e-05, + "loss": 1.4536, + "step": 207700 + }, + { + "epoch": 3.07, + "learning_rate": 3.463980418835779e-05, + "loss": 1.4667, + "step": 207800 + }, + { + "epoch": 3.07, + "learning_rate": 3.4632409488878376e-05, + "loss": 1.4345, + "step": 207900 + }, + { + "epoch": 3.08, + "learning_rate": 3.462501478939896e-05, + "loss": 1.4282, + "step": 208000 + }, + { + "epoch": 3.08, + "learning_rate": 3.461762008991955e-05, + "loss": 1.4354, + "step": 208100 + }, + { + "epoch": 3.08, + "learning_rate": 3.461022539044013e-05, + "loss": 1.4437, + "step": 208200 + }, + { + "epoch": 3.08, + "learning_rate": 3.460283069096072e-05, + "loss": 1.4407, + "step": 208300 + }, + { + "epoch": 3.08, + "learning_rate": 3.4595435991481304e-05, + "loss": 1.4703, + "step": 208400 + }, + { + "epoch": 3.08, + "learning_rate": 3.45880412920019e-05, + "loss": 1.4376, + "step": 208500 + }, + { + "epoch": 3.09, + "learning_rate": 3.458064659252248e-05, + "loss": 1.4579, + "step": 208600 + }, + { + "epoch": 3.09, + "learning_rate": 3.457325189304307e-05, + "loss": 1.4484, + "step": 208700 + }, + { + "epoch": 3.09, + "learning_rate": 3.4565857193563654e-05, + "loss": 1.4395, + "step": 208800 + }, + { + "epoch": 3.09, + "learning_rate": 3.4558462494084246e-05, + "loss": 1.4397, + "step": 208900 + }, + { + "epoch": 3.09, + "learning_rate": 3.455106779460483e-05, + "loss": 1.4588, + "step": 209000 + }, + { + "epoch": 3.09, + "learning_rate": 3.454367309512542e-05, + "loss": 1.4647, + "step": 209100 + }, + { + "epoch": 3.09, + "learning_rate": 3.4536278395646e-05, + "loss": 1.4295, + "step": 209200 + }, + { + "epoch": 3.1, + "learning_rate": 3.452888369616659e-05, + "loss": 1.4432, + "step": 209300 + }, + { + "epoch": 3.1, + "learning_rate": 3.452148899668718e-05, + "loss": 1.4296, + "step": 209400 + }, + { + "epoch": 3.1, + "learning_rate": 3.451409429720776e-05, + "loss": 1.4767, + "step": 209500 + }, + { + "epoch": 3.1, + "learning_rate": 3.4506699597728345e-05, + "loss": 1.4432, + "step": 209600 + }, + { + "epoch": 3.1, + "learning_rate": 3.449930489824894e-05, + "loss": 1.4422, + "step": 209700 + }, + { + "epoch": 3.1, + "learning_rate": 3.4491910198769523e-05, + "loss": 1.4468, + "step": 209800 + }, + { + "epoch": 3.1, + "learning_rate": 3.448451549929011e-05, + "loss": 1.461, + "step": 209900 + }, + { + "epoch": 3.11, + "learning_rate": 3.4477120799810695e-05, + "loss": 1.445, + "step": 210000 + }, + { + "epoch": 3.11, + "learning_rate": 3.4469800047326075e-05, + "loss": 1.4507, + "step": 210100 + }, + { + "epoch": 3.11, + "learning_rate": 3.446240534784667e-05, + "loss": 1.4299, + "step": 210200 + }, + { + "epoch": 3.11, + "learning_rate": 3.445501064836725e-05, + "loss": 1.4292, + "step": 210300 + }, + { + "epoch": 3.11, + "learning_rate": 3.444761594888784e-05, + "loss": 1.4423, + "step": 210400 + }, + { + "epoch": 3.11, + "learning_rate": 3.4440221249408425e-05, + "loss": 1.4787, + "step": 210500 + }, + { + "epoch": 3.11, + "learning_rate": 3.443282654992902e-05, + "loss": 1.4368, + "step": 210600 + }, + { + "epoch": 3.12, + "learning_rate": 3.44254318504496e-05, + "loss": 1.4669, + "step": 210700 + }, + { + "epoch": 3.12, + "learning_rate": 3.441803715097018e-05, + "loss": 1.4524, + "step": 210800 + }, + { + "epoch": 3.12, + "learning_rate": 3.4410642451490774e-05, + "loss": 1.4455, + "step": 210900 + }, + { + "epoch": 3.12, + "learning_rate": 3.440324775201136e-05, + "loss": 1.4502, + "step": 211000 + }, + { + "epoch": 3.12, + "learning_rate": 3.4395853052531945e-05, + "loss": 1.4656, + "step": 211100 + }, + { + "epoch": 3.12, + "learning_rate": 3.438845835305253e-05, + "loss": 1.4462, + "step": 211200 + }, + { + "epoch": 3.12, + "learning_rate": 3.438106365357312e-05, + "loss": 1.4765, + "step": 211300 + }, + { + "epoch": 3.13, + "learning_rate": 3.437366895409371e-05, + "loss": 1.4761, + "step": 211400 + }, + { + "epoch": 3.13, + "learning_rate": 3.4366274254614294e-05, + "loss": 1.4469, + "step": 211500 + }, + { + "epoch": 3.13, + "learning_rate": 3.435887955513488e-05, + "loss": 1.4518, + "step": 211600 + }, + { + "epoch": 3.13, + "learning_rate": 3.435148485565547e-05, + "loss": 1.4462, + "step": 211700 + }, + { + "epoch": 3.13, + "learning_rate": 3.434409015617606e-05, + "loss": 1.4291, + "step": 211800 + }, + { + "epoch": 3.13, + "learning_rate": 3.4336695456696644e-05, + "loss": 1.4782, + "step": 211900 + }, + { + "epoch": 3.14, + "learning_rate": 3.432930075721723e-05, + "loss": 1.4346, + "step": 212000 + }, + { + "epoch": 3.14, + "learning_rate": 3.4321906057737815e-05, + "loss": 1.4632, + "step": 212100 + }, + { + "epoch": 3.14, + "learning_rate": 3.43145113582584e-05, + "loss": 1.4507, + "step": 212200 + }, + { + "epoch": 3.14, + "learning_rate": 3.4307116658778986e-05, + "loss": 1.4575, + "step": 212300 + }, + { + "epoch": 3.14, + "learning_rate": 3.429972195929957e-05, + "loss": 1.4961, + "step": 212400 + }, + { + "epoch": 3.14, + "learning_rate": 3.4292327259820164e-05, + "loss": 1.4423, + "step": 212500 + }, + { + "epoch": 3.14, + "learning_rate": 3.428493256034075e-05, + "loss": 1.4741, + "step": 212600 + }, + { + "epoch": 3.15, + "learning_rate": 3.4277537860861336e-05, + "loss": 1.4779, + "step": 212700 + }, + { + "epoch": 3.15, + "learning_rate": 3.4270217108376716e-05, + "loss": 1.4514, + "step": 212800 + }, + { + "epoch": 3.15, + "learning_rate": 3.42628224088973e-05, + "loss": 1.4456, + "step": 212900 + }, + { + "epoch": 3.15, + "learning_rate": 3.4255427709417894e-05, + "loss": 1.4587, + "step": 213000 + }, + { + "epoch": 3.15, + "learning_rate": 3.424803300993848e-05, + "loss": 1.4517, + "step": 213100 + }, + { + "epoch": 3.15, + "learning_rate": 3.4240638310459065e-05, + "loss": 1.4655, + "step": 213200 + }, + { + "epoch": 3.15, + "learning_rate": 3.423324361097965e-05, + "loss": 1.4575, + "step": 213300 + }, + { + "epoch": 3.16, + "learning_rate": 3.422584891150024e-05, + "loss": 1.4428, + "step": 213400 + }, + { + "epoch": 3.16, + "learning_rate": 3.421845421202082e-05, + "loss": 1.4334, + "step": 213500 + }, + { + "epoch": 3.16, + "learning_rate": 3.421105951254141e-05, + "loss": 1.436, + "step": 213600 + }, + { + "epoch": 3.16, + "learning_rate": 3.4203664813062e-05, + "loss": 1.4577, + "step": 213700 + }, + { + "epoch": 3.16, + "learning_rate": 3.4196270113582586e-05, + "loss": 1.4476, + "step": 213800 + }, + { + "epoch": 3.16, + "learning_rate": 3.418887541410317e-05, + "loss": 1.4709, + "step": 213900 + }, + { + "epoch": 3.16, + "learning_rate": 3.418148071462376e-05, + "loss": 1.4479, + "step": 214000 + }, + { + "epoch": 3.17, + "learning_rate": 3.417408601514435e-05, + "loss": 1.4473, + "step": 214100 + }, + { + "epoch": 3.17, + "learning_rate": 3.4166691315664935e-05, + "loss": 1.4334, + "step": 214200 + }, + { + "epoch": 3.17, + "learning_rate": 3.415929661618552e-05, + "loss": 1.4306, + "step": 214300 + }, + { + "epoch": 3.17, + "learning_rate": 3.415190191670611e-05, + "loss": 1.4555, + "step": 214400 + }, + { + "epoch": 3.17, + "learning_rate": 3.414450721722669e-05, + "loss": 1.4437, + "step": 214500 + }, + { + "epoch": 3.17, + "learning_rate": 3.4137112517747285e-05, + "loss": 1.4559, + "step": 214600 + }, + { + "epoch": 3.18, + "learning_rate": 3.4129717818267864e-05, + "loss": 1.4304, + "step": 214700 + }, + { + "epoch": 3.18, + "learning_rate": 3.412232311878845e-05, + "loss": 1.4627, + "step": 214800 + }, + { + "epoch": 3.18, + "learning_rate": 3.4115002366303836e-05, + "loss": 1.4788, + "step": 214900 + }, + { + "epoch": 3.18, + "learning_rate": 3.410760766682442e-05, + "loss": 1.4392, + "step": 215000 + }, + { + "epoch": 3.18, + "learning_rate": 3.410021296734501e-05, + "loss": 1.4518, + "step": 215100 + }, + { + "epoch": 3.18, + "learning_rate": 3.409281826786559e-05, + "loss": 1.427, + "step": 215200 + }, + { + "epoch": 3.18, + "learning_rate": 3.408542356838618e-05, + "loss": 1.4791, + "step": 215300 + }, + { + "epoch": 3.19, + "learning_rate": 3.407802886890677e-05, + "loss": 1.4585, + "step": 215400 + }, + { + "epoch": 3.19, + "learning_rate": 3.407063416942736e-05, + "loss": 1.4765, + "step": 215500 + }, + { + "epoch": 3.19, + "learning_rate": 3.406323946994794e-05, + "loss": 1.4367, + "step": 215600 + }, + { + "epoch": 3.19, + "learning_rate": 3.405584477046853e-05, + "loss": 1.4522, + "step": 215700 + }, + { + "epoch": 3.19, + "learning_rate": 3.404845007098912e-05, + "loss": 1.4528, + "step": 215800 + }, + { + "epoch": 3.19, + "learning_rate": 3.4041055371509706e-05, + "loss": 1.4597, + "step": 215900 + }, + { + "epoch": 3.19, + "learning_rate": 3.403366067203029e-05, + "loss": 1.4231, + "step": 216000 + }, + { + "epoch": 3.2, + "learning_rate": 3.402626597255088e-05, + "loss": 1.4376, + "step": 216100 + }, + { + "epoch": 3.2, + "learning_rate": 3.401887127307146e-05, + "loss": 1.4908, + "step": 216200 + }, + { + "epoch": 3.2, + "learning_rate": 3.401147657359205e-05, + "loss": 1.4361, + "step": 216300 + }, + { + "epoch": 3.2, + "learning_rate": 3.4004081874112635e-05, + "loss": 1.4421, + "step": 216400 + }, + { + "epoch": 3.2, + "learning_rate": 3.399668717463323e-05, + "loss": 1.4523, + "step": 216500 + }, + { + "epoch": 3.2, + "learning_rate": 3.398929247515381e-05, + "loss": 1.457, + "step": 216600 + }, + { + "epoch": 3.2, + "learning_rate": 3.39818977756744e-05, + "loss": 1.4546, + "step": 216700 + }, + { + "epoch": 3.21, + "learning_rate": 3.3974503076194984e-05, + "loss": 1.4732, + "step": 216800 + }, + { + "epoch": 3.21, + "learning_rate": 3.396710837671557e-05, + "loss": 1.4533, + "step": 216900 + }, + { + "epoch": 3.21, + "learning_rate": 3.395978762423096e-05, + "loss": 1.429, + "step": 217000 + }, + { + "epoch": 3.21, + "learning_rate": 3.395239292475154e-05, + "loss": 1.4433, + "step": 217100 + }, + { + "epoch": 3.21, + "learning_rate": 3.394499822527213e-05, + "loss": 1.4395, + "step": 217200 + }, + { + "epoch": 3.21, + "learning_rate": 3.3937603525792714e-05, + "loss": 1.4702, + "step": 217300 + }, + { + "epoch": 3.22, + "learning_rate": 3.39302088263133e-05, + "loss": 1.4479, + "step": 217400 + }, + { + "epoch": 3.22, + "learning_rate": 3.3922814126833885e-05, + "loss": 1.4529, + "step": 217500 + }, + { + "epoch": 3.22, + "learning_rate": 3.391541942735447e-05, + "loss": 1.4576, + "step": 217600 + }, + { + "epoch": 3.22, + "learning_rate": 3.3908024727875056e-05, + "loss": 1.4356, + "step": 217700 + }, + { + "epoch": 3.22, + "learning_rate": 3.390063002839565e-05, + "loss": 1.4582, + "step": 217800 + }, + { + "epoch": 3.22, + "learning_rate": 3.3893235328916234e-05, + "loss": 1.4645, + "step": 217900 + }, + { + "epoch": 3.22, + "learning_rate": 3.388584062943682e-05, + "loss": 1.4593, + "step": 218000 + }, + { + "epoch": 3.23, + "learning_rate": 3.3878445929957406e-05, + "loss": 1.4457, + "step": 218100 + }, + { + "epoch": 3.23, + "learning_rate": 3.3871051230478e-05, + "loss": 1.4242, + "step": 218200 + }, + { + "epoch": 3.23, + "learning_rate": 3.3863656530998584e-05, + "loss": 1.4487, + "step": 218300 + }, + { + "epoch": 3.23, + "learning_rate": 3.385626183151917e-05, + "loss": 1.4566, + "step": 218400 + }, + { + "epoch": 3.23, + "learning_rate": 3.3848867132039755e-05, + "loss": 1.436, + "step": 218500 + }, + { + "epoch": 3.23, + "learning_rate": 3.384147243256035e-05, + "loss": 1.4592, + "step": 218600 + }, + { + "epoch": 3.23, + "learning_rate": 3.3834077733080926e-05, + "loss": 1.4626, + "step": 218700 + }, + { + "epoch": 3.24, + "learning_rate": 3.382668303360151e-05, + "loss": 1.4797, + "step": 218800 + }, + { + "epoch": 3.24, + "learning_rate": 3.3819288334122104e-05, + "loss": 1.4678, + "step": 218900 + }, + { + "epoch": 3.24, + "learning_rate": 3.381189363464269e-05, + "loss": 1.4504, + "step": 219000 + }, + { + "epoch": 3.24, + "learning_rate": 3.3804498935163275e-05, + "loss": 1.4582, + "step": 219100 + }, + { + "epoch": 3.24, + "learning_rate": 3.379710423568386e-05, + "loss": 1.4346, + "step": 219200 + }, + { + "epoch": 3.24, + "learning_rate": 3.3789709536204454e-05, + "loss": 1.4601, + "step": 219300 + }, + { + "epoch": 3.24, + "learning_rate": 3.378231483672504e-05, + "loss": 1.4311, + "step": 219400 + }, + { + "epoch": 3.25, + "learning_rate": 3.3774920137245625e-05, + "loss": 1.4747, + "step": 219500 + }, + { + "epoch": 3.25, + "learning_rate": 3.376752543776621e-05, + "loss": 1.4642, + "step": 219600 + }, + { + "epoch": 3.25, + "learning_rate": 3.3760130738286796e-05, + "loss": 1.4645, + "step": 219700 + }, + { + "epoch": 3.25, + "learning_rate": 3.375273603880739e-05, + "loss": 1.4475, + "step": 219800 + }, + { + "epoch": 3.25, + "learning_rate": 3.3745341339327974e-05, + "loss": 1.4827, + "step": 219900 + }, + { + "epoch": 3.25, + "learning_rate": 3.373794663984855e-05, + "loss": 1.4653, + "step": 220000 + }, + { + "epoch": 3.26, + "learning_rate": 3.373062588736394e-05, + "loss": 1.4656, + "step": 220100 + }, + { + "epoch": 3.26, + "learning_rate": 3.3723231187884526e-05, + "loss": 1.4609, + "step": 220200 + }, + { + "epoch": 3.26, + "learning_rate": 3.371583648840511e-05, + "loss": 1.4321, + "step": 220300 + }, + { + "epoch": 3.26, + "learning_rate": 3.37084417889257e-05, + "loss": 1.4321, + "step": 220400 + }, + { + "epoch": 3.26, + "learning_rate": 3.370104708944628e-05, + "loss": 1.4848, + "step": 220500 + }, + { + "epoch": 3.26, + "learning_rate": 3.3693652389966875e-05, + "loss": 1.4539, + "step": 220600 + }, + { + "epoch": 3.26, + "learning_rate": 3.368625769048746e-05, + "loss": 1.4658, + "step": 220700 + }, + { + "epoch": 3.27, + "learning_rate": 3.3678862991008046e-05, + "loss": 1.4603, + "step": 220800 + }, + { + "epoch": 3.27, + "learning_rate": 3.367146829152863e-05, + "loss": 1.451, + "step": 220900 + }, + { + "epoch": 3.27, + "learning_rate": 3.3664073592049225e-05, + "loss": 1.4548, + "step": 221000 + }, + { + "epoch": 3.27, + "learning_rate": 3.365667889256981e-05, + "loss": 1.4472, + "step": 221100 + }, + { + "epoch": 3.27, + "learning_rate": 3.3649284193090396e-05, + "loss": 1.4557, + "step": 221200 + }, + { + "epoch": 3.27, + "learning_rate": 3.364188949361098e-05, + "loss": 1.4471, + "step": 221300 + }, + { + "epoch": 3.27, + "learning_rate": 3.363449479413157e-05, + "loss": 1.452, + "step": 221400 + }, + { + "epoch": 3.28, + "learning_rate": 3.362710009465215e-05, + "loss": 1.4725, + "step": 221500 + }, + { + "epoch": 3.28, + "learning_rate": 3.361970539517274e-05, + "loss": 1.4553, + "step": 221600 + }, + { + "epoch": 3.28, + "learning_rate": 3.361231069569333e-05, + "loss": 1.453, + "step": 221700 + }, + { + "epoch": 3.28, + "learning_rate": 3.3604915996213916e-05, + "loss": 1.4278, + "step": 221800 + }, + { + "epoch": 3.28, + "learning_rate": 3.35975212967345e-05, + "loss": 1.4487, + "step": 221900 + }, + { + "epoch": 3.28, + "learning_rate": 3.359012659725509e-05, + "loss": 1.4678, + "step": 222000 + }, + { + "epoch": 3.28, + "learning_rate": 3.358273189777567e-05, + "loss": 1.458, + "step": 222100 + }, + { + "epoch": 3.29, + "learning_rate": 3.3575337198296266e-05, + "loss": 1.4512, + "step": 222200 + }, + { + "epoch": 3.29, + "learning_rate": 3.356794249881685e-05, + "loss": 1.4444, + "step": 222300 + }, + { + "epoch": 3.29, + "learning_rate": 3.356054779933744e-05, + "loss": 1.5072, + "step": 222400 + }, + { + "epoch": 3.29, + "learning_rate": 3.355315309985802e-05, + "loss": 1.4419, + "step": 222500 + }, + { + "epoch": 3.29, + "learning_rate": 3.3545758400378615e-05, + "loss": 1.4642, + "step": 222600 + }, + { + "epoch": 3.29, + "learning_rate": 3.3538363700899194e-05, + "loss": 1.4721, + "step": 222700 + }, + { + "epoch": 3.3, + "learning_rate": 3.353096900141978e-05, + "loss": 1.4793, + "step": 222800 + }, + { + "epoch": 3.3, + "learning_rate": 3.352357430194037e-05, + "loss": 1.4563, + "step": 222900 + }, + { + "epoch": 3.3, + "learning_rate": 3.351617960246096e-05, + "loss": 1.4351, + "step": 223000 + }, + { + "epoch": 3.3, + "learning_rate": 3.350878490298154e-05, + "loss": 1.4408, + "step": 223100 + }, + { + "epoch": 3.3, + "learning_rate": 3.350139020350213e-05, + "loss": 1.4192, + "step": 223200 + }, + { + "epoch": 3.3, + "learning_rate": 3.349399550402272e-05, + "loss": 1.4325, + "step": 223300 + }, + { + "epoch": 3.3, + "learning_rate": 3.348660080454331e-05, + "loss": 1.4408, + "step": 223400 + }, + { + "epoch": 3.31, + "learning_rate": 3.347928005205869e-05, + "loss": 1.446, + "step": 223500 + }, + { + "epoch": 3.31, + "learning_rate": 3.347188535257927e-05, + "loss": 1.4552, + "step": 223600 + }, + { + "epoch": 3.31, + "learning_rate": 3.346449065309986e-05, + "loss": 1.4486, + "step": 223700 + }, + { + "epoch": 3.31, + "learning_rate": 3.345709595362045e-05, + "loss": 1.4542, + "step": 223800 + }, + { + "epoch": 3.31, + "learning_rate": 3.344970125414104e-05, + "loss": 1.452, + "step": 223900 + }, + { + "epoch": 3.31, + "learning_rate": 3.3442306554661616e-05, + "loss": 1.4718, + "step": 224000 + }, + { + "epoch": 3.31, + "learning_rate": 3.343491185518221e-05, + "loss": 1.48, + "step": 224100 + }, + { + "epoch": 3.32, + "learning_rate": 3.3427517155702794e-05, + "loss": 1.4444, + "step": 224200 + }, + { + "epoch": 3.32, + "learning_rate": 3.342012245622338e-05, + "loss": 1.449, + "step": 224300 + }, + { + "epoch": 3.32, + "learning_rate": 3.3412727756743965e-05, + "loss": 1.4548, + "step": 224400 + }, + { + "epoch": 3.32, + "learning_rate": 3.340533305726456e-05, + "loss": 1.452, + "step": 224500 + }, + { + "epoch": 3.32, + "learning_rate": 3.339793835778514e-05, + "loss": 1.4536, + "step": 224600 + }, + { + "epoch": 3.32, + "learning_rate": 3.339054365830573e-05, + "loss": 1.4558, + "step": 224700 + }, + { + "epoch": 3.32, + "learning_rate": 3.3383148958826314e-05, + "loss": 1.4351, + "step": 224800 + }, + { + "epoch": 3.33, + "learning_rate": 3.33757542593469e-05, + "loss": 1.44, + "step": 224900 + }, + { + "epoch": 3.33, + "learning_rate": 3.336835955986749e-05, + "loss": 1.4473, + "step": 225000 + }, + { + "epoch": 3.33, + "learning_rate": 3.336096486038808e-05, + "loss": 1.449, + "step": 225100 + }, + { + "epoch": 3.33, + "learning_rate": 3.3353570160908664e-05, + "loss": 1.4554, + "step": 225200 + }, + { + "epoch": 3.33, + "learning_rate": 3.334617546142925e-05, + "loss": 1.4322, + "step": 225300 + }, + { + "epoch": 3.33, + "learning_rate": 3.3338780761949835e-05, + "loss": 1.4404, + "step": 225400 + }, + { + "epoch": 3.33, + "learning_rate": 3.333138606247042e-05, + "loss": 1.4728, + "step": 225500 + }, + { + "epoch": 3.34, + "learning_rate": 3.3323991362991006e-05, + "loss": 1.4521, + "step": 225600 + }, + { + "epoch": 3.34, + "learning_rate": 3.33165966635116e-05, + "loss": 1.4484, + "step": 225700 + }, + { + "epoch": 3.34, + "learning_rate": 3.330927591102698e-05, + "loss": 1.4182, + "step": 225800 + }, + { + "epoch": 3.34, + "learning_rate": 3.3301881211547565e-05, + "loss": 1.4491, + "step": 225900 + }, + { + "epoch": 3.34, + "learning_rate": 3.329448651206815e-05, + "loss": 1.4651, + "step": 226000 + }, + { + "epoch": 3.34, + "learning_rate": 3.3287091812588736e-05, + "loss": 1.4812, + "step": 226100 + }, + { + "epoch": 3.35, + "learning_rate": 3.327969711310933e-05, + "loss": 1.449, + "step": 226200 + }, + { + "epoch": 3.35, + "learning_rate": 3.3272302413629914e-05, + "loss": 1.4565, + "step": 226300 + }, + { + "epoch": 3.35, + "learning_rate": 3.32649077141505e-05, + "loss": 1.4206, + "step": 226400 + }, + { + "epoch": 3.35, + "learning_rate": 3.3257513014671085e-05, + "loss": 1.4458, + "step": 226500 + }, + { + "epoch": 3.35, + "learning_rate": 3.325011831519167e-05, + "loss": 1.5017, + "step": 226600 + }, + { + "epoch": 3.35, + "learning_rate": 3.3242723615712256e-05, + "loss": 1.4658, + "step": 226700 + }, + { + "epoch": 3.35, + "learning_rate": 3.323532891623284e-05, + "loss": 1.483, + "step": 226800 + }, + { + "epoch": 3.36, + "learning_rate": 3.3227934216753435e-05, + "loss": 1.4776, + "step": 226900 + }, + { + "epoch": 3.36, + "learning_rate": 3.322053951727402e-05, + "loss": 1.4439, + "step": 227000 + }, + { + "epoch": 3.36, + "learning_rate": 3.3213144817794606e-05, + "loss": 1.4507, + "step": 227100 + }, + { + "epoch": 3.36, + "learning_rate": 3.320575011831519e-05, + "loss": 1.4624, + "step": 227200 + }, + { + "epoch": 3.36, + "learning_rate": 3.319835541883578e-05, + "loss": 1.4533, + "step": 227300 + }, + { + "epoch": 3.36, + "learning_rate": 3.319096071935637e-05, + "loss": 1.4531, + "step": 227400 + }, + { + "epoch": 3.36, + "learning_rate": 3.3183566019876955e-05, + "loss": 1.4582, + "step": 227500 + }, + { + "epoch": 3.37, + "learning_rate": 3.317617132039754e-05, + "loss": 1.4636, + "step": 227600 + }, + { + "epoch": 3.37, + "learning_rate": 3.3168776620918126e-05, + "loss": 1.4405, + "step": 227700 + }, + { + "epoch": 3.37, + "learning_rate": 3.316138192143872e-05, + "loss": 1.4343, + "step": 227800 + }, + { + "epoch": 3.37, + "learning_rate": 3.31539872219593e-05, + "loss": 1.458, + "step": 227900 + }, + { + "epoch": 3.37, + "learning_rate": 3.314659252247988e-05, + "loss": 1.4641, + "step": 228000 + }, + { + "epoch": 3.37, + "learning_rate": 3.3139197823000476e-05, + "loss": 1.4405, + "step": 228100 + }, + { + "epoch": 3.37, + "learning_rate": 3.313180312352106e-05, + "loss": 1.4493, + "step": 228200 + }, + { + "epoch": 3.38, + "learning_rate": 3.312448237103644e-05, + "loss": 1.4373, + "step": 228300 + }, + { + "epoch": 3.38, + "learning_rate": 3.311708767155703e-05, + "loss": 1.4467, + "step": 228400 + }, + { + "epoch": 3.38, + "learning_rate": 3.310969297207761e-05, + "loss": 1.4587, + "step": 228500 + }, + { + "epoch": 3.38, + "learning_rate": 3.3102298272598206e-05, + "loss": 1.4464, + "step": 228600 + }, + { + "epoch": 3.38, + "learning_rate": 3.309490357311879e-05, + "loss": 1.4684, + "step": 228700 + }, + { + "epoch": 3.38, + "learning_rate": 3.308750887363938e-05, + "loss": 1.4847, + "step": 228800 + }, + { + "epoch": 3.39, + "learning_rate": 3.308011417415996e-05, + "loss": 1.4437, + "step": 228900 + }, + { + "epoch": 3.39, + "learning_rate": 3.3072719474680555e-05, + "loss": 1.4265, + "step": 229000 + }, + { + "epoch": 3.39, + "learning_rate": 3.306532477520114e-05, + "loss": 1.4563, + "step": 229100 + }, + { + "epoch": 3.39, + "learning_rate": 3.3057930075721726e-05, + "loss": 1.4276, + "step": 229200 + }, + { + "epoch": 3.39, + "learning_rate": 3.305053537624231e-05, + "loss": 1.4528, + "step": 229300 + }, + { + "epoch": 3.39, + "learning_rate": 3.30431406767629e-05, + "loss": 1.4737, + "step": 229400 + }, + { + "epoch": 3.39, + "learning_rate": 3.303574597728348e-05, + "loss": 1.4328, + "step": 229500 + }, + { + "epoch": 3.4, + "learning_rate": 3.302835127780407e-05, + "loss": 1.4384, + "step": 229600 + }, + { + "epoch": 3.4, + "learning_rate": 3.302095657832466e-05, + "loss": 1.4221, + "step": 229700 + }, + { + "epoch": 3.4, + "learning_rate": 3.301356187884525e-05, + "loss": 1.447, + "step": 229800 + }, + { + "epoch": 3.4, + "learning_rate": 3.300616717936583e-05, + "loss": 1.4384, + "step": 229900 + }, + { + "epoch": 3.4, + "learning_rate": 3.299877247988642e-05, + "loss": 1.4497, + "step": 230000 + }, + { + "epoch": 3.4, + "learning_rate": 3.2991377780407004e-05, + "loss": 1.4625, + "step": 230100 + }, + { + "epoch": 3.4, + "learning_rate": 3.2983983080927596e-05, + "loss": 1.4362, + "step": 230200 + }, + { + "epoch": 3.41, + "learning_rate": 3.297658838144818e-05, + "loss": 1.4556, + "step": 230300 + }, + { + "epoch": 3.41, + "learning_rate": 3.296919368196877e-05, + "loss": 1.4745, + "step": 230400 + }, + { + "epoch": 3.41, + "learning_rate": 3.296179898248935e-05, + "loss": 1.4879, + "step": 230500 + }, + { + "epoch": 3.41, + "learning_rate": 3.295440428300994e-05, + "loss": 1.4561, + "step": 230600 + }, + { + "epoch": 3.41, + "learning_rate": 3.2947009583530524e-05, + "loss": 1.4837, + "step": 230700 + }, + { + "epoch": 3.41, + "learning_rate": 3.293961488405111e-05, + "loss": 1.4721, + "step": 230800 + }, + { + "epoch": 3.41, + "learning_rate": 3.29322201845717e-05, + "loss": 1.4633, + "step": 230900 + }, + { + "epoch": 3.42, + "learning_rate": 3.292482548509229e-05, + "loss": 1.4543, + "step": 231000 + }, + { + "epoch": 3.42, + "learning_rate": 3.291750473260767e-05, + "loss": 1.4759, + "step": 231100 + }, + { + "epoch": 3.42, + "learning_rate": 3.2910110033128254e-05, + "loss": 1.4526, + "step": 231200 + }, + { + "epoch": 3.42, + "learning_rate": 3.290271533364884e-05, + "loss": 1.4815, + "step": 231300 + }, + { + "epoch": 3.42, + "learning_rate": 3.289532063416943e-05, + "loss": 1.4567, + "step": 231400 + }, + { + "epoch": 3.42, + "learning_rate": 3.288792593469002e-05, + "loss": 1.4574, + "step": 231500 + }, + { + "epoch": 3.43, + "learning_rate": 3.28805312352106e-05, + "loss": 1.4739, + "step": 231600 + }, + { + "epoch": 3.43, + "learning_rate": 3.287313653573119e-05, + "loss": 1.4784, + "step": 231700 + }, + { + "epoch": 3.43, + "learning_rate": 3.286574183625178e-05, + "loss": 1.4821, + "step": 231800 + }, + { + "epoch": 3.43, + "learning_rate": 3.285834713677236e-05, + "loss": 1.4552, + "step": 231900 + }, + { + "epoch": 3.43, + "learning_rate": 3.2850952437292946e-05, + "loss": 1.4453, + "step": 232000 + }, + { + "epoch": 3.43, + "learning_rate": 3.284355773781354e-05, + "loss": 1.4604, + "step": 232100 + }, + { + "epoch": 3.43, + "learning_rate": 3.2836163038334124e-05, + "loss": 1.4495, + "step": 232200 + }, + { + "epoch": 3.44, + "learning_rate": 3.282876833885471e-05, + "loss": 1.4648, + "step": 232300 + }, + { + "epoch": 3.44, + "learning_rate": 3.2821373639375295e-05, + "loss": 1.4429, + "step": 232400 + }, + { + "epoch": 3.44, + "learning_rate": 3.281397893989588e-05, + "loss": 1.4455, + "step": 232500 + }, + { + "epoch": 3.44, + "learning_rate": 3.280658424041647e-05, + "loss": 1.4326, + "step": 232600 + }, + { + "epoch": 3.44, + "learning_rate": 3.279918954093706e-05, + "loss": 1.4309, + "step": 232700 + }, + { + "epoch": 3.44, + "learning_rate": 3.2791794841457645e-05, + "loss": 1.4444, + "step": 232800 + }, + { + "epoch": 3.44, + "learning_rate": 3.278440014197823e-05, + "loss": 1.4739, + "step": 232900 + }, + { + "epoch": 3.45, + "learning_rate": 3.277700544249882e-05, + "loss": 1.4576, + "step": 233000 + }, + { + "epoch": 3.45, + "learning_rate": 3.276961074301941e-05, + "loss": 1.4311, + "step": 233100 + }, + { + "epoch": 3.45, + "learning_rate": 3.276228999053478e-05, + "loss": 1.466, + "step": 233200 + }, + { + "epoch": 3.45, + "learning_rate": 3.275489529105537e-05, + "loss": 1.4605, + "step": 233300 + }, + { + "epoch": 3.45, + "learning_rate": 3.274750059157596e-05, + "loss": 1.4525, + "step": 233400 + }, + { + "epoch": 3.45, + "learning_rate": 3.2740105892096546e-05, + "loss": 1.4771, + "step": 233500 + }, + { + "epoch": 3.45, + "learning_rate": 3.273271119261713e-05, + "loss": 1.4862, + "step": 233600 + }, + { + "epoch": 3.46, + "learning_rate": 3.272531649313772e-05, + "loss": 1.4223, + "step": 233700 + }, + { + "epoch": 3.46, + "learning_rate": 3.271792179365831e-05, + "loss": 1.4843, + "step": 233800 + }, + { + "epoch": 3.46, + "learning_rate": 3.2710527094178895e-05, + "loss": 1.4683, + "step": 233900 + }, + { + "epoch": 3.46, + "learning_rate": 3.270313239469948e-05, + "loss": 1.4354, + "step": 234000 + }, + { + "epoch": 3.46, + "learning_rate": 3.2695737695220066e-05, + "loss": 1.4617, + "step": 234100 + }, + { + "epoch": 3.46, + "learning_rate": 3.268834299574066e-05, + "loss": 1.4489, + "step": 234200 + }, + { + "epoch": 3.47, + "learning_rate": 3.2680948296261244e-05, + "loss": 1.4502, + "step": 234300 + }, + { + "epoch": 3.47, + "learning_rate": 3.267355359678183e-05, + "loss": 1.4543, + "step": 234400 + }, + { + "epoch": 3.47, + "learning_rate": 3.2666158897302416e-05, + "loss": 1.4637, + "step": 234500 + }, + { + "epoch": 3.47, + "learning_rate": 3.2658764197823e-05, + "loss": 1.4581, + "step": 234600 + }, + { + "epoch": 3.47, + "learning_rate": 3.265136949834359e-05, + "loss": 1.4234, + "step": 234700 + }, + { + "epoch": 3.47, + "learning_rate": 3.264397479886417e-05, + "loss": 1.4624, + "step": 234800 + }, + { + "epoch": 3.47, + "learning_rate": 3.2636580099384765e-05, + "loss": 1.4669, + "step": 234900 + }, + { + "epoch": 3.48, + "learning_rate": 3.262918539990535e-05, + "loss": 1.4352, + "step": 235000 + }, + { + "epoch": 3.48, + "learning_rate": 3.2621790700425936e-05, + "loss": 1.4327, + "step": 235100 + }, + { + "epoch": 3.48, + "learning_rate": 3.2614469947941317e-05, + "loss": 1.4422, + "step": 235200 + }, + { + "epoch": 3.48, + "learning_rate": 3.26070752484619e-05, + "loss": 1.4746, + "step": 235300 + }, + { + "epoch": 3.48, + "learning_rate": 3.2599680548982495e-05, + "loss": 1.4763, + "step": 235400 + }, + { + "epoch": 3.48, + "learning_rate": 3.259228584950308e-05, + "loss": 1.4368, + "step": 235500 + }, + { + "epoch": 3.48, + "learning_rate": 3.2584891150023666e-05, + "loss": 1.4727, + "step": 235600 + }, + { + "epoch": 3.49, + "learning_rate": 3.257749645054425e-05, + "loss": 1.4363, + "step": 235700 + }, + { + "epoch": 3.49, + "learning_rate": 3.2570101751064844e-05, + "loss": 1.4573, + "step": 235800 + }, + { + "epoch": 3.49, + "learning_rate": 3.256270705158542e-05, + "loss": 1.4631, + "step": 235900 + }, + { + "epoch": 3.49, + "learning_rate": 3.255531235210601e-05, + "loss": 1.4711, + "step": 236000 + }, + { + "epoch": 3.49, + "learning_rate": 3.2547917652626594e-05, + "loss": 1.4499, + "step": 236100 + }, + { + "epoch": 3.49, + "learning_rate": 3.2540522953147187e-05, + "loss": 1.4454, + "step": 236200 + }, + { + "epoch": 3.49, + "learning_rate": 3.253312825366777e-05, + "loss": 1.449, + "step": 236300 + }, + { + "epoch": 3.5, + "learning_rate": 3.252573355418836e-05, + "loss": 1.4243, + "step": 236400 + }, + { + "epoch": 3.5, + "learning_rate": 3.2518338854708943e-05, + "loss": 1.4591, + "step": 236500 + }, + { + "epoch": 3.5, + "learning_rate": 3.2510944155229536e-05, + "loss": 1.4638, + "step": 236600 + }, + { + "epoch": 3.5, + "learning_rate": 3.250354945575012e-05, + "loss": 1.4424, + "step": 236700 + }, + { + "epoch": 3.5, + "learning_rate": 3.249615475627071e-05, + "loss": 1.4517, + "step": 236800 + }, + { + "epoch": 3.5, + "learning_rate": 3.248876005679129e-05, + "loss": 1.4548, + "step": 236900 + }, + { + "epoch": 3.51, + "learning_rate": 3.2481365357311885e-05, + "loss": 1.4472, + "step": 237000 + }, + { + "epoch": 3.51, + "learning_rate": 3.247397065783247e-05, + "loss": 1.4397, + "step": 237100 + }, + { + "epoch": 3.51, + "learning_rate": 3.2466649905347844e-05, + "loss": 1.4491, + "step": 237200 + }, + { + "epoch": 3.51, + "learning_rate": 3.245925520586843e-05, + "loss": 1.4537, + "step": 237300 + }, + { + "epoch": 3.51, + "learning_rate": 3.245186050638902e-05, + "loss": 1.4472, + "step": 237400 + }, + { + "epoch": 3.51, + "learning_rate": 3.244446580690961e-05, + "loss": 1.4218, + "step": 237500 + }, + { + "epoch": 3.51, + "learning_rate": 3.2437071107430194e-05, + "loss": 1.475, + "step": 237600 + }, + { + "epoch": 3.52, + "learning_rate": 3.242967640795078e-05, + "loss": 1.4464, + "step": 237700 + }, + { + "epoch": 3.52, + "learning_rate": 3.242228170847137e-05, + "loss": 1.4725, + "step": 237800 + }, + { + "epoch": 3.52, + "learning_rate": 3.241488700899196e-05, + "loss": 1.4732, + "step": 237900 + }, + { + "epoch": 3.52, + "learning_rate": 3.240749230951254e-05, + "loss": 1.4584, + "step": 238000 + }, + { + "epoch": 3.52, + "learning_rate": 3.240009761003313e-05, + "loss": 1.4746, + "step": 238100 + }, + { + "epoch": 3.52, + "learning_rate": 3.239270291055372e-05, + "loss": 1.4642, + "step": 238200 + }, + { + "epoch": 3.52, + "learning_rate": 3.238530821107431e-05, + "loss": 1.4508, + "step": 238300 + }, + { + "epoch": 3.53, + "learning_rate": 3.237791351159489e-05, + "loss": 1.4541, + "step": 238400 + }, + { + "epoch": 3.53, + "learning_rate": 3.237051881211547e-05, + "loss": 1.4451, + "step": 238500 + }, + { + "epoch": 3.53, + "learning_rate": 3.2363124112636064e-05, + "loss": 1.4644, + "step": 238600 + }, + { + "epoch": 3.53, + "learning_rate": 3.235572941315665e-05, + "loss": 1.4558, + "step": 238700 + }, + { + "epoch": 3.53, + "learning_rate": 3.2348334713677235e-05, + "loss": 1.417, + "step": 238800 + }, + { + "epoch": 3.53, + "learning_rate": 3.234094001419782e-05, + "loss": 1.4587, + "step": 238900 + }, + { + "epoch": 3.53, + "learning_rate": 3.233354531471841e-05, + "loss": 1.4327, + "step": 239000 + }, + { + "epoch": 3.54, + "learning_rate": 3.2326150615239e-05, + "loss": 1.4856, + "step": 239100 + }, + { + "epoch": 3.54, + "learning_rate": 3.2318755915759584e-05, + "loss": 1.4343, + "step": 239200 + }, + { + "epoch": 3.54, + "learning_rate": 3.231136121628017e-05, + "loss": 1.4598, + "step": 239300 + }, + { + "epoch": 3.54, + "learning_rate": 3.230396651680076e-05, + "loss": 1.4681, + "step": 239400 + }, + { + "epoch": 3.54, + "learning_rate": 3.229657181732135e-05, + "loss": 1.4706, + "step": 239500 + }, + { + "epoch": 3.54, + "learning_rate": 3.2289177117841934e-05, + "loss": 1.446, + "step": 239600 + }, + { + "epoch": 3.54, + "learning_rate": 3.228178241836252e-05, + "loss": 1.4683, + "step": 239700 + }, + { + "epoch": 3.55, + "learning_rate": 3.2274387718883105e-05, + "loss": 1.455, + "step": 239800 + }, + { + "epoch": 3.55, + "learning_rate": 3.226699301940369e-05, + "loss": 1.4581, + "step": 239900 + }, + { + "epoch": 3.55, + "learning_rate": 3.225967226691907e-05, + "loss": 1.4718, + "step": 240000 + }, + { + "epoch": 3.55, + "learning_rate": 3.225227756743966e-05, + "loss": 1.4662, + "step": 240100 + }, + { + "epoch": 3.55, + "learning_rate": 3.224488286796025e-05, + "loss": 1.4319, + "step": 240200 + }, + { + "epoch": 3.55, + "learning_rate": 3.2237488168480835e-05, + "loss": 1.4447, + "step": 240300 + }, + { + "epoch": 3.56, + "learning_rate": 3.223009346900142e-05, + "loss": 1.472, + "step": 240400 + }, + { + "epoch": 3.56, + "learning_rate": 3.2222698769522006e-05, + "loss": 1.4484, + "step": 240500 + }, + { + "epoch": 3.56, + "learning_rate": 3.22153040700426e-05, + "loss": 1.4396, + "step": 240600 + }, + { + "epoch": 3.56, + "learning_rate": 3.2207909370563184e-05, + "loss": 1.4661, + "step": 240700 + }, + { + "epoch": 3.56, + "learning_rate": 3.220051467108377e-05, + "loss": 1.4479, + "step": 240800 + }, + { + "epoch": 3.56, + "learning_rate": 3.2193119971604355e-05, + "loss": 1.4598, + "step": 240900 + }, + { + "epoch": 3.56, + "learning_rate": 3.218572527212495e-05, + "loss": 1.4468, + "step": 241000 + }, + { + "epoch": 3.57, + "learning_rate": 3.217833057264553e-05, + "loss": 1.4472, + "step": 241100 + }, + { + "epoch": 3.57, + "learning_rate": 3.217093587316611e-05, + "loss": 1.4582, + "step": 241200 + }, + { + "epoch": 3.57, + "learning_rate": 3.21635411736867e-05, + "loss": 1.4578, + "step": 241300 + }, + { + "epoch": 3.57, + "learning_rate": 3.215614647420729e-05, + "loss": 1.467, + "step": 241400 + }, + { + "epoch": 3.57, + "learning_rate": 3.2148751774727876e-05, + "loss": 1.4569, + "step": 241500 + }, + { + "epoch": 3.57, + "learning_rate": 3.214135707524846e-05, + "loss": 1.4629, + "step": 241600 + }, + { + "epoch": 3.57, + "learning_rate": 3.213396237576905e-05, + "loss": 1.468, + "step": 241700 + }, + { + "epoch": 3.58, + "learning_rate": 3.212656767628964e-05, + "loss": 1.4412, + "step": 241800 + }, + { + "epoch": 3.58, + "learning_rate": 3.2119172976810225e-05, + "loss": 1.4575, + "step": 241900 + }, + { + "epoch": 3.58, + "learning_rate": 3.211177827733081e-05, + "loss": 1.4483, + "step": 242000 + }, + { + "epoch": 3.58, + "learning_rate": 3.2104383577851397e-05, + "loss": 1.4663, + "step": 242100 + }, + { + "epoch": 3.58, + "learning_rate": 3.209698887837199e-05, + "loss": 1.4624, + "step": 242200 + }, + { + "epoch": 3.58, + "learning_rate": 3.2089594178892575e-05, + "loss": 1.4506, + "step": 242300 + }, + { + "epoch": 3.58, + "learning_rate": 3.208219947941316e-05, + "loss": 1.4508, + "step": 242400 + }, + { + "epoch": 3.59, + "learning_rate": 3.2074878726928534e-05, + "loss": 1.4211, + "step": 242500 + }, + { + "epoch": 3.59, + "learning_rate": 3.2067484027449126e-05, + "loss": 1.4594, + "step": 242600 + }, + { + "epoch": 3.59, + "learning_rate": 3.206008932796971e-05, + "loss": 1.4498, + "step": 242700 + }, + { + "epoch": 3.59, + "learning_rate": 3.20526946284903e-05, + "loss": 1.4589, + "step": 242800 + }, + { + "epoch": 3.59, + "learning_rate": 3.204529992901088e-05, + "loss": 1.452, + "step": 242900 + }, + { + "epoch": 3.59, + "learning_rate": 3.2037905229531476e-05, + "loss": 1.4555, + "step": 243000 + }, + { + "epoch": 3.6, + "learning_rate": 3.203051053005206e-05, + "loss": 1.4408, + "step": 243100 + }, + { + "epoch": 3.6, + "learning_rate": 3.202311583057265e-05, + "loss": 1.4347, + "step": 243200 + }, + { + "epoch": 3.6, + "learning_rate": 3.201572113109323e-05, + "loss": 1.425, + "step": 243300 + }, + { + "epoch": 3.6, + "learning_rate": 3.2008326431613825e-05, + "loss": 1.4706, + "step": 243400 + }, + { + "epoch": 3.6, + "learning_rate": 3.200093173213441e-05, + "loss": 1.4657, + "step": 243500 + }, + { + "epoch": 3.6, + "learning_rate": 3.1993537032654996e-05, + "loss": 1.4482, + "step": 243600 + }, + { + "epoch": 3.6, + "learning_rate": 3.198614233317558e-05, + "loss": 1.4613, + "step": 243700 + }, + { + "epoch": 3.61, + "learning_rate": 3.197874763369617e-05, + "loss": 1.4527, + "step": 243800 + }, + { + "epoch": 3.61, + "learning_rate": 3.197135293421675e-05, + "loss": 1.4539, + "step": 243900 + }, + { + "epoch": 3.61, + "learning_rate": 3.196395823473734e-05, + "loss": 1.4557, + "step": 244000 + }, + { + "epoch": 3.61, + "learning_rate": 3.1956563535257924e-05, + "loss": 1.4675, + "step": 244100 + }, + { + "epoch": 3.61, + "learning_rate": 3.194916883577852e-05, + "loss": 1.4284, + "step": 244200 + }, + { + "epoch": 3.61, + "learning_rate": 3.19417741362991e-05, + "loss": 1.4536, + "step": 244300 + }, + { + "epoch": 3.61, + "learning_rate": 3.193437943681969e-05, + "loss": 1.4614, + "step": 244400 + }, + { + "epoch": 3.62, + "learning_rate": 3.1926984737340274e-05, + "loss": 1.4691, + "step": 244500 + }, + { + "epoch": 3.62, + "learning_rate": 3.1919590037860866e-05, + "loss": 1.4686, + "step": 244600 + }, + { + "epoch": 3.62, + "learning_rate": 3.191219533838145e-05, + "loss": 1.4255, + "step": 244700 + }, + { + "epoch": 3.62, + "learning_rate": 3.190480063890204e-05, + "loss": 1.4494, + "step": 244800 + }, + { + "epoch": 3.62, + "learning_rate": 3.189747988641742e-05, + "loss": 1.4466, + "step": 244900 + }, + { + "epoch": 3.62, + "learning_rate": 3.1890085186938004e-05, + "loss": 1.4677, + "step": 245000 + }, + { + "epoch": 3.62, + "learning_rate": 3.188269048745859e-05, + "loss": 1.4468, + "step": 245100 + }, + { + "epoch": 3.63, + "learning_rate": 3.1875295787979175e-05, + "loss": 1.4638, + "step": 245200 + }, + { + "epoch": 3.63, + "learning_rate": 3.186790108849976e-05, + "loss": 1.4574, + "step": 245300 + }, + { + "epoch": 3.63, + "learning_rate": 3.186050638902035e-05, + "loss": 1.4255, + "step": 245400 + }, + { + "epoch": 3.63, + "learning_rate": 3.185311168954094e-05, + "loss": 1.4608, + "step": 245500 + }, + { + "epoch": 3.63, + "learning_rate": 3.1845716990061524e-05, + "loss": 1.4493, + "step": 245600 + }, + { + "epoch": 3.63, + "learning_rate": 3.183832229058211e-05, + "loss": 1.4608, + "step": 245700 + }, + { + "epoch": 3.64, + "learning_rate": 3.18309275911027e-05, + "loss": 1.4477, + "step": 245800 + }, + { + "epoch": 3.64, + "learning_rate": 3.182353289162329e-05, + "loss": 1.4867, + "step": 245900 + }, + { + "epoch": 3.64, + "learning_rate": 3.1816138192143873e-05, + "loss": 1.4555, + "step": 246000 + }, + { + "epoch": 3.64, + "learning_rate": 3.180874349266446e-05, + "loss": 1.4719, + "step": 246100 + }, + { + "epoch": 3.64, + "learning_rate": 3.180134879318505e-05, + "loss": 1.4805, + "step": 246200 + }, + { + "epoch": 3.64, + "learning_rate": 3.179395409370564e-05, + "loss": 1.4482, + "step": 246300 + }, + { + "epoch": 3.64, + "learning_rate": 3.1786559394226216e-05, + "loss": 1.4435, + "step": 246400 + }, + { + "epoch": 3.65, + "learning_rate": 3.17791646947468e-05, + "loss": 1.4361, + "step": 246500 + }, + { + "epoch": 3.65, + "learning_rate": 3.1771769995267394e-05, + "loss": 1.4462, + "step": 246600 + }, + { + "epoch": 3.65, + "learning_rate": 3.176437529578798e-05, + "loss": 1.492, + "step": 246700 + }, + { + "epoch": 3.65, + "learning_rate": 3.1756980596308565e-05, + "loss": 1.4231, + "step": 246800 + }, + { + "epoch": 3.65, + "learning_rate": 3.174958589682915e-05, + "loss": 1.4501, + "step": 246900 + }, + { + "epoch": 3.65, + "learning_rate": 3.174219119734974e-05, + "loss": 1.4567, + "step": 247000 + }, + { + "epoch": 3.65, + "learning_rate": 3.173479649787033e-05, + "loss": 1.4564, + "step": 247100 + }, + { + "epoch": 3.66, + "learning_rate": 3.1727401798390915e-05, + "loss": 1.4374, + "step": 247200 + }, + { + "epoch": 3.66, + "learning_rate": 3.17200070989115e-05, + "loss": 1.4556, + "step": 247300 + }, + { + "epoch": 3.66, + "learning_rate": 3.171268634642689e-05, + "loss": 1.4478, + "step": 247400 + }, + { + "epoch": 3.66, + "learning_rate": 3.170529164694747e-05, + "loss": 1.4674, + "step": 247500 + }, + { + "epoch": 3.66, + "learning_rate": 3.169789694746806e-05, + "loss": 1.4562, + "step": 247600 + }, + { + "epoch": 3.66, + "learning_rate": 3.1690502247988644e-05, + "loss": 1.4535, + "step": 247700 + }, + { + "epoch": 3.66, + "learning_rate": 3.168310754850923e-05, + "loss": 1.4544, + "step": 247800 + }, + { + "epoch": 3.67, + "learning_rate": 3.1675712849029816e-05, + "loss": 1.4627, + "step": 247900 + }, + { + "epoch": 3.67, + "learning_rate": 3.16683181495504e-05, + "loss": 1.4373, + "step": 248000 + }, + { + "epoch": 3.67, + "learning_rate": 3.166092345007099e-05, + "loss": 1.4433, + "step": 248100 + }, + { + "epoch": 3.67, + "learning_rate": 3.165352875059158e-05, + "loss": 1.4748, + "step": 248200 + }, + { + "epoch": 3.67, + "learning_rate": 3.1646134051112165e-05, + "loss": 1.4577, + "step": 248300 + }, + { + "epoch": 3.67, + "learning_rate": 3.163873935163275e-05, + "loss": 1.4591, + "step": 248400 + }, + { + "epoch": 3.68, + "learning_rate": 3.1631344652153336e-05, + "loss": 1.4396, + "step": 248500 + }, + { + "epoch": 3.68, + "learning_rate": 3.162394995267393e-05, + "loss": 1.4501, + "step": 248600 + }, + { + "epoch": 3.68, + "learning_rate": 3.1616555253194514e-05, + "loss": 1.4706, + "step": 248700 + }, + { + "epoch": 3.68, + "learning_rate": 3.16091605537151e-05, + "loss": 1.4643, + "step": 248800 + }, + { + "epoch": 3.68, + "learning_rate": 3.1601765854235686e-05, + "loss": 1.4331, + "step": 248900 + }, + { + "epoch": 3.68, + "learning_rate": 3.159437115475628e-05, + "loss": 1.4583, + "step": 249000 + }, + { + "epoch": 3.68, + "learning_rate": 3.158697645527686e-05, + "loss": 1.4725, + "step": 249100 + }, + { + "epoch": 3.69, + "learning_rate": 3.157958175579744e-05, + "loss": 1.4618, + "step": 249200 + }, + { + "epoch": 3.69, + "learning_rate": 3.157218705631803e-05, + "loss": 1.477, + "step": 249300 + }, + { + "epoch": 3.69, + "learning_rate": 3.156479235683862e-05, + "loss": 1.4867, + "step": 249400 + }, + { + "epoch": 3.69, + "learning_rate": 3.1557471604354e-05, + "loss": 1.4771, + "step": 249500 + }, + { + "epoch": 3.69, + "learning_rate": 3.155007690487459e-05, + "loss": 1.4643, + "step": 249600 + }, + { + "epoch": 3.69, + "learning_rate": 3.154268220539517e-05, + "loss": 1.4616, + "step": 249700 + }, + { + "epoch": 3.69, + "learning_rate": 3.1535287505915765e-05, + "loss": 1.4612, + "step": 249800 + }, + { + "epoch": 3.7, + "learning_rate": 3.152789280643635e-05, + "loss": 1.458, + "step": 249900 + }, + { + "epoch": 3.7, + "learning_rate": 3.1520498106956936e-05, + "loss": 1.4418, + "step": 250000 + }, + { + "epoch": 3.7, + "learning_rate": 3.151310340747752e-05, + "loss": 1.4659, + "step": 250100 + }, + { + "epoch": 3.7, + "learning_rate": 3.150570870799811e-05, + "loss": 1.4273, + "step": 250200 + }, + { + "epoch": 3.7, + "learning_rate": 3.14983140085187e-05, + "loss": 1.4438, + "step": 250300 + }, + { + "epoch": 3.7, + "learning_rate": 3.149091930903928e-05, + "loss": 1.4741, + "step": 250400 + }, + { + "epoch": 3.7, + "learning_rate": 3.1483524609559864e-05, + "loss": 1.468, + "step": 250500 + }, + { + "epoch": 3.71, + "learning_rate": 3.147612991008046e-05, + "loss": 1.4647, + "step": 250600 + }, + { + "epoch": 3.71, + "learning_rate": 3.146873521060104e-05, + "loss": 1.4563, + "step": 250700 + }, + { + "epoch": 3.71, + "learning_rate": 3.146134051112163e-05, + "loss": 1.459, + "step": 250800 + }, + { + "epoch": 3.71, + "learning_rate": 3.1453945811642214e-05, + "loss": 1.4524, + "step": 250900 + }, + { + "epoch": 3.71, + "learning_rate": 3.1446551112162806e-05, + "loss": 1.4834, + "step": 251000 + }, + { + "epoch": 3.71, + "learning_rate": 3.143915641268339e-05, + "loss": 1.4779, + "step": 251100 + }, + { + "epoch": 3.72, + "learning_rate": 3.143176171320398e-05, + "loss": 1.4791, + "step": 251200 + }, + { + "epoch": 3.72, + "learning_rate": 3.142436701372456e-05, + "loss": 1.4597, + "step": 251300 + }, + { + "epoch": 3.72, + "learning_rate": 3.1416972314245155e-05, + "loss": 1.4616, + "step": 251400 + }, + { + "epoch": 3.72, + "learning_rate": 3.140957761476574e-05, + "loss": 1.4634, + "step": 251500 + }, + { + "epoch": 3.72, + "learning_rate": 3.1402182915286327e-05, + "loss": 1.4781, + "step": 251600 + }, + { + "epoch": 3.72, + "learning_rate": 3.13948621628017e-05, + "loss": 1.467, + "step": 251700 + }, + { + "epoch": 3.72, + "learning_rate": 3.138746746332229e-05, + "loss": 1.4414, + "step": 251800 + }, + { + "epoch": 3.73, + "learning_rate": 3.138007276384288e-05, + "loss": 1.4603, + "step": 251900 + }, + { + "epoch": 3.73, + "learning_rate": 3.1372678064363464e-05, + "loss": 1.47, + "step": 252000 + }, + { + "epoch": 3.73, + "learning_rate": 3.136528336488405e-05, + "loss": 1.4566, + "step": 252100 + }, + { + "epoch": 3.73, + "learning_rate": 3.135788866540464e-05, + "loss": 1.455, + "step": 252200 + }, + { + "epoch": 3.73, + "learning_rate": 3.135049396592523e-05, + "loss": 1.4718, + "step": 252300 + }, + { + "epoch": 3.73, + "learning_rate": 3.134309926644581e-05, + "loss": 1.4804, + "step": 252400 + }, + { + "epoch": 3.73, + "learning_rate": 3.13357045669664e-05, + "loss": 1.4586, + "step": 252500 + }, + { + "epoch": 3.74, + "learning_rate": 3.132830986748699e-05, + "loss": 1.4705, + "step": 252600 + }, + { + "epoch": 3.74, + "learning_rate": 3.132091516800758e-05, + "loss": 1.4189, + "step": 252700 + }, + { + "epoch": 3.74, + "learning_rate": 3.131352046852816e-05, + "loss": 1.4423, + "step": 252800 + }, + { + "epoch": 3.74, + "learning_rate": 3.130612576904875e-05, + "loss": 1.4676, + "step": 252900 + }, + { + "epoch": 3.74, + "learning_rate": 3.1298731069569334e-05, + "loss": 1.4623, + "step": 253000 + }, + { + "epoch": 3.74, + "learning_rate": 3.129133637008992e-05, + "loss": 1.4569, + "step": 253100 + }, + { + "epoch": 3.74, + "learning_rate": 3.1283941670610505e-05, + "loss": 1.4338, + "step": 253200 + }, + { + "epoch": 3.75, + "learning_rate": 3.127654697113109e-05, + "loss": 1.4857, + "step": 253300 + }, + { + "epoch": 3.75, + "learning_rate": 3.126915227165168e-05, + "loss": 1.4386, + "step": 253400 + }, + { + "epoch": 3.75, + "learning_rate": 3.126175757217227e-05, + "loss": 1.4579, + "step": 253500 + }, + { + "epoch": 3.75, + "learning_rate": 3.1254362872692854e-05, + "loss": 1.4322, + "step": 253600 + }, + { + "epoch": 3.75, + "learning_rate": 3.124696817321344e-05, + "loss": 1.466, + "step": 253700 + }, + { + "epoch": 3.75, + "learning_rate": 3.123957347373403e-05, + "loss": 1.4423, + "step": 253800 + }, + { + "epoch": 3.76, + "learning_rate": 3.123217877425462e-05, + "loss": 1.4366, + "step": 253900 + }, + { + "epoch": 3.76, + "learning_rate": 3.1224784074775204e-05, + "loss": 1.4432, + "step": 254000 + }, + { + "epoch": 3.76, + "learning_rate": 3.121738937529579e-05, + "loss": 1.47, + "step": 254100 + }, + { + "epoch": 3.76, + "learning_rate": 3.121006862281117e-05, + "loss": 1.4665, + "step": 254200 + }, + { + "epoch": 3.76, + "learning_rate": 3.120267392333176e-05, + "loss": 1.4624, + "step": 254300 + }, + { + "epoch": 3.76, + "learning_rate": 3.119527922385234e-05, + "loss": 1.4807, + "step": 254400 + }, + { + "epoch": 3.76, + "learning_rate": 3.118788452437293e-05, + "loss": 1.4502, + "step": 254500 + }, + { + "epoch": 3.77, + "learning_rate": 3.118048982489352e-05, + "loss": 1.4431, + "step": 254600 + }, + { + "epoch": 3.77, + "learning_rate": 3.1173095125414105e-05, + "loss": 1.424, + "step": 254700 + }, + { + "epoch": 3.77, + "learning_rate": 3.116570042593469e-05, + "loss": 1.4472, + "step": 254800 + }, + { + "epoch": 3.77, + "learning_rate": 3.1158305726455276e-05, + "loss": 1.4356, + "step": 254900 + }, + { + "epoch": 3.77, + "learning_rate": 3.115091102697587e-05, + "loss": 1.4435, + "step": 255000 + }, + { + "epoch": 3.77, + "learning_rate": 3.1143516327496454e-05, + "loss": 1.4551, + "step": 255100 + }, + { + "epoch": 3.77, + "learning_rate": 3.113612162801704e-05, + "loss": 1.4553, + "step": 255200 + }, + { + "epoch": 3.78, + "learning_rate": 3.1128726928537625e-05, + "loss": 1.4468, + "step": 255300 + }, + { + "epoch": 3.78, + "learning_rate": 3.112133222905821e-05, + "loss": 1.4626, + "step": 255400 + }, + { + "epoch": 3.78, + "learning_rate": 3.1113937529578803e-05, + "loss": 1.4547, + "step": 255500 + }, + { + "epoch": 3.78, + "learning_rate": 3.110654283009939e-05, + "loss": 1.433, + "step": 255600 + }, + { + "epoch": 3.78, + "learning_rate": 3.109914813061997e-05, + "loss": 1.4489, + "step": 255700 + }, + { + "epoch": 3.78, + "learning_rate": 3.109175343114056e-05, + "loss": 1.4201, + "step": 255800 + }, + { + "epoch": 3.78, + "learning_rate": 3.1084358731661146e-05, + "loss": 1.4341, + "step": 255900 + }, + { + "epoch": 3.79, + "learning_rate": 3.107696403218173e-05, + "loss": 1.4415, + "step": 256000 + }, + { + "epoch": 3.79, + "learning_rate": 3.106956933270232e-05, + "loss": 1.4422, + "step": 256100 + }, + { + "epoch": 3.79, + "learning_rate": 3.106217463322291e-05, + "loss": 1.4599, + "step": 256200 + }, + { + "epoch": 3.79, + "learning_rate": 3.1054779933743495e-05, + "loss": 1.4481, + "step": 256300 + }, + { + "epoch": 3.79, + "learning_rate": 3.104738523426408e-05, + "loss": 1.4249, + "step": 256400 + }, + { + "epoch": 3.79, + "learning_rate": 3.104006448177946e-05, + "loss": 1.4642, + "step": 256500 + }, + { + "epoch": 3.79, + "learning_rate": 3.103266978230005e-05, + "loss": 1.4423, + "step": 256600 + }, + { + "epoch": 3.8, + "learning_rate": 3.102527508282064e-05, + "loss": 1.4772, + "step": 256700 + }, + { + "epoch": 3.8, + "learning_rate": 3.1017880383341225e-05, + "loss": 1.4786, + "step": 256800 + }, + { + "epoch": 3.8, + "learning_rate": 3.101048568386181e-05, + "loss": 1.4647, + "step": 256900 + }, + { + "epoch": 3.8, + "learning_rate": 3.1003090984382396e-05, + "loss": 1.4688, + "step": 257000 + }, + { + "epoch": 3.8, + "learning_rate": 3.099569628490298e-05, + "loss": 1.4537, + "step": 257100 + }, + { + "epoch": 3.8, + "learning_rate": 3.098830158542357e-05, + "loss": 1.4441, + "step": 257200 + }, + { + "epoch": 3.81, + "learning_rate": 3.098090688594415e-05, + "loss": 1.4517, + "step": 257300 + }, + { + "epoch": 3.81, + "learning_rate": 3.0973512186464746e-05, + "loss": 1.4403, + "step": 257400 + }, + { + "epoch": 3.81, + "learning_rate": 3.096611748698533e-05, + "loss": 1.4369, + "step": 257500 + }, + { + "epoch": 3.81, + "learning_rate": 3.095872278750592e-05, + "loss": 1.4753, + "step": 257600 + }, + { + "epoch": 3.81, + "learning_rate": 3.09513280880265e-05, + "loss": 1.4565, + "step": 257700 + }, + { + "epoch": 3.81, + "learning_rate": 3.094393338854709e-05, + "loss": 1.4734, + "step": 257800 + }, + { + "epoch": 3.81, + "learning_rate": 3.093653868906768e-05, + "loss": 1.4578, + "step": 257900 + }, + { + "epoch": 3.82, + "learning_rate": 3.0929143989588266e-05, + "loss": 1.4291, + "step": 258000 + }, + { + "epoch": 3.82, + "learning_rate": 3.092174929010885e-05, + "loss": 1.4683, + "step": 258100 + }, + { + "epoch": 3.82, + "learning_rate": 3.091435459062944e-05, + "loss": 1.4355, + "step": 258200 + }, + { + "epoch": 3.82, + "learning_rate": 3.090695989115002e-05, + "loss": 1.4812, + "step": 258300 + }, + { + "epoch": 3.82, + "learning_rate": 3.089956519167061e-05, + "loss": 1.4621, + "step": 258400 + }, + { + "epoch": 3.82, + "learning_rate": 3.0892170492191195e-05, + "loss": 1.4205, + "step": 258500 + }, + { + "epoch": 3.82, + "learning_rate": 3.088477579271179e-05, + "loss": 1.4575, + "step": 258600 + }, + { + "epoch": 3.83, + "learning_rate": 3.087738109323237e-05, + "loss": 1.4566, + "step": 258700 + }, + { + "epoch": 3.83, + "learning_rate": 3.086998639375296e-05, + "loss": 1.4783, + "step": 258800 + }, + { + "epoch": 3.83, + "learning_rate": 3.0862591694273544e-05, + "loss": 1.4467, + "step": 258900 + }, + { + "epoch": 3.83, + "learning_rate": 3.0855196994794136e-05, + "loss": 1.4777, + "step": 259000 + }, + { + "epoch": 3.83, + "learning_rate": 3.084780229531472e-05, + "loss": 1.4653, + "step": 259100 + }, + { + "epoch": 3.83, + "learning_rate": 3.08404815428301e-05, + "loss": 1.436, + "step": 259200 + }, + { + "epoch": 3.83, + "learning_rate": 3.083308684335069e-05, + "loss": 1.4384, + "step": 259300 + }, + { + "epoch": 3.84, + "learning_rate": 3.0825692143871274e-05, + "loss": 1.4586, + "step": 259400 + }, + { + "epoch": 3.84, + "learning_rate": 3.0818297444391866e-05, + "loss": 1.4328, + "step": 259500 + }, + { + "epoch": 3.84, + "learning_rate": 3.081090274491245e-05, + "loss": 1.4634, + "step": 259600 + }, + { + "epoch": 3.84, + "learning_rate": 3.080350804543303e-05, + "loss": 1.4164, + "step": 259700 + }, + { + "epoch": 3.84, + "learning_rate": 3.079611334595362e-05, + "loss": 1.4378, + "step": 259800 + }, + { + "epoch": 3.84, + "learning_rate": 3.078871864647421e-05, + "loss": 1.4221, + "step": 259900 + }, + { + "epoch": 3.85, + "learning_rate": 3.0781323946994794e-05, + "loss": 1.4478, + "step": 260000 + }, + { + "epoch": 3.85, + "learning_rate": 3.077392924751538e-05, + "loss": 1.4634, + "step": 260100 + }, + { + "epoch": 3.85, + "learning_rate": 3.076653454803597e-05, + "loss": 1.4531, + "step": 260200 + }, + { + "epoch": 3.85, + "learning_rate": 3.075913984855656e-05, + "loss": 1.4501, + "step": 260300 + }, + { + "epoch": 3.85, + "learning_rate": 3.0751745149077144e-05, + "loss": 1.46, + "step": 260400 + }, + { + "epoch": 3.85, + "learning_rate": 3.074435044959773e-05, + "loss": 1.443, + "step": 260500 + }, + { + "epoch": 3.85, + "learning_rate": 3.0736955750118315e-05, + "loss": 1.4446, + "step": 260600 + }, + { + "epoch": 3.86, + "learning_rate": 3.072956105063891e-05, + "loss": 1.4435, + "step": 260700 + }, + { + "epoch": 3.86, + "learning_rate": 3.072216635115949e-05, + "loss": 1.453, + "step": 260800 + }, + { + "epoch": 3.86, + "learning_rate": 3.071477165168008e-05, + "loss": 1.4593, + "step": 260900 + }, + { + "epoch": 3.86, + "learning_rate": 3.0707376952200664e-05, + "loss": 1.4824, + "step": 261000 + }, + { + "epoch": 3.86, + "learning_rate": 3.069998225272125e-05, + "loss": 1.444, + "step": 261100 + }, + { + "epoch": 3.86, + "learning_rate": 3.0692587553241835e-05, + "loss": 1.4434, + "step": 261200 + }, + { + "epoch": 3.86, + "learning_rate": 3.068519285376242e-05, + "loss": 1.4484, + "step": 261300 + }, + { + "epoch": 3.87, + "learning_rate": 3.06778721012778e-05, + "loss": 1.4546, + "step": 261400 + }, + { + "epoch": 3.87, + "learning_rate": 3.0670477401798394e-05, + "loss": 1.4435, + "step": 261500 + }, + { + "epoch": 3.87, + "learning_rate": 3.066308270231898e-05, + "loss": 1.4472, + "step": 261600 + }, + { + "epoch": 3.87, + "learning_rate": 3.0655688002839565e-05, + "loss": 1.4501, + "step": 261700 + }, + { + "epoch": 3.87, + "learning_rate": 3.064829330336015e-05, + "loss": 1.4376, + "step": 261800 + }, + { + "epoch": 3.87, + "learning_rate": 3.064089860388074e-05, + "loss": 1.4524, + "step": 261900 + }, + { + "epoch": 3.87, + "learning_rate": 3.063350390440133e-05, + "loss": 1.474, + "step": 262000 + }, + { + "epoch": 3.88, + "learning_rate": 3.0626109204921915e-05, + "loss": 1.4342, + "step": 262100 + }, + { + "epoch": 3.88, + "learning_rate": 3.06187145054425e-05, + "loss": 1.4517, + "step": 262200 + }, + { + "epoch": 3.88, + "learning_rate": 3.0611319805963086e-05, + "loss": 1.4701, + "step": 262300 + }, + { + "epoch": 3.88, + "learning_rate": 3.060392510648367e-05, + "loss": 1.4433, + "step": 262400 + }, + { + "epoch": 3.88, + "learning_rate": 3.059653040700426e-05, + "loss": 1.4313, + "step": 262500 + }, + { + "epoch": 3.88, + "learning_rate": 3.058913570752485e-05, + "loss": 1.462, + "step": 262600 + }, + { + "epoch": 3.89, + "learning_rate": 3.0581741008045435e-05, + "loss": 1.4654, + "step": 262700 + }, + { + "epoch": 3.89, + "learning_rate": 3.057434630856602e-05, + "loss": 1.437, + "step": 262800 + }, + { + "epoch": 3.89, + "learning_rate": 3.0566951609086606e-05, + "loss": 1.4649, + "step": 262900 + }, + { + "epoch": 3.89, + "learning_rate": 3.055955690960719e-05, + "loss": 1.4598, + "step": 263000 + }, + { + "epoch": 3.89, + "learning_rate": 3.0552162210127784e-05, + "loss": 1.4531, + "step": 263100 + }, + { + "epoch": 3.89, + "learning_rate": 3.054476751064837e-05, + "loss": 1.4248, + "step": 263200 + }, + { + "epoch": 3.89, + "learning_rate": 3.0537372811168956e-05, + "loss": 1.4671, + "step": 263300 + }, + { + "epoch": 3.9, + "learning_rate": 3.0530052058684336e-05, + "loss": 1.4596, + "step": 263400 + }, + { + "epoch": 3.9, + "learning_rate": 3.052265735920493e-05, + "loss": 1.4571, + "step": 263500 + }, + { + "epoch": 3.9, + "learning_rate": 3.0515262659725507e-05, + "loss": 1.449, + "step": 263600 + }, + { + "epoch": 3.9, + "learning_rate": 3.0507867960246097e-05, + "loss": 1.4454, + "step": 263700 + }, + { + "epoch": 3.9, + "learning_rate": 3.0500473260766682e-05, + "loss": 1.4522, + "step": 263800 + }, + { + "epoch": 3.9, + "learning_rate": 3.0493078561287268e-05, + "loss": 1.4814, + "step": 263900 + }, + { + "epoch": 3.9, + "learning_rate": 3.0485683861807857e-05, + "loss": 1.4452, + "step": 264000 + }, + { + "epoch": 3.91, + "learning_rate": 3.0478289162328442e-05, + "loss": 1.4403, + "step": 264100 + }, + { + "epoch": 3.91, + "learning_rate": 3.047089446284903e-05, + "loss": 1.4635, + "step": 264200 + }, + { + "epoch": 3.91, + "learning_rate": 3.0463499763369617e-05, + "loss": 1.4392, + "step": 264300 + }, + { + "epoch": 3.91, + "learning_rate": 3.0456105063890206e-05, + "loss": 1.4775, + "step": 264400 + }, + { + "epoch": 3.91, + "learning_rate": 3.0448710364410792e-05, + "loss": 1.4486, + "step": 264500 + }, + { + "epoch": 3.91, + "learning_rate": 3.044131566493138e-05, + "loss": 1.4507, + "step": 264600 + }, + { + "epoch": 3.91, + "learning_rate": 3.0433920965451966e-05, + "loss": 1.4551, + "step": 264700 + }, + { + "epoch": 3.92, + "learning_rate": 3.0426526265972555e-05, + "loss": 1.4425, + "step": 264800 + }, + { + "epoch": 3.92, + "learning_rate": 3.0419131566493138e-05, + "loss": 1.4591, + "step": 264900 + }, + { + "epoch": 3.92, + "learning_rate": 3.0411736867013723e-05, + "loss": 1.4626, + "step": 265000 + }, + { + "epoch": 3.92, + "learning_rate": 3.0404342167534312e-05, + "loss": 1.439, + "step": 265100 + }, + { + "epoch": 3.92, + "learning_rate": 3.0396947468054898e-05, + "loss": 1.4599, + "step": 265200 + }, + { + "epoch": 3.92, + "learning_rate": 3.0389552768575487e-05, + "loss": 1.4376, + "step": 265300 + }, + { + "epoch": 3.93, + "learning_rate": 3.0382158069096073e-05, + "loss": 1.4509, + "step": 265400 + }, + { + "epoch": 3.93, + "learning_rate": 3.0374837316611453e-05, + "loss": 1.4408, + "step": 265500 + }, + { + "epoch": 3.93, + "learning_rate": 3.0367442617132042e-05, + "loss": 1.4709, + "step": 265600 + }, + { + "epoch": 3.93, + "learning_rate": 3.0360047917652628e-05, + "loss": 1.4705, + "step": 265700 + }, + { + "epoch": 3.93, + "learning_rate": 3.0352653218173217e-05, + "loss": 1.4483, + "step": 265800 + }, + { + "epoch": 3.93, + "learning_rate": 3.0345258518693802e-05, + "loss": 1.4379, + "step": 265900 + }, + { + "epoch": 3.93, + "learning_rate": 3.033786381921439e-05, + "loss": 1.4769, + "step": 266000 + }, + { + "epoch": 3.94, + "learning_rate": 3.0330469119734977e-05, + "loss": 1.4412, + "step": 266100 + }, + { + "epoch": 3.94, + "learning_rate": 3.0323074420255566e-05, + "loss": 1.4616, + "step": 266200 + }, + { + "epoch": 3.94, + "learning_rate": 3.031567972077615e-05, + "loss": 1.4841, + "step": 266300 + }, + { + "epoch": 3.94, + "learning_rate": 3.0308285021296734e-05, + "loss": 1.439, + "step": 266400 + }, + { + "epoch": 3.94, + "learning_rate": 3.030089032181732e-05, + "loss": 1.424, + "step": 266500 + }, + { + "epoch": 3.94, + "learning_rate": 3.029349562233791e-05, + "loss": 1.4557, + "step": 266600 + }, + { + "epoch": 3.94, + "learning_rate": 3.0286100922858494e-05, + "loss": 1.4481, + "step": 266700 + }, + { + "epoch": 3.95, + "learning_rate": 3.0278706223379083e-05, + "loss": 1.4483, + "step": 266800 + }, + { + "epoch": 3.95, + "learning_rate": 3.027131152389967e-05, + "loss": 1.4332, + "step": 266900 + }, + { + "epoch": 3.95, + "learning_rate": 3.0263916824420258e-05, + "loss": 1.4615, + "step": 267000 + }, + { + "epoch": 3.95, + "learning_rate": 3.0256522124940844e-05, + "loss": 1.4224, + "step": 267100 + }, + { + "epoch": 3.95, + "learning_rate": 3.0249127425461433e-05, + "loss": 1.4723, + "step": 267200 + }, + { + "epoch": 3.95, + "learning_rate": 3.024173272598202e-05, + "loss": 1.467, + "step": 267300 + }, + { + "epoch": 3.95, + "learning_rate": 3.0234338026502607e-05, + "loss": 1.4352, + "step": 267400 + }, + { + "epoch": 3.96, + "learning_rate": 3.0226943327023193e-05, + "loss": 1.4885, + "step": 267500 + }, + { + "epoch": 3.96, + "learning_rate": 3.0219548627543775e-05, + "loss": 1.4652, + "step": 267600 + }, + { + "epoch": 3.96, + "learning_rate": 3.0212153928064364e-05, + "loss": 1.4473, + "step": 267700 + }, + { + "epoch": 3.96, + "learning_rate": 3.020475922858495e-05, + "loss": 1.4456, + "step": 267800 + }, + { + "epoch": 3.96, + "learning_rate": 3.019736452910554e-05, + "loss": 1.4611, + "step": 267900 + }, + { + "epoch": 3.96, + "learning_rate": 3.0189969829626125e-05, + "loss": 1.4293, + "step": 268000 + }, + { + "epoch": 3.97, + "learning_rate": 3.0182575130146714e-05, + "loss": 1.4638, + "step": 268100 + }, + { + "epoch": 3.97, + "learning_rate": 3.01751804306673e-05, + "loss": 1.4595, + "step": 268200 + }, + { + "epoch": 3.97, + "learning_rate": 3.0167785731187885e-05, + "loss": 1.4483, + "step": 268300 + }, + { + "epoch": 3.97, + "learning_rate": 3.0160391031708474e-05, + "loss": 1.4867, + "step": 268400 + }, + { + "epoch": 3.97, + "learning_rate": 3.015299633222906e-05, + "loss": 1.4749, + "step": 268500 + }, + { + "epoch": 3.97, + "learning_rate": 3.014560163274965e-05, + "loss": 1.4623, + "step": 268600 + }, + { + "epoch": 3.97, + "learning_rate": 3.013828088026503e-05, + "loss": 1.441, + "step": 268700 + }, + { + "epoch": 3.98, + "learning_rate": 3.0130886180785618e-05, + "loss": 1.4679, + "step": 268800 + }, + { + "epoch": 3.98, + "learning_rate": 3.0123491481306197e-05, + "loss": 1.453, + "step": 268900 + }, + { + "epoch": 3.98, + "learning_rate": 3.0116096781826786e-05, + "loss": 1.4646, + "step": 269000 + }, + { + "epoch": 3.98, + "learning_rate": 3.010870208234737e-05, + "loss": 1.4265, + "step": 269100 + }, + { + "epoch": 3.98, + "learning_rate": 3.010130738286796e-05, + "loss": 1.4553, + "step": 269200 + }, + { + "epoch": 3.98, + "learning_rate": 3.0093912683388546e-05, + "loss": 1.4309, + "step": 269300 + }, + { + "epoch": 3.98, + "learning_rate": 3.0086517983909135e-05, + "loss": 1.4472, + "step": 269400 + }, + { + "epoch": 3.99, + "learning_rate": 3.007912328442972e-05, + "loss": 1.4453, + "step": 269500 + }, + { + "epoch": 3.99, + "learning_rate": 3.007172858495031e-05, + "loss": 1.4412, + "step": 269600 + }, + { + "epoch": 3.99, + "learning_rate": 3.0064333885470896e-05, + "loss": 1.4683, + "step": 269700 + }, + { + "epoch": 3.99, + "learning_rate": 3.0056939185991485e-05, + "loss": 1.4261, + "step": 269800 + }, + { + "epoch": 3.99, + "learning_rate": 3.004954448651207e-05, + "loss": 1.4568, + "step": 269900 + }, + { + "epoch": 3.99, + "learning_rate": 3.004214978703266e-05, + "loss": 1.4615, + "step": 270000 + }, + { + "epoch": 3.99, + "learning_rate": 3.0034755087553245e-05, + "loss": 1.4484, + "step": 270100 + }, + { + "epoch": 4.0, + "learning_rate": 3.0027360388073827e-05, + "loss": 1.4745, + "step": 270200 + }, + { + "epoch": 4.0, + "learning_rate": 3.0019965688594416e-05, + "loss": 1.4542, + "step": 270300 + }, + { + "epoch": 4.0, + "learning_rate": 3.0012570989115002e-05, + "loss": 1.444, + "step": 270400 + }, + { + "epoch": 4.0, + "learning_rate": 3.000517628963559e-05, + "loss": 1.4219, + "step": 270500 + }, + { + "epoch": 4.0, + "learning_rate": 2.9997781590156176e-05, + "loss": 1.3698, + "step": 270600 + }, + { + "epoch": 4.0, + "learning_rate": 2.9990386890676762e-05, + "loss": 1.3848, + "step": 270700 + }, + { + "epoch": 4.0, + "learning_rate": 2.998299219119735e-05, + "loss": 1.4004, + "step": 270800 + }, + { + "epoch": 4.01, + "learning_rate": 2.9975597491717937e-05, + "loss": 1.3691, + "step": 270900 + }, + { + "epoch": 4.01, + "learning_rate": 2.9968202792238526e-05, + "loss": 1.4072, + "step": 271000 + }, + { + "epoch": 4.01, + "learning_rate": 2.9960882039753906e-05, + "loss": 1.3749, + "step": 271100 + }, + { + "epoch": 4.01, + "learning_rate": 2.9953487340274495e-05, + "loss": 1.387, + "step": 271200 + }, + { + "epoch": 4.01, + "learning_rate": 2.994609264079508e-05, + "loss": 1.3935, + "step": 271300 + }, + { + "epoch": 4.01, + "learning_rate": 2.993869794131567e-05, + "loss": 1.3942, + "step": 271400 + }, + { + "epoch": 4.02, + "learning_rate": 2.993130324183625e-05, + "loss": 1.3825, + "step": 271500 + }, + { + "epoch": 4.02, + "learning_rate": 2.9923908542356838e-05, + "loss": 1.3686, + "step": 271600 + }, + { + "epoch": 4.02, + "learning_rate": 2.9916513842877423e-05, + "loss": 1.3966, + "step": 271700 + }, + { + "epoch": 4.02, + "learning_rate": 2.9909119143398012e-05, + "loss": 1.394, + "step": 271800 + }, + { + "epoch": 4.02, + "learning_rate": 2.9901724443918598e-05, + "loss": 1.3981, + "step": 271900 + }, + { + "epoch": 4.02, + "learning_rate": 2.9894329744439187e-05, + "loss": 1.3897, + "step": 272000 + }, + { + "epoch": 4.02, + "learning_rate": 2.9886935044959773e-05, + "loss": 1.4052, + "step": 272100 + }, + { + "epoch": 4.03, + "learning_rate": 2.9879540345480362e-05, + "loss": 1.4164, + "step": 272200 + }, + { + "epoch": 4.03, + "learning_rate": 2.9872145646000947e-05, + "loss": 1.3779, + "step": 272300 + }, + { + "epoch": 4.03, + "learning_rate": 2.9864750946521536e-05, + "loss": 1.392, + "step": 272400 + }, + { + "epoch": 4.03, + "learning_rate": 2.9857356247042122e-05, + "loss": 1.3841, + "step": 272500 + }, + { + "epoch": 4.03, + "learning_rate": 2.984996154756271e-05, + "loss": 1.4046, + "step": 272600 + }, + { + "epoch": 4.03, + "learning_rate": 2.9842566848083297e-05, + "loss": 1.3781, + "step": 272700 + }, + { + "epoch": 4.03, + "learning_rate": 2.9835172148603886e-05, + "loss": 1.3891, + "step": 272800 + }, + { + "epoch": 4.04, + "learning_rate": 2.9827777449124468e-05, + "loss": 1.3916, + "step": 272900 + }, + { + "epoch": 4.04, + "learning_rate": 2.9820382749645054e-05, + "loss": 1.3962, + "step": 273000 + }, + { + "epoch": 4.04, + "learning_rate": 2.9812988050165643e-05, + "loss": 1.3882, + "step": 273100 + }, + { + "epoch": 4.04, + "learning_rate": 2.980559335068623e-05, + "loss": 1.3682, + "step": 273200 + }, + { + "epoch": 4.04, + "learning_rate": 2.9798198651206814e-05, + "loss": 1.3904, + "step": 273300 + }, + { + "epoch": 4.04, + "learning_rate": 2.9790803951727403e-05, + "loss": 1.3874, + "step": 273400 + }, + { + "epoch": 4.04, + "learning_rate": 2.978340925224799e-05, + "loss": 1.3798, + "step": 273500 + }, + { + "epoch": 4.05, + "learning_rate": 2.9776014552768578e-05, + "loss": 1.3843, + "step": 273600 + }, + { + "epoch": 4.05, + "learning_rate": 2.9768619853289163e-05, + "loss": 1.3884, + "step": 273700 + }, + { + "epoch": 4.05, + "learning_rate": 2.9761225153809752e-05, + "loss": 1.348, + "step": 273800 + }, + { + "epoch": 4.05, + "learning_rate": 2.9753830454330338e-05, + "loss": 1.4056, + "step": 273900 + }, + { + "epoch": 4.05, + "learning_rate": 2.9746435754850927e-05, + "loss": 1.3648, + "step": 274000 + }, + { + "epoch": 4.05, + "learning_rate": 2.9739041055371513e-05, + "loss": 1.43, + "step": 274100 + }, + { + "epoch": 4.06, + "learning_rate": 2.973172030288689e-05, + "loss": 1.3886, + "step": 274200 + }, + { + "epoch": 4.06, + "learning_rate": 2.9724325603407475e-05, + "loss": 1.3801, + "step": 274300 + }, + { + "epoch": 4.06, + "learning_rate": 2.9716930903928064e-05, + "loss": 1.3922, + "step": 274400 + }, + { + "epoch": 4.06, + "learning_rate": 2.970953620444865e-05, + "loss": 1.4062, + "step": 274500 + }, + { + "epoch": 4.06, + "learning_rate": 2.970214150496924e-05, + "loss": 1.402, + "step": 274600 + }, + { + "epoch": 4.06, + "learning_rate": 2.9694746805489825e-05, + "loss": 1.39, + "step": 274700 + }, + { + "epoch": 4.06, + "learning_rate": 2.9687352106010414e-05, + "loss": 1.3792, + "step": 274800 + }, + { + "epoch": 4.07, + "learning_rate": 2.9679957406531e-05, + "loss": 1.3935, + "step": 274900 + }, + { + "epoch": 4.07, + "learning_rate": 2.967256270705159e-05, + "loss": 1.3846, + "step": 275000 + }, + { + "epoch": 4.07, + "learning_rate": 2.9665168007572174e-05, + "loss": 1.3881, + "step": 275100 + }, + { + "epoch": 4.07, + "learning_rate": 2.9657773308092763e-05, + "loss": 1.4005, + "step": 275200 + }, + { + "epoch": 4.07, + "learning_rate": 2.965037860861335e-05, + "loss": 1.3965, + "step": 275300 + }, + { + "epoch": 4.07, + "learning_rate": 2.9642983909133938e-05, + "loss": 1.4019, + "step": 275400 + }, + { + "epoch": 4.07, + "learning_rate": 2.963558920965452e-05, + "loss": 1.4251, + "step": 275500 + }, + { + "epoch": 4.08, + "learning_rate": 2.9628194510175106e-05, + "loss": 1.3955, + "step": 275600 + }, + { + "epoch": 4.08, + "learning_rate": 2.9620799810695695e-05, + "loss": 1.3832, + "step": 275700 + }, + { + "epoch": 4.08, + "learning_rate": 2.961340511121628e-05, + "loss": 1.4405, + "step": 275800 + }, + { + "epoch": 4.08, + "learning_rate": 2.9606010411736866e-05, + "loss": 1.3938, + "step": 275900 + }, + { + "epoch": 4.08, + "learning_rate": 2.9598615712257455e-05, + "loss": 1.386, + "step": 276000 + }, + { + "epoch": 4.08, + "learning_rate": 2.959122101277804e-05, + "loss": 1.4083, + "step": 276100 + }, + { + "epoch": 4.08, + "learning_rate": 2.958382631329863e-05, + "loss": 1.4335, + "step": 276200 + }, + { + "epoch": 4.09, + "learning_rate": 2.9576431613819215e-05, + "loss": 1.3892, + "step": 276300 + }, + { + "epoch": 4.09, + "learning_rate": 2.9569036914339804e-05, + "loss": 1.4128, + "step": 276400 + }, + { + "epoch": 4.09, + "learning_rate": 2.956164221486039e-05, + "loss": 1.4321, + "step": 276500 + }, + { + "epoch": 4.09, + "learning_rate": 2.9554321462375774e-05, + "loss": 1.3953, + "step": 276600 + }, + { + "epoch": 4.09, + "learning_rate": 2.954692676289636e-05, + "loss": 1.3843, + "step": 276700 + }, + { + "epoch": 4.09, + "learning_rate": 2.953953206341694e-05, + "loss": 1.3799, + "step": 276800 + }, + { + "epoch": 4.1, + "learning_rate": 2.9532137363937527e-05, + "loss": 1.3716, + "step": 276900 + }, + { + "epoch": 4.1, + "learning_rate": 2.9524742664458116e-05, + "loss": 1.4048, + "step": 277000 + }, + { + "epoch": 4.1, + "learning_rate": 2.9517347964978702e-05, + "loss": 1.3871, + "step": 277100 + }, + { + "epoch": 4.1, + "learning_rate": 2.950995326549929e-05, + "loss": 1.4006, + "step": 277200 + }, + { + "epoch": 4.1, + "learning_rate": 2.9502558566019877e-05, + "loss": 1.4159, + "step": 277300 + }, + { + "epoch": 4.1, + "learning_rate": 2.9495163866540466e-05, + "loss": 1.4032, + "step": 277400 + }, + { + "epoch": 4.1, + "learning_rate": 2.948776916706105e-05, + "loss": 1.4016, + "step": 277500 + }, + { + "epoch": 4.11, + "learning_rate": 2.948037446758164e-05, + "loss": 1.4114, + "step": 277600 + }, + { + "epoch": 4.11, + "learning_rate": 2.9472979768102226e-05, + "loss": 1.3942, + "step": 277700 + }, + { + "epoch": 4.11, + "learning_rate": 2.9465585068622815e-05, + "loss": 1.3795, + "step": 277800 + }, + { + "epoch": 4.11, + "learning_rate": 2.94581903691434e-05, + "loss": 1.3886, + "step": 277900 + }, + { + "epoch": 4.11, + "learning_rate": 2.945079566966399e-05, + "loss": 1.3886, + "step": 278000 + }, + { + "epoch": 4.11, + "learning_rate": 2.9443400970184575e-05, + "loss": 1.3941, + "step": 278100 + }, + { + "epoch": 4.11, + "learning_rate": 2.9436006270705157e-05, + "loss": 1.4028, + "step": 278200 + }, + { + "epoch": 4.12, + "learning_rate": 2.9428611571225746e-05, + "loss": 1.3757, + "step": 278300 + }, + { + "epoch": 4.12, + "learning_rate": 2.9421216871746332e-05, + "loss": 1.4051, + "step": 278400 + }, + { + "epoch": 4.12, + "learning_rate": 2.9413822172266918e-05, + "loss": 1.4317, + "step": 278500 + }, + { + "epoch": 4.12, + "learning_rate": 2.9406427472787507e-05, + "loss": 1.3936, + "step": 278600 + }, + { + "epoch": 4.12, + "learning_rate": 2.9399106720302887e-05, + "loss": 1.3885, + "step": 278700 + }, + { + "epoch": 4.12, + "learning_rate": 2.9391712020823476e-05, + "loss": 1.4096, + "step": 278800 + }, + { + "epoch": 4.12, + "learning_rate": 2.9384317321344062e-05, + "loss": 1.4038, + "step": 278900 + }, + { + "epoch": 4.13, + "learning_rate": 2.937692262186465e-05, + "loss": 1.3699, + "step": 279000 + }, + { + "epoch": 4.13, + "learning_rate": 2.9369527922385237e-05, + "loss": 1.3965, + "step": 279100 + }, + { + "epoch": 4.13, + "learning_rate": 2.9362133222905826e-05, + "loss": 1.42, + "step": 279200 + }, + { + "epoch": 4.13, + "learning_rate": 2.935473852342641e-05, + "loss": 1.4035, + "step": 279300 + }, + { + "epoch": 4.13, + "learning_rate": 2.9347343823947e-05, + "loss": 1.3799, + "step": 279400 + }, + { + "epoch": 4.13, + "learning_rate": 2.933994912446758e-05, + "loss": 1.4015, + "step": 279500 + }, + { + "epoch": 4.14, + "learning_rate": 2.9332554424988168e-05, + "loss": 1.3781, + "step": 279600 + }, + { + "epoch": 4.14, + "learning_rate": 2.9325159725508754e-05, + "loss": 1.4084, + "step": 279700 + }, + { + "epoch": 4.14, + "learning_rate": 2.9317765026029343e-05, + "loss": 1.4066, + "step": 279800 + }, + { + "epoch": 4.14, + "learning_rate": 2.931037032654993e-05, + "loss": 1.4019, + "step": 279900 + }, + { + "epoch": 4.14, + "learning_rate": 2.9302975627070517e-05, + "loss": 1.3928, + "step": 280000 + }, + { + "epoch": 4.14, + "learning_rate": 2.9295580927591103e-05, + "loss": 1.3932, + "step": 280100 + }, + { + "epoch": 4.14, + "learning_rate": 2.9288186228111692e-05, + "loss": 1.3943, + "step": 280200 + }, + { + "epoch": 4.15, + "learning_rate": 2.9280791528632278e-05, + "loss": 1.3998, + "step": 280300 + }, + { + "epoch": 4.15, + "learning_rate": 2.9273396829152867e-05, + "loss": 1.4403, + "step": 280400 + }, + { + "epoch": 4.15, + "learning_rate": 2.9266002129673452e-05, + "loss": 1.4033, + "step": 280500 + }, + { + "epoch": 4.15, + "learning_rate": 2.925860743019404e-05, + "loss": 1.3885, + "step": 280600 + }, + { + "epoch": 4.15, + "learning_rate": 2.9251212730714627e-05, + "loss": 1.3953, + "step": 280700 + }, + { + "epoch": 4.15, + "learning_rate": 2.924381803123521e-05, + "loss": 1.3653, + "step": 280800 + }, + { + "epoch": 4.15, + "learning_rate": 2.92364233317558e-05, + "loss": 1.3739, + "step": 280900 + }, + { + "epoch": 4.16, + "learning_rate": 2.9229028632276384e-05, + "loss": 1.4124, + "step": 281000 + }, + { + "epoch": 4.16, + "learning_rate": 2.9221707879791764e-05, + "loss": 1.3835, + "step": 281100 + }, + { + "epoch": 4.16, + "learning_rate": 2.9214313180312353e-05, + "loss": 1.4057, + "step": 281200 + }, + { + "epoch": 4.16, + "learning_rate": 2.920691848083294e-05, + "loss": 1.3904, + "step": 281300 + }, + { + "epoch": 4.16, + "learning_rate": 2.9199523781353528e-05, + "loss": 1.3636, + "step": 281400 + }, + { + "epoch": 4.16, + "learning_rate": 2.9192129081874114e-05, + "loss": 1.3902, + "step": 281500 + }, + { + "epoch": 4.16, + "learning_rate": 2.9184734382394703e-05, + "loss": 1.3879, + "step": 281600 + }, + { + "epoch": 4.17, + "learning_rate": 2.917733968291529e-05, + "loss": 1.4237, + "step": 281700 + }, + { + "epoch": 4.17, + "learning_rate": 2.9169944983435877e-05, + "loss": 1.4052, + "step": 281800 + }, + { + "epoch": 4.17, + "learning_rate": 2.9162550283956463e-05, + "loss": 1.4376, + "step": 281900 + }, + { + "epoch": 4.17, + "learning_rate": 2.9155155584477052e-05, + "loss": 1.3966, + "step": 282000 + }, + { + "epoch": 4.17, + "learning_rate": 2.914776088499763e-05, + "loss": 1.3835, + "step": 282100 + }, + { + "epoch": 4.17, + "learning_rate": 2.914036618551822e-05, + "loss": 1.3788, + "step": 282200 + }, + { + "epoch": 4.18, + "learning_rate": 2.9132971486038806e-05, + "loss": 1.4168, + "step": 282300 + }, + { + "epoch": 4.18, + "learning_rate": 2.9125576786559395e-05, + "loss": 1.4064, + "step": 282400 + }, + { + "epoch": 4.18, + "learning_rate": 2.911818208707998e-05, + "loss": 1.3886, + "step": 282500 + }, + { + "epoch": 4.18, + "learning_rate": 2.911078738760057e-05, + "loss": 1.4049, + "step": 282600 + }, + { + "epoch": 4.18, + "learning_rate": 2.9103392688121155e-05, + "loss": 1.4418, + "step": 282700 + }, + { + "epoch": 4.18, + "learning_rate": 2.9095997988641744e-05, + "loss": 1.3843, + "step": 282800 + }, + { + "epoch": 4.18, + "learning_rate": 2.908860328916233e-05, + "loss": 1.3907, + "step": 282900 + }, + { + "epoch": 4.19, + "learning_rate": 2.908120858968292e-05, + "loss": 1.4314, + "step": 283000 + }, + { + "epoch": 4.19, + "learning_rate": 2.9073813890203504e-05, + "loss": 1.4178, + "step": 283100 + }, + { + "epoch": 4.19, + "learning_rate": 2.9066419190724093e-05, + "loss": 1.4006, + "step": 283200 + }, + { + "epoch": 4.19, + "learning_rate": 2.9059098438239474e-05, + "loss": 1.3997, + "step": 283300 + }, + { + "epoch": 4.19, + "learning_rate": 2.9051703738760056e-05, + "loss": 1.4285, + "step": 283400 + }, + { + "epoch": 4.19, + "learning_rate": 2.904430903928064e-05, + "loss": 1.3899, + "step": 283500 + }, + { + "epoch": 4.19, + "learning_rate": 2.903691433980123e-05, + "loss": 1.3918, + "step": 283600 + }, + { + "epoch": 4.2, + "learning_rate": 2.9029519640321816e-05, + "loss": 1.3865, + "step": 283700 + }, + { + "epoch": 4.2, + "learning_rate": 2.9022124940842405e-05, + "loss": 1.3726, + "step": 283800 + }, + { + "epoch": 4.2, + "learning_rate": 2.901473024136299e-05, + "loss": 1.4013, + "step": 283900 + }, + { + "epoch": 4.2, + "learning_rate": 2.900733554188358e-05, + "loss": 1.3871, + "step": 284000 + }, + { + "epoch": 4.2, + "learning_rate": 2.8999940842404166e-05, + "loss": 1.4047, + "step": 284100 + }, + { + "epoch": 4.2, + "learning_rate": 2.8992546142924755e-05, + "loss": 1.3992, + "step": 284200 + }, + { + "epoch": 4.2, + "learning_rate": 2.898515144344534e-05, + "loss": 1.3815, + "step": 284300 + }, + { + "epoch": 4.21, + "learning_rate": 2.897775674396593e-05, + "loss": 1.4037, + "step": 284400 + }, + { + "epoch": 4.21, + "learning_rate": 2.8970362044486515e-05, + "loss": 1.4115, + "step": 284500 + }, + { + "epoch": 4.21, + "learning_rate": 2.8962967345007104e-05, + "loss": 1.3929, + "step": 284600 + }, + { + "epoch": 4.21, + "learning_rate": 2.895557264552769e-05, + "loss": 1.4171, + "step": 284700 + }, + { + "epoch": 4.21, + "learning_rate": 2.8948177946048272e-05, + "loss": 1.3957, + "step": 284800 + }, + { + "epoch": 4.21, + "learning_rate": 2.8940783246568858e-05, + "loss": 1.3951, + "step": 284900 + }, + { + "epoch": 4.21, + "learning_rate": 2.8933388547089447e-05, + "loss": 1.3868, + "step": 285000 + }, + { + "epoch": 4.22, + "learning_rate": 2.8925993847610032e-05, + "loss": 1.401, + "step": 285100 + }, + { + "epoch": 4.22, + "learning_rate": 2.891859914813062e-05, + "loss": 1.39, + "step": 285200 + }, + { + "epoch": 4.22, + "learning_rate": 2.8911204448651207e-05, + "loss": 1.4027, + "step": 285300 + }, + { + "epoch": 4.22, + "learning_rate": 2.8903809749171796e-05, + "loss": 1.3884, + "step": 285400 + }, + { + "epoch": 4.22, + "learning_rate": 2.889641504969238e-05, + "loss": 1.3792, + "step": 285500 + }, + { + "epoch": 4.22, + "learning_rate": 2.888902035021297e-05, + "loss": 1.4087, + "step": 285600 + }, + { + "epoch": 4.23, + "learning_rate": 2.888169959772835e-05, + "loss": 1.4136, + "step": 285700 + }, + { + "epoch": 4.23, + "learning_rate": 2.887430489824894e-05, + "loss": 1.3675, + "step": 285800 + }, + { + "epoch": 4.23, + "learning_rate": 2.8866910198769526e-05, + "loss": 1.4047, + "step": 285900 + }, + { + "epoch": 4.23, + "learning_rate": 2.885951549929011e-05, + "loss": 1.3875, + "step": 286000 + }, + { + "epoch": 4.23, + "learning_rate": 2.8852120799810694e-05, + "loss": 1.3916, + "step": 286100 + }, + { + "epoch": 4.23, + "learning_rate": 2.8844726100331283e-05, + "loss": 1.3959, + "step": 286200 + }, + { + "epoch": 4.23, + "learning_rate": 2.8837331400851868e-05, + "loss": 1.406, + "step": 286300 + }, + { + "epoch": 4.24, + "learning_rate": 2.8829936701372457e-05, + "loss": 1.4104, + "step": 286400 + }, + { + "epoch": 4.24, + "learning_rate": 2.8822542001893043e-05, + "loss": 1.4054, + "step": 286500 + }, + { + "epoch": 4.24, + "learning_rate": 2.8815147302413632e-05, + "loss": 1.3742, + "step": 286600 + }, + { + "epoch": 4.24, + "learning_rate": 2.8807752602934218e-05, + "loss": 1.3788, + "step": 286700 + }, + { + "epoch": 4.24, + "learning_rate": 2.8800357903454807e-05, + "loss": 1.4198, + "step": 286800 + }, + { + "epoch": 4.24, + "learning_rate": 2.8792963203975392e-05, + "loss": 1.4164, + "step": 286900 + }, + { + "epoch": 4.24, + "learning_rate": 2.878556850449598e-05, + "loss": 1.3812, + "step": 287000 + }, + { + "epoch": 4.25, + "learning_rate": 2.8778173805016567e-05, + "loss": 1.4123, + "step": 287100 + }, + { + "epoch": 4.25, + "learning_rate": 2.8770779105537156e-05, + "loss": 1.4089, + "step": 287200 + }, + { + "epoch": 4.25, + "learning_rate": 2.876338440605774e-05, + "loss": 1.4031, + "step": 287300 + }, + { + "epoch": 4.25, + "learning_rate": 2.8755989706578324e-05, + "loss": 1.3679, + "step": 287400 + }, + { + "epoch": 4.25, + "learning_rate": 2.874859500709891e-05, + "loss": 1.395, + "step": 287500 + }, + { + "epoch": 4.25, + "learning_rate": 2.87412003076195e-05, + "loss": 1.3919, + "step": 287600 + }, + { + "epoch": 4.25, + "learning_rate": 2.8733805608140084e-05, + "loss": 1.4031, + "step": 287700 + }, + { + "epoch": 4.26, + "learning_rate": 2.8726410908660673e-05, + "loss": 1.3972, + "step": 287800 + }, + { + "epoch": 4.26, + "learning_rate": 2.871901620918126e-05, + "loss": 1.3781, + "step": 287900 + }, + { + "epoch": 4.26, + "learning_rate": 2.8711695456696643e-05, + "loss": 1.3883, + "step": 288000 + }, + { + "epoch": 4.26, + "learning_rate": 2.8704300757217228e-05, + "loss": 1.3918, + "step": 288100 + }, + { + "epoch": 4.26, + "learning_rate": 2.8696906057737817e-05, + "loss": 1.3804, + "step": 288200 + }, + { + "epoch": 4.26, + "learning_rate": 2.8689511358258403e-05, + "loss": 1.4147, + "step": 288300 + }, + { + "epoch": 4.27, + "learning_rate": 2.8682116658778992e-05, + "loss": 1.4315, + "step": 288400 + }, + { + "epoch": 4.27, + "learning_rate": 2.8674721959299578e-05, + "loss": 1.3891, + "step": 288500 + }, + { + "epoch": 4.27, + "learning_rate": 2.8667327259820163e-05, + "loss": 1.391, + "step": 288600 + }, + { + "epoch": 4.27, + "learning_rate": 2.8659932560340745e-05, + "loss": 1.3827, + "step": 288700 + }, + { + "epoch": 4.27, + "learning_rate": 2.8652537860861334e-05, + "loss": 1.3936, + "step": 288800 + }, + { + "epoch": 4.27, + "learning_rate": 2.864514316138192e-05, + "loss": 1.39, + "step": 288900 + }, + { + "epoch": 4.27, + "learning_rate": 2.863774846190251e-05, + "loss": 1.3901, + "step": 289000 + }, + { + "epoch": 4.28, + "learning_rate": 2.8630353762423095e-05, + "loss": 1.4108, + "step": 289100 + }, + { + "epoch": 4.28, + "learning_rate": 2.8622959062943684e-05, + "loss": 1.4048, + "step": 289200 + }, + { + "epoch": 4.28, + "learning_rate": 2.861556436346427e-05, + "loss": 1.4003, + "step": 289300 + }, + { + "epoch": 4.28, + "learning_rate": 2.860816966398486e-05, + "loss": 1.3991, + "step": 289400 + }, + { + "epoch": 4.28, + "learning_rate": 2.8600774964505444e-05, + "loss": 1.374, + "step": 289500 + }, + { + "epoch": 4.28, + "learning_rate": 2.8593380265026033e-05, + "loss": 1.4193, + "step": 289600 + }, + { + "epoch": 4.28, + "learning_rate": 2.858598556554662e-05, + "loss": 1.4239, + "step": 289700 + }, + { + "epoch": 4.29, + "learning_rate": 2.8578590866067208e-05, + "loss": 1.4035, + "step": 289800 + }, + { + "epoch": 4.29, + "learning_rate": 2.8571196166587793e-05, + "loss": 1.4181, + "step": 289900 + }, + { + "epoch": 4.29, + "learning_rate": 2.8563801467108376e-05, + "loss": 1.4156, + "step": 290000 + }, + { + "epoch": 4.29, + "learning_rate": 2.8556480714623756e-05, + "loss": 1.4212, + "step": 290100 + }, + { + "epoch": 4.29, + "learning_rate": 2.8549086015144345e-05, + "loss": 1.3951, + "step": 290200 + }, + { + "epoch": 4.29, + "learning_rate": 2.854169131566493e-05, + "loss": 1.3955, + "step": 290300 + }, + { + "epoch": 4.29, + "learning_rate": 2.853429661618552e-05, + "loss": 1.3681, + "step": 290400 + }, + { + "epoch": 4.3, + "learning_rate": 2.8526901916706105e-05, + "loss": 1.4056, + "step": 290500 + }, + { + "epoch": 4.3, + "learning_rate": 2.8519507217226694e-05, + "loss": 1.3818, + "step": 290600 + }, + { + "epoch": 4.3, + "learning_rate": 2.851211251774728e-05, + "loss": 1.3988, + "step": 290700 + }, + { + "epoch": 4.3, + "learning_rate": 2.850471781826787e-05, + "loss": 1.3988, + "step": 290800 + }, + { + "epoch": 4.3, + "learning_rate": 2.8497323118788455e-05, + "loss": 1.4182, + "step": 290900 + }, + { + "epoch": 4.3, + "learning_rate": 2.848992841930904e-05, + "loss": 1.4102, + "step": 291000 + }, + { + "epoch": 4.31, + "learning_rate": 2.848253371982963e-05, + "loss": 1.4322, + "step": 291100 + }, + { + "epoch": 4.31, + "learning_rate": 2.8475139020350215e-05, + "loss": 1.4221, + "step": 291200 + }, + { + "epoch": 4.31, + "learning_rate": 2.8467744320870804e-05, + "loss": 1.4201, + "step": 291300 + }, + { + "epoch": 4.31, + "learning_rate": 2.8460349621391386e-05, + "loss": 1.4131, + "step": 291400 + }, + { + "epoch": 4.31, + "learning_rate": 2.8452954921911972e-05, + "loss": 1.4188, + "step": 291500 + }, + { + "epoch": 4.31, + "learning_rate": 2.844556022243256e-05, + "loss": 1.3829, + "step": 291600 + }, + { + "epoch": 4.31, + "learning_rate": 2.8438165522953147e-05, + "loss": 1.4186, + "step": 291700 + }, + { + "epoch": 4.32, + "learning_rate": 2.8430770823473736e-05, + "loss": 1.3747, + "step": 291800 + }, + { + "epoch": 4.32, + "learning_rate": 2.842337612399432e-05, + "loss": 1.3808, + "step": 291900 + }, + { + "epoch": 4.32, + "learning_rate": 2.841598142451491e-05, + "loss": 1.3875, + "step": 292000 + }, + { + "epoch": 4.32, + "learning_rate": 2.840866067203029e-05, + "loss": 1.4022, + "step": 292100 + }, + { + "epoch": 4.32, + "learning_rate": 2.8401265972550876e-05, + "loss": 1.4042, + "step": 292200 + }, + { + "epoch": 4.32, + "learning_rate": 2.8393871273071465e-05, + "loss": 1.4041, + "step": 292300 + }, + { + "epoch": 4.32, + "learning_rate": 2.838647657359205e-05, + "loss": 1.38, + "step": 292400 + }, + { + "epoch": 4.33, + "learning_rate": 2.837908187411264e-05, + "loss": 1.3741, + "step": 292500 + }, + { + "epoch": 4.33, + "learning_rate": 2.8371687174633226e-05, + "loss": 1.4029, + "step": 292600 + }, + { + "epoch": 4.33, + "learning_rate": 2.8364292475153808e-05, + "loss": 1.382, + "step": 292700 + }, + { + "epoch": 4.33, + "learning_rate": 2.8356897775674397e-05, + "loss": 1.4097, + "step": 292800 + }, + { + "epoch": 4.33, + "learning_rate": 2.8349503076194983e-05, + "loss": 1.3897, + "step": 292900 + }, + { + "epoch": 4.33, + "learning_rate": 2.8342108376715572e-05, + "loss": 1.3815, + "step": 293000 + }, + { + "epoch": 4.33, + "learning_rate": 2.8334713677236157e-05, + "loss": 1.4094, + "step": 293100 + }, + { + "epoch": 4.34, + "learning_rate": 2.8327318977756746e-05, + "loss": 1.3908, + "step": 293200 + }, + { + "epoch": 4.34, + "learning_rate": 2.8319924278277332e-05, + "loss": 1.4051, + "step": 293300 + }, + { + "epoch": 4.34, + "learning_rate": 2.831252957879792e-05, + "loss": 1.3974, + "step": 293400 + }, + { + "epoch": 4.34, + "learning_rate": 2.8305134879318507e-05, + "loss": 1.4135, + "step": 293500 + }, + { + "epoch": 4.34, + "learning_rate": 2.8297740179839092e-05, + "loss": 1.3962, + "step": 293600 + }, + { + "epoch": 4.34, + "learning_rate": 2.829034548035968e-05, + "loss": 1.432, + "step": 293700 + }, + { + "epoch": 4.35, + "learning_rate": 2.8282950780880267e-05, + "loss": 1.3993, + "step": 293800 + }, + { + "epoch": 4.35, + "learning_rate": 2.8275556081400856e-05, + "loss": 1.399, + "step": 293900 + }, + { + "epoch": 4.35, + "learning_rate": 2.8268161381921438e-05, + "loss": 1.3919, + "step": 294000 + }, + { + "epoch": 4.35, + "learning_rate": 2.8260766682442024e-05, + "loss": 1.397, + "step": 294100 + }, + { + "epoch": 4.35, + "learning_rate": 2.8253445929957408e-05, + "loss": 1.4321, + "step": 294200 + }, + { + "epoch": 4.35, + "learning_rate": 2.8246051230477993e-05, + "loss": 1.3729, + "step": 294300 + }, + { + "epoch": 4.35, + "learning_rate": 2.823865653099858e-05, + "loss": 1.3853, + "step": 294400 + }, + { + "epoch": 4.36, + "learning_rate": 2.8231261831519168e-05, + "loss": 1.396, + "step": 294500 + }, + { + "epoch": 4.36, + "learning_rate": 2.8223867132039754e-05, + "loss": 1.3994, + "step": 294600 + }, + { + "epoch": 4.36, + "learning_rate": 2.8216472432560343e-05, + "loss": 1.4032, + "step": 294700 + }, + { + "epoch": 4.36, + "learning_rate": 2.820907773308093e-05, + "loss": 1.4005, + "step": 294800 + }, + { + "epoch": 4.36, + "learning_rate": 2.8201683033601517e-05, + "loss": 1.4302, + "step": 294900 + }, + { + "epoch": 4.36, + "learning_rate": 2.8194288334122103e-05, + "loss": 1.3921, + "step": 295000 + }, + { + "epoch": 4.36, + "learning_rate": 2.8186893634642692e-05, + "loss": 1.4094, + "step": 295100 + }, + { + "epoch": 4.37, + "learning_rate": 2.8179498935163278e-05, + "loss": 1.3973, + "step": 295200 + }, + { + "epoch": 4.37, + "learning_rate": 2.817210423568386e-05, + "loss": 1.4016, + "step": 295300 + }, + { + "epoch": 4.37, + "learning_rate": 2.816470953620445e-05, + "loss": 1.4024, + "step": 295400 + }, + { + "epoch": 4.37, + "learning_rate": 2.8157314836725035e-05, + "loss": 1.377, + "step": 295500 + }, + { + "epoch": 4.37, + "learning_rate": 2.8149920137245624e-05, + "loss": 1.404, + "step": 295600 + }, + { + "epoch": 4.37, + "learning_rate": 2.814252543776621e-05, + "loss": 1.3917, + "step": 295700 + }, + { + "epoch": 4.37, + "learning_rate": 2.8135130738286798e-05, + "loss": 1.3965, + "step": 295800 + }, + { + "epoch": 4.38, + "learning_rate": 2.8127736038807384e-05, + "loss": 1.379, + "step": 295900 + }, + { + "epoch": 4.38, + "learning_rate": 2.8120341339327973e-05, + "loss": 1.4034, + "step": 296000 + }, + { + "epoch": 4.38, + "learning_rate": 2.811294663984856e-05, + "loss": 1.4126, + "step": 296100 + }, + { + "epoch": 4.38, + "learning_rate": 2.8105551940369144e-05, + "loss": 1.4348, + "step": 296200 + }, + { + "epoch": 4.38, + "learning_rate": 2.8098231187884528e-05, + "loss": 1.398, + "step": 296300 + }, + { + "epoch": 4.38, + "learning_rate": 2.8090836488405114e-05, + "loss": 1.3766, + "step": 296400 + }, + { + "epoch": 4.39, + "learning_rate": 2.8083441788925703e-05, + "loss": 1.4034, + "step": 296500 + }, + { + "epoch": 4.39, + "learning_rate": 2.8076047089446285e-05, + "loss": 1.4128, + "step": 296600 + }, + { + "epoch": 4.39, + "learning_rate": 2.806865238996687e-05, + "loss": 1.3915, + "step": 296700 + }, + { + "epoch": 4.39, + "learning_rate": 2.806125769048746e-05, + "loss": 1.4104, + "step": 296800 + }, + { + "epoch": 4.39, + "learning_rate": 2.8053862991008045e-05, + "loss": 1.3745, + "step": 296900 + }, + { + "epoch": 4.39, + "learning_rate": 2.804646829152863e-05, + "loss": 1.4287, + "step": 297000 + }, + { + "epoch": 4.39, + "learning_rate": 2.803907359204922e-05, + "loss": 1.4033, + "step": 297100 + }, + { + "epoch": 4.4, + "learning_rate": 2.8031678892569806e-05, + "loss": 1.3933, + "step": 297200 + }, + { + "epoch": 4.4, + "learning_rate": 2.8024284193090395e-05, + "loss": 1.411, + "step": 297300 + }, + { + "epoch": 4.4, + "learning_rate": 2.801688949361098e-05, + "loss": 1.3957, + "step": 297400 + }, + { + "epoch": 4.4, + "learning_rate": 2.800949479413157e-05, + "loss": 1.4075, + "step": 297500 + }, + { + "epoch": 4.4, + "learning_rate": 2.8002100094652155e-05, + "loss": 1.396, + "step": 297600 + }, + { + "epoch": 4.4, + "learning_rate": 2.7994705395172744e-05, + "loss": 1.3808, + "step": 297700 + }, + { + "epoch": 4.4, + "learning_rate": 2.798731069569333e-05, + "loss": 1.3991, + "step": 297800 + }, + { + "epoch": 4.41, + "learning_rate": 2.797991599621392e-05, + "loss": 1.4053, + "step": 297900 + }, + { + "epoch": 4.41, + "learning_rate": 2.79725212967345e-05, + "loss": 1.4183, + "step": 298000 + }, + { + "epoch": 4.41, + "learning_rate": 2.7965126597255086e-05, + "loss": 1.3875, + "step": 298100 + }, + { + "epoch": 4.41, + "learning_rate": 2.7957731897775675e-05, + "loss": 1.3697, + "step": 298200 + }, + { + "epoch": 4.41, + "learning_rate": 2.795033719829626e-05, + "loss": 1.4007, + "step": 298300 + }, + { + "epoch": 4.41, + "learning_rate": 2.794294249881685e-05, + "loss": 1.4025, + "step": 298400 + }, + { + "epoch": 4.41, + "learning_rate": 2.7935547799337436e-05, + "loss": 1.389, + "step": 298500 + }, + { + "epoch": 4.42, + "learning_rate": 2.7928153099858025e-05, + "loss": 1.3968, + "step": 298600 + }, + { + "epoch": 4.42, + "learning_rate": 2.792075840037861e-05, + "loss": 1.4114, + "step": 298700 + }, + { + "epoch": 4.42, + "learning_rate": 2.7913363700899196e-05, + "loss": 1.4218, + "step": 298800 + }, + { + "epoch": 4.42, + "learning_rate": 2.7905969001419785e-05, + "loss": 1.3861, + "step": 298900 + }, + { + "epoch": 4.42, + "learning_rate": 2.7898648248935166e-05, + "loss": 1.4197, + "step": 299000 + }, + { + "epoch": 4.42, + "learning_rate": 2.7891253549455755e-05, + "loss": 1.4049, + "step": 299100 + }, + { + "epoch": 4.42, + "learning_rate": 2.788385884997634e-05, + "loss": 1.4299, + "step": 299200 + }, + { + "epoch": 4.43, + "learning_rate": 2.7876464150496922e-05, + "loss": 1.3937, + "step": 299300 + }, + { + "epoch": 4.43, + "learning_rate": 2.786906945101751e-05, + "loss": 1.4192, + "step": 299400 + }, + { + "epoch": 4.43, + "learning_rate": 2.7861674751538097e-05, + "loss": 1.3999, + "step": 299500 + }, + { + "epoch": 4.43, + "learning_rate": 2.7854280052058683e-05, + "loss": 1.358, + "step": 299600 + }, + { + "epoch": 4.43, + "learning_rate": 2.7846885352579272e-05, + "loss": 1.3942, + "step": 299700 + }, + { + "epoch": 4.43, + "learning_rate": 2.7839490653099857e-05, + "loss": 1.4014, + "step": 299800 + }, + { + "epoch": 4.44, + "learning_rate": 2.7832095953620446e-05, + "loss": 1.392, + "step": 299900 + }, + { + "epoch": 4.44, + "learning_rate": 2.7824701254141032e-05, + "loss": 1.3779, + "step": 300000 + }, + { + "epoch": 4.44, + "learning_rate": 2.781730655466162e-05, + "loss": 1.3978, + "step": 300100 + }, + { + "epoch": 4.44, + "learning_rate": 2.7809911855182207e-05, + "loss": 1.3962, + "step": 300200 + }, + { + "epoch": 4.44, + "learning_rate": 2.7802517155702796e-05, + "loss": 1.414, + "step": 300300 + }, + { + "epoch": 4.44, + "learning_rate": 2.779512245622338e-05, + "loss": 1.3898, + "step": 300400 + }, + { + "epoch": 4.44, + "learning_rate": 2.778772775674397e-05, + "loss": 1.4142, + "step": 300500 + }, + { + "epoch": 4.45, + "learning_rate": 2.7780333057264553e-05, + "loss": 1.4096, + "step": 300600 + }, + { + "epoch": 4.45, + "learning_rate": 2.777293835778514e-05, + "loss": 1.3696, + "step": 300700 + }, + { + "epoch": 4.45, + "learning_rate": 2.7765543658305727e-05, + "loss": 1.4099, + "step": 300800 + }, + { + "epoch": 4.45, + "learning_rate": 2.7758148958826313e-05, + "loss": 1.4091, + "step": 300900 + }, + { + "epoch": 4.45, + "learning_rate": 2.7750754259346902e-05, + "loss": 1.4059, + "step": 301000 + }, + { + "epoch": 4.45, + "learning_rate": 2.7743433506862282e-05, + "loss": 1.3945, + "step": 301100 + }, + { + "epoch": 4.45, + "learning_rate": 2.7736038807382868e-05, + "loss": 1.3814, + "step": 301200 + }, + { + "epoch": 4.46, + "learning_rate": 2.7728644107903457e-05, + "loss": 1.4168, + "step": 301300 + }, + { + "epoch": 4.46, + "learning_rate": 2.7721249408424043e-05, + "loss": 1.3843, + "step": 301400 + }, + { + "epoch": 4.46, + "learning_rate": 2.7713854708944632e-05, + "loss": 1.4102, + "step": 301500 + }, + { + "epoch": 4.46, + "learning_rate": 2.7706460009465217e-05, + "loss": 1.3841, + "step": 301600 + }, + { + "epoch": 4.46, + "learning_rate": 2.7699065309985806e-05, + "loss": 1.4118, + "step": 301700 + }, + { + "epoch": 4.46, + "learning_rate": 2.7691670610506392e-05, + "loss": 1.4146, + "step": 301800 + }, + { + "epoch": 4.46, + "learning_rate": 2.7684275911026974e-05, + "loss": 1.4064, + "step": 301900 + }, + { + "epoch": 4.47, + "learning_rate": 2.7676881211547563e-05, + "loss": 1.4041, + "step": 302000 + }, + { + "epoch": 4.47, + "learning_rate": 2.766948651206815e-05, + "loss": 1.3948, + "step": 302100 + }, + { + "epoch": 4.47, + "learning_rate": 2.7662091812588735e-05, + "loss": 1.4038, + "step": 302200 + }, + { + "epoch": 4.47, + "learning_rate": 2.7654697113109324e-05, + "loss": 1.3799, + "step": 302300 + }, + { + "epoch": 4.47, + "learning_rate": 2.764730241362991e-05, + "loss": 1.3944, + "step": 302400 + }, + { + "epoch": 4.47, + "learning_rate": 2.76399077141505e-05, + "loss": 1.3978, + "step": 302500 + }, + { + "epoch": 4.48, + "learning_rate": 2.7632513014671084e-05, + "loss": 1.4021, + "step": 302600 + }, + { + "epoch": 4.48, + "learning_rate": 2.7625118315191673e-05, + "loss": 1.3842, + "step": 302700 + }, + { + "epoch": 4.48, + "learning_rate": 2.761772361571226e-05, + "loss": 1.4106, + "step": 302800 + }, + { + "epoch": 4.48, + "learning_rate": 2.7610328916232848e-05, + "loss": 1.4069, + "step": 302900 + }, + { + "epoch": 4.48, + "learning_rate": 2.7602934216753433e-05, + "loss": 1.3812, + "step": 303000 + }, + { + "epoch": 4.48, + "learning_rate": 2.7595539517274022e-05, + "loss": 1.399, + "step": 303100 + }, + { + "epoch": 4.48, + "learning_rate": 2.7588218764789403e-05, + "loss": 1.3976, + "step": 303200 + }, + { + "epoch": 4.49, + "learning_rate": 2.7580824065309985e-05, + "loss": 1.3913, + "step": 303300 + }, + { + "epoch": 4.49, + "learning_rate": 2.757342936583057e-05, + "loss": 1.3909, + "step": 303400 + }, + { + "epoch": 4.49, + "learning_rate": 2.756603466635116e-05, + "loss": 1.401, + "step": 303500 + }, + { + "epoch": 4.49, + "learning_rate": 2.7558639966871745e-05, + "loss": 1.3997, + "step": 303600 + }, + { + "epoch": 4.49, + "learning_rate": 2.7551245267392334e-05, + "loss": 1.4052, + "step": 303700 + }, + { + "epoch": 4.49, + "learning_rate": 2.754385056791292e-05, + "loss": 1.4019, + "step": 303800 + }, + { + "epoch": 4.49, + "learning_rate": 2.753645586843351e-05, + "loss": 1.4279, + "step": 303900 + }, + { + "epoch": 4.5, + "learning_rate": 2.7529061168954095e-05, + "loss": 1.4318, + "step": 304000 + }, + { + "epoch": 4.5, + "learning_rate": 2.7521666469474684e-05, + "loss": 1.3936, + "step": 304100 + }, + { + "epoch": 4.5, + "learning_rate": 2.751427176999527e-05, + "loss": 1.4057, + "step": 304200 + }, + { + "epoch": 4.5, + "learning_rate": 2.750687707051586e-05, + "loss": 1.4191, + "step": 304300 + }, + { + "epoch": 4.5, + "learning_rate": 2.7499482371036444e-05, + "loss": 1.3874, + "step": 304400 + }, + { + "epoch": 4.5, + "learning_rate": 2.7492087671557033e-05, + "loss": 1.4066, + "step": 304500 + }, + { + "epoch": 4.5, + "learning_rate": 2.7484692972077615e-05, + "loss": 1.3791, + "step": 304600 + }, + { + "epoch": 4.51, + "learning_rate": 2.74772982725982e-05, + "loss": 1.4139, + "step": 304700 + }, + { + "epoch": 4.51, + "learning_rate": 2.7469903573118787e-05, + "loss": 1.3962, + "step": 304800 + }, + { + "epoch": 4.51, + "learning_rate": 2.7462508873639376e-05, + "loss": 1.4071, + "step": 304900 + }, + { + "epoch": 4.51, + "learning_rate": 2.745511417415996e-05, + "loss": 1.3976, + "step": 305000 + }, + { + "epoch": 4.51, + "learning_rate": 2.744771947468055e-05, + "loss": 1.4062, + "step": 305100 + }, + { + "epoch": 4.51, + "learning_rate": 2.7440324775201136e-05, + "loss": 1.3743, + "step": 305200 + }, + { + "epoch": 4.52, + "learning_rate": 2.7432930075721725e-05, + "loss": 1.3982, + "step": 305300 + }, + { + "epoch": 4.52, + "learning_rate": 2.742553537624231e-05, + "loss": 1.4134, + "step": 305400 + }, + { + "epoch": 4.52, + "learning_rate": 2.7418214623757694e-05, + "loss": 1.4157, + "step": 305500 + }, + { + "epoch": 4.52, + "learning_rate": 2.741081992427828e-05, + "loss": 1.3945, + "step": 305600 + }, + { + "epoch": 4.52, + "learning_rate": 2.740342522479887e-05, + "loss": 1.3865, + "step": 305700 + }, + { + "epoch": 4.52, + "learning_rate": 2.7396030525319455e-05, + "loss": 1.419, + "step": 305800 + }, + { + "epoch": 4.52, + "learning_rate": 2.7388635825840037e-05, + "loss": 1.3965, + "step": 305900 + }, + { + "epoch": 4.53, + "learning_rate": 2.7381241126360623e-05, + "loss": 1.4041, + "step": 306000 + }, + { + "epoch": 4.53, + "learning_rate": 2.737384642688121e-05, + "loss": 1.41, + "step": 306100 + }, + { + "epoch": 4.53, + "learning_rate": 2.7366451727401797e-05, + "loss": 1.4232, + "step": 306200 + }, + { + "epoch": 4.53, + "learning_rate": 2.7359057027922386e-05, + "loss": 1.3874, + "step": 306300 + }, + { + "epoch": 4.53, + "learning_rate": 2.7351662328442972e-05, + "loss": 1.3956, + "step": 306400 + }, + { + "epoch": 4.53, + "learning_rate": 2.734426762896356e-05, + "loss": 1.3779, + "step": 306500 + }, + { + "epoch": 4.53, + "learning_rate": 2.7336872929484147e-05, + "loss": 1.3915, + "step": 306600 + }, + { + "epoch": 4.54, + "learning_rate": 2.7329478230004736e-05, + "loss": 1.4063, + "step": 306700 + }, + { + "epoch": 4.54, + "learning_rate": 2.732208353052532e-05, + "loss": 1.4044, + "step": 306800 + }, + { + "epoch": 4.54, + "learning_rate": 2.731468883104591e-05, + "loss": 1.415, + "step": 306900 + }, + { + "epoch": 4.54, + "learning_rate": 2.7307294131566496e-05, + "loss": 1.4124, + "step": 307000 + }, + { + "epoch": 4.54, + "learning_rate": 2.7299899432087085e-05, + "loss": 1.406, + "step": 307100 + }, + { + "epoch": 4.54, + "learning_rate": 2.7292504732607667e-05, + "loss": 1.4166, + "step": 307200 + }, + { + "epoch": 4.54, + "learning_rate": 2.7285110033128253e-05, + "loss": 1.4211, + "step": 307300 + }, + { + "epoch": 4.55, + "learning_rate": 2.727771533364884e-05, + "loss": 1.3863, + "step": 307400 + }, + { + "epoch": 4.55, + "learning_rate": 2.7270320634169427e-05, + "loss": 1.3903, + "step": 307500 + }, + { + "epoch": 4.55, + "learning_rate": 2.7262925934690013e-05, + "loss": 1.3859, + "step": 307600 + }, + { + "epoch": 4.55, + "learning_rate": 2.7255605182205397e-05, + "loss": 1.4115, + "step": 307700 + }, + { + "epoch": 4.55, + "learning_rate": 2.7248210482725983e-05, + "loss": 1.4098, + "step": 307800 + }, + { + "epoch": 4.55, + "learning_rate": 2.724081578324657e-05, + "loss": 1.3912, + "step": 307900 + }, + { + "epoch": 4.56, + "learning_rate": 2.7233421083767157e-05, + "loss": 1.395, + "step": 308000 + }, + { + "epoch": 4.56, + "learning_rate": 2.7226026384287746e-05, + "loss": 1.3902, + "step": 308100 + }, + { + "epoch": 4.56, + "learning_rate": 2.7218631684808332e-05, + "loss": 1.4241, + "step": 308200 + }, + { + "epoch": 4.56, + "learning_rate": 2.721123698532892e-05, + "loss": 1.4162, + "step": 308300 + }, + { + "epoch": 4.56, + "learning_rate": 2.7203842285849507e-05, + "loss": 1.4169, + "step": 308400 + }, + { + "epoch": 4.56, + "learning_rate": 2.719644758637009e-05, + "loss": 1.4073, + "step": 308500 + }, + { + "epoch": 4.56, + "learning_rate": 2.7189052886890674e-05, + "loss": 1.4012, + "step": 308600 + }, + { + "epoch": 4.57, + "learning_rate": 2.7181658187411263e-05, + "loss": 1.3902, + "step": 308700 + }, + { + "epoch": 4.57, + "learning_rate": 2.717426348793185e-05, + "loss": 1.4071, + "step": 308800 + }, + { + "epoch": 4.57, + "learning_rate": 2.7166868788452438e-05, + "loss": 1.4028, + "step": 308900 + }, + { + "epoch": 4.57, + "learning_rate": 2.7159474088973024e-05, + "loss": 1.4176, + "step": 309000 + }, + { + "epoch": 4.57, + "learning_rate": 2.7152079389493613e-05, + "loss": 1.3869, + "step": 309100 + }, + { + "epoch": 4.57, + "learning_rate": 2.71446846900142e-05, + "loss": 1.4362, + "step": 309200 + }, + { + "epoch": 4.57, + "learning_rate": 2.7137289990534787e-05, + "loss": 1.3982, + "step": 309300 + }, + { + "epoch": 4.58, + "learning_rate": 2.7129895291055373e-05, + "loss": 1.3764, + "step": 309400 + }, + { + "epoch": 4.58, + "learning_rate": 2.7122500591575962e-05, + "loss": 1.4363, + "step": 309500 + }, + { + "epoch": 4.58, + "learning_rate": 2.7115105892096548e-05, + "loss": 1.397, + "step": 309600 + }, + { + "epoch": 4.58, + "learning_rate": 2.7107711192617137e-05, + "loss": 1.3899, + "step": 309700 + }, + { + "epoch": 4.58, + "learning_rate": 2.7100316493137722e-05, + "loss": 1.4021, + "step": 309800 + }, + { + "epoch": 4.58, + "learning_rate": 2.70929957406531e-05, + "loss": 1.4213, + "step": 309900 + }, + { + "epoch": 4.58, + "learning_rate": 2.7085601041173685e-05, + "loss": 1.424, + "step": 310000 + }, + { + "epoch": 4.59, + "learning_rate": 2.7078206341694274e-05, + "loss": 1.3989, + "step": 310100 + }, + { + "epoch": 4.59, + "learning_rate": 2.707081164221486e-05, + "loss": 1.3979, + "step": 310200 + }, + { + "epoch": 4.59, + "learning_rate": 2.706341694273545e-05, + "loss": 1.4068, + "step": 310300 + }, + { + "epoch": 4.59, + "learning_rate": 2.7056022243256034e-05, + "loss": 1.3994, + "step": 310400 + }, + { + "epoch": 4.59, + "learning_rate": 2.7048627543776624e-05, + "loss": 1.4063, + "step": 310500 + }, + { + "epoch": 4.59, + "learning_rate": 2.704123284429721e-05, + "loss": 1.3879, + "step": 310600 + }, + { + "epoch": 4.6, + "learning_rate": 2.7033838144817798e-05, + "loss": 1.3846, + "step": 310700 + }, + { + "epoch": 4.6, + "learning_rate": 2.7026443445338384e-05, + "loss": 1.4188, + "step": 310800 + }, + { + "epoch": 4.6, + "learning_rate": 2.7019048745858973e-05, + "loss": 1.4022, + "step": 310900 + }, + { + "epoch": 4.6, + "learning_rate": 2.701165404637956e-05, + "loss": 1.3922, + "step": 311000 + }, + { + "epoch": 4.6, + "learning_rate": 2.7004259346900147e-05, + "loss": 1.4035, + "step": 311100 + }, + { + "epoch": 4.6, + "learning_rate": 2.6996864647420726e-05, + "loss": 1.3813, + "step": 311200 + }, + { + "epoch": 4.6, + "learning_rate": 2.6989469947941315e-05, + "loss": 1.3837, + "step": 311300 + }, + { + "epoch": 4.61, + "learning_rate": 2.69820752484619e-05, + "loss": 1.3998, + "step": 311400 + }, + { + "epoch": 4.61, + "learning_rate": 2.697468054898249e-05, + "loss": 1.4259, + "step": 311500 + }, + { + "epoch": 4.61, + "learning_rate": 2.6967285849503076e-05, + "loss": 1.4068, + "step": 311600 + }, + { + "epoch": 4.61, + "learning_rate": 2.6959891150023665e-05, + "loss": 1.3909, + "step": 311700 + }, + { + "epoch": 4.61, + "learning_rate": 2.695249645054425e-05, + "loss": 1.4087, + "step": 311800 + }, + { + "epoch": 4.61, + "learning_rate": 2.694510175106484e-05, + "loss": 1.4111, + "step": 311900 + }, + { + "epoch": 4.61, + "learning_rate": 2.6937707051585425e-05, + "loss": 1.3985, + "step": 312000 + }, + { + "epoch": 4.62, + "learning_rate": 2.6930386299100805e-05, + "loss": 1.4061, + "step": 312100 + }, + { + "epoch": 4.62, + "learning_rate": 2.6922991599621394e-05, + "loss": 1.4065, + "step": 312200 + }, + { + "epoch": 4.62, + "learning_rate": 2.691559690014198e-05, + "loss": 1.3924, + "step": 312300 + }, + { + "epoch": 4.62, + "learning_rate": 2.690820220066257e-05, + "loss": 1.3844, + "step": 312400 + }, + { + "epoch": 4.62, + "learning_rate": 2.690080750118315e-05, + "loss": 1.4003, + "step": 312500 + }, + { + "epoch": 4.62, + "learning_rate": 2.6893412801703737e-05, + "loss": 1.4288, + "step": 312600 + }, + { + "epoch": 4.62, + "learning_rate": 2.6886018102224326e-05, + "loss": 1.4118, + "step": 312700 + }, + { + "epoch": 4.63, + "learning_rate": 2.6878623402744912e-05, + "loss": 1.3864, + "step": 312800 + }, + { + "epoch": 4.63, + "learning_rate": 2.68712287032655e-05, + "loss": 1.4224, + "step": 312900 + }, + { + "epoch": 4.63, + "learning_rate": 2.6863834003786086e-05, + "loss": 1.3975, + "step": 313000 + }, + { + "epoch": 4.63, + "learning_rate": 2.6856439304306675e-05, + "loss": 1.3982, + "step": 313100 + }, + { + "epoch": 4.63, + "learning_rate": 2.684904460482726e-05, + "loss": 1.4121, + "step": 313200 + }, + { + "epoch": 4.63, + "learning_rate": 2.684164990534785e-05, + "loss": 1.3935, + "step": 313300 + }, + { + "epoch": 4.63, + "learning_rate": 2.6834255205868436e-05, + "loss": 1.3517, + "step": 313400 + }, + { + "epoch": 4.64, + "learning_rate": 2.6826860506389025e-05, + "loss": 1.3912, + "step": 313500 + }, + { + "epoch": 4.64, + "learning_rate": 2.681946580690961e-05, + "loss": 1.3977, + "step": 313600 + }, + { + "epoch": 4.64, + "learning_rate": 2.68120711074302e-05, + "loss": 1.4388, + "step": 313700 + }, + { + "epoch": 4.64, + "learning_rate": 2.6804676407950778e-05, + "loss": 1.4014, + "step": 313800 + }, + { + "epoch": 4.64, + "learning_rate": 2.6797281708471367e-05, + "loss": 1.3796, + "step": 313900 + }, + { + "epoch": 4.64, + "learning_rate": 2.6789887008991953e-05, + "loss": 1.3977, + "step": 314000 + }, + { + "epoch": 4.65, + "learning_rate": 2.6782492309512542e-05, + "loss": 1.4009, + "step": 314100 + }, + { + "epoch": 4.65, + "learning_rate": 2.6775171557027922e-05, + "loss": 1.3908, + "step": 314200 + }, + { + "epoch": 4.65, + "learning_rate": 2.676777685754851e-05, + "loss": 1.4075, + "step": 314300 + }, + { + "epoch": 4.65, + "learning_rate": 2.6760382158069097e-05, + "loss": 1.4018, + "step": 314400 + }, + { + "epoch": 4.65, + "learning_rate": 2.6752987458589686e-05, + "loss": 1.4072, + "step": 314500 + }, + { + "epoch": 4.65, + "learning_rate": 2.6745592759110272e-05, + "loss": 1.4175, + "step": 314600 + }, + { + "epoch": 4.65, + "learning_rate": 2.6738198059630857e-05, + "loss": 1.4097, + "step": 314700 + }, + { + "epoch": 4.66, + "learning_rate": 2.6730803360151446e-05, + "loss": 1.4168, + "step": 314800 + }, + { + "epoch": 4.66, + "learning_rate": 2.6723408660672032e-05, + "loss": 1.3969, + "step": 314900 + }, + { + "epoch": 4.66, + "learning_rate": 2.671601396119262e-05, + "loss": 1.389, + "step": 315000 + }, + { + "epoch": 4.66, + "learning_rate": 2.6708619261713203e-05, + "loss": 1.4078, + "step": 315100 + }, + { + "epoch": 4.66, + "learning_rate": 2.670122456223379e-05, + "loss": 1.4054, + "step": 315200 + }, + { + "epoch": 4.66, + "learning_rate": 2.6693829862754378e-05, + "loss": 1.4086, + "step": 315300 + }, + { + "epoch": 4.66, + "learning_rate": 2.6686435163274964e-05, + "loss": 1.4127, + "step": 315400 + }, + { + "epoch": 4.67, + "learning_rate": 2.6679040463795553e-05, + "loss": 1.4049, + "step": 315500 + }, + { + "epoch": 4.67, + "learning_rate": 2.6671645764316138e-05, + "loss": 1.4171, + "step": 315600 + }, + { + "epoch": 4.67, + "learning_rate": 2.6664251064836727e-05, + "loss": 1.3779, + "step": 315700 + }, + { + "epoch": 4.67, + "learning_rate": 2.6656856365357313e-05, + "loss": 1.406, + "step": 315800 + }, + { + "epoch": 4.67, + "learning_rate": 2.6649461665877902e-05, + "loss": 1.4004, + "step": 315900 + }, + { + "epoch": 4.67, + "learning_rate": 2.6642066966398488e-05, + "loss": 1.419, + "step": 316000 + }, + { + "epoch": 4.67, + "learning_rate": 2.6634672266919077e-05, + "loss": 1.4127, + "step": 316100 + }, + { + "epoch": 4.68, + "learning_rate": 2.6627277567439662e-05, + "loss": 1.4013, + "step": 316200 + }, + { + "epoch": 4.68, + "learning_rate": 2.661988286796025e-05, + "loss": 1.418, + "step": 316300 + }, + { + "epoch": 4.68, + "learning_rate": 2.6612488168480837e-05, + "loss": 1.4184, + "step": 316400 + }, + { + "epoch": 4.68, + "learning_rate": 2.660509346900142e-05, + "loss": 1.3857, + "step": 316500 + }, + { + "epoch": 4.68, + "learning_rate": 2.6597698769522005e-05, + "loss": 1.3737, + "step": 316600 + }, + { + "epoch": 4.68, + "learning_rate": 2.659037801703739e-05, + "loss": 1.4014, + "step": 316700 + }, + { + "epoch": 4.69, + "learning_rate": 2.6582983317557974e-05, + "loss": 1.4109, + "step": 316800 + }, + { + "epoch": 4.69, + "learning_rate": 2.6575588618078563e-05, + "loss": 1.3923, + "step": 316900 + }, + { + "epoch": 4.69, + "learning_rate": 2.656819391859915e-05, + "loss": 1.3728, + "step": 317000 + }, + { + "epoch": 4.69, + "learning_rate": 2.6560799219119738e-05, + "loss": 1.4171, + "step": 317100 + }, + { + "epoch": 4.69, + "learning_rate": 2.6553404519640324e-05, + "loss": 1.4139, + "step": 317200 + }, + { + "epoch": 4.69, + "learning_rate": 2.654600982016091e-05, + "loss": 1.401, + "step": 317300 + }, + { + "epoch": 4.69, + "learning_rate": 2.6538615120681498e-05, + "loss": 1.3988, + "step": 317400 + }, + { + "epoch": 4.7, + "learning_rate": 2.6531220421202084e-05, + "loss": 1.4097, + "step": 317500 + }, + { + "epoch": 4.7, + "learning_rate": 2.6523825721722673e-05, + "loss": 1.4153, + "step": 317600 + }, + { + "epoch": 4.7, + "learning_rate": 2.651643102224326e-05, + "loss": 1.3922, + "step": 317700 + }, + { + "epoch": 4.7, + "learning_rate": 2.650903632276384e-05, + "loss": 1.3918, + "step": 317800 + }, + { + "epoch": 4.7, + "learning_rate": 2.650164162328443e-05, + "loss": 1.4184, + "step": 317900 + }, + { + "epoch": 4.7, + "learning_rate": 2.6494246923805015e-05, + "loss": 1.4213, + "step": 318000 + }, + { + "epoch": 4.7, + "learning_rate": 2.6486852224325604e-05, + "loss": 1.402, + "step": 318100 + }, + { + "epoch": 4.71, + "learning_rate": 2.647945752484619e-05, + "loss": 1.3948, + "step": 318200 + }, + { + "epoch": 4.71, + "learning_rate": 2.647206282536678e-05, + "loss": 1.388, + "step": 318300 + }, + { + "epoch": 4.71, + "learning_rate": 2.6464668125887365e-05, + "loss": 1.3909, + "step": 318400 + }, + { + "epoch": 4.71, + "learning_rate": 2.6457273426407954e-05, + "loss": 1.418, + "step": 318500 + }, + { + "epoch": 4.71, + "learning_rate": 2.644987872692854e-05, + "loss": 1.4173, + "step": 318600 + }, + { + "epoch": 4.71, + "learning_rate": 2.644248402744913e-05, + "loss": 1.4153, + "step": 318700 + }, + { + "epoch": 4.71, + "learning_rate": 2.6435089327969714e-05, + "loss": 1.3701, + "step": 318800 + }, + { + "epoch": 4.72, + "learning_rate": 2.6427694628490303e-05, + "loss": 1.3928, + "step": 318900 + }, + { + "epoch": 4.72, + "learning_rate": 2.6420373876005684e-05, + "loss": 1.4116, + "step": 319000 + }, + { + "epoch": 4.72, + "learning_rate": 2.6412979176526266e-05, + "loss": 1.4005, + "step": 319100 + }, + { + "epoch": 4.72, + "learning_rate": 2.640558447704685e-05, + "loss": 1.3824, + "step": 319200 + }, + { + "epoch": 4.72, + "learning_rate": 2.639818977756744e-05, + "loss": 1.4029, + "step": 319300 + }, + { + "epoch": 4.72, + "learning_rate": 2.6390795078088026e-05, + "loss": 1.4155, + "step": 319400 + }, + { + "epoch": 4.73, + "learning_rate": 2.6383400378608615e-05, + "loss": 1.4083, + "step": 319500 + }, + { + "epoch": 4.73, + "learning_rate": 2.63760056791292e-05, + "loss": 1.4158, + "step": 319600 + }, + { + "epoch": 4.73, + "learning_rate": 2.636861097964979e-05, + "loss": 1.4122, + "step": 319700 + }, + { + "epoch": 4.73, + "learning_rate": 2.6361216280170375e-05, + "loss": 1.4198, + "step": 319800 + }, + { + "epoch": 4.73, + "learning_rate": 2.635382158069096e-05, + "loss": 1.3805, + "step": 319900 + }, + { + "epoch": 4.73, + "learning_rate": 2.634642688121155e-05, + "loss": 1.4009, + "step": 320000 + }, + { + "epoch": 4.73, + "learning_rate": 2.6339032181732136e-05, + "loss": 1.395, + "step": 320100 + }, + { + "epoch": 4.74, + "learning_rate": 2.6331637482252725e-05, + "loss": 1.431, + "step": 320200 + }, + { + "epoch": 4.74, + "learning_rate": 2.632424278277331e-05, + "loss": 1.4195, + "step": 320300 + }, + { + "epoch": 4.74, + "learning_rate": 2.6316848083293893e-05, + "loss": 1.374, + "step": 320400 + }, + { + "epoch": 4.74, + "learning_rate": 2.6309453383814482e-05, + "loss": 1.3721, + "step": 320500 + }, + { + "epoch": 4.74, + "learning_rate": 2.6302058684335067e-05, + "loss": 1.4014, + "step": 320600 + }, + { + "epoch": 4.74, + "learning_rate": 2.6294663984855656e-05, + "loss": 1.4085, + "step": 320700 + }, + { + "epoch": 4.74, + "learning_rate": 2.6287269285376242e-05, + "loss": 1.4139, + "step": 320800 + }, + { + "epoch": 4.75, + "learning_rate": 2.627987458589683e-05, + "loss": 1.4055, + "step": 320900 + }, + { + "epoch": 4.75, + "learning_rate": 2.6272479886417417e-05, + "loss": 1.4042, + "step": 321000 + }, + { + "epoch": 4.75, + "learning_rate": 2.6265085186938006e-05, + "loss": 1.4195, + "step": 321100 + }, + { + "epoch": 4.75, + "learning_rate": 2.6257764434453386e-05, + "loss": 1.3895, + "step": 321200 + }, + { + "epoch": 4.75, + "learning_rate": 2.6250369734973972e-05, + "loss": 1.4039, + "step": 321300 + }, + { + "epoch": 4.75, + "learning_rate": 2.624297503549456e-05, + "loss": 1.3841, + "step": 321400 + }, + { + "epoch": 4.75, + "learning_rate": 2.6235580336015146e-05, + "loss": 1.3889, + "step": 321500 + }, + { + "epoch": 4.76, + "learning_rate": 2.6228185636535735e-05, + "loss": 1.3979, + "step": 321600 + }, + { + "epoch": 4.76, + "learning_rate": 2.622079093705632e-05, + "loss": 1.4202, + "step": 321700 + }, + { + "epoch": 4.76, + "learning_rate": 2.6213396237576903e-05, + "loss": 1.3931, + "step": 321800 + }, + { + "epoch": 4.76, + "learning_rate": 2.6206001538097492e-05, + "loss": 1.3893, + "step": 321900 + }, + { + "epoch": 4.76, + "learning_rate": 2.6198606838618078e-05, + "loss": 1.3864, + "step": 322000 + }, + { + "epoch": 4.76, + "learning_rate": 2.6191212139138667e-05, + "loss": 1.4198, + "step": 322100 + }, + { + "epoch": 4.77, + "learning_rate": 2.6183817439659253e-05, + "loss": 1.3964, + "step": 322200 + }, + { + "epoch": 4.77, + "learning_rate": 2.6176422740179842e-05, + "loss": 1.4046, + "step": 322300 + }, + { + "epoch": 4.77, + "learning_rate": 2.6169028040700427e-05, + "loss": 1.412, + "step": 322400 + }, + { + "epoch": 4.77, + "learning_rate": 2.6161633341221013e-05, + "loss": 1.4092, + "step": 322500 + }, + { + "epoch": 4.77, + "learning_rate": 2.6154238641741602e-05, + "loss": 1.4168, + "step": 322600 + }, + { + "epoch": 4.77, + "learning_rate": 2.6146843942262188e-05, + "loss": 1.3844, + "step": 322700 + }, + { + "epoch": 4.77, + "learning_rate": 2.6139449242782777e-05, + "loss": 1.38, + "step": 322800 + }, + { + "epoch": 4.78, + "learning_rate": 2.6132054543303362e-05, + "loss": 1.4074, + "step": 322900 + }, + { + "epoch": 4.78, + "learning_rate": 2.612465984382395e-05, + "loss": 1.3954, + "step": 323000 + }, + { + "epoch": 4.78, + "learning_rate": 2.6117265144344534e-05, + "loss": 1.3753, + "step": 323100 + }, + { + "epoch": 4.78, + "learning_rate": 2.610987044486512e-05, + "loss": 1.4286, + "step": 323200 + }, + { + "epoch": 4.78, + "learning_rate": 2.6102475745385708e-05, + "loss": 1.4093, + "step": 323300 + }, + { + "epoch": 4.78, + "learning_rate": 2.609515499290109e-05, + "loss": 1.4161, + "step": 323400 + }, + { + "epoch": 4.78, + "learning_rate": 2.6087760293421674e-05, + "loss": 1.377, + "step": 323500 + }, + { + "epoch": 4.79, + "learning_rate": 2.6080365593942263e-05, + "loss": 1.4051, + "step": 323600 + }, + { + "epoch": 4.79, + "learning_rate": 2.607297089446285e-05, + "loss": 1.3867, + "step": 323700 + }, + { + "epoch": 4.79, + "learning_rate": 2.6065576194983438e-05, + "loss": 1.3852, + "step": 323800 + }, + { + "epoch": 4.79, + "learning_rate": 2.6058181495504024e-05, + "loss": 1.395, + "step": 323900 + }, + { + "epoch": 4.79, + "learning_rate": 2.6050786796024613e-05, + "loss": 1.4133, + "step": 324000 + }, + { + "epoch": 4.79, + "learning_rate": 2.60433920965452e-05, + "loss": 1.3982, + "step": 324100 + }, + { + "epoch": 4.79, + "learning_rate": 2.6035997397065787e-05, + "loss": 1.4248, + "step": 324200 + }, + { + "epoch": 4.8, + "learning_rate": 2.6028602697586373e-05, + "loss": 1.4216, + "step": 324300 + }, + { + "epoch": 4.8, + "learning_rate": 2.6021207998106955e-05, + "loss": 1.4014, + "step": 324400 + }, + { + "epoch": 4.8, + "learning_rate": 2.6013813298627544e-05, + "loss": 1.4228, + "step": 324500 + }, + { + "epoch": 4.8, + "learning_rate": 2.600641859914813e-05, + "loss": 1.3547, + "step": 324600 + }, + { + "epoch": 4.8, + "learning_rate": 2.599902389966872e-05, + "loss": 1.3968, + "step": 324700 + }, + { + "epoch": 4.8, + "learning_rate": 2.5991629200189305e-05, + "loss": 1.394, + "step": 324800 + }, + { + "epoch": 4.81, + "learning_rate": 2.5984234500709894e-05, + "loss": 1.3957, + "step": 324900 + }, + { + "epoch": 4.81, + "learning_rate": 2.597683980123048e-05, + "loss": 1.4235, + "step": 325000 + }, + { + "epoch": 4.81, + "learning_rate": 2.5969445101751065e-05, + "loss": 1.4078, + "step": 325100 + }, + { + "epoch": 4.81, + "learning_rate": 2.5962050402271654e-05, + "loss": 1.3816, + "step": 325200 + }, + { + "epoch": 4.81, + "learning_rate": 2.595465570279224e-05, + "loss": 1.3976, + "step": 325300 + }, + { + "epoch": 4.81, + "learning_rate": 2.594726100331283e-05, + "loss": 1.402, + "step": 325400 + }, + { + "epoch": 4.81, + "learning_rate": 2.5939866303833414e-05, + "loss": 1.3941, + "step": 325500 + }, + { + "epoch": 4.82, + "learning_rate": 2.5932545551348798e-05, + "loss": 1.4363, + "step": 325600 + }, + { + "epoch": 4.82, + "learning_rate": 2.592515085186938e-05, + "loss": 1.4188, + "step": 325700 + }, + { + "epoch": 4.82, + "learning_rate": 2.5917756152389966e-05, + "loss": 1.3932, + "step": 325800 + }, + { + "epoch": 4.82, + "learning_rate": 2.591036145291055e-05, + "loss": 1.4292, + "step": 325900 + }, + { + "epoch": 4.82, + "learning_rate": 2.590296675343114e-05, + "loss": 1.412, + "step": 326000 + }, + { + "epoch": 4.82, + "learning_rate": 2.5895572053951726e-05, + "loss": 1.3943, + "step": 326100 + }, + { + "epoch": 4.82, + "learning_rate": 2.5888177354472315e-05, + "loss": 1.4092, + "step": 326200 + }, + { + "epoch": 4.83, + "learning_rate": 2.58807826549929e-05, + "loss": 1.398, + "step": 326300 + }, + { + "epoch": 4.83, + "learning_rate": 2.587338795551349e-05, + "loss": 1.3717, + "step": 326400 + }, + { + "epoch": 4.83, + "learning_rate": 2.5865993256034076e-05, + "loss": 1.4091, + "step": 326500 + }, + { + "epoch": 4.83, + "learning_rate": 2.5858598556554665e-05, + "loss": 1.3953, + "step": 326600 + }, + { + "epoch": 4.83, + "learning_rate": 2.585120385707525e-05, + "loss": 1.3906, + "step": 326700 + }, + { + "epoch": 4.83, + "learning_rate": 2.584380915759584e-05, + "loss": 1.4101, + "step": 326800 + }, + { + "epoch": 4.83, + "learning_rate": 2.5836414458116425e-05, + "loss": 1.3829, + "step": 326900 + }, + { + "epoch": 4.84, + "learning_rate": 2.5829019758637007e-05, + "loss": 1.39, + "step": 327000 + }, + { + "epoch": 4.84, + "learning_rate": 2.5821625059157596e-05, + "loss": 1.3935, + "step": 327100 + }, + { + "epoch": 4.84, + "learning_rate": 2.5814230359678182e-05, + "loss": 1.3865, + "step": 327200 + }, + { + "epoch": 4.84, + "learning_rate": 2.580683566019877e-05, + "loss": 1.4081, + "step": 327300 + }, + { + "epoch": 4.84, + "learning_rate": 2.5799440960719356e-05, + "loss": 1.4142, + "step": 327400 + }, + { + "epoch": 4.84, + "learning_rate": 2.5792046261239946e-05, + "loss": 1.4058, + "step": 327500 + }, + { + "epoch": 4.84, + "learning_rate": 2.578465156176053e-05, + "loss": 1.4113, + "step": 327600 + }, + { + "epoch": 4.85, + "learning_rate": 2.5777256862281117e-05, + "loss": 1.424, + "step": 327700 + }, + { + "epoch": 4.85, + "learning_rate": 2.5769862162801706e-05, + "loss": 1.4089, + "step": 327800 + }, + { + "epoch": 4.85, + "learning_rate": 2.576246746332229e-05, + "loss": 1.3901, + "step": 327900 + }, + { + "epoch": 4.85, + "learning_rate": 2.575507276384288e-05, + "loss": 1.4001, + "step": 328000 + }, + { + "epoch": 4.85, + "learning_rate": 2.5747678064363466e-05, + "loss": 1.3706, + "step": 328100 + }, + { + "epoch": 4.85, + "learning_rate": 2.5740283364884055e-05, + "loss": 1.4038, + "step": 328200 + }, + { + "epoch": 4.86, + "learning_rate": 2.573288866540464e-05, + "loss": 1.4163, + "step": 328300 + }, + { + "epoch": 4.86, + "learning_rate": 2.5725567912920018e-05, + "loss": 1.433, + "step": 328400 + }, + { + "epoch": 4.86, + "learning_rate": 2.5718173213440603e-05, + "loss": 1.3997, + "step": 328500 + }, + { + "epoch": 4.86, + "learning_rate": 2.5710778513961193e-05, + "loss": 1.3934, + "step": 328600 + }, + { + "epoch": 4.86, + "learning_rate": 2.5703383814481778e-05, + "loss": 1.3886, + "step": 328700 + }, + { + "epoch": 4.86, + "learning_rate": 2.5695989115002367e-05, + "loss": 1.4273, + "step": 328800 + }, + { + "epoch": 4.86, + "learning_rate": 2.5688594415522953e-05, + "loss": 1.3939, + "step": 328900 + }, + { + "epoch": 4.87, + "learning_rate": 2.5681199716043542e-05, + "loss": 1.4052, + "step": 329000 + }, + { + "epoch": 4.87, + "learning_rate": 2.5673805016564127e-05, + "loss": 1.398, + "step": 329100 + }, + { + "epoch": 4.87, + "learning_rate": 2.5666410317084716e-05, + "loss": 1.4253, + "step": 329200 + }, + { + "epoch": 4.87, + "learning_rate": 2.5659015617605302e-05, + "loss": 1.4074, + "step": 329300 + }, + { + "epoch": 4.87, + "learning_rate": 2.565162091812589e-05, + "loss": 1.3836, + "step": 329400 + }, + { + "epoch": 4.87, + "learning_rate": 2.5644226218646477e-05, + "loss": 1.3709, + "step": 329500 + }, + { + "epoch": 4.87, + "learning_rate": 2.5636831519167066e-05, + "loss": 1.3991, + "step": 329600 + }, + { + "epoch": 4.88, + "learning_rate": 2.5629436819687648e-05, + "loss": 1.3991, + "step": 329700 + }, + { + "epoch": 4.88, + "learning_rate": 2.5622042120208234e-05, + "loss": 1.3876, + "step": 329800 + }, + { + "epoch": 4.88, + "learning_rate": 2.5614647420728823e-05, + "loss": 1.4114, + "step": 329900 + }, + { + "epoch": 4.88, + "learning_rate": 2.560725272124941e-05, + "loss": 1.4185, + "step": 330000 + }, + { + "epoch": 4.88, + "learning_rate": 2.5599858021769997e-05, + "loss": 1.4153, + "step": 330100 + }, + { + "epoch": 4.88, + "learning_rate": 2.5592463322290583e-05, + "loss": 1.3831, + "step": 330200 + }, + { + "epoch": 4.88, + "learning_rate": 2.558506862281117e-05, + "loss": 1.4161, + "step": 330300 + }, + { + "epoch": 4.89, + "learning_rate": 2.5577673923331758e-05, + "loss": 1.4176, + "step": 330400 + }, + { + "epoch": 4.89, + "learning_rate": 2.5570279223852343e-05, + "loss": 1.3946, + "step": 330500 + }, + { + "epoch": 4.89, + "learning_rate": 2.5562884524372932e-05, + "loss": 1.404, + "step": 330600 + }, + { + "epoch": 4.89, + "learning_rate": 2.5555489824893518e-05, + "loss": 1.3935, + "step": 330700 + }, + { + "epoch": 4.89, + "learning_rate": 2.5548095125414107e-05, + "loss": 1.4059, + "step": 330800 + }, + { + "epoch": 4.89, + "learning_rate": 2.5540700425934693e-05, + "loss": 1.4215, + "step": 330900 + }, + { + "epoch": 4.9, + "learning_rate": 2.5533305726455275e-05, + "loss": 1.411, + "step": 331000 + }, + { + "epoch": 4.9, + "learning_rate": 2.5525984973970655e-05, + "loss": 1.4078, + "step": 331100 + }, + { + "epoch": 4.9, + "learning_rate": 2.5518590274491244e-05, + "loss": 1.4236, + "step": 331200 + }, + { + "epoch": 4.9, + "learning_rate": 2.551119557501183e-05, + "loss": 1.3888, + "step": 331300 + }, + { + "epoch": 4.9, + "learning_rate": 2.550380087553242e-05, + "loss": 1.3943, + "step": 331400 + }, + { + "epoch": 4.9, + "learning_rate": 2.5496406176053005e-05, + "loss": 1.3837, + "step": 331500 + }, + { + "epoch": 4.9, + "learning_rate": 2.5489011476573594e-05, + "loss": 1.3937, + "step": 331600 + }, + { + "epoch": 4.91, + "learning_rate": 2.548161677709418e-05, + "loss": 1.3897, + "step": 331700 + }, + { + "epoch": 4.91, + "learning_rate": 2.547422207761477e-05, + "loss": 1.4386, + "step": 331800 + }, + { + "epoch": 4.91, + "learning_rate": 2.5466827378135354e-05, + "loss": 1.3989, + "step": 331900 + }, + { + "epoch": 4.91, + "learning_rate": 2.5459432678655943e-05, + "loss": 1.4145, + "step": 332000 + }, + { + "epoch": 4.91, + "learning_rate": 2.545203797917653e-05, + "loss": 1.4008, + "step": 332100 + }, + { + "epoch": 4.91, + "learning_rate": 2.5444643279697118e-05, + "loss": 1.3886, + "step": 332200 + }, + { + "epoch": 4.91, + "learning_rate": 2.54372485802177e-05, + "loss": 1.4076, + "step": 332300 + }, + { + "epoch": 4.92, + "learning_rate": 2.5429853880738286e-05, + "loss": 1.408, + "step": 332400 + }, + { + "epoch": 4.92, + "learning_rate": 2.5422459181258875e-05, + "loss": 1.3714, + "step": 332500 + }, + { + "epoch": 4.92, + "learning_rate": 2.541506448177946e-05, + "loss": 1.3997, + "step": 332600 + }, + { + "epoch": 4.92, + "learning_rate": 2.5407669782300046e-05, + "loss": 1.4232, + "step": 332700 + }, + { + "epoch": 4.92, + "learning_rate": 2.5400275082820635e-05, + "loss": 1.3737, + "step": 332800 + }, + { + "epoch": 4.92, + "learning_rate": 2.539288038334122e-05, + "loss": 1.3903, + "step": 332900 + }, + { + "epoch": 4.92, + "learning_rate": 2.538548568386181e-05, + "loss": 1.4049, + "step": 333000 + }, + { + "epoch": 4.93, + "learning_rate": 2.5378090984382395e-05, + "loss": 1.3803, + "step": 333100 + }, + { + "epoch": 4.93, + "learning_rate": 2.5370696284902984e-05, + "loss": 1.4067, + "step": 333200 + }, + { + "epoch": 4.93, + "learning_rate": 2.536330158542357e-05, + "loss": 1.3726, + "step": 333300 + }, + { + "epoch": 4.93, + "learning_rate": 2.535590688594416e-05, + "loss": 1.4359, + "step": 333400 + }, + { + "epoch": 4.93, + "learning_rate": 2.534858613345954e-05, + "loss": 1.4146, + "step": 333500 + }, + { + "epoch": 4.93, + "learning_rate": 2.534119143398012e-05, + "loss": 1.4106, + "step": 333600 + }, + { + "epoch": 4.94, + "learning_rate": 2.5333796734500707e-05, + "loss": 1.3999, + "step": 333700 + }, + { + "epoch": 4.94, + "learning_rate": 2.5326402035021296e-05, + "loss": 1.4005, + "step": 333800 + }, + { + "epoch": 4.94, + "learning_rate": 2.5319007335541882e-05, + "loss": 1.4153, + "step": 333900 + }, + { + "epoch": 4.94, + "learning_rate": 2.531161263606247e-05, + "loss": 1.4184, + "step": 334000 + }, + { + "epoch": 4.94, + "learning_rate": 2.5304217936583057e-05, + "loss": 1.4164, + "step": 334100 + }, + { + "epoch": 4.94, + "learning_rate": 2.5296823237103646e-05, + "loss": 1.3939, + "step": 334200 + }, + { + "epoch": 4.94, + "learning_rate": 2.528942853762423e-05, + "loss": 1.3871, + "step": 334300 + }, + { + "epoch": 4.95, + "learning_rate": 2.528203383814482e-05, + "loss": 1.3975, + "step": 334400 + }, + { + "epoch": 4.95, + "learning_rate": 2.5274639138665406e-05, + "loss": 1.4158, + "step": 334500 + }, + { + "epoch": 4.95, + "learning_rate": 2.5267244439185995e-05, + "loss": 1.4101, + "step": 334600 + }, + { + "epoch": 4.95, + "learning_rate": 2.525984973970658e-05, + "loss": 1.4026, + "step": 334700 + }, + { + "epoch": 4.95, + "learning_rate": 2.525245504022717e-05, + "loss": 1.3994, + "step": 334800 + }, + { + "epoch": 4.95, + "learning_rate": 2.5245060340747755e-05, + "loss": 1.3891, + "step": 334900 + }, + { + "epoch": 4.95, + "learning_rate": 2.5237665641268337e-05, + "loss": 1.3969, + "step": 335000 + }, + { + "epoch": 4.96, + "learning_rate": 2.5230270941788927e-05, + "loss": 1.4142, + "step": 335100 + }, + { + "epoch": 4.96, + "learning_rate": 2.5222876242309512e-05, + "loss": 1.4041, + "step": 335200 + }, + { + "epoch": 4.96, + "learning_rate": 2.5215481542830098e-05, + "loss": 1.4263, + "step": 335300 + }, + { + "epoch": 4.96, + "learning_rate": 2.5208086843350687e-05, + "loss": 1.4229, + "step": 335400 + }, + { + "epoch": 4.96, + "learning_rate": 2.5200692143871272e-05, + "loss": 1.4045, + "step": 335500 + }, + { + "epoch": 4.96, + "learning_rate": 2.519329744439186e-05, + "loss": 1.3706, + "step": 335600 + }, + { + "epoch": 4.96, + "learning_rate": 2.5185902744912447e-05, + "loss": 1.4175, + "step": 335700 + }, + { + "epoch": 4.97, + "learning_rate": 2.5178508045433036e-05, + "loss": 1.387, + "step": 335800 + }, + { + "epoch": 4.97, + "learning_rate": 2.5171113345953622e-05, + "loss": 1.401, + "step": 335900 + }, + { + "epoch": 4.97, + "learning_rate": 2.516371864647421e-05, + "loss": 1.4065, + "step": 336000 + }, + { + "epoch": 4.97, + "learning_rate": 2.5156323946994796e-05, + "loss": 1.3945, + "step": 336100 + }, + { + "epoch": 4.97, + "learning_rate": 2.5148929247515385e-05, + "loss": 1.4092, + "step": 336200 + }, + { + "epoch": 4.97, + "learning_rate": 2.514160849503076e-05, + "loss": 1.4123, + "step": 336300 + }, + { + "epoch": 4.98, + "learning_rate": 2.5134213795551348e-05, + "loss": 1.3895, + "step": 336400 + }, + { + "epoch": 4.98, + "learning_rate": 2.5126819096071934e-05, + "loss": 1.399, + "step": 336500 + }, + { + "epoch": 4.98, + "learning_rate": 2.5119424396592523e-05, + "loss": 1.3791, + "step": 336600 + }, + { + "epoch": 4.98, + "learning_rate": 2.511202969711311e-05, + "loss": 1.4009, + "step": 336700 + }, + { + "epoch": 4.98, + "learning_rate": 2.5104634997633697e-05, + "loss": 1.4128, + "step": 336800 + }, + { + "epoch": 4.98, + "learning_rate": 2.5097240298154283e-05, + "loss": 1.4035, + "step": 336900 + }, + { + "epoch": 4.98, + "learning_rate": 2.5089845598674872e-05, + "loss": 1.4161, + "step": 337000 + }, + { + "epoch": 4.99, + "learning_rate": 2.5082450899195458e-05, + "loss": 1.4023, + "step": 337100 + }, + { + "epoch": 4.99, + "learning_rate": 2.5075056199716047e-05, + "loss": 1.381, + "step": 337200 + }, + { + "epoch": 4.99, + "learning_rate": 2.5067661500236632e-05, + "loss": 1.4085, + "step": 337300 + }, + { + "epoch": 4.99, + "learning_rate": 2.506026680075722e-05, + "loss": 1.3829, + "step": 337400 + }, + { + "epoch": 4.99, + "learning_rate": 2.5052872101277807e-05, + "loss": 1.4298, + "step": 337500 + }, + { + "epoch": 4.99, + "learning_rate": 2.504547740179839e-05, + "loss": 1.411, + "step": 337600 + }, + { + "epoch": 4.99, + "learning_rate": 2.503808270231898e-05, + "loss": 1.4376, + "step": 337700 + }, + { + "epoch": 5.0, + "learning_rate": 2.5030688002839564e-05, + "loss": 1.4147, + "step": 337800 + }, + { + "epoch": 5.0, + "learning_rate": 2.502329330336015e-05, + "loss": 1.3851, + "step": 337900 + }, + { + "epoch": 5.0, + "learning_rate": 2.501589860388074e-05, + "loss": 1.4166, + "step": 338000 + }, + { + "epoch": 5.0, + "learning_rate": 2.5008503904401324e-05, + "loss": 1.3948, + "step": 338100 + }, + { + "epoch": 5.0, + "learning_rate": 2.5001109204921913e-05, + "loss": 1.3702, + "step": 338200 + }, + { + "epoch": 5.0, + "learning_rate": 2.49937145054425e-05, + "loss": 1.3485, + "step": 338300 + }, + { + "epoch": 5.0, + "learning_rate": 2.4986393752957883e-05, + "loss": 1.3543, + "step": 338400 + }, + { + "epoch": 5.01, + "learning_rate": 2.497899905347847e-05, + "loss": 1.3329, + "step": 338500 + }, + { + "epoch": 5.01, + "learning_rate": 2.4971604353999054e-05, + "loss": 1.3373, + "step": 338600 + }, + { + "epoch": 5.01, + "learning_rate": 2.496420965451964e-05, + "loss": 1.3363, + "step": 338700 + }, + { + "epoch": 5.01, + "learning_rate": 2.495681495504023e-05, + "loss": 1.3605, + "step": 338800 + }, + { + "epoch": 5.01, + "learning_rate": 2.4949420255560814e-05, + "loss": 1.3547, + "step": 338900 + }, + { + "epoch": 5.01, + "learning_rate": 2.4942025556081403e-05, + "loss": 1.3487, + "step": 339000 + }, + { + "epoch": 5.02, + "learning_rate": 2.493463085660199e-05, + "loss": 1.3582, + "step": 339100 + }, + { + "epoch": 5.02, + "learning_rate": 2.4927236157122578e-05, + "loss": 1.3308, + "step": 339200 + }, + { + "epoch": 5.02, + "learning_rate": 2.491984145764316e-05, + "loss": 1.334, + "step": 339300 + }, + { + "epoch": 5.02, + "learning_rate": 2.491244675816375e-05, + "loss": 1.3531, + "step": 339400 + }, + { + "epoch": 5.02, + "learning_rate": 2.4905052058684335e-05, + "loss": 1.3437, + "step": 339500 + }, + { + "epoch": 5.02, + "learning_rate": 2.4897657359204924e-05, + "loss": 1.3161, + "step": 339600 + }, + { + "epoch": 5.02, + "learning_rate": 2.489026265972551e-05, + "loss": 1.3156, + "step": 339700 + }, + { + "epoch": 5.03, + "learning_rate": 2.48828679602461e-05, + "loss": 1.3701, + "step": 339800 + }, + { + "epoch": 5.03, + "learning_rate": 2.487547326076668e-05, + "loss": 1.3449, + "step": 339900 + }, + { + "epoch": 5.03, + "learning_rate": 2.486807856128727e-05, + "loss": 1.3695, + "step": 340000 + }, + { + "epoch": 5.03, + "learning_rate": 2.4860683861807856e-05, + "loss": 1.3214, + "step": 340100 + }, + { + "epoch": 5.03, + "learning_rate": 2.4853289162328445e-05, + "loss": 1.3218, + "step": 340200 + }, + { + "epoch": 5.03, + "learning_rate": 2.484589446284903e-05, + "loss": 1.3362, + "step": 340300 + }, + { + "epoch": 5.03, + "learning_rate": 2.483849976336962e-05, + "loss": 1.3517, + "step": 340400 + }, + { + "epoch": 5.04, + "learning_rate": 2.4831105063890205e-05, + "loss": 1.3528, + "step": 340500 + }, + { + "epoch": 5.04, + "learning_rate": 2.482371036441079e-05, + "loss": 1.3078, + "step": 340600 + }, + { + "epoch": 5.04, + "learning_rate": 2.481638961192617e-05, + "loss": 1.3507, + "step": 340700 + }, + { + "epoch": 5.04, + "learning_rate": 2.480899491244676e-05, + "loss": 1.3499, + "step": 340800 + }, + { + "epoch": 5.04, + "learning_rate": 2.4801600212967346e-05, + "loss": 1.3384, + "step": 340900 + }, + { + "epoch": 5.04, + "learning_rate": 2.4794205513487935e-05, + "loss": 1.35, + "step": 341000 + }, + { + "epoch": 5.04, + "learning_rate": 2.478681081400852e-05, + "loss": 1.3296, + "step": 341100 + }, + { + "epoch": 5.05, + "learning_rate": 2.4779416114529106e-05, + "loss": 1.3198, + "step": 341200 + }, + { + "epoch": 5.05, + "learning_rate": 2.477202141504969e-05, + "loss": 1.332, + "step": 341300 + }, + { + "epoch": 5.05, + "learning_rate": 2.476462671557028e-05, + "loss": 1.3688, + "step": 341400 + }, + { + "epoch": 5.05, + "learning_rate": 2.4757232016090866e-05, + "loss": 1.3425, + "step": 341500 + }, + { + "epoch": 5.05, + "learning_rate": 2.4749837316611455e-05, + "loss": 1.3265, + "step": 341600 + }, + { + "epoch": 5.05, + "learning_rate": 2.474244261713204e-05, + "loss": 1.3433, + "step": 341700 + }, + { + "epoch": 5.05, + "learning_rate": 2.473504791765263e-05, + "loss": 1.3219, + "step": 341800 + }, + { + "epoch": 5.06, + "learning_rate": 2.4727653218173212e-05, + "loss": 1.3654, + "step": 341900 + }, + { + "epoch": 5.06, + "learning_rate": 2.47202585186938e-05, + "loss": 1.3468, + "step": 342000 + }, + { + "epoch": 5.06, + "learning_rate": 2.4712863819214387e-05, + "loss": 1.3484, + "step": 342100 + }, + { + "epoch": 5.06, + "learning_rate": 2.4705469119734976e-05, + "loss": 1.3366, + "step": 342200 + }, + { + "epoch": 5.06, + "learning_rate": 2.469807442025556e-05, + "loss": 1.3378, + "step": 342300 + }, + { + "epoch": 5.06, + "learning_rate": 2.469067972077615e-05, + "loss": 1.33, + "step": 342400 + }, + { + "epoch": 5.07, + "learning_rate": 2.4683285021296736e-05, + "loss": 1.3422, + "step": 342500 + }, + { + "epoch": 5.07, + "learning_rate": 2.4675890321817322e-05, + "loss": 1.3506, + "step": 342600 + }, + { + "epoch": 5.07, + "learning_rate": 2.4668495622337907e-05, + "loss": 1.3379, + "step": 342700 + }, + { + "epoch": 5.07, + "learning_rate": 2.466117486985329e-05, + "loss": 1.3706, + "step": 342800 + }, + { + "epoch": 5.07, + "learning_rate": 2.4653780170373877e-05, + "loss": 1.3361, + "step": 342900 + }, + { + "epoch": 5.07, + "learning_rate": 2.4646385470894466e-05, + "loss": 1.3462, + "step": 343000 + }, + { + "epoch": 5.07, + "learning_rate": 2.463899077141505e-05, + "loss": 1.3442, + "step": 343100 + }, + { + "epoch": 5.08, + "learning_rate": 2.4631596071935637e-05, + "loss": 1.336, + "step": 343200 + }, + { + "epoch": 5.08, + "learning_rate": 2.4624201372456223e-05, + "loss": 1.3334, + "step": 343300 + }, + { + "epoch": 5.08, + "learning_rate": 2.4616806672976812e-05, + "loss": 1.3449, + "step": 343400 + }, + { + "epoch": 5.08, + "learning_rate": 2.4609411973497398e-05, + "loss": 1.336, + "step": 343500 + }, + { + "epoch": 5.08, + "learning_rate": 2.4602017274017987e-05, + "loss": 1.3497, + "step": 343600 + }, + { + "epoch": 5.08, + "learning_rate": 2.4594622574538572e-05, + "loss": 1.3586, + "step": 343700 + }, + { + "epoch": 5.08, + "learning_rate": 2.458722787505916e-05, + "loss": 1.323, + "step": 343800 + }, + { + "epoch": 5.09, + "learning_rate": 2.4579833175579744e-05, + "loss": 1.3518, + "step": 343900 + }, + { + "epoch": 5.09, + "learning_rate": 2.4572438476100333e-05, + "loss": 1.3332, + "step": 344000 + }, + { + "epoch": 5.09, + "learning_rate": 2.4565043776620918e-05, + "loss": 1.3651, + "step": 344100 + }, + { + "epoch": 5.09, + "learning_rate": 2.4557649077141507e-05, + "loss": 1.3599, + "step": 344200 + }, + { + "epoch": 5.09, + "learning_rate": 2.4550254377662093e-05, + "loss": 1.3434, + "step": 344300 + }, + { + "epoch": 5.09, + "learning_rate": 2.4542859678182682e-05, + "loss": 1.3533, + "step": 344400 + }, + { + "epoch": 5.09, + "learning_rate": 2.4535464978703264e-05, + "loss": 1.3188, + "step": 344500 + }, + { + "epoch": 5.1, + "learning_rate": 2.4528070279223853e-05, + "loss": 1.3369, + "step": 344600 + }, + { + "epoch": 5.1, + "learning_rate": 2.452067557974444e-05, + "loss": 1.3393, + "step": 344700 + }, + { + "epoch": 5.1, + "learning_rate": 2.4513280880265028e-05, + "loss": 1.3307, + "step": 344800 + }, + { + "epoch": 5.1, + "learning_rate": 2.4505960127780408e-05, + "loss": 1.3485, + "step": 344900 + }, + { + "epoch": 5.1, + "learning_rate": 2.4498565428300997e-05, + "loss": 1.3228, + "step": 345000 + }, + { + "epoch": 5.1, + "learning_rate": 2.4491170728821583e-05, + "loss": 1.3602, + "step": 345100 + }, + { + "epoch": 5.11, + "learning_rate": 2.448377602934217e-05, + "loss": 1.3233, + "step": 345200 + }, + { + "epoch": 5.11, + "learning_rate": 2.4476381329862754e-05, + "loss": 1.352, + "step": 345300 + }, + { + "epoch": 5.11, + "learning_rate": 2.4468986630383343e-05, + "loss": 1.3616, + "step": 345400 + }, + { + "epoch": 5.11, + "learning_rate": 2.446159193090393e-05, + "loss": 1.333, + "step": 345500 + }, + { + "epoch": 5.11, + "learning_rate": 2.4454197231424518e-05, + "loss": 1.369, + "step": 345600 + }, + { + "epoch": 5.11, + "learning_rate": 2.4446802531945104e-05, + "loss": 1.3199, + "step": 345700 + }, + { + "epoch": 5.11, + "learning_rate": 2.4439407832465693e-05, + "loss": 1.3311, + "step": 345800 + }, + { + "epoch": 5.12, + "learning_rate": 2.4432013132986275e-05, + "loss": 1.3319, + "step": 345900 + }, + { + "epoch": 5.12, + "learning_rate": 2.4424618433506864e-05, + "loss": 1.3702, + "step": 346000 + }, + { + "epoch": 5.12, + "learning_rate": 2.441722373402745e-05, + "loss": 1.3414, + "step": 346100 + }, + { + "epoch": 5.12, + "learning_rate": 2.440982903454804e-05, + "loss": 1.3635, + "step": 346200 + }, + { + "epoch": 5.12, + "learning_rate": 2.4402434335068624e-05, + "loss": 1.3551, + "step": 346300 + }, + { + "epoch": 5.12, + "learning_rate": 2.4395039635589213e-05, + "loss": 1.3301, + "step": 346400 + }, + { + "epoch": 5.12, + "learning_rate": 2.4387644936109795e-05, + "loss": 1.3368, + "step": 346500 + }, + { + "epoch": 5.13, + "learning_rate": 2.4380250236630384e-05, + "loss": 1.3348, + "step": 346600 + }, + { + "epoch": 5.13, + "learning_rate": 2.437285553715097e-05, + "loss": 1.3608, + "step": 346700 + }, + { + "epoch": 5.13, + "learning_rate": 2.436546083767156e-05, + "loss": 1.3438, + "step": 346800 + }, + { + "epoch": 5.13, + "learning_rate": 2.4358066138192145e-05, + "loss": 1.3665, + "step": 346900 + }, + { + "epoch": 5.13, + "learning_rate": 2.4350671438712734e-05, + "loss": 1.3661, + "step": 347000 + }, + { + "epoch": 5.13, + "learning_rate": 2.4343350686228114e-05, + "loss": 1.3751, + "step": 347100 + }, + { + "epoch": 5.13, + "learning_rate": 2.43359559867487e-05, + "loss": 1.3241, + "step": 347200 + }, + { + "epoch": 5.14, + "learning_rate": 2.4328561287269285e-05, + "loss": 1.3514, + "step": 347300 + }, + { + "epoch": 5.14, + "learning_rate": 2.4321166587789875e-05, + "loss": 1.35, + "step": 347400 + }, + { + "epoch": 5.14, + "learning_rate": 2.431377188831046e-05, + "loss": 1.3572, + "step": 347500 + }, + { + "epoch": 5.14, + "learning_rate": 2.430637718883105e-05, + "loss": 1.3777, + "step": 347600 + }, + { + "epoch": 5.14, + "learning_rate": 2.4298982489351635e-05, + "loss": 1.3747, + "step": 347700 + }, + { + "epoch": 5.14, + "learning_rate": 2.429158778987222e-05, + "loss": 1.3402, + "step": 347800 + }, + { + "epoch": 5.15, + "learning_rate": 2.4284193090392806e-05, + "loss": 1.372, + "step": 347900 + }, + { + "epoch": 5.15, + "learning_rate": 2.4276798390913395e-05, + "loss": 1.3475, + "step": 348000 + }, + { + "epoch": 5.15, + "learning_rate": 2.426940369143398e-05, + "loss": 1.3753, + "step": 348100 + }, + { + "epoch": 5.15, + "learning_rate": 2.426200899195457e-05, + "loss": 1.3414, + "step": 348200 + }, + { + "epoch": 5.15, + "learning_rate": 2.4254614292475155e-05, + "loss": 1.3287, + "step": 348300 + }, + { + "epoch": 5.15, + "learning_rate": 2.4247219592995744e-05, + "loss": 1.36, + "step": 348400 + }, + { + "epoch": 5.15, + "learning_rate": 2.4239824893516327e-05, + "loss": 1.3176, + "step": 348500 + }, + { + "epoch": 5.16, + "learning_rate": 2.4232430194036916e-05, + "loss": 1.3575, + "step": 348600 + }, + { + "epoch": 5.16, + "learning_rate": 2.42250354945575e-05, + "loss": 1.3686, + "step": 348700 + }, + { + "epoch": 5.16, + "learning_rate": 2.421764079507809e-05, + "loss": 1.3468, + "step": 348800 + }, + { + "epoch": 5.16, + "learning_rate": 2.4210246095598676e-05, + "loss": 1.3428, + "step": 348900 + }, + { + "epoch": 5.16, + "learning_rate": 2.4202851396119265e-05, + "loss": 1.3493, + "step": 349000 + }, + { + "epoch": 5.16, + "learning_rate": 2.419545669663985e-05, + "loss": 1.3746, + "step": 349100 + }, + { + "epoch": 5.16, + "learning_rate": 2.4188061997160436e-05, + "loss": 1.3618, + "step": 349200 + }, + { + "epoch": 5.17, + "learning_rate": 2.4180667297681022e-05, + "loss": 1.3373, + "step": 349300 + }, + { + "epoch": 5.17, + "learning_rate": 2.4173346545196406e-05, + "loss": 1.3801, + "step": 349400 + }, + { + "epoch": 5.17, + "learning_rate": 2.416595184571699e-05, + "loss": 1.3208, + "step": 349500 + }, + { + "epoch": 5.17, + "learning_rate": 2.4158557146237577e-05, + "loss": 1.3318, + "step": 349600 + }, + { + "epoch": 5.17, + "learning_rate": 2.4151162446758166e-05, + "loss": 1.3546, + "step": 349700 + }, + { + "epoch": 5.17, + "learning_rate": 2.4143767747278752e-05, + "loss": 1.3686, + "step": 349800 + }, + { + "epoch": 5.17, + "learning_rate": 2.4136373047799337e-05, + "loss": 1.366, + "step": 349900 + }, + { + "epoch": 5.18, + "learning_rate": 2.4128978348319926e-05, + "loss": 1.3592, + "step": 350000 + }, + { + "epoch": 5.18, + "learning_rate": 2.4121583648840512e-05, + "loss": 1.3487, + "step": 350100 + }, + { + "epoch": 5.18, + "learning_rate": 2.41141889493611e-05, + "loss": 1.3572, + "step": 350200 + }, + { + "epoch": 5.18, + "learning_rate": 2.4106794249881687e-05, + "loss": 1.3297, + "step": 350300 + }, + { + "epoch": 5.18, + "learning_rate": 2.4099399550402276e-05, + "loss": 1.3562, + "step": 350400 + }, + { + "epoch": 5.18, + "learning_rate": 2.4092004850922858e-05, + "loss": 1.3757, + "step": 350500 + }, + { + "epoch": 5.19, + "learning_rate": 2.4084610151443447e-05, + "loss": 1.3353, + "step": 350600 + }, + { + "epoch": 5.19, + "learning_rate": 2.4077215451964033e-05, + "loss": 1.3293, + "step": 350700 + }, + { + "epoch": 5.19, + "learning_rate": 2.406982075248462e-05, + "loss": 1.3459, + "step": 350800 + }, + { + "epoch": 5.19, + "learning_rate": 2.4062426053005207e-05, + "loss": 1.3724, + "step": 350900 + }, + { + "epoch": 5.19, + "learning_rate": 2.4055031353525796e-05, + "loss": 1.348, + "step": 351000 + }, + { + "epoch": 5.19, + "learning_rate": 2.404763665404638e-05, + "loss": 1.3536, + "step": 351100 + }, + { + "epoch": 5.19, + "learning_rate": 2.4040241954566968e-05, + "loss": 1.3623, + "step": 351200 + }, + { + "epoch": 5.2, + "learning_rate": 2.4032847255087553e-05, + "loss": 1.3587, + "step": 351300 + }, + { + "epoch": 5.2, + "learning_rate": 2.4025452555608142e-05, + "loss": 1.3258, + "step": 351400 + }, + { + "epoch": 5.2, + "learning_rate": 2.4018057856128728e-05, + "loss": 1.3554, + "step": 351500 + }, + { + "epoch": 5.2, + "learning_rate": 2.4010663156649317e-05, + "loss": 1.3583, + "step": 351600 + }, + { + "epoch": 5.2, + "learning_rate": 2.4003268457169903e-05, + "loss": 1.3475, + "step": 351700 + }, + { + "epoch": 5.2, + "learning_rate": 2.3995873757690488e-05, + "loss": 1.3436, + "step": 351800 + }, + { + "epoch": 5.2, + "learning_rate": 2.398855300520587e-05, + "loss": 1.3676, + "step": 351900 + }, + { + "epoch": 5.21, + "learning_rate": 2.3981158305726458e-05, + "loss": 1.3588, + "step": 352000 + }, + { + "epoch": 5.21, + "learning_rate": 2.3973763606247043e-05, + "loss": 1.3522, + "step": 352100 + }, + { + "epoch": 5.21, + "learning_rate": 2.396636890676763e-05, + "loss": 1.3447, + "step": 352200 + }, + { + "epoch": 5.21, + "learning_rate": 2.3958974207288218e-05, + "loss": 1.3548, + "step": 352300 + }, + { + "epoch": 5.21, + "learning_rate": 2.3951579507808804e-05, + "loss": 1.3581, + "step": 352400 + }, + { + "epoch": 5.21, + "learning_rate": 2.394418480832939e-05, + "loss": 1.3626, + "step": 352500 + }, + { + "epoch": 5.21, + "learning_rate": 2.3936790108849978e-05, + "loss": 1.3487, + "step": 352600 + }, + { + "epoch": 5.22, + "learning_rate": 2.3929395409370564e-05, + "loss": 1.3624, + "step": 352700 + }, + { + "epoch": 5.22, + "learning_rate": 2.3922000709891153e-05, + "loss": 1.3553, + "step": 352800 + }, + { + "epoch": 5.22, + "learning_rate": 2.391460601041174e-05, + "loss": 1.3216, + "step": 352900 + }, + { + "epoch": 5.22, + "learning_rate": 2.3907211310932324e-05, + "loss": 1.3566, + "step": 353000 + }, + { + "epoch": 5.22, + "learning_rate": 2.389981661145291e-05, + "loss": 1.3676, + "step": 353100 + }, + { + "epoch": 5.22, + "learning_rate": 2.38924219119735e-05, + "loss": 1.3445, + "step": 353200 + }, + { + "epoch": 5.23, + "learning_rate": 2.3885027212494085e-05, + "loss": 1.3795, + "step": 353300 + }, + { + "epoch": 5.23, + "learning_rate": 2.3877632513014674e-05, + "loss": 1.3506, + "step": 353400 + }, + { + "epoch": 5.23, + "learning_rate": 2.387023781353526e-05, + "loss": 1.3655, + "step": 353500 + }, + { + "epoch": 5.23, + "learning_rate": 2.3862843114055848e-05, + "loss": 1.3697, + "step": 353600 + }, + { + "epoch": 5.23, + "learning_rate": 2.3855448414576434e-05, + "loss": 1.3324, + "step": 353700 + }, + { + "epoch": 5.23, + "learning_rate": 2.384805371509702e-05, + "loss": 1.3343, + "step": 353800 + }, + { + "epoch": 5.23, + "learning_rate": 2.3840659015617605e-05, + "loss": 1.355, + "step": 353900 + }, + { + "epoch": 5.24, + "learning_rate": 2.3833264316138194e-05, + "loss": 1.3639, + "step": 354000 + }, + { + "epoch": 5.24, + "learning_rate": 2.382586961665878e-05, + "loss": 1.3654, + "step": 354100 + }, + { + "epoch": 5.24, + "learning_rate": 2.381854886417416e-05, + "loss": 1.3718, + "step": 354200 + }, + { + "epoch": 5.24, + "learning_rate": 2.381115416469475e-05, + "loss": 1.3633, + "step": 354300 + }, + { + "epoch": 5.24, + "learning_rate": 2.3803759465215335e-05, + "loss": 1.3697, + "step": 354400 + }, + { + "epoch": 5.24, + "learning_rate": 2.379636476573592e-05, + "loss": 1.3683, + "step": 354500 + }, + { + "epoch": 5.24, + "learning_rate": 2.378897006625651e-05, + "loss": 1.3455, + "step": 354600 + }, + { + "epoch": 5.25, + "learning_rate": 2.3781575366777095e-05, + "loss": 1.3487, + "step": 354700 + }, + { + "epoch": 5.25, + "learning_rate": 2.377418066729768e-05, + "loss": 1.3375, + "step": 354800 + }, + { + "epoch": 5.25, + "learning_rate": 2.376678596781827e-05, + "loss": 1.3701, + "step": 354900 + }, + { + "epoch": 5.25, + "learning_rate": 2.3759391268338856e-05, + "loss": 1.3438, + "step": 355000 + }, + { + "epoch": 5.25, + "learning_rate": 2.375199656885944e-05, + "loss": 1.3543, + "step": 355100 + }, + { + "epoch": 5.25, + "learning_rate": 2.374460186938003e-05, + "loss": 1.3552, + "step": 355200 + }, + { + "epoch": 5.25, + "learning_rate": 2.3737207169900616e-05, + "loss": 1.3419, + "step": 355300 + }, + { + "epoch": 5.26, + "learning_rate": 2.3729812470421205e-05, + "loss": 1.3706, + "step": 355400 + }, + { + "epoch": 5.26, + "learning_rate": 2.372241777094179e-05, + "loss": 1.3587, + "step": 355500 + }, + { + "epoch": 5.26, + "learning_rate": 2.3715023071462376e-05, + "loss": 1.348, + "step": 355600 + }, + { + "epoch": 5.26, + "learning_rate": 2.3707628371982965e-05, + "loss": 1.3674, + "step": 355700 + }, + { + "epoch": 5.26, + "learning_rate": 2.370023367250355e-05, + "loss": 1.3469, + "step": 355800 + }, + { + "epoch": 5.26, + "learning_rate": 2.3692838973024136e-05, + "loss": 1.3095, + "step": 355900 + }, + { + "epoch": 5.26, + "learning_rate": 2.3685444273544725e-05, + "loss": 1.3535, + "step": 356000 + }, + { + "epoch": 5.27, + "learning_rate": 2.367804957406531e-05, + "loss": 1.3539, + "step": 356100 + }, + { + "epoch": 5.27, + "learning_rate": 2.367072882158069e-05, + "loss": 1.3797, + "step": 356200 + }, + { + "epoch": 5.27, + "learning_rate": 2.366333412210128e-05, + "loss": 1.3584, + "step": 356300 + }, + { + "epoch": 5.27, + "learning_rate": 2.3655939422621863e-05, + "loss": 1.3396, + "step": 356400 + }, + { + "epoch": 5.27, + "learning_rate": 2.3648544723142452e-05, + "loss": 1.3826, + "step": 356500 + }, + { + "epoch": 5.27, + "learning_rate": 2.3641150023663037e-05, + "loss": 1.3658, + "step": 356600 + }, + { + "epoch": 5.28, + "learning_rate": 2.3633755324183627e-05, + "loss": 1.3391, + "step": 356700 + }, + { + "epoch": 5.28, + "learning_rate": 2.3626360624704212e-05, + "loss": 1.3616, + "step": 356800 + }, + { + "epoch": 5.28, + "learning_rate": 2.36189659252248e-05, + "loss": 1.3618, + "step": 356900 + }, + { + "epoch": 5.28, + "learning_rate": 2.3611571225745387e-05, + "loss": 1.3438, + "step": 357000 + }, + { + "epoch": 5.28, + "learning_rate": 2.3604176526265972e-05, + "loss": 1.336, + "step": 357100 + }, + { + "epoch": 5.28, + "learning_rate": 2.359678182678656e-05, + "loss": 1.3394, + "step": 357200 + }, + { + "epoch": 5.28, + "learning_rate": 2.3589387127307147e-05, + "loss": 1.3923, + "step": 357300 + }, + { + "epoch": 5.29, + "learning_rate": 2.3581992427827733e-05, + "loss": 1.3293, + "step": 357400 + }, + { + "epoch": 5.29, + "learning_rate": 2.3574597728348322e-05, + "loss": 1.3411, + "step": 357500 + }, + { + "epoch": 5.29, + "learning_rate": 2.3567203028868907e-05, + "loss": 1.3703, + "step": 357600 + }, + { + "epoch": 5.29, + "learning_rate": 2.3559808329389496e-05, + "loss": 1.3448, + "step": 357700 + }, + { + "epoch": 5.29, + "learning_rate": 2.3552413629910082e-05, + "loss": 1.3374, + "step": 357800 + }, + { + "epoch": 5.29, + "learning_rate": 2.3545018930430668e-05, + "loss": 1.3796, + "step": 357900 + }, + { + "epoch": 5.29, + "learning_rate": 2.3537624230951257e-05, + "loss": 1.3564, + "step": 358000 + }, + { + "epoch": 5.3, + "learning_rate": 2.3530229531471842e-05, + "loss": 1.3552, + "step": 358100 + }, + { + "epoch": 5.3, + "learning_rate": 2.3522908778987223e-05, + "loss": 1.3746, + "step": 358200 + }, + { + "epoch": 5.3, + "learning_rate": 2.3515514079507812e-05, + "loss": 1.3688, + "step": 358300 + }, + { + "epoch": 5.3, + "learning_rate": 2.3508119380028394e-05, + "loss": 1.3477, + "step": 358400 + }, + { + "epoch": 5.3, + "learning_rate": 2.3500724680548983e-05, + "loss": 1.3635, + "step": 358500 + }, + { + "epoch": 5.3, + "learning_rate": 2.349332998106957e-05, + "loss": 1.3558, + "step": 358600 + }, + { + "epoch": 5.3, + "learning_rate": 2.3485935281590158e-05, + "loss": 1.355, + "step": 358700 + }, + { + "epoch": 5.31, + "learning_rate": 2.3478540582110743e-05, + "loss": 1.3598, + "step": 358800 + }, + { + "epoch": 5.31, + "learning_rate": 2.3471145882631332e-05, + "loss": 1.3429, + "step": 358900 + }, + { + "epoch": 5.31, + "learning_rate": 2.3463751183151918e-05, + "loss": 1.3768, + "step": 359000 + }, + { + "epoch": 5.31, + "learning_rate": 2.3456356483672504e-05, + "loss": 1.3594, + "step": 359100 + }, + { + "epoch": 5.31, + "learning_rate": 2.344896178419309e-05, + "loss": 1.3476, + "step": 359200 + }, + { + "epoch": 5.31, + "learning_rate": 2.344156708471368e-05, + "loss": 1.3589, + "step": 359300 + }, + { + "epoch": 5.32, + "learning_rate": 2.3434172385234264e-05, + "loss": 1.326, + "step": 359400 + }, + { + "epoch": 5.32, + "learning_rate": 2.3426777685754853e-05, + "loss": 1.3501, + "step": 359500 + }, + { + "epoch": 5.32, + "learning_rate": 2.341938298627544e-05, + "loss": 1.3557, + "step": 359600 + }, + { + "epoch": 5.32, + "learning_rate": 2.3411988286796024e-05, + "loss": 1.324, + "step": 359700 + }, + { + "epoch": 5.32, + "learning_rate": 2.3404593587316613e-05, + "loss": 1.3608, + "step": 359800 + }, + { + "epoch": 5.32, + "learning_rate": 2.33971988878372e-05, + "loss": 1.3564, + "step": 359900 + }, + { + "epoch": 5.32, + "learning_rate": 2.3389804188357785e-05, + "loss": 1.3756, + "step": 360000 + }, + { + "epoch": 5.33, + "learning_rate": 2.338248343587317e-05, + "loss": 1.3827, + "step": 360100 + }, + { + "epoch": 5.33, + "learning_rate": 2.3375088736393754e-05, + "loss": 1.3527, + "step": 360200 + }, + { + "epoch": 5.33, + "learning_rate": 2.3367694036914343e-05, + "loss": 1.3564, + "step": 360300 + }, + { + "epoch": 5.33, + "learning_rate": 2.3360299337434925e-05, + "loss": 1.3469, + "step": 360400 + }, + { + "epoch": 5.33, + "learning_rate": 2.3352904637955514e-05, + "loss": 1.3817, + "step": 360500 + }, + { + "epoch": 5.33, + "learning_rate": 2.33455099384761e-05, + "loss": 1.3679, + "step": 360600 + }, + { + "epoch": 5.33, + "learning_rate": 2.333811523899669e-05, + "loss": 1.3551, + "step": 360700 + }, + { + "epoch": 5.34, + "learning_rate": 2.3330720539517275e-05, + "loss": 1.3651, + "step": 360800 + }, + { + "epoch": 5.34, + "learning_rate": 2.3323325840037864e-05, + "loss": 1.3574, + "step": 360900 + }, + { + "epoch": 5.34, + "learning_rate": 2.331593114055845e-05, + "loss": 1.3482, + "step": 361000 + }, + { + "epoch": 5.34, + "learning_rate": 2.3308536441079035e-05, + "loss": 1.3568, + "step": 361100 + }, + { + "epoch": 5.34, + "learning_rate": 2.330114174159962e-05, + "loss": 1.3438, + "step": 361200 + }, + { + "epoch": 5.34, + "learning_rate": 2.329374704212021e-05, + "loss": 1.3549, + "step": 361300 + }, + { + "epoch": 5.34, + "learning_rate": 2.3286352342640795e-05, + "loss": 1.3632, + "step": 361400 + }, + { + "epoch": 5.35, + "learning_rate": 2.3278957643161384e-05, + "loss": 1.3705, + "step": 361500 + }, + { + "epoch": 5.35, + "learning_rate": 2.327156294368197e-05, + "loss": 1.3236, + "step": 361600 + }, + { + "epoch": 5.35, + "learning_rate": 2.3264168244202556e-05, + "loss": 1.3419, + "step": 361700 + }, + { + "epoch": 5.35, + "learning_rate": 2.325677354472314e-05, + "loss": 1.3449, + "step": 361800 + }, + { + "epoch": 5.35, + "learning_rate": 2.324937884524373e-05, + "loss": 1.3646, + "step": 361900 + }, + { + "epoch": 5.35, + "learning_rate": 2.3241984145764316e-05, + "loss": 1.3447, + "step": 362000 + }, + { + "epoch": 5.36, + "learning_rate": 2.3234589446284905e-05, + "loss": 1.375, + "step": 362100 + }, + { + "epoch": 5.36, + "learning_rate": 2.322719474680549e-05, + "loss": 1.3972, + "step": 362200 + }, + { + "epoch": 5.36, + "learning_rate": 2.321980004732608e-05, + "loss": 1.3478, + "step": 362300 + }, + { + "epoch": 5.36, + "learning_rate": 2.3212405347846662e-05, + "loss": 1.3451, + "step": 362400 + }, + { + "epoch": 5.36, + "learning_rate": 2.320501064836725e-05, + "loss": 1.3443, + "step": 362500 + }, + { + "epoch": 5.36, + "learning_rate": 2.3197615948887837e-05, + "loss": 1.3716, + "step": 362600 + }, + { + "epoch": 5.36, + "learning_rate": 2.3190221249408426e-05, + "loss": 1.36, + "step": 362700 + }, + { + "epoch": 5.37, + "learning_rate": 2.318282654992901e-05, + "loss": 1.3498, + "step": 362800 + }, + { + "epoch": 5.37, + "learning_rate": 2.31754318504496e-05, + "loss": 1.3447, + "step": 362900 + }, + { + "epoch": 5.37, + "learning_rate": 2.3168037150970186e-05, + "loss": 1.3486, + "step": 363000 + }, + { + "epoch": 5.37, + "learning_rate": 2.316064245149077e-05, + "loss": 1.3642, + "step": 363100 + }, + { + "epoch": 5.37, + "learning_rate": 2.315324775201136e-05, + "loss": 1.3316, + "step": 363200 + }, + { + "epoch": 5.37, + "learning_rate": 2.3145853052531946e-05, + "loss": 1.3485, + "step": 363300 + }, + { + "epoch": 5.37, + "learning_rate": 2.3138458353052532e-05, + "loss": 1.3408, + "step": 363400 + }, + { + "epoch": 5.38, + "learning_rate": 2.313106365357312e-05, + "loss": 1.3639, + "step": 363500 + }, + { + "epoch": 5.38, + "learning_rate": 2.3123668954093706e-05, + "loss": 1.3714, + "step": 363600 + }, + { + "epoch": 5.38, + "learning_rate": 2.3116274254614292e-05, + "loss": 1.3556, + "step": 363700 + }, + { + "epoch": 5.38, + "learning_rate": 2.310887955513488e-05, + "loss": 1.3674, + "step": 363800 + }, + { + "epoch": 5.38, + "learning_rate": 2.3101484855655467e-05, + "loss": 1.3647, + "step": 363900 + }, + { + "epoch": 5.38, + "learning_rate": 2.3094090156176056e-05, + "loss": 1.3344, + "step": 364000 + }, + { + "epoch": 5.38, + "learning_rate": 2.3086769403691436e-05, + "loss": 1.3416, + "step": 364100 + }, + { + "epoch": 5.39, + "learning_rate": 2.3079374704212022e-05, + "loss": 1.3712, + "step": 364200 + }, + { + "epoch": 5.39, + "learning_rate": 2.307198000473261e-05, + "loss": 1.3582, + "step": 364300 + }, + { + "epoch": 5.39, + "learning_rate": 2.3064585305253193e-05, + "loss": 1.3546, + "step": 364400 + }, + { + "epoch": 5.39, + "learning_rate": 2.3057190605773782e-05, + "loss": 1.3509, + "step": 364500 + }, + { + "epoch": 5.39, + "learning_rate": 2.3049795906294368e-05, + "loss": 1.3188, + "step": 364600 + }, + { + "epoch": 5.39, + "learning_rate": 2.3042401206814957e-05, + "loss": 1.3573, + "step": 364700 + }, + { + "epoch": 5.4, + "learning_rate": 2.3035006507335542e-05, + "loss": 1.3377, + "step": 364800 + }, + { + "epoch": 5.4, + "learning_rate": 2.302761180785613e-05, + "loss": 1.3533, + "step": 364900 + }, + { + "epoch": 5.4, + "learning_rate": 2.3020217108376714e-05, + "loss": 1.3818, + "step": 365000 + }, + { + "epoch": 5.4, + "learning_rate": 2.3012822408897303e-05, + "loss": 1.3548, + "step": 365100 + }, + { + "epoch": 5.4, + "learning_rate": 2.300542770941789e-05, + "loss": 1.3449, + "step": 365200 + }, + { + "epoch": 5.4, + "learning_rate": 2.2998033009938477e-05, + "loss": 1.3479, + "step": 365300 + }, + { + "epoch": 5.4, + "learning_rate": 2.2990638310459063e-05, + "loss": 1.3131, + "step": 365400 + }, + { + "epoch": 5.41, + "learning_rate": 2.2983243610979652e-05, + "loss": 1.3591, + "step": 365500 + }, + { + "epoch": 5.41, + "learning_rate": 2.2975848911500238e-05, + "loss": 1.3443, + "step": 365600 + }, + { + "epoch": 5.41, + "learning_rate": 2.2968454212020823e-05, + "loss": 1.3336, + "step": 365700 + }, + { + "epoch": 5.41, + "learning_rate": 2.2961059512541412e-05, + "loss": 1.3556, + "step": 365800 + }, + { + "epoch": 5.41, + "learning_rate": 2.2953664813061998e-05, + "loss": 1.3977, + "step": 365900 + }, + { + "epoch": 5.41, + "learning_rate": 2.2946270113582584e-05, + "loss": 1.3506, + "step": 366000 + }, + { + "epoch": 5.41, + "learning_rate": 2.2938875414103173e-05, + "loss": 1.3391, + "step": 366100 + }, + { + "epoch": 5.42, + "learning_rate": 2.293148071462376e-05, + "loss": 1.3433, + "step": 366200 + }, + { + "epoch": 5.42, + "learning_rate": 2.2924086015144344e-05, + "loss": 1.3471, + "step": 366300 + }, + { + "epoch": 5.42, + "learning_rate": 2.2916765262659724e-05, + "loss": 1.3525, + "step": 366400 + }, + { + "epoch": 5.42, + "learning_rate": 2.2909370563180313e-05, + "loss": 1.3621, + "step": 366500 + }, + { + "epoch": 5.42, + "learning_rate": 2.29019758637009e-05, + "loss": 1.3505, + "step": 366600 + }, + { + "epoch": 5.42, + "learning_rate": 2.2894581164221488e-05, + "loss": 1.3749, + "step": 366700 + }, + { + "epoch": 5.42, + "learning_rate": 2.2887186464742074e-05, + "loss": 1.3439, + "step": 366800 + }, + { + "epoch": 5.43, + "learning_rate": 2.2879791765262663e-05, + "loss": 1.3735, + "step": 366900 + }, + { + "epoch": 5.43, + "learning_rate": 2.2872397065783245e-05, + "loss": 1.3718, + "step": 367000 + }, + { + "epoch": 5.43, + "learning_rate": 2.2865002366303834e-05, + "loss": 1.3532, + "step": 367100 + }, + { + "epoch": 5.43, + "learning_rate": 2.285760766682442e-05, + "loss": 1.3453, + "step": 367200 + }, + { + "epoch": 5.43, + "learning_rate": 2.285021296734501e-05, + "loss": 1.3509, + "step": 367300 + }, + { + "epoch": 5.43, + "learning_rate": 2.2842818267865594e-05, + "loss": 1.3741, + "step": 367400 + }, + { + "epoch": 5.44, + "learning_rate": 2.2835423568386183e-05, + "loss": 1.3597, + "step": 367500 + }, + { + "epoch": 5.44, + "learning_rate": 2.282802886890677e-05, + "loss": 1.3519, + "step": 367600 + }, + { + "epoch": 5.44, + "learning_rate": 2.2820634169427355e-05, + "loss": 1.3491, + "step": 367700 + }, + { + "epoch": 5.44, + "learning_rate": 2.281323946994794e-05, + "loss": 1.3902, + "step": 367800 + }, + { + "epoch": 5.44, + "learning_rate": 2.280584477046853e-05, + "loss": 1.3428, + "step": 367900 + }, + { + "epoch": 5.44, + "learning_rate": 2.2798450070989115e-05, + "loss": 1.3533, + "step": 368000 + }, + { + "epoch": 5.44, + "learning_rate": 2.2791055371509704e-05, + "loss": 1.3521, + "step": 368100 + }, + { + "epoch": 5.45, + "learning_rate": 2.278366067203029e-05, + "loss": 1.3501, + "step": 368200 + }, + { + "epoch": 5.45, + "learning_rate": 2.2776265972550875e-05, + "loss": 1.3528, + "step": 368300 + }, + { + "epoch": 5.45, + "learning_rate": 2.2768871273071464e-05, + "loss": 1.3558, + "step": 368400 + }, + { + "epoch": 5.45, + "learning_rate": 2.2761550520586845e-05, + "loss": 1.3692, + "step": 368500 + }, + { + "epoch": 5.45, + "learning_rate": 2.275415582110743e-05, + "loss": 1.3566, + "step": 368600 + }, + { + "epoch": 5.45, + "learning_rate": 2.274676112162802e-05, + "loss": 1.3515, + "step": 368700 + }, + { + "epoch": 5.45, + "learning_rate": 2.2739366422148605e-05, + "loss": 1.361, + "step": 368800 + }, + { + "epoch": 5.46, + "learning_rate": 2.2731971722669194e-05, + "loss": 1.3528, + "step": 368900 + }, + { + "epoch": 5.46, + "learning_rate": 2.2724577023189776e-05, + "loss": 1.3766, + "step": 369000 + }, + { + "epoch": 5.46, + "learning_rate": 2.2717182323710365e-05, + "loss": 1.356, + "step": 369100 + }, + { + "epoch": 5.46, + "learning_rate": 2.270978762423095e-05, + "loss": 1.3618, + "step": 369200 + }, + { + "epoch": 5.46, + "learning_rate": 2.270239292475154e-05, + "loss": 1.3446, + "step": 369300 + }, + { + "epoch": 5.46, + "learning_rate": 2.2694998225272126e-05, + "loss": 1.3522, + "step": 369400 + }, + { + "epoch": 5.46, + "learning_rate": 2.2687603525792715e-05, + "loss": 1.3401, + "step": 369500 + }, + { + "epoch": 5.47, + "learning_rate": 2.2680208826313297e-05, + "loss": 1.3564, + "step": 369600 + }, + { + "epoch": 5.47, + "learning_rate": 2.2672814126833886e-05, + "loss": 1.3251, + "step": 369700 + }, + { + "epoch": 5.47, + "learning_rate": 2.266541942735447e-05, + "loss": 1.3456, + "step": 369800 + }, + { + "epoch": 5.47, + "learning_rate": 2.265802472787506e-05, + "loss": 1.3496, + "step": 369900 + }, + { + "epoch": 5.47, + "learning_rate": 2.2650630028395646e-05, + "loss": 1.346, + "step": 370000 + }, + { + "epoch": 5.47, + "learning_rate": 2.2643235328916235e-05, + "loss": 1.3617, + "step": 370100 + }, + { + "epoch": 5.47, + "learning_rate": 2.263584062943682e-05, + "loss": 1.3442, + "step": 370200 + }, + { + "epoch": 5.48, + "learning_rate": 2.2628445929957407e-05, + "loss": 1.3626, + "step": 370300 + }, + { + "epoch": 5.48, + "learning_rate": 2.2621051230477992e-05, + "loss": 1.3643, + "step": 370400 + }, + { + "epoch": 5.48, + "learning_rate": 2.261365653099858e-05, + "loss": 1.3718, + "step": 370500 + }, + { + "epoch": 5.48, + "learning_rate": 2.260633577851396e-05, + "loss": 1.3166, + "step": 370600 + }, + { + "epoch": 5.48, + "learning_rate": 2.259894107903455e-05, + "loss": 1.3684, + "step": 370700 + }, + { + "epoch": 5.48, + "learning_rate": 2.2591546379555136e-05, + "loss": 1.313, + "step": 370800 + }, + { + "epoch": 5.49, + "learning_rate": 2.2584151680075725e-05, + "loss": 1.3632, + "step": 370900 + }, + { + "epoch": 5.49, + "learning_rate": 2.2576756980596308e-05, + "loss": 1.343, + "step": 371000 + }, + { + "epoch": 5.49, + "learning_rate": 2.2569362281116897e-05, + "loss": 1.3527, + "step": 371100 + }, + { + "epoch": 5.49, + "learning_rate": 2.2561967581637482e-05, + "loss": 1.3526, + "step": 371200 + }, + { + "epoch": 5.49, + "learning_rate": 2.255457288215807e-05, + "loss": 1.3481, + "step": 371300 + }, + { + "epoch": 5.49, + "learning_rate": 2.2547178182678657e-05, + "loss": 1.3524, + "step": 371400 + }, + { + "epoch": 5.49, + "learning_rate": 2.2539783483199246e-05, + "loss": 1.3399, + "step": 371500 + }, + { + "epoch": 5.5, + "learning_rate": 2.2532388783719828e-05, + "loss": 1.3331, + "step": 371600 + }, + { + "epoch": 5.5, + "learning_rate": 2.2524994084240417e-05, + "loss": 1.3776, + "step": 371700 + }, + { + "epoch": 5.5, + "learning_rate": 2.2517599384761003e-05, + "loss": 1.3537, + "step": 371800 + }, + { + "epoch": 5.5, + "learning_rate": 2.2510204685281592e-05, + "loss": 1.3599, + "step": 371900 + }, + { + "epoch": 5.5, + "learning_rate": 2.2502809985802178e-05, + "loss": 1.3614, + "step": 372000 + }, + { + "epoch": 5.5, + "learning_rate": 2.2495415286322767e-05, + "loss": 1.3527, + "step": 372100 + }, + { + "epoch": 5.5, + "learning_rate": 2.2488020586843352e-05, + "loss": 1.3766, + "step": 372200 + }, + { + "epoch": 5.51, + "learning_rate": 2.2480625887363938e-05, + "loss": 1.3389, + "step": 372300 + }, + { + "epoch": 5.51, + "learning_rate": 2.2473231187884523e-05, + "loss": 1.3551, + "step": 372400 + }, + { + "epoch": 5.51, + "learning_rate": 2.2465836488405112e-05, + "loss": 1.3791, + "step": 372500 + }, + { + "epoch": 5.51, + "learning_rate": 2.2458515735920493e-05, + "loss": 1.3556, + "step": 372600 + }, + { + "epoch": 5.51, + "learning_rate": 2.2451121036441082e-05, + "loss": 1.3639, + "step": 372700 + }, + { + "epoch": 5.51, + "learning_rate": 2.2443726336961668e-05, + "loss": 1.3547, + "step": 372800 + }, + { + "epoch": 5.51, + "learning_rate": 2.2436331637482253e-05, + "loss": 1.3513, + "step": 372900 + }, + { + "epoch": 5.52, + "learning_rate": 2.242893693800284e-05, + "loss": 1.3674, + "step": 373000 + }, + { + "epoch": 5.52, + "learning_rate": 2.2421542238523428e-05, + "loss": 1.356, + "step": 373100 + }, + { + "epoch": 5.52, + "learning_rate": 2.2414147539044014e-05, + "loss": 1.3506, + "step": 373200 + }, + { + "epoch": 5.52, + "learning_rate": 2.2406752839564603e-05, + "loss": 1.3393, + "step": 373300 + }, + { + "epoch": 5.52, + "learning_rate": 2.2399358140085188e-05, + "loss": 1.3737, + "step": 373400 + }, + { + "epoch": 5.52, + "learning_rate": 2.2391963440605777e-05, + "loss": 1.3702, + "step": 373500 + }, + { + "epoch": 5.53, + "learning_rate": 2.238456874112636e-05, + "loss": 1.3421, + "step": 373600 + }, + { + "epoch": 5.53, + "learning_rate": 2.237717404164695e-05, + "loss": 1.3553, + "step": 373700 + }, + { + "epoch": 5.53, + "learning_rate": 2.2369779342167534e-05, + "loss": 1.3766, + "step": 373800 + }, + { + "epoch": 5.53, + "learning_rate": 2.2362384642688123e-05, + "loss": 1.363, + "step": 373900 + }, + { + "epoch": 5.53, + "learning_rate": 2.235498994320871e-05, + "loss": 1.3612, + "step": 374000 + }, + { + "epoch": 5.53, + "learning_rate": 2.2347595243729298e-05, + "loss": 1.3513, + "step": 374100 + }, + { + "epoch": 5.53, + "learning_rate": 2.2340200544249883e-05, + "loss": 1.3717, + "step": 374200 + }, + { + "epoch": 5.54, + "learning_rate": 2.233280584477047e-05, + "loss": 1.3578, + "step": 374300 + }, + { + "epoch": 5.54, + "learning_rate": 2.2325411145291055e-05, + "loss": 1.3693, + "step": 374400 + }, + { + "epoch": 5.54, + "learning_rate": 2.2318016445811644e-05, + "loss": 1.3801, + "step": 374500 + }, + { + "epoch": 5.54, + "learning_rate": 2.231062174633223e-05, + "loss": 1.3535, + "step": 374600 + }, + { + "epoch": 5.54, + "learning_rate": 2.230322704685282e-05, + "loss": 1.3847, + "step": 374700 + }, + { + "epoch": 5.54, + "learning_rate": 2.2295832347373404e-05, + "loss": 1.3473, + "step": 374800 + }, + { + "epoch": 5.54, + "learning_rate": 2.2288511594888785e-05, + "loss": 1.3502, + "step": 374900 + }, + { + "epoch": 5.55, + "learning_rate": 2.228111689540937e-05, + "loss": 1.377, + "step": 375000 + }, + { + "epoch": 5.55, + "learning_rate": 2.227372219592996e-05, + "loss": 1.3696, + "step": 375100 + }, + { + "epoch": 5.55, + "learning_rate": 2.2266327496450545e-05, + "loss": 1.3537, + "step": 375200 + }, + { + "epoch": 5.55, + "learning_rate": 2.2258932796971134e-05, + "loss": 1.3625, + "step": 375300 + }, + { + "epoch": 5.55, + "learning_rate": 2.225153809749172e-05, + "loss": 1.3548, + "step": 375400 + }, + { + "epoch": 5.55, + "learning_rate": 2.224414339801231e-05, + "loss": 1.3559, + "step": 375500 + }, + { + "epoch": 5.55, + "learning_rate": 2.223674869853289e-05, + "loss": 1.3543, + "step": 375600 + }, + { + "epoch": 5.56, + "learning_rate": 2.222935399905348e-05, + "loss": 1.3594, + "step": 375700 + }, + { + "epoch": 5.56, + "learning_rate": 2.2221959299574065e-05, + "loss": 1.3565, + "step": 375800 + }, + { + "epoch": 5.56, + "learning_rate": 2.2214564600094654e-05, + "loss": 1.3714, + "step": 375900 + }, + { + "epoch": 5.56, + "learning_rate": 2.220716990061524e-05, + "loss": 1.3748, + "step": 376000 + }, + { + "epoch": 5.56, + "learning_rate": 2.219977520113583e-05, + "loss": 1.3814, + "step": 376100 + }, + { + "epoch": 5.56, + "learning_rate": 2.219238050165641e-05, + "loss": 1.375, + "step": 376200 + }, + { + "epoch": 5.57, + "learning_rate": 2.2184985802177e-05, + "loss": 1.3437, + "step": 376300 + }, + { + "epoch": 5.57, + "learning_rate": 2.2177591102697586e-05, + "loss": 1.3762, + "step": 376400 + }, + { + "epoch": 5.57, + "learning_rate": 2.2170196403218175e-05, + "loss": 1.342, + "step": 376500 + }, + { + "epoch": 5.57, + "learning_rate": 2.216280170373876e-05, + "loss": 1.345, + "step": 376600 + }, + { + "epoch": 5.57, + "learning_rate": 2.215540700425935e-05, + "loss": 1.3638, + "step": 376700 + }, + { + "epoch": 5.57, + "learning_rate": 2.2148012304779935e-05, + "loss": 1.3701, + "step": 376800 + }, + { + "epoch": 5.57, + "learning_rate": 2.214061760530052e-05, + "loss": 1.3759, + "step": 376900 + }, + { + "epoch": 5.58, + "learning_rate": 2.21332968528159e-05, + "loss": 1.3683, + "step": 377000 + }, + { + "epoch": 5.58, + "learning_rate": 2.212590215333649e-05, + "loss": 1.3851, + "step": 377100 + }, + { + "epoch": 5.58, + "learning_rate": 2.2118507453857076e-05, + "loss": 1.3368, + "step": 377200 + }, + { + "epoch": 5.58, + "learning_rate": 2.2111112754377665e-05, + "loss": 1.3602, + "step": 377300 + }, + { + "epoch": 5.58, + "learning_rate": 2.210371805489825e-05, + "loss": 1.3416, + "step": 377400 + }, + { + "epoch": 5.58, + "learning_rate": 2.209632335541884e-05, + "loss": 1.3721, + "step": 377500 + }, + { + "epoch": 5.58, + "learning_rate": 2.2088928655939422e-05, + "loss": 1.3514, + "step": 377600 + }, + { + "epoch": 5.59, + "learning_rate": 2.208153395646001e-05, + "loss": 1.3524, + "step": 377700 + }, + { + "epoch": 5.59, + "learning_rate": 2.2074139256980597e-05, + "loss": 1.3703, + "step": 377800 + }, + { + "epoch": 5.59, + "learning_rate": 2.2066744557501186e-05, + "loss": 1.3542, + "step": 377900 + }, + { + "epoch": 5.59, + "learning_rate": 2.205934985802177e-05, + "loss": 1.3647, + "step": 378000 + }, + { + "epoch": 5.59, + "learning_rate": 2.205195515854236e-05, + "loss": 1.3634, + "step": 378100 + }, + { + "epoch": 5.59, + "learning_rate": 2.2044560459062943e-05, + "loss": 1.3594, + "step": 378200 + }, + { + "epoch": 5.59, + "learning_rate": 2.203716575958353e-05, + "loss": 1.3639, + "step": 378300 + }, + { + "epoch": 5.6, + "learning_rate": 2.2029771060104117e-05, + "loss": 1.3542, + "step": 378400 + }, + { + "epoch": 5.6, + "learning_rate": 2.2022376360624706e-05, + "loss": 1.3474, + "step": 378500 + }, + { + "epoch": 5.6, + "learning_rate": 2.2014981661145292e-05, + "loss": 1.3861, + "step": 378600 + }, + { + "epoch": 5.6, + "learning_rate": 2.200758696166588e-05, + "loss": 1.3639, + "step": 378700 + }, + { + "epoch": 5.6, + "learning_rate": 2.2000192262186467e-05, + "loss": 1.3835, + "step": 378800 + }, + { + "epoch": 5.6, + "learning_rate": 2.1992797562707052e-05, + "loss": 1.3601, + "step": 378900 + }, + { + "epoch": 5.61, + "learning_rate": 2.1985402863227638e-05, + "loss": 1.3475, + "step": 379000 + }, + { + "epoch": 5.61, + "learning_rate": 2.1978008163748227e-05, + "loss": 1.3506, + "step": 379100 + }, + { + "epoch": 5.61, + "learning_rate": 2.1970613464268813e-05, + "loss": 1.3434, + "step": 379200 + }, + { + "epoch": 5.61, + "learning_rate": 2.19632187647894e-05, + "loss": 1.3621, + "step": 379300 + }, + { + "epoch": 5.61, + "learning_rate": 2.1955898012304782e-05, + "loss": 1.3617, + "step": 379400 + }, + { + "epoch": 5.61, + "learning_rate": 2.1948503312825368e-05, + "loss": 1.3635, + "step": 379500 + }, + { + "epoch": 5.61, + "learning_rate": 2.1941108613345953e-05, + "loss": 1.3626, + "step": 379600 + }, + { + "epoch": 5.62, + "learning_rate": 2.1933713913866542e-05, + "loss": 1.3663, + "step": 379700 + }, + { + "epoch": 5.62, + "learning_rate": 2.1926319214387128e-05, + "loss": 1.36, + "step": 379800 + }, + { + "epoch": 5.62, + "learning_rate": 2.1918924514907717e-05, + "loss": 1.3745, + "step": 379900 + }, + { + "epoch": 5.62, + "learning_rate": 2.1911529815428303e-05, + "loss": 1.3676, + "step": 380000 + }, + { + "epoch": 5.62, + "learning_rate": 2.190413511594889e-05, + "loss": 1.3575, + "step": 380100 + }, + { + "epoch": 5.62, + "learning_rate": 2.1896740416469474e-05, + "loss": 1.3795, + "step": 380200 + }, + { + "epoch": 5.62, + "learning_rate": 2.1889345716990063e-05, + "loss": 1.3607, + "step": 380300 + }, + { + "epoch": 5.63, + "learning_rate": 2.188195101751065e-05, + "loss": 1.3566, + "step": 380400 + }, + { + "epoch": 5.63, + "learning_rate": 2.1874556318031238e-05, + "loss": 1.3641, + "step": 380500 + }, + { + "epoch": 5.63, + "learning_rate": 2.1867161618551823e-05, + "loss": 1.3397, + "step": 380600 + }, + { + "epoch": 5.63, + "learning_rate": 2.1859766919072412e-05, + "loss": 1.3481, + "step": 380700 + }, + { + "epoch": 5.63, + "learning_rate": 2.1852372219592998e-05, + "loss": 1.3568, + "step": 380800 + }, + { + "epoch": 5.63, + "learning_rate": 2.1844977520113584e-05, + "loss": 1.3878, + "step": 380900 + }, + { + "epoch": 5.63, + "learning_rate": 2.183758282063417e-05, + "loss": 1.3658, + "step": 381000 + }, + { + "epoch": 5.64, + "learning_rate": 2.1830188121154758e-05, + "loss": 1.3635, + "step": 381100 + }, + { + "epoch": 5.64, + "learning_rate": 2.1822793421675344e-05, + "loss": 1.3748, + "step": 381200 + }, + { + "epoch": 5.64, + "learning_rate": 2.1815398722195933e-05, + "loss": 1.3379, + "step": 381300 + }, + { + "epoch": 5.64, + "learning_rate": 2.1808077969711313e-05, + "loss": 1.3838, + "step": 381400 + }, + { + "epoch": 5.64, + "learning_rate": 2.18006832702319e-05, + "loss": 1.363, + "step": 381500 + }, + { + "epoch": 5.64, + "learning_rate": 2.1793288570752485e-05, + "loss": 1.3644, + "step": 381600 + }, + { + "epoch": 5.65, + "learning_rate": 2.1785893871273074e-05, + "loss": 1.4008, + "step": 381700 + }, + { + "epoch": 5.65, + "learning_rate": 2.177849917179366e-05, + "loss": 1.3566, + "step": 381800 + }, + { + "epoch": 5.65, + "learning_rate": 2.1771104472314245e-05, + "loss": 1.3817, + "step": 381900 + }, + { + "epoch": 5.65, + "learning_rate": 2.1763709772834834e-05, + "loss": 1.3296, + "step": 382000 + }, + { + "epoch": 5.65, + "learning_rate": 2.175631507335542e-05, + "loss": 1.3526, + "step": 382100 + }, + { + "epoch": 5.65, + "learning_rate": 2.1748920373876005e-05, + "loss": 1.3616, + "step": 382200 + }, + { + "epoch": 5.65, + "learning_rate": 2.1741525674396594e-05, + "loss": 1.3482, + "step": 382300 + }, + { + "epoch": 5.66, + "learning_rate": 2.173413097491718e-05, + "loss": 1.3729, + "step": 382400 + }, + { + "epoch": 5.66, + "learning_rate": 2.172673627543777e-05, + "loss": 1.3498, + "step": 382500 + }, + { + "epoch": 5.66, + "learning_rate": 2.1719341575958355e-05, + "loss": 1.3323, + "step": 382600 + }, + { + "epoch": 5.66, + "learning_rate": 2.171194687647894e-05, + "loss": 1.3727, + "step": 382700 + }, + { + "epoch": 5.66, + "learning_rate": 2.170455217699953e-05, + "loss": 1.3347, + "step": 382800 + }, + { + "epoch": 5.66, + "learning_rate": 2.1697157477520115e-05, + "loss": 1.3469, + "step": 382900 + }, + { + "epoch": 5.66, + "learning_rate": 2.16897627780407e-05, + "loss": 1.3796, + "step": 383000 + }, + { + "epoch": 5.67, + "learning_rate": 2.168236807856129e-05, + "loss": 1.3719, + "step": 383100 + }, + { + "epoch": 5.67, + "learning_rate": 2.1674973379081875e-05, + "loss": 1.3529, + "step": 383200 + }, + { + "epoch": 5.67, + "learning_rate": 2.1667578679602464e-05, + "loss": 1.3666, + "step": 383300 + }, + { + "epoch": 5.67, + "learning_rate": 2.166018398012305e-05, + "loss": 1.3678, + "step": 383400 + }, + { + "epoch": 5.67, + "learning_rate": 2.1652789280643635e-05, + "loss": 1.361, + "step": 383500 + }, + { + "epoch": 5.67, + "learning_rate": 2.1645468528159016e-05, + "loss": 1.354, + "step": 383600 + }, + { + "epoch": 5.67, + "learning_rate": 2.16380738286796e-05, + "loss": 1.3837, + "step": 383700 + }, + { + "epoch": 5.68, + "learning_rate": 2.163067912920019e-05, + "loss": 1.3643, + "step": 383800 + }, + { + "epoch": 5.68, + "learning_rate": 2.1623284429720776e-05, + "loss": 1.3387, + "step": 383900 + }, + { + "epoch": 5.68, + "learning_rate": 2.1615889730241365e-05, + "loss": 1.3837, + "step": 384000 + }, + { + "epoch": 5.68, + "learning_rate": 2.160849503076195e-05, + "loss": 1.3352, + "step": 384100 + }, + { + "epoch": 5.68, + "learning_rate": 2.1601100331282537e-05, + "loss": 1.3388, + "step": 384200 + }, + { + "epoch": 5.68, + "learning_rate": 2.1593705631803126e-05, + "loss": 1.3733, + "step": 384300 + }, + { + "epoch": 5.69, + "learning_rate": 2.158631093232371e-05, + "loss": 1.3625, + "step": 384400 + }, + { + "epoch": 5.69, + "learning_rate": 2.1578916232844297e-05, + "loss": 1.3501, + "step": 384500 + }, + { + "epoch": 5.69, + "learning_rate": 2.1571521533364886e-05, + "loss": 1.3437, + "step": 384600 + }, + { + "epoch": 5.69, + "learning_rate": 2.156412683388547e-05, + "loss": 1.3637, + "step": 384700 + }, + { + "epoch": 5.69, + "learning_rate": 2.1556732134406057e-05, + "loss": 1.3936, + "step": 384800 + }, + { + "epoch": 5.69, + "learning_rate": 2.1549337434926646e-05, + "loss": 1.3549, + "step": 384900 + }, + { + "epoch": 5.69, + "learning_rate": 2.1541942735447232e-05, + "loss": 1.3613, + "step": 385000 + }, + { + "epoch": 5.7, + "learning_rate": 2.153454803596782e-05, + "loss": 1.362, + "step": 385100 + }, + { + "epoch": 5.7, + "learning_rate": 2.1527153336488406e-05, + "loss": 1.3761, + "step": 385200 + }, + { + "epoch": 5.7, + "learning_rate": 2.1519758637008992e-05, + "loss": 1.3901, + "step": 385300 + }, + { + "epoch": 5.7, + "learning_rate": 2.151236393752958e-05, + "loss": 1.3521, + "step": 385400 + }, + { + "epoch": 5.7, + "learning_rate": 2.1504969238050167e-05, + "loss": 1.3484, + "step": 385500 + }, + { + "epoch": 5.7, + "learning_rate": 2.1497574538570752e-05, + "loss": 1.3832, + "step": 385600 + }, + { + "epoch": 5.7, + "learning_rate": 2.149017983909134e-05, + "loss": 1.3364, + "step": 385700 + }, + { + "epoch": 5.71, + "learning_rate": 2.1482859086606722e-05, + "loss": 1.3713, + "step": 385800 + }, + { + "epoch": 5.71, + "learning_rate": 2.1475464387127307e-05, + "loss": 1.3254, + "step": 385900 + }, + { + "epoch": 5.71, + "learning_rate": 2.1468069687647897e-05, + "loss": 1.3668, + "step": 386000 + }, + { + "epoch": 5.71, + "learning_rate": 2.1460674988168482e-05, + "loss": 1.3247, + "step": 386100 + }, + { + "epoch": 5.71, + "learning_rate": 2.1453280288689068e-05, + "loss": 1.3417, + "step": 386200 + }, + { + "epoch": 5.71, + "learning_rate": 2.1445885589209653e-05, + "loss": 1.3744, + "step": 386300 + }, + { + "epoch": 5.71, + "learning_rate": 2.1438490889730242e-05, + "loss": 1.3603, + "step": 386400 + }, + { + "epoch": 5.72, + "learning_rate": 2.1431096190250828e-05, + "loss": 1.3609, + "step": 386500 + }, + { + "epoch": 5.72, + "learning_rate": 2.1423701490771417e-05, + "loss": 1.3729, + "step": 386600 + }, + { + "epoch": 5.72, + "learning_rate": 2.1416306791292003e-05, + "loss": 1.3592, + "step": 386700 + }, + { + "epoch": 5.72, + "learning_rate": 2.140891209181259e-05, + "loss": 1.3698, + "step": 386800 + }, + { + "epoch": 5.72, + "learning_rate": 2.1401517392333177e-05, + "loss": 1.3506, + "step": 386900 + }, + { + "epoch": 5.72, + "learning_rate": 2.1394122692853763e-05, + "loss": 1.3826, + "step": 387000 + }, + { + "epoch": 5.72, + "learning_rate": 2.138672799337435e-05, + "loss": 1.3469, + "step": 387100 + }, + { + "epoch": 5.73, + "learning_rate": 2.1379333293894938e-05, + "loss": 1.3606, + "step": 387200 + }, + { + "epoch": 5.73, + "learning_rate": 2.1371938594415523e-05, + "loss": 1.3496, + "step": 387300 + }, + { + "epoch": 5.73, + "learning_rate": 2.1364543894936112e-05, + "loss": 1.3739, + "step": 387400 + }, + { + "epoch": 5.73, + "learning_rate": 2.1357149195456698e-05, + "loss": 1.3686, + "step": 387500 + }, + { + "epoch": 5.73, + "learning_rate": 2.1349754495977284e-05, + "loss": 1.3555, + "step": 387600 + }, + { + "epoch": 5.73, + "learning_rate": 2.1342359796497873e-05, + "loss": 1.3511, + "step": 387700 + }, + { + "epoch": 5.74, + "learning_rate": 2.133496509701846e-05, + "loss": 1.3657, + "step": 387800 + }, + { + "epoch": 5.74, + "learning_rate": 2.1327570397539044e-05, + "loss": 1.3599, + "step": 387900 + }, + { + "epoch": 5.74, + "learning_rate": 2.1320175698059633e-05, + "loss": 1.3341, + "step": 388000 + }, + { + "epoch": 5.74, + "learning_rate": 2.131278099858022e-05, + "loss": 1.3282, + "step": 388100 + }, + { + "epoch": 5.74, + "learning_rate": 2.1305386299100804e-05, + "loss": 1.3674, + "step": 388200 + }, + { + "epoch": 5.74, + "learning_rate": 2.1297991599621393e-05, + "loss": 1.3606, + "step": 388300 + }, + { + "epoch": 5.74, + "learning_rate": 2.129059690014198e-05, + "loss": 1.3486, + "step": 388400 + }, + { + "epoch": 5.75, + "learning_rate": 2.1283202200662568e-05, + "loss": 1.3482, + "step": 388500 + }, + { + "epoch": 5.75, + "learning_rate": 2.1275807501183154e-05, + "loss": 1.3463, + "step": 388600 + }, + { + "epoch": 5.75, + "learning_rate": 2.1268412801703743e-05, + "loss": 1.3668, + "step": 388700 + }, + { + "epoch": 5.75, + "learning_rate": 2.1261018102224325e-05, + "loss": 1.3637, + "step": 388800 + }, + { + "epoch": 5.75, + "learning_rate": 2.1253697349739705e-05, + "loss": 1.3715, + "step": 388900 + }, + { + "epoch": 5.75, + "learning_rate": 2.1246302650260294e-05, + "loss": 1.3766, + "step": 389000 + }, + { + "epoch": 5.75, + "learning_rate": 2.123890795078088e-05, + "loss": 1.3801, + "step": 389100 + }, + { + "epoch": 5.76, + "learning_rate": 2.123151325130147e-05, + "loss": 1.372, + "step": 389200 + }, + { + "epoch": 5.76, + "learning_rate": 2.1224118551822055e-05, + "loss": 1.3755, + "step": 389300 + }, + { + "epoch": 5.76, + "learning_rate": 2.1216723852342644e-05, + "loss": 1.3414, + "step": 389400 + }, + { + "epoch": 5.76, + "learning_rate": 2.120932915286323e-05, + "loss": 1.3542, + "step": 389500 + }, + { + "epoch": 5.76, + "learning_rate": 2.1201934453383815e-05, + "loss": 1.3812, + "step": 389600 + }, + { + "epoch": 5.76, + "learning_rate": 2.11945397539044e-05, + "loss": 1.3531, + "step": 389700 + }, + { + "epoch": 5.76, + "learning_rate": 2.118714505442499e-05, + "loss": 1.3911, + "step": 389800 + }, + { + "epoch": 5.77, + "learning_rate": 2.1179750354945575e-05, + "loss": 1.3526, + "step": 389900 + }, + { + "epoch": 5.77, + "learning_rate": 2.1172355655466164e-05, + "loss": 1.3434, + "step": 390000 + }, + { + "epoch": 5.77, + "learning_rate": 2.116496095598675e-05, + "loss": 1.36, + "step": 390100 + }, + { + "epoch": 5.77, + "learning_rate": 2.1157566256507336e-05, + "loss": 1.372, + "step": 390200 + }, + { + "epoch": 5.77, + "learning_rate": 2.1150171557027925e-05, + "loss": 1.3773, + "step": 390300 + }, + { + "epoch": 5.77, + "learning_rate": 2.114277685754851e-05, + "loss": 1.3466, + "step": 390400 + }, + { + "epoch": 5.78, + "learning_rate": 2.1135382158069096e-05, + "loss": 1.3554, + "step": 390500 + }, + { + "epoch": 5.78, + "learning_rate": 2.1127987458589685e-05, + "loss": 1.3829, + "step": 390600 + }, + { + "epoch": 5.78, + "learning_rate": 2.112059275911027e-05, + "loss": 1.3756, + "step": 390700 + }, + { + "epoch": 5.78, + "learning_rate": 2.1113198059630856e-05, + "loss": 1.3478, + "step": 390800 + }, + { + "epoch": 5.78, + "learning_rate": 2.1105803360151445e-05, + "loss": 1.3361, + "step": 390900 + }, + { + "epoch": 5.78, + "learning_rate": 2.1098482607666826e-05, + "loss": 1.3957, + "step": 391000 + }, + { + "epoch": 5.78, + "learning_rate": 2.109108790818741e-05, + "loss": 1.3728, + "step": 391100 + }, + { + "epoch": 5.79, + "learning_rate": 2.1083693208708e-05, + "loss": 1.375, + "step": 391200 + }, + { + "epoch": 5.79, + "learning_rate": 2.1076298509228586e-05, + "loss": 1.3352, + "step": 391300 + }, + { + "epoch": 5.79, + "learning_rate": 2.106890380974917e-05, + "loss": 1.3668, + "step": 391400 + }, + { + "epoch": 5.79, + "learning_rate": 2.1061509110269757e-05, + "loss": 1.3691, + "step": 391500 + }, + { + "epoch": 5.79, + "learning_rate": 2.1054114410790346e-05, + "loss": 1.3457, + "step": 391600 + }, + { + "epoch": 5.79, + "learning_rate": 2.1046719711310932e-05, + "loss": 1.3506, + "step": 391700 + }, + { + "epoch": 5.79, + "learning_rate": 2.103932501183152e-05, + "loss": 1.357, + "step": 391800 + }, + { + "epoch": 5.8, + "learning_rate": 2.1031930312352107e-05, + "loss": 1.3642, + "step": 391900 + }, + { + "epoch": 5.8, + "learning_rate": 2.1024535612872696e-05, + "loss": 1.341, + "step": 392000 + }, + { + "epoch": 5.8, + "learning_rate": 2.101714091339328e-05, + "loss": 1.3864, + "step": 392100 + }, + { + "epoch": 5.8, + "learning_rate": 2.1009746213913867e-05, + "loss": 1.3601, + "step": 392200 + }, + { + "epoch": 5.8, + "learning_rate": 2.1002351514434452e-05, + "loss": 1.3812, + "step": 392300 + }, + { + "epoch": 5.8, + "learning_rate": 2.099495681495504e-05, + "loss": 1.3769, + "step": 392400 + }, + { + "epoch": 5.8, + "learning_rate": 2.0987562115475627e-05, + "loss": 1.3615, + "step": 392500 + }, + { + "epoch": 5.81, + "learning_rate": 2.0980167415996216e-05, + "loss": 1.3577, + "step": 392600 + }, + { + "epoch": 5.81, + "learning_rate": 2.0972772716516802e-05, + "loss": 1.3689, + "step": 392700 + }, + { + "epoch": 5.81, + "learning_rate": 2.0965378017037387e-05, + "loss": 1.3629, + "step": 392800 + }, + { + "epoch": 5.81, + "learning_rate": 2.0957983317557976e-05, + "loss": 1.3564, + "step": 392900 + }, + { + "epoch": 5.81, + "learning_rate": 2.0950588618078562e-05, + "loss": 1.351, + "step": 393000 + }, + { + "epoch": 5.81, + "learning_rate": 2.0943193918599148e-05, + "loss": 1.3678, + "step": 393100 + }, + { + "epoch": 5.82, + "learning_rate": 2.093587316611453e-05, + "loss": 1.3662, + "step": 393200 + }, + { + "epoch": 5.82, + "learning_rate": 2.0928478466635117e-05, + "loss": 1.3438, + "step": 393300 + }, + { + "epoch": 5.82, + "learning_rate": 2.0921083767155703e-05, + "loss": 1.3553, + "step": 393400 + }, + { + "epoch": 5.82, + "learning_rate": 2.091368906767629e-05, + "loss": 1.3524, + "step": 393500 + }, + { + "epoch": 5.82, + "learning_rate": 2.0906294368196878e-05, + "loss": 1.3671, + "step": 393600 + }, + { + "epoch": 5.82, + "learning_rate": 2.0898899668717463e-05, + "loss": 1.3646, + "step": 393700 + }, + { + "epoch": 5.82, + "learning_rate": 2.0891504969238052e-05, + "loss": 1.3618, + "step": 393800 + }, + { + "epoch": 5.83, + "learning_rate": 2.0884110269758638e-05, + "loss": 1.3627, + "step": 393900 + }, + { + "epoch": 5.83, + "learning_rate": 2.0876715570279227e-05, + "loss": 1.3611, + "step": 394000 + }, + { + "epoch": 5.83, + "learning_rate": 2.086932087079981e-05, + "loss": 1.3712, + "step": 394100 + }, + { + "epoch": 5.83, + "learning_rate": 2.0861926171320398e-05, + "loss": 1.3859, + "step": 394200 + }, + { + "epoch": 5.83, + "learning_rate": 2.0854531471840984e-05, + "loss": 1.3838, + "step": 394300 + }, + { + "epoch": 5.83, + "learning_rate": 2.0847136772361573e-05, + "loss": 1.3674, + "step": 394400 + }, + { + "epoch": 5.83, + "learning_rate": 2.083974207288216e-05, + "loss": 1.3708, + "step": 394500 + }, + { + "epoch": 5.84, + "learning_rate": 2.0832347373402747e-05, + "loss": 1.3507, + "step": 394600 + }, + { + "epoch": 5.84, + "learning_rate": 2.082495267392333e-05, + "loss": 1.3661, + "step": 394700 + }, + { + "epoch": 5.84, + "learning_rate": 2.081755797444392e-05, + "loss": 1.3538, + "step": 394800 + }, + { + "epoch": 5.84, + "learning_rate": 2.0810163274964504e-05, + "loss": 1.3354, + "step": 394900 + }, + { + "epoch": 5.84, + "learning_rate": 2.0802768575485093e-05, + "loss": 1.3672, + "step": 395000 + }, + { + "epoch": 5.84, + "learning_rate": 2.079537387600568e-05, + "loss": 1.379, + "step": 395100 + }, + { + "epoch": 5.84, + "learning_rate": 2.0787979176526268e-05, + "loss": 1.3556, + "step": 395200 + }, + { + "epoch": 5.85, + "learning_rate": 2.078065842404165e-05, + "loss": 1.3433, + "step": 395300 + }, + { + "epoch": 5.85, + "learning_rate": 2.0773263724562234e-05, + "loss": 1.3462, + "step": 395400 + }, + { + "epoch": 5.85, + "learning_rate": 2.076586902508282e-05, + "loss": 1.3458, + "step": 395500 + }, + { + "epoch": 5.85, + "learning_rate": 2.075847432560341e-05, + "loss": 1.3521, + "step": 395600 + }, + { + "epoch": 5.85, + "learning_rate": 2.0751079626123994e-05, + "loss": 1.3496, + "step": 395700 + }, + { + "epoch": 5.85, + "learning_rate": 2.0743684926644583e-05, + "loss": 1.3636, + "step": 395800 + }, + { + "epoch": 5.86, + "learning_rate": 2.073629022716517e-05, + "loss": 1.3717, + "step": 395900 + }, + { + "epoch": 5.86, + "learning_rate": 2.0728895527685758e-05, + "loss": 1.3657, + "step": 396000 + }, + { + "epoch": 5.86, + "learning_rate": 2.072150082820634e-05, + "loss": 1.3682, + "step": 396100 + }, + { + "epoch": 5.86, + "learning_rate": 2.071410612872693e-05, + "loss": 1.3621, + "step": 396200 + }, + { + "epoch": 5.86, + "learning_rate": 2.0706711429247515e-05, + "loss": 1.3663, + "step": 396300 + }, + { + "epoch": 5.86, + "learning_rate": 2.0699316729768104e-05, + "loss": 1.3633, + "step": 396400 + }, + { + "epoch": 5.86, + "learning_rate": 2.069192203028869e-05, + "loss": 1.351, + "step": 396500 + }, + { + "epoch": 5.87, + "learning_rate": 2.068452733080928e-05, + "loss": 1.3534, + "step": 396600 + }, + { + "epoch": 5.87, + "learning_rate": 2.067713263132986e-05, + "loss": 1.3563, + "step": 396700 + }, + { + "epoch": 5.87, + "learning_rate": 2.066973793185045e-05, + "loss": 1.3717, + "step": 396800 + }, + { + "epoch": 5.87, + "learning_rate": 2.0662343232371036e-05, + "loss": 1.3717, + "step": 396900 + }, + { + "epoch": 5.87, + "learning_rate": 2.0654948532891625e-05, + "loss": 1.3533, + "step": 397000 + }, + { + "epoch": 5.87, + "learning_rate": 2.064755383341221e-05, + "loss": 1.3486, + "step": 397100 + }, + { + "epoch": 5.87, + "learning_rate": 2.06401591339328e-05, + "loss": 1.3755, + "step": 397200 + }, + { + "epoch": 5.88, + "learning_rate": 2.063283838144818e-05, + "loss": 1.3437, + "step": 397300 + }, + { + "epoch": 5.88, + "learning_rate": 2.0625443681968765e-05, + "loss": 1.3579, + "step": 397400 + }, + { + "epoch": 5.88, + "learning_rate": 2.061804898248935e-05, + "loss": 1.3634, + "step": 397500 + }, + { + "epoch": 5.88, + "learning_rate": 2.061065428300994e-05, + "loss": 1.3371, + "step": 397600 + }, + { + "epoch": 5.88, + "learning_rate": 2.0603259583530526e-05, + "loss": 1.348, + "step": 397700 + }, + { + "epoch": 5.88, + "learning_rate": 2.0595864884051115e-05, + "loss": 1.3728, + "step": 397800 + }, + { + "epoch": 5.88, + "learning_rate": 2.05884701845717e-05, + "loss": 1.3601, + "step": 397900 + }, + { + "epoch": 5.89, + "learning_rate": 2.058107548509229e-05, + "loss": 1.3654, + "step": 398000 + }, + { + "epoch": 5.89, + "learning_rate": 2.057368078561287e-05, + "loss": 1.3873, + "step": 398100 + }, + { + "epoch": 5.89, + "learning_rate": 2.056628608613346e-05, + "loss": 1.3464, + "step": 398200 + }, + { + "epoch": 5.89, + "learning_rate": 2.0558891386654046e-05, + "loss": 1.3842, + "step": 398300 + }, + { + "epoch": 5.89, + "learning_rate": 2.0551496687174635e-05, + "loss": 1.3672, + "step": 398400 + }, + { + "epoch": 5.89, + "learning_rate": 2.054410198769522e-05, + "loss": 1.3623, + "step": 398500 + }, + { + "epoch": 5.9, + "learning_rate": 2.053670728821581e-05, + "loss": 1.3657, + "step": 398600 + }, + { + "epoch": 5.9, + "learning_rate": 2.0529312588736392e-05, + "loss": 1.3536, + "step": 398700 + }, + { + "epoch": 5.9, + "learning_rate": 2.052191788925698e-05, + "loss": 1.342, + "step": 398800 + }, + { + "epoch": 5.9, + "learning_rate": 2.0514523189777567e-05, + "loss": 1.3739, + "step": 398900 + }, + { + "epoch": 5.9, + "learning_rate": 2.0507128490298156e-05, + "loss": 1.354, + "step": 399000 + }, + { + "epoch": 5.9, + "learning_rate": 2.049973379081874e-05, + "loss": 1.3457, + "step": 399100 + }, + { + "epoch": 5.9, + "learning_rate": 2.049233909133933e-05, + "loss": 1.3563, + "step": 399200 + }, + { + "epoch": 5.91, + "learning_rate": 2.0484944391859916e-05, + "loss": 1.3486, + "step": 399300 + }, + { + "epoch": 5.91, + "learning_rate": 2.0477549692380502e-05, + "loss": 1.3496, + "step": 399400 + }, + { + "epoch": 5.91, + "learning_rate": 2.0470228939895882e-05, + "loss": 1.3916, + "step": 399500 + }, + { + "epoch": 5.91, + "learning_rate": 2.046283424041647e-05, + "loss": 1.3605, + "step": 399600 + }, + { + "epoch": 5.91, + "learning_rate": 2.0455439540937057e-05, + "loss": 1.3406, + "step": 399700 + }, + { + "epoch": 5.91, + "learning_rate": 2.0448044841457646e-05, + "loss": 1.3797, + "step": 399800 + }, + { + "epoch": 5.91, + "learning_rate": 2.044065014197823e-05, + "loss": 1.3541, + "step": 399900 + }, + { + "epoch": 5.92, + "learning_rate": 2.0433255442498817e-05, + "loss": 1.3753, + "step": 400000 + }, + { + "epoch": 5.92, + "learning_rate": 2.0425860743019403e-05, + "loss": 1.3537, + "step": 400100 + }, + { + "epoch": 5.92, + "learning_rate": 2.0418466043539992e-05, + "loss": 1.3532, + "step": 400200 + }, + { + "epoch": 5.92, + "learning_rate": 2.0411071344060578e-05, + "loss": 1.3417, + "step": 400300 + }, + { + "epoch": 5.92, + "learning_rate": 2.0403676644581167e-05, + "loss": 1.3837, + "step": 400400 + }, + { + "epoch": 5.92, + "learning_rate": 2.0396281945101752e-05, + "loss": 1.3718, + "step": 400500 + }, + { + "epoch": 5.92, + "learning_rate": 2.038888724562234e-05, + "loss": 1.3726, + "step": 400600 + }, + { + "epoch": 5.93, + "learning_rate": 2.0381492546142924e-05, + "loss": 1.3725, + "step": 400700 + }, + { + "epoch": 5.93, + "learning_rate": 2.0374097846663513e-05, + "loss": 1.3595, + "step": 400800 + }, + { + "epoch": 5.93, + "learning_rate": 2.0366703147184098e-05, + "loss": 1.3646, + "step": 400900 + }, + { + "epoch": 5.93, + "learning_rate": 2.0359308447704687e-05, + "loss": 1.3744, + "step": 401000 + }, + { + "epoch": 5.93, + "learning_rate": 2.0351913748225273e-05, + "loss": 1.355, + "step": 401100 + }, + { + "epoch": 5.93, + "learning_rate": 2.0344519048745862e-05, + "loss": 1.3529, + "step": 401200 + }, + { + "epoch": 5.93, + "learning_rate": 2.0337124349266448e-05, + "loss": 1.346, + "step": 401300 + }, + { + "epoch": 5.94, + "learning_rate": 2.0329729649787033e-05, + "loss": 1.3438, + "step": 401400 + }, + { + "epoch": 5.94, + "learning_rate": 2.032233495030762e-05, + "loss": 1.3379, + "step": 401500 + }, + { + "epoch": 5.94, + "learning_rate": 2.0315014197823003e-05, + "loss": 1.3548, + "step": 401600 + }, + { + "epoch": 5.94, + "learning_rate": 2.0307619498343588e-05, + "loss": 1.3422, + "step": 401700 + }, + { + "epoch": 5.94, + "learning_rate": 2.0300224798864177e-05, + "loss": 1.38, + "step": 401800 + }, + { + "epoch": 5.94, + "learning_rate": 2.0292830099384763e-05, + "loss": 1.343, + "step": 401900 + }, + { + "epoch": 5.95, + "learning_rate": 2.028543539990535e-05, + "loss": 1.3457, + "step": 402000 + }, + { + "epoch": 5.95, + "learning_rate": 2.0278040700425934e-05, + "loss": 1.3529, + "step": 402100 + }, + { + "epoch": 5.95, + "learning_rate": 2.0270646000946523e-05, + "loss": 1.3702, + "step": 402200 + }, + { + "epoch": 5.95, + "learning_rate": 2.026325130146711e-05, + "loss": 1.3999, + "step": 402300 + }, + { + "epoch": 5.95, + "learning_rate": 2.0255856601987698e-05, + "loss": 1.3891, + "step": 402400 + }, + { + "epoch": 5.95, + "learning_rate": 2.0248461902508284e-05, + "loss": 1.3632, + "step": 402500 + }, + { + "epoch": 5.95, + "learning_rate": 2.0241067203028873e-05, + "loss": 1.357, + "step": 402600 + }, + { + "epoch": 5.96, + "learning_rate": 2.0233672503549455e-05, + "loss": 1.3587, + "step": 402700 + }, + { + "epoch": 5.96, + "learning_rate": 2.0226277804070044e-05, + "loss": 1.3786, + "step": 402800 + }, + { + "epoch": 5.96, + "learning_rate": 2.021888310459063e-05, + "loss": 1.3533, + "step": 402900 + }, + { + "epoch": 5.96, + "learning_rate": 2.021148840511122e-05, + "loss": 1.3539, + "step": 403000 + }, + { + "epoch": 5.96, + "learning_rate": 2.0204093705631804e-05, + "loss": 1.3745, + "step": 403100 + }, + { + "epoch": 5.96, + "learning_rate": 2.0196699006152393e-05, + "loss": 1.366, + "step": 403200 + }, + { + "epoch": 5.96, + "learning_rate": 2.0189304306672975e-05, + "loss": 1.355, + "step": 403300 + }, + { + "epoch": 5.97, + "learning_rate": 2.0181909607193564e-05, + "loss": 1.3504, + "step": 403400 + }, + { + "epoch": 5.97, + "learning_rate": 2.017451490771415e-05, + "loss": 1.3413, + "step": 403500 + }, + { + "epoch": 5.97, + "learning_rate": 2.016712020823474e-05, + "loss": 1.3297, + "step": 403600 + }, + { + "epoch": 5.97, + "learning_rate": 2.015979945575012e-05, + "loss": 1.3773, + "step": 403700 + }, + { + "epoch": 5.97, + "learning_rate": 2.0152404756270705e-05, + "loss": 1.3612, + "step": 403800 + }, + { + "epoch": 5.97, + "learning_rate": 2.0145010056791294e-05, + "loss": 1.3555, + "step": 403900 + }, + { + "epoch": 5.97, + "learning_rate": 2.013761535731188e-05, + "loss": 1.3575, + "step": 404000 + }, + { + "epoch": 5.98, + "learning_rate": 2.0130220657832466e-05, + "loss": 1.3592, + "step": 404100 + }, + { + "epoch": 5.98, + "learning_rate": 2.0122825958353055e-05, + "loss": 1.3643, + "step": 404200 + }, + { + "epoch": 5.98, + "learning_rate": 2.011543125887364e-05, + "loss": 1.3626, + "step": 404300 + }, + { + "epoch": 5.98, + "learning_rate": 2.010803655939423e-05, + "loss": 1.379, + "step": 404400 + }, + { + "epoch": 5.98, + "learning_rate": 2.0100641859914815e-05, + "loss": 1.36, + "step": 404500 + }, + { + "epoch": 5.98, + "learning_rate": 2.0093247160435404e-05, + "loss": 1.3551, + "step": 404600 + }, + { + "epoch": 5.99, + "learning_rate": 2.0085852460955986e-05, + "loss": 1.3859, + "step": 404700 + }, + { + "epoch": 5.99, + "learning_rate": 2.0078457761476575e-05, + "loss": 1.3758, + "step": 404800 + }, + { + "epoch": 5.99, + "learning_rate": 2.007106306199716e-05, + "loss": 1.3323, + "step": 404900 + }, + { + "epoch": 5.99, + "learning_rate": 2.006366836251775e-05, + "loss": 1.3705, + "step": 405000 + }, + { + "epoch": 5.99, + "learning_rate": 2.0056273663038335e-05, + "loss": 1.3614, + "step": 405100 + }, + { + "epoch": 5.99, + "learning_rate": 2.0048878963558924e-05, + "loss": 1.3642, + "step": 405200 + }, + { + "epoch": 5.99, + "learning_rate": 2.0041484264079507e-05, + "loss": 1.3397, + "step": 405300 + }, + { + "epoch": 6.0, + "learning_rate": 2.0034089564600096e-05, + "loss": 1.3774, + "step": 405400 + }, + { + "epoch": 6.0, + "learning_rate": 2.002669486512068e-05, + "loss": 1.3685, + "step": 405500 + }, + { + "epoch": 6.0, + "learning_rate": 2.001930016564127e-05, + "loss": 1.3352, + "step": 405600 + }, + { + "epoch": 6.0, + "learning_rate": 2.0011905466161856e-05, + "loss": 1.3751, + "step": 405700 + }, + { + "epoch": 6.0, + "learning_rate": 2.0004584713677237e-05, + "loss": 1.3057, + "step": 405800 + }, + { + "epoch": 6.0, + "learning_rate": 1.9997190014197826e-05, + "loss": 1.3074, + "step": 405900 + }, + { + "epoch": 6.0, + "learning_rate": 1.998979531471841e-05, + "loss": 1.3093, + "step": 406000 + }, + { + "epoch": 6.01, + "learning_rate": 1.9982400615238997e-05, + "loss": 1.297, + "step": 406100 + }, + { + "epoch": 6.01, + "learning_rate": 1.9975005915759586e-05, + "loss": 1.3071, + "step": 406200 + }, + { + "epoch": 6.01, + "learning_rate": 1.996761121628017e-05, + "loss": 1.3385, + "step": 406300 + }, + { + "epoch": 6.01, + "learning_rate": 1.9960216516800757e-05, + "loss": 1.3119, + "step": 406400 + }, + { + "epoch": 6.01, + "learning_rate": 1.9952821817321346e-05, + "loss": 1.2962, + "step": 406500 + }, + { + "epoch": 6.01, + "learning_rate": 1.9945427117841932e-05, + "loss": 1.29, + "step": 406600 + }, + { + "epoch": 6.01, + "learning_rate": 1.9938032418362517e-05, + "loss": 1.2855, + "step": 406700 + }, + { + "epoch": 6.02, + "learning_rate": 1.9930637718883106e-05, + "loss": 1.303, + "step": 406800 + }, + { + "epoch": 6.02, + "learning_rate": 1.9923243019403692e-05, + "loss": 1.2951, + "step": 406900 + }, + { + "epoch": 6.02, + "learning_rate": 1.991584831992428e-05, + "loss": 1.2959, + "step": 407000 + }, + { + "epoch": 6.02, + "learning_rate": 1.9908453620444867e-05, + "loss": 1.3224, + "step": 407100 + }, + { + "epoch": 6.02, + "learning_rate": 1.9901058920965456e-05, + "loss": 1.3235, + "step": 407200 + }, + { + "epoch": 6.02, + "learning_rate": 1.9893664221486038e-05, + "loss": 1.2923, + "step": 407300 + }, + { + "epoch": 6.03, + "learning_rate": 1.9886269522006627e-05, + "loss": 1.3111, + "step": 407400 + }, + { + "epoch": 6.03, + "learning_rate": 1.9878874822527213e-05, + "loss": 1.2695, + "step": 407500 + }, + { + "epoch": 6.03, + "learning_rate": 1.9871480123047802e-05, + "loss": 1.3087, + "step": 407600 + }, + { + "epoch": 6.03, + "learning_rate": 1.9864085423568387e-05, + "loss": 1.3093, + "step": 407700 + }, + { + "epoch": 6.03, + "learning_rate": 1.9856690724088976e-05, + "loss": 1.3171, + "step": 407800 + }, + { + "epoch": 6.03, + "learning_rate": 1.9849369971604357e-05, + "loss": 1.3192, + "step": 407900 + }, + { + "epoch": 6.03, + "learning_rate": 1.9841975272124942e-05, + "loss": 1.3245, + "step": 408000 + }, + { + "epoch": 6.04, + "learning_rate": 1.9834580572645528e-05, + "loss": 1.3174, + "step": 408100 + }, + { + "epoch": 6.04, + "learning_rate": 1.9827185873166114e-05, + "loss": 1.31, + "step": 408200 + }, + { + "epoch": 6.04, + "learning_rate": 1.9819791173686703e-05, + "loss": 1.3185, + "step": 408300 + }, + { + "epoch": 6.04, + "learning_rate": 1.981239647420729e-05, + "loss": 1.3052, + "step": 408400 + }, + { + "epoch": 6.04, + "learning_rate": 1.9805001774727877e-05, + "loss": 1.3141, + "step": 408500 + }, + { + "epoch": 6.04, + "learning_rate": 1.9797607075248463e-05, + "loss": 1.2849, + "step": 408600 + }, + { + "epoch": 6.04, + "learning_rate": 1.979021237576905e-05, + "loss": 1.3123, + "step": 408700 + }, + { + "epoch": 6.05, + "learning_rate": 1.9782817676289638e-05, + "loss": 1.3189, + "step": 408800 + }, + { + "epoch": 6.05, + "learning_rate": 1.9775422976810223e-05, + "loss": 1.2995, + "step": 408900 + }, + { + "epoch": 6.05, + "learning_rate": 1.976802827733081e-05, + "loss": 1.3152, + "step": 409000 + }, + { + "epoch": 6.05, + "learning_rate": 1.9760633577851398e-05, + "loss": 1.307, + "step": 409100 + }, + { + "epoch": 6.05, + "learning_rate": 1.9753238878371984e-05, + "loss": 1.3232, + "step": 409200 + }, + { + "epoch": 6.05, + "learning_rate": 1.974584417889257e-05, + "loss": 1.3295, + "step": 409300 + }, + { + "epoch": 6.05, + "learning_rate": 1.973844947941316e-05, + "loss": 1.2697, + "step": 409400 + }, + { + "epoch": 6.06, + "learning_rate": 1.9731054779933744e-05, + "loss": 1.2815, + "step": 409500 + }, + { + "epoch": 6.06, + "learning_rate": 1.9723660080454333e-05, + "loss": 1.2953, + "step": 409600 + }, + { + "epoch": 6.06, + "learning_rate": 1.971626538097492e-05, + "loss": 1.31, + "step": 409700 + }, + { + "epoch": 6.06, + "learning_rate": 1.9708870681495508e-05, + "loss": 1.3144, + "step": 409800 + }, + { + "epoch": 6.06, + "learning_rate": 1.970147598201609e-05, + "loss": 1.2992, + "step": 409900 + }, + { + "epoch": 6.06, + "learning_rate": 1.969408128253668e-05, + "loss": 1.3126, + "step": 410000 + }, + { + "epoch": 6.07, + "learning_rate": 1.9686686583057265e-05, + "loss": 1.3389, + "step": 410100 + }, + { + "epoch": 6.07, + "learning_rate": 1.9679291883577854e-05, + "loss": 1.3095, + "step": 410200 + }, + { + "epoch": 6.07, + "learning_rate": 1.9671971131093234e-05, + "loss": 1.3192, + "step": 410300 + }, + { + "epoch": 6.07, + "learning_rate": 1.966457643161382e-05, + "loss": 1.3074, + "step": 410400 + }, + { + "epoch": 6.07, + "learning_rate": 1.965718173213441e-05, + "loss": 1.295, + "step": 410500 + }, + { + "epoch": 6.07, + "learning_rate": 1.9649787032654994e-05, + "loss": 1.3407, + "step": 410600 + }, + { + "epoch": 6.07, + "learning_rate": 1.964239233317558e-05, + "loss": 1.3018, + "step": 410700 + }, + { + "epoch": 6.08, + "learning_rate": 1.9634997633696166e-05, + "loss": 1.2903, + "step": 410800 + }, + { + "epoch": 6.08, + "learning_rate": 1.9627602934216755e-05, + "loss": 1.2941, + "step": 410900 + }, + { + "epoch": 6.08, + "learning_rate": 1.962020823473734e-05, + "loss": 1.3169, + "step": 411000 + }, + { + "epoch": 6.08, + "learning_rate": 1.961281353525793e-05, + "loss": 1.3098, + "step": 411100 + }, + { + "epoch": 6.08, + "learning_rate": 1.9605418835778515e-05, + "loss": 1.3229, + "step": 411200 + }, + { + "epoch": 6.08, + "learning_rate": 1.95980241362991e-05, + "loss": 1.3166, + "step": 411300 + }, + { + "epoch": 6.08, + "learning_rate": 1.959062943681969e-05, + "loss": 1.3148, + "step": 411400 + }, + { + "epoch": 6.09, + "learning_rate": 1.9583234737340275e-05, + "loss": 1.3592, + "step": 411500 + }, + { + "epoch": 6.09, + "learning_rate": 1.957584003786086e-05, + "loss": 1.2856, + "step": 411600 + }, + { + "epoch": 6.09, + "learning_rate": 1.956844533838145e-05, + "loss": 1.2963, + "step": 411700 + }, + { + "epoch": 6.09, + "learning_rate": 1.9561050638902036e-05, + "loss": 1.2959, + "step": 411800 + }, + { + "epoch": 6.09, + "learning_rate": 1.955365593942262e-05, + "loss": 1.3177, + "step": 411900 + }, + { + "epoch": 6.09, + "learning_rate": 1.954626123994321e-05, + "loss": 1.2948, + "step": 412000 + }, + { + "epoch": 6.09, + "learning_rate": 1.9538866540463796e-05, + "loss": 1.2978, + "step": 412100 + }, + { + "epoch": 6.1, + "learning_rate": 1.9531471840984385e-05, + "loss": 1.29, + "step": 412200 + }, + { + "epoch": 6.1, + "learning_rate": 1.9524151088499765e-05, + "loss": 1.3074, + "step": 412300 + }, + { + "epoch": 6.1, + "learning_rate": 1.951675638902035e-05, + "loss": 1.3061, + "step": 412400 + }, + { + "epoch": 6.1, + "learning_rate": 1.950936168954094e-05, + "loss": 1.3222, + "step": 412500 + }, + { + "epoch": 6.1, + "learning_rate": 1.9501966990061522e-05, + "loss": 1.3071, + "step": 412600 + }, + { + "epoch": 6.1, + "learning_rate": 1.949457229058211e-05, + "loss": 1.3076, + "step": 412700 + }, + { + "epoch": 6.11, + "learning_rate": 1.9487177591102697e-05, + "loss": 1.3064, + "step": 412800 + }, + { + "epoch": 6.11, + "learning_rate": 1.9479782891623286e-05, + "loss": 1.2984, + "step": 412900 + }, + { + "epoch": 6.11, + "learning_rate": 1.947238819214387e-05, + "loss": 1.301, + "step": 413000 + }, + { + "epoch": 6.11, + "learning_rate": 1.946499349266446e-05, + "loss": 1.3197, + "step": 413100 + }, + { + "epoch": 6.11, + "learning_rate": 1.9457598793185043e-05, + "loss": 1.2934, + "step": 413200 + }, + { + "epoch": 6.11, + "learning_rate": 1.9450204093705632e-05, + "loss": 1.3354, + "step": 413300 + }, + { + "epoch": 6.11, + "learning_rate": 1.9442809394226218e-05, + "loss": 1.3234, + "step": 413400 + }, + { + "epoch": 6.12, + "learning_rate": 1.9435414694746807e-05, + "loss": 1.3227, + "step": 413500 + }, + { + "epoch": 6.12, + "learning_rate": 1.9428019995267392e-05, + "loss": 1.2968, + "step": 413600 + }, + { + "epoch": 6.12, + "learning_rate": 1.942062529578798e-05, + "loss": 1.3243, + "step": 413700 + }, + { + "epoch": 6.12, + "learning_rate": 1.9413230596308567e-05, + "loss": 1.2985, + "step": 413800 + }, + { + "epoch": 6.12, + "learning_rate": 1.9405835896829152e-05, + "loss": 1.3337, + "step": 413900 + }, + { + "epoch": 6.12, + "learning_rate": 1.939844119734974e-05, + "loss": 1.2948, + "step": 414000 + }, + { + "epoch": 6.12, + "learning_rate": 1.9391046497870327e-05, + "loss": 1.3173, + "step": 414100 + }, + { + "epoch": 6.13, + "learning_rate": 1.9383651798390913e-05, + "loss": 1.3159, + "step": 414200 + }, + { + "epoch": 6.13, + "learning_rate": 1.9376331045906297e-05, + "loss": 1.2999, + "step": 414300 + }, + { + "epoch": 6.13, + "learning_rate": 1.9368936346426882e-05, + "loss": 1.305, + "step": 414400 + }, + { + "epoch": 6.13, + "learning_rate": 1.936154164694747e-05, + "loss": 1.3407, + "step": 414500 + }, + { + "epoch": 6.13, + "learning_rate": 1.9354146947468054e-05, + "loss": 1.3466, + "step": 414600 + }, + { + "epoch": 6.13, + "learning_rate": 1.9346752247988643e-05, + "loss": 1.3178, + "step": 414700 + }, + { + "epoch": 6.13, + "learning_rate": 1.9339357548509228e-05, + "loss": 1.3016, + "step": 414800 + }, + { + "epoch": 6.14, + "learning_rate": 1.9331962849029817e-05, + "loss": 1.3056, + "step": 414900 + }, + { + "epoch": 6.14, + "learning_rate": 1.9324568149550403e-05, + "loss": 1.3012, + "step": 415000 + }, + { + "epoch": 6.14, + "learning_rate": 1.9317173450070992e-05, + "loss": 1.3319, + "step": 415100 + }, + { + "epoch": 6.14, + "learning_rate": 1.9309778750591574e-05, + "loss": 1.3177, + "step": 415200 + }, + { + "epoch": 6.14, + "learning_rate": 1.9302384051112163e-05, + "loss": 1.3214, + "step": 415300 + }, + { + "epoch": 6.14, + "learning_rate": 1.929498935163275e-05, + "loss": 1.3224, + "step": 415400 + }, + { + "epoch": 6.14, + "learning_rate": 1.9287594652153338e-05, + "loss": 1.3119, + "step": 415500 + }, + { + "epoch": 6.15, + "learning_rate": 1.9280199952673923e-05, + "loss": 1.3042, + "step": 415600 + }, + { + "epoch": 6.15, + "learning_rate": 1.9272805253194512e-05, + "loss": 1.3209, + "step": 415700 + }, + { + "epoch": 6.15, + "learning_rate": 1.9265410553715098e-05, + "loss": 1.2965, + "step": 415800 + }, + { + "epoch": 6.15, + "learning_rate": 1.9258015854235684e-05, + "loss": 1.3169, + "step": 415900 + }, + { + "epoch": 6.15, + "learning_rate": 1.925062115475627e-05, + "loss": 1.3387, + "step": 416000 + }, + { + "epoch": 6.15, + "learning_rate": 1.924322645527686e-05, + "loss": 1.2908, + "step": 416100 + }, + { + "epoch": 6.16, + "learning_rate": 1.9235831755797444e-05, + "loss": 1.287, + "step": 416200 + }, + { + "epoch": 6.16, + "learning_rate": 1.9228437056318033e-05, + "loss": 1.2972, + "step": 416300 + }, + { + "epoch": 6.16, + "learning_rate": 1.922104235683862e-05, + "loss": 1.31, + "step": 416400 + }, + { + "epoch": 6.16, + "learning_rate": 1.9213647657359204e-05, + "loss": 1.3056, + "step": 416500 + }, + { + "epoch": 6.16, + "learning_rate": 1.9206326904874585e-05, + "loss": 1.2996, + "step": 416600 + }, + { + "epoch": 6.16, + "learning_rate": 1.9198932205395174e-05, + "loss": 1.3038, + "step": 416700 + }, + { + "epoch": 6.16, + "learning_rate": 1.919153750591576e-05, + "loss": 1.3108, + "step": 416800 + }, + { + "epoch": 6.17, + "learning_rate": 1.918414280643635e-05, + "loss": 1.331, + "step": 416900 + }, + { + "epoch": 6.17, + "learning_rate": 1.9176748106956934e-05, + "loss": 1.3094, + "step": 417000 + }, + { + "epoch": 6.17, + "learning_rate": 1.9169353407477523e-05, + "loss": 1.3322, + "step": 417100 + }, + { + "epoch": 6.17, + "learning_rate": 1.9161958707998105e-05, + "loss": 1.2981, + "step": 417200 + }, + { + "epoch": 6.17, + "learning_rate": 1.9154564008518694e-05, + "loss": 1.3286, + "step": 417300 + }, + { + "epoch": 6.17, + "learning_rate": 1.914716930903928e-05, + "loss": 1.3048, + "step": 417400 + }, + { + "epoch": 6.17, + "learning_rate": 1.913977460955987e-05, + "loss": 1.3199, + "step": 417500 + }, + { + "epoch": 6.18, + "learning_rate": 1.9132379910080455e-05, + "loss": 1.3247, + "step": 417600 + }, + { + "epoch": 6.18, + "learning_rate": 1.9124985210601044e-05, + "loss": 1.3317, + "step": 417700 + }, + { + "epoch": 6.18, + "learning_rate": 1.911759051112163e-05, + "loss": 1.3038, + "step": 417800 + }, + { + "epoch": 6.18, + "learning_rate": 1.9110195811642215e-05, + "loss": 1.3216, + "step": 417900 + }, + { + "epoch": 6.18, + "learning_rate": 1.91028011121628e-05, + "loss": 1.3252, + "step": 418000 + }, + { + "epoch": 6.18, + "learning_rate": 1.909540641268339e-05, + "loss": 1.3289, + "step": 418100 + }, + { + "epoch": 6.18, + "learning_rate": 1.9088011713203975e-05, + "loss": 1.316, + "step": 418200 + }, + { + "epoch": 6.19, + "learning_rate": 1.9080617013724564e-05, + "loss": 1.3306, + "step": 418300 + }, + { + "epoch": 6.19, + "learning_rate": 1.907322231424515e-05, + "loss": 1.3096, + "step": 418400 + }, + { + "epoch": 6.19, + "learning_rate": 1.9065827614765736e-05, + "loss": 1.3107, + "step": 418500 + }, + { + "epoch": 6.19, + "learning_rate": 1.905843291528632e-05, + "loss": 1.3004, + "step": 418600 + }, + { + "epoch": 6.19, + "learning_rate": 1.905103821580691e-05, + "loss": 1.3107, + "step": 418700 + }, + { + "epoch": 6.19, + "learning_rate": 1.9043643516327496e-05, + "loss": 1.3253, + "step": 418800 + }, + { + "epoch": 6.2, + "learning_rate": 1.903632276384288e-05, + "loss": 1.3182, + "step": 418900 + }, + { + "epoch": 6.2, + "learning_rate": 1.9028928064363465e-05, + "loss": 1.3144, + "step": 419000 + }, + { + "epoch": 6.2, + "learning_rate": 1.9021533364884054e-05, + "loss": 1.324, + "step": 419100 + }, + { + "epoch": 6.2, + "learning_rate": 1.9014138665404637e-05, + "loss": 1.3132, + "step": 419200 + }, + { + "epoch": 6.2, + "learning_rate": 1.9006743965925226e-05, + "loss": 1.3285, + "step": 419300 + }, + { + "epoch": 6.2, + "learning_rate": 1.899934926644581e-05, + "loss": 1.304, + "step": 419400 + }, + { + "epoch": 6.2, + "learning_rate": 1.89919545669664e-05, + "loss": 1.3236, + "step": 419500 + }, + { + "epoch": 6.21, + "learning_rate": 1.8984559867486986e-05, + "loss": 1.319, + "step": 419600 + }, + { + "epoch": 6.21, + "learning_rate": 1.8977165168007575e-05, + "loss": 1.3406, + "step": 419700 + }, + { + "epoch": 6.21, + "learning_rate": 1.896977046852816e-05, + "loss": 1.31, + "step": 419800 + }, + { + "epoch": 6.21, + "learning_rate": 1.8962375769048746e-05, + "loss": 1.3161, + "step": 419900 + }, + { + "epoch": 6.21, + "learning_rate": 1.8954981069569332e-05, + "loss": 1.3136, + "step": 420000 + }, + { + "epoch": 6.21, + "learning_rate": 1.894758637008992e-05, + "loss": 1.3108, + "step": 420100 + }, + { + "epoch": 6.21, + "learning_rate": 1.8940191670610507e-05, + "loss": 1.3159, + "step": 420200 + }, + { + "epoch": 6.22, + "learning_rate": 1.8932796971131096e-05, + "loss": 1.295, + "step": 420300 + }, + { + "epoch": 6.22, + "learning_rate": 1.892540227165168e-05, + "loss": 1.289, + "step": 420400 + }, + { + "epoch": 6.22, + "learning_rate": 1.8918007572172267e-05, + "loss": 1.3052, + "step": 420500 + }, + { + "epoch": 6.22, + "learning_rate": 1.8910612872692853e-05, + "loss": 1.3103, + "step": 420600 + }, + { + "epoch": 6.22, + "learning_rate": 1.890321817321344e-05, + "loss": 1.3039, + "step": 420700 + }, + { + "epoch": 6.22, + "learning_rate": 1.8895823473734027e-05, + "loss": 1.3267, + "step": 420800 + }, + { + "epoch": 6.22, + "learning_rate": 1.888850272124941e-05, + "loss": 1.3086, + "step": 420900 + }, + { + "epoch": 6.23, + "learning_rate": 1.8881108021769997e-05, + "loss": 1.3426, + "step": 421000 + }, + { + "epoch": 6.23, + "learning_rate": 1.8873713322290586e-05, + "loss": 1.2891, + "step": 421100 + }, + { + "epoch": 6.23, + "learning_rate": 1.8866318622811168e-05, + "loss": 1.3103, + "step": 421200 + }, + { + "epoch": 6.23, + "learning_rate": 1.8858923923331757e-05, + "loss": 1.2971, + "step": 421300 + }, + { + "epoch": 6.23, + "learning_rate": 1.8851529223852343e-05, + "loss": 1.307, + "step": 421400 + }, + { + "epoch": 6.23, + "learning_rate": 1.884413452437293e-05, + "loss": 1.3149, + "step": 421500 + }, + { + "epoch": 6.24, + "learning_rate": 1.8836739824893517e-05, + "loss": 1.3105, + "step": 421600 + }, + { + "epoch": 6.24, + "learning_rate": 1.8829345125414106e-05, + "loss": 1.3224, + "step": 421700 + }, + { + "epoch": 6.24, + "learning_rate": 1.882195042593469e-05, + "loss": 1.3207, + "step": 421800 + }, + { + "epoch": 6.24, + "learning_rate": 1.8814555726455278e-05, + "loss": 1.3008, + "step": 421900 + }, + { + "epoch": 6.24, + "learning_rate": 1.8807161026975863e-05, + "loss": 1.3036, + "step": 422000 + }, + { + "epoch": 6.24, + "learning_rate": 1.8799766327496452e-05, + "loss": 1.3577, + "step": 422100 + }, + { + "epoch": 6.24, + "learning_rate": 1.8792371628017038e-05, + "loss": 1.3085, + "step": 422200 + }, + { + "epoch": 6.25, + "learning_rate": 1.8784976928537627e-05, + "loss": 1.3151, + "step": 422300 + }, + { + "epoch": 6.25, + "learning_rate": 1.8777582229058213e-05, + "loss": 1.3044, + "step": 422400 + }, + { + "epoch": 6.25, + "learning_rate": 1.8770187529578798e-05, + "loss": 1.3218, + "step": 422500 + }, + { + "epoch": 6.25, + "learning_rate": 1.8762792830099384e-05, + "loss": 1.3193, + "step": 422600 + }, + { + "epoch": 6.25, + "learning_rate": 1.8755398130619973e-05, + "loss": 1.3035, + "step": 422700 + }, + { + "epoch": 6.25, + "learning_rate": 1.874800343114056e-05, + "loss": 1.3294, + "step": 422800 + }, + { + "epoch": 6.25, + "learning_rate": 1.8740608731661148e-05, + "loss": 1.3061, + "step": 422900 + }, + { + "epoch": 6.26, + "learning_rate": 1.8733214032181733e-05, + "loss": 1.3061, + "step": 423000 + }, + { + "epoch": 6.26, + "learning_rate": 1.8725893279697117e-05, + "loss": 1.3326, + "step": 423100 + }, + { + "epoch": 6.26, + "learning_rate": 1.87184985802177e-05, + "loss": 1.3122, + "step": 423200 + }, + { + "epoch": 6.26, + "learning_rate": 1.8711103880738288e-05, + "loss": 1.2883, + "step": 423300 + }, + { + "epoch": 6.26, + "learning_rate": 1.8703709181258874e-05, + "loss": 1.3216, + "step": 423400 + }, + { + "epoch": 6.26, + "learning_rate": 1.8696314481779463e-05, + "loss": 1.3202, + "step": 423500 + }, + { + "epoch": 6.26, + "learning_rate": 1.868891978230005e-05, + "loss": 1.3252, + "step": 423600 + }, + { + "epoch": 6.27, + "learning_rate": 1.8681525082820638e-05, + "loss": 1.3244, + "step": 423700 + }, + { + "epoch": 6.27, + "learning_rate": 1.867413038334122e-05, + "loss": 1.3235, + "step": 423800 + }, + { + "epoch": 6.27, + "learning_rate": 1.866673568386181e-05, + "loss": 1.3133, + "step": 423900 + }, + { + "epoch": 6.27, + "learning_rate": 1.8659340984382395e-05, + "loss": 1.3134, + "step": 424000 + }, + { + "epoch": 6.27, + "learning_rate": 1.8651946284902984e-05, + "loss": 1.3105, + "step": 424100 + }, + { + "epoch": 6.27, + "learning_rate": 1.864455158542357e-05, + "loss": 1.3372, + "step": 424200 + }, + { + "epoch": 6.28, + "learning_rate": 1.8637156885944158e-05, + "loss": 1.2953, + "step": 424300 + }, + { + "epoch": 6.28, + "learning_rate": 1.8629762186464744e-05, + "loss": 1.3244, + "step": 424400 + }, + { + "epoch": 6.28, + "learning_rate": 1.862236748698533e-05, + "loss": 1.3118, + "step": 424500 + }, + { + "epoch": 6.28, + "learning_rate": 1.8614972787505915e-05, + "loss": 1.3281, + "step": 424600 + }, + { + "epoch": 6.28, + "learning_rate": 1.8607578088026504e-05, + "loss": 1.3204, + "step": 424700 + }, + { + "epoch": 6.28, + "learning_rate": 1.860018338854709e-05, + "loss": 1.3054, + "step": 424800 + }, + { + "epoch": 6.28, + "learning_rate": 1.859278868906768e-05, + "loss": 1.3257, + "step": 424900 + }, + { + "epoch": 6.29, + "learning_rate": 1.8585393989588264e-05, + "loss": 1.3074, + "step": 425000 + }, + { + "epoch": 6.29, + "learning_rate": 1.857799929010885e-05, + "loss": 1.2786, + "step": 425100 + }, + { + "epoch": 6.29, + "learning_rate": 1.8570604590629436e-05, + "loss": 1.3185, + "step": 425200 + }, + { + "epoch": 6.29, + "learning_rate": 1.8563209891150025e-05, + "loss": 1.3151, + "step": 425300 + }, + { + "epoch": 6.29, + "learning_rate": 1.855581519167061e-05, + "loss": 1.3146, + "step": 425400 + }, + { + "epoch": 6.29, + "learning_rate": 1.85484204921912e-05, + "loss": 1.3346, + "step": 425500 + }, + { + "epoch": 6.29, + "learning_rate": 1.854109973970658e-05, + "loss": 1.3238, + "step": 425600 + }, + { + "epoch": 6.3, + "learning_rate": 1.853370504022717e-05, + "loss": 1.334, + "step": 425700 + }, + { + "epoch": 6.3, + "learning_rate": 1.852631034074775e-05, + "loss": 1.3283, + "step": 425800 + }, + { + "epoch": 6.3, + "learning_rate": 1.851891564126834e-05, + "loss": 1.3043, + "step": 425900 + }, + { + "epoch": 6.3, + "learning_rate": 1.8511520941788926e-05, + "loss": 1.33, + "step": 426000 + }, + { + "epoch": 6.3, + "learning_rate": 1.8504126242309515e-05, + "loss": 1.3048, + "step": 426100 + }, + { + "epoch": 6.3, + "learning_rate": 1.84967315428301e-05, + "loss": 1.3415, + "step": 426200 + }, + { + "epoch": 6.3, + "learning_rate": 1.848941079034548e-05, + "loss": 1.3081, + "step": 426300 + }, + { + "epoch": 6.31, + "learning_rate": 1.848201609086607e-05, + "loss": 1.3149, + "step": 426400 + }, + { + "epoch": 6.31, + "learning_rate": 1.8474621391386656e-05, + "loss": 1.2996, + "step": 426500 + }, + { + "epoch": 6.31, + "learning_rate": 1.846722669190724e-05, + "loss": 1.3145, + "step": 426600 + }, + { + "epoch": 6.31, + "learning_rate": 1.8459831992427827e-05, + "loss": 1.3267, + "step": 426700 + }, + { + "epoch": 6.31, + "learning_rate": 1.8452437292948416e-05, + "loss": 1.3134, + "step": 426800 + }, + { + "epoch": 6.31, + "learning_rate": 1.8445042593469e-05, + "loss": 1.3304, + "step": 426900 + }, + { + "epoch": 6.32, + "learning_rate": 1.843764789398959e-05, + "loss": 1.313, + "step": 427000 + }, + { + "epoch": 6.32, + "learning_rate": 1.8430253194510176e-05, + "loss": 1.3097, + "step": 427100 + }, + { + "epoch": 6.32, + "learning_rate": 1.8422858495030762e-05, + "loss": 1.2905, + "step": 427200 + }, + { + "epoch": 6.32, + "learning_rate": 1.841546379555135e-05, + "loss": 1.335, + "step": 427300 + }, + { + "epoch": 6.32, + "learning_rate": 1.8408069096071937e-05, + "loss": 1.3508, + "step": 427400 + }, + { + "epoch": 6.32, + "learning_rate": 1.8400674396592522e-05, + "loss": 1.2862, + "step": 427500 + }, + { + "epoch": 6.32, + "learning_rate": 1.839327969711311e-05, + "loss": 1.3333, + "step": 427600 + }, + { + "epoch": 6.33, + "learning_rate": 1.8385884997633697e-05, + "loss": 1.3146, + "step": 427700 + }, + { + "epoch": 6.33, + "learning_rate": 1.8378490298154282e-05, + "loss": 1.3535, + "step": 427800 + }, + { + "epoch": 6.33, + "learning_rate": 1.837109559867487e-05, + "loss": 1.3073, + "step": 427900 + }, + { + "epoch": 6.33, + "learning_rate": 1.8363700899195457e-05, + "loss": 1.307, + "step": 428000 + }, + { + "epoch": 6.33, + "learning_rate": 1.8356306199716046e-05, + "loss": 1.3288, + "step": 428100 + }, + { + "epoch": 6.33, + "learning_rate": 1.8348911500236632e-05, + "loss": 1.3138, + "step": 428200 + }, + { + "epoch": 6.33, + "learning_rate": 1.834151680075722e-05, + "loss": 1.3284, + "step": 428300 + }, + { + "epoch": 6.34, + "learning_rate": 1.8334122101277803e-05, + "loss": 1.3484, + "step": 428400 + }, + { + "epoch": 6.34, + "learning_rate": 1.8326727401798392e-05, + "loss": 1.3264, + "step": 428500 + }, + { + "epoch": 6.34, + "learning_rate": 1.8319332702318978e-05, + "loss": 1.3349, + "step": 428600 + }, + { + "epoch": 6.34, + "learning_rate": 1.8311938002839567e-05, + "loss": 1.3181, + "step": 428700 + }, + { + "epoch": 6.34, + "learning_rate": 1.8304543303360152e-05, + "loss": 1.3138, + "step": 428800 + }, + { + "epoch": 6.34, + "learning_rate": 1.829714860388074e-05, + "loss": 1.3066, + "step": 428900 + }, + { + "epoch": 6.34, + "learning_rate": 1.8289753904401327e-05, + "loss": 1.3207, + "step": 429000 + }, + { + "epoch": 6.35, + "learning_rate": 1.8282359204921913e-05, + "loss": 1.3024, + "step": 429100 + }, + { + "epoch": 6.35, + "learning_rate": 1.82749645054425e-05, + "loss": 1.3032, + "step": 429200 + }, + { + "epoch": 6.35, + "learning_rate": 1.8267569805963087e-05, + "loss": 1.3102, + "step": 429300 + }, + { + "epoch": 6.35, + "learning_rate": 1.8260175106483673e-05, + "loss": 1.3163, + "step": 429400 + }, + { + "epoch": 6.35, + "learning_rate": 1.8252780407004262e-05, + "loss": 1.3271, + "step": 429500 + }, + { + "epoch": 6.35, + "learning_rate": 1.8245385707524848e-05, + "loss": 1.3158, + "step": 429600 + }, + { + "epoch": 6.35, + "learning_rate": 1.8237991008045437e-05, + "loss": 1.2956, + "step": 429700 + }, + { + "epoch": 6.36, + "learning_rate": 1.823059630856602e-05, + "loss": 1.2976, + "step": 429800 + }, + { + "epoch": 6.36, + "learning_rate": 1.8223201609086608e-05, + "loss": 1.3431, + "step": 429900 + }, + { + "epoch": 6.36, + "learning_rate": 1.8215806909607194e-05, + "loss": 1.3298, + "step": 430000 + }, + { + "epoch": 6.36, + "learning_rate": 1.8208412210127783e-05, + "loss": 1.3158, + "step": 430100 + }, + { + "epoch": 6.36, + "learning_rate": 1.8201017510648368e-05, + "loss": 1.3344, + "step": 430200 + }, + { + "epoch": 6.36, + "learning_rate": 1.819369675816375e-05, + "loss": 1.3292, + "step": 430300 + }, + { + "epoch": 6.37, + "learning_rate": 1.8186302058684334e-05, + "loss": 1.3028, + "step": 430400 + }, + { + "epoch": 6.37, + "learning_rate": 1.8178907359204923e-05, + "loss": 1.3061, + "step": 430500 + }, + { + "epoch": 6.37, + "learning_rate": 1.817151265972551e-05, + "loss": 1.3017, + "step": 430600 + }, + { + "epoch": 6.37, + "learning_rate": 1.8164117960246098e-05, + "loss": 1.3068, + "step": 430700 + }, + { + "epoch": 6.37, + "learning_rate": 1.8156723260766684e-05, + "loss": 1.3143, + "step": 430800 + }, + { + "epoch": 6.37, + "learning_rate": 1.8149328561287273e-05, + "loss": 1.3069, + "step": 430900 + }, + { + "epoch": 6.37, + "learning_rate": 1.814193386180786e-05, + "loss": 1.3217, + "step": 431000 + }, + { + "epoch": 6.38, + "learning_rate": 1.8134539162328444e-05, + "loss": 1.2817, + "step": 431100 + }, + { + "epoch": 6.38, + "learning_rate": 1.812714446284903e-05, + "loss": 1.3094, + "step": 431200 + }, + { + "epoch": 6.38, + "learning_rate": 1.811974976336962e-05, + "loss": 1.3225, + "step": 431300 + }, + { + "epoch": 6.38, + "learning_rate": 1.8112355063890204e-05, + "loss": 1.3027, + "step": 431400 + }, + { + "epoch": 6.38, + "learning_rate": 1.8104960364410793e-05, + "loss": 1.3292, + "step": 431500 + }, + { + "epoch": 6.38, + "learning_rate": 1.809756566493138e-05, + "loss": 1.3392, + "step": 431600 + }, + { + "epoch": 6.38, + "learning_rate": 1.8090170965451965e-05, + "loss": 1.3255, + "step": 431700 + }, + { + "epoch": 6.39, + "learning_rate": 1.808277626597255e-05, + "loss": 1.3341, + "step": 431800 + }, + { + "epoch": 6.39, + "learning_rate": 1.807538156649314e-05, + "loss": 1.3231, + "step": 431900 + }, + { + "epoch": 6.39, + "learning_rate": 1.8067986867013725e-05, + "loss": 1.3124, + "step": 432000 + }, + { + "epoch": 6.39, + "learning_rate": 1.8060592167534314e-05, + "loss": 1.3196, + "step": 432100 + }, + { + "epoch": 6.39, + "learning_rate": 1.80531974680549e-05, + "loss": 1.3053, + "step": 432200 + }, + { + "epoch": 6.39, + "learning_rate": 1.804587671557028e-05, + "loss": 1.3196, + "step": 432300 + }, + { + "epoch": 6.39, + "learning_rate": 1.8038482016090866e-05, + "loss": 1.3137, + "step": 432400 + }, + { + "epoch": 6.4, + "learning_rate": 1.8031087316611455e-05, + "loss": 1.3438, + "step": 432500 + }, + { + "epoch": 6.4, + "learning_rate": 1.802369261713204e-05, + "loss": 1.3149, + "step": 432600 + }, + { + "epoch": 6.4, + "learning_rate": 1.8016297917652626e-05, + "loss": 1.3215, + "step": 432700 + }, + { + "epoch": 6.4, + "learning_rate": 1.8008903218173215e-05, + "loss": 1.3219, + "step": 432800 + }, + { + "epoch": 6.4, + "learning_rate": 1.80015085186938e-05, + "loss": 1.3019, + "step": 432900 + }, + { + "epoch": 6.4, + "learning_rate": 1.799411381921439e-05, + "loss": 1.3268, + "step": 433000 + }, + { + "epoch": 6.41, + "learning_rate": 1.7986719119734975e-05, + "loss": 1.3214, + "step": 433100 + }, + { + "epoch": 6.41, + "learning_rate": 1.797932442025556e-05, + "loss": 1.3056, + "step": 433200 + }, + { + "epoch": 6.41, + "learning_rate": 1.797192972077615e-05, + "loss": 1.3126, + "step": 433300 + }, + { + "epoch": 6.41, + "learning_rate": 1.7964535021296736e-05, + "loss": 1.3256, + "step": 433400 + }, + { + "epoch": 6.41, + "learning_rate": 1.795714032181732e-05, + "loss": 1.3382, + "step": 433500 + }, + { + "epoch": 6.41, + "learning_rate": 1.794974562233791e-05, + "loss": 1.3231, + "step": 433600 + }, + { + "epoch": 6.41, + "learning_rate": 1.7942350922858496e-05, + "loss": 1.3147, + "step": 433700 + }, + { + "epoch": 6.42, + "learning_rate": 1.793495622337908e-05, + "loss": 1.3406, + "step": 433800 + }, + { + "epoch": 6.42, + "learning_rate": 1.792756152389967e-05, + "loss": 1.326, + "step": 433900 + }, + { + "epoch": 6.42, + "learning_rate": 1.7920166824420256e-05, + "loss": 1.3253, + "step": 434000 + }, + { + "epoch": 6.42, + "learning_rate": 1.7912772124940845e-05, + "loss": 1.3584, + "step": 434100 + }, + { + "epoch": 6.42, + "learning_rate": 1.790537742546143e-05, + "loss": 1.3207, + "step": 434200 + }, + { + "epoch": 6.42, + "learning_rate": 1.789805667297681e-05, + "loss": 1.3198, + "step": 434300 + }, + { + "epoch": 6.42, + "learning_rate": 1.7890661973497397e-05, + "loss": 1.3257, + "step": 434400 + }, + { + "epoch": 6.43, + "learning_rate": 1.7883267274017983e-05, + "loss": 1.2902, + "step": 434500 + }, + { + "epoch": 6.43, + "learning_rate": 1.787587257453857e-05, + "loss": 1.3508, + "step": 434600 + }, + { + "epoch": 6.43, + "learning_rate": 1.7868477875059157e-05, + "loss": 1.3252, + "step": 434700 + }, + { + "epoch": 6.43, + "learning_rate": 1.7861083175579746e-05, + "loss": 1.3117, + "step": 434800 + }, + { + "epoch": 6.43, + "learning_rate": 1.7853688476100332e-05, + "loss": 1.305, + "step": 434900 + }, + { + "epoch": 6.43, + "learning_rate": 1.7846293776620918e-05, + "loss": 1.3227, + "step": 435000 + }, + { + "epoch": 6.43, + "learning_rate": 1.7838899077141507e-05, + "loss": 1.3127, + "step": 435100 + }, + { + "epoch": 6.44, + "learning_rate": 1.7831504377662092e-05, + "loss": 1.3145, + "step": 435200 + }, + { + "epoch": 6.44, + "learning_rate": 1.7824109678182678e-05, + "loss": 1.3278, + "step": 435300 + }, + { + "epoch": 6.44, + "learning_rate": 1.7816714978703267e-05, + "loss": 1.3359, + "step": 435400 + }, + { + "epoch": 6.44, + "learning_rate": 1.7809320279223852e-05, + "loss": 1.3127, + "step": 435500 + }, + { + "epoch": 6.44, + "learning_rate": 1.780192557974444e-05, + "loss": 1.3307, + "step": 435600 + }, + { + "epoch": 6.44, + "learning_rate": 1.7794530880265027e-05, + "loss": 1.331, + "step": 435700 + }, + { + "epoch": 6.45, + "learning_rate": 1.7787136180785613e-05, + "loss": 1.3317, + "step": 435800 + }, + { + "epoch": 6.45, + "learning_rate": 1.7779741481306202e-05, + "loss": 1.3251, + "step": 435900 + }, + { + "epoch": 6.45, + "learning_rate": 1.7772346781826787e-05, + "loss": 1.3208, + "step": 436000 + }, + { + "epoch": 6.45, + "learning_rate": 1.7764952082347373e-05, + "loss": 1.33, + "step": 436100 + }, + { + "epoch": 6.45, + "learning_rate": 1.7757557382867962e-05, + "loss": 1.3235, + "step": 436200 + }, + { + "epoch": 6.45, + "learning_rate": 1.7750162683388548e-05, + "loss": 1.3241, + "step": 436300 + }, + { + "epoch": 6.45, + "learning_rate": 1.7742767983909133e-05, + "loss": 1.3134, + "step": 436400 + }, + { + "epoch": 6.46, + "learning_rate": 1.7735373284429722e-05, + "loss": 1.3205, + "step": 436500 + }, + { + "epoch": 6.46, + "learning_rate": 1.7727978584950308e-05, + "loss": 1.3342, + "step": 436600 + }, + { + "epoch": 6.46, + "learning_rate": 1.772065783246569e-05, + "loss": 1.3081, + "step": 436700 + }, + { + "epoch": 6.46, + "learning_rate": 1.7713263132986278e-05, + "loss": 1.3024, + "step": 436800 + }, + { + "epoch": 6.46, + "learning_rate": 1.7705868433506863e-05, + "loss": 1.312, + "step": 436900 + }, + { + "epoch": 6.46, + "learning_rate": 1.769847373402745e-05, + "loss": 1.3154, + "step": 437000 + }, + { + "epoch": 6.46, + "learning_rate": 1.7691079034548034e-05, + "loss": 1.3309, + "step": 437100 + }, + { + "epoch": 6.47, + "learning_rate": 1.7683684335068623e-05, + "loss": 1.3259, + "step": 437200 + }, + { + "epoch": 6.47, + "learning_rate": 1.767628963558921e-05, + "loss": 1.3199, + "step": 437300 + }, + { + "epoch": 6.47, + "learning_rate": 1.7668894936109798e-05, + "loss": 1.3255, + "step": 437400 + }, + { + "epoch": 6.47, + "learning_rate": 1.7661500236630384e-05, + "loss": 1.3273, + "step": 437500 + }, + { + "epoch": 6.47, + "learning_rate": 1.7654105537150973e-05, + "loss": 1.3096, + "step": 437600 + }, + { + "epoch": 6.47, + "learning_rate": 1.764671083767156e-05, + "loss": 1.3248, + "step": 437700 + }, + { + "epoch": 6.47, + "learning_rate": 1.7639316138192144e-05, + "loss": 1.3265, + "step": 437800 + }, + { + "epoch": 6.48, + "learning_rate": 1.763192143871273e-05, + "loss": 1.3172, + "step": 437900 + }, + { + "epoch": 6.48, + "learning_rate": 1.762452673923332e-05, + "loss": 1.2988, + "step": 438000 + }, + { + "epoch": 6.48, + "learning_rate": 1.7617132039753904e-05, + "loss": 1.3203, + "step": 438100 + }, + { + "epoch": 6.48, + "learning_rate": 1.7609737340274493e-05, + "loss": 1.299, + "step": 438200 + }, + { + "epoch": 6.48, + "learning_rate": 1.760234264079508e-05, + "loss": 1.3233, + "step": 438300 + }, + { + "epoch": 6.48, + "learning_rate": 1.7594947941315665e-05, + "loss": 1.3057, + "step": 438400 + }, + { + "epoch": 6.49, + "learning_rate": 1.7587553241836254e-05, + "loss": 1.3306, + "step": 438500 + }, + { + "epoch": 6.49, + "learning_rate": 1.758015854235684e-05, + "loss": 1.3206, + "step": 438600 + }, + { + "epoch": 6.49, + "learning_rate": 1.7572763842877425e-05, + "loss": 1.3287, + "step": 438700 + }, + { + "epoch": 6.49, + "learning_rate": 1.7565369143398014e-05, + "loss": 1.3222, + "step": 438800 + }, + { + "epoch": 6.49, + "learning_rate": 1.7558048390913394e-05, + "loss": 1.3195, + "step": 438900 + }, + { + "epoch": 6.49, + "learning_rate": 1.755065369143398e-05, + "loss": 1.3377, + "step": 439000 + }, + { + "epoch": 6.49, + "learning_rate": 1.7543258991954566e-05, + "loss": 1.3091, + "step": 439100 + }, + { + "epoch": 6.5, + "learning_rate": 1.7535864292475155e-05, + "loss": 1.322, + "step": 439200 + }, + { + "epoch": 6.5, + "learning_rate": 1.752846959299574e-05, + "loss": 1.3038, + "step": 439300 + }, + { + "epoch": 6.5, + "learning_rate": 1.752107489351633e-05, + "loss": 1.3374, + "step": 439400 + }, + { + "epoch": 6.5, + "learning_rate": 1.7513680194036915e-05, + "loss": 1.3355, + "step": 439500 + }, + { + "epoch": 6.5, + "learning_rate": 1.7506285494557504e-05, + "loss": 1.3014, + "step": 439600 + }, + { + "epoch": 6.5, + "learning_rate": 1.7498890795078086e-05, + "loss": 1.3244, + "step": 439700 + }, + { + "epoch": 6.5, + "learning_rate": 1.7491496095598675e-05, + "loss": 1.3296, + "step": 439800 + }, + { + "epoch": 6.51, + "learning_rate": 1.748410139611926e-05, + "loss": 1.3198, + "step": 439900 + }, + { + "epoch": 6.51, + "learning_rate": 1.747670669663985e-05, + "loss": 1.3444, + "step": 440000 + }, + { + "epoch": 6.51, + "learning_rate": 1.7469311997160436e-05, + "loss": 1.3117, + "step": 440100 + }, + { + "epoch": 6.51, + "learning_rate": 1.7461917297681025e-05, + "loss": 1.3113, + "step": 440200 + }, + { + "epoch": 6.51, + "learning_rate": 1.745452259820161e-05, + "loss": 1.3096, + "step": 440300 + }, + { + "epoch": 6.51, + "learning_rate": 1.7447127898722196e-05, + "loss": 1.3078, + "step": 440400 + }, + { + "epoch": 6.51, + "learning_rate": 1.743973319924278e-05, + "loss": 1.3229, + "step": 440500 + }, + { + "epoch": 6.52, + "learning_rate": 1.743233849976337e-05, + "loss": 1.3469, + "step": 440600 + }, + { + "epoch": 6.52, + "learning_rate": 1.7424943800283956e-05, + "loss": 1.2954, + "step": 440700 + }, + { + "epoch": 6.52, + "learning_rate": 1.7417549100804545e-05, + "loss": 1.3156, + "step": 440800 + }, + { + "epoch": 6.52, + "learning_rate": 1.741015440132513e-05, + "loss": 1.3009, + "step": 440900 + }, + { + "epoch": 6.52, + "learning_rate": 1.7402759701845717e-05, + "loss": 1.3199, + "step": 441000 + }, + { + "epoch": 6.52, + "learning_rate": 1.7395365002366306e-05, + "loss": 1.3091, + "step": 441100 + }, + { + "epoch": 6.53, + "learning_rate": 1.738797030288689e-05, + "loss": 1.3387, + "step": 441200 + }, + { + "epoch": 6.53, + "learning_rate": 1.7380575603407477e-05, + "loss": 1.35, + "step": 441300 + }, + { + "epoch": 6.53, + "learning_rate": 1.737325485092286e-05, + "loss": 1.3129, + "step": 441400 + }, + { + "epoch": 6.53, + "learning_rate": 1.7365860151443446e-05, + "loss": 1.3392, + "step": 441500 + }, + { + "epoch": 6.53, + "learning_rate": 1.7358465451964035e-05, + "loss": 1.3075, + "step": 441600 + }, + { + "epoch": 6.53, + "learning_rate": 1.7351070752484618e-05, + "loss": 1.2911, + "step": 441700 + }, + { + "epoch": 6.53, + "learning_rate": 1.7343676053005207e-05, + "loss": 1.3145, + "step": 441800 + }, + { + "epoch": 6.54, + "learning_rate": 1.7336281353525792e-05, + "loss": 1.3285, + "step": 441900 + }, + { + "epoch": 6.54, + "learning_rate": 1.732888665404638e-05, + "loss": 1.3345, + "step": 442000 + }, + { + "epoch": 6.54, + "learning_rate": 1.7321491954566967e-05, + "loss": 1.3333, + "step": 442100 + }, + { + "epoch": 6.54, + "learning_rate": 1.7314097255087556e-05, + "loss": 1.3261, + "step": 442200 + }, + { + "epoch": 6.54, + "learning_rate": 1.7306702555608138e-05, + "loss": 1.3124, + "step": 442300 + }, + { + "epoch": 6.54, + "learning_rate": 1.7299307856128727e-05, + "loss": 1.3013, + "step": 442400 + }, + { + "epoch": 6.54, + "learning_rate": 1.7291913156649313e-05, + "loss": 1.3096, + "step": 442500 + }, + { + "epoch": 6.55, + "learning_rate": 1.7284518457169902e-05, + "loss": 1.3168, + "step": 442600 + }, + { + "epoch": 6.55, + "learning_rate": 1.7277123757690488e-05, + "loss": 1.3436, + "step": 442700 + }, + { + "epoch": 6.55, + "learning_rate": 1.7269729058211077e-05, + "loss": 1.3138, + "step": 442800 + }, + { + "epoch": 6.55, + "learning_rate": 1.7262334358731662e-05, + "loss": 1.3065, + "step": 442900 + }, + { + "epoch": 6.55, + "learning_rate": 1.7254939659252248e-05, + "loss": 1.3174, + "step": 443000 + }, + { + "epoch": 6.55, + "learning_rate": 1.7247544959772833e-05, + "loss": 1.3175, + "step": 443100 + }, + { + "epoch": 6.55, + "learning_rate": 1.7240150260293422e-05, + "loss": 1.3192, + "step": 443200 + }, + { + "epoch": 6.56, + "learning_rate": 1.7232755560814008e-05, + "loss": 1.3283, + "step": 443300 + }, + { + "epoch": 6.56, + "learning_rate": 1.7225434808329392e-05, + "loss": 1.3126, + "step": 443400 + }, + { + "epoch": 6.56, + "learning_rate": 1.7218040108849978e-05, + "loss": 1.3312, + "step": 443500 + }, + { + "epoch": 6.56, + "learning_rate": 1.7210645409370563e-05, + "loss": 1.311, + "step": 443600 + }, + { + "epoch": 6.56, + "learning_rate": 1.720325070989115e-05, + "loss": 1.3149, + "step": 443700 + }, + { + "epoch": 6.56, + "learning_rate": 1.7195856010411738e-05, + "loss": 1.3187, + "step": 443800 + }, + { + "epoch": 6.56, + "learning_rate": 1.7188461310932324e-05, + "loss": 1.3222, + "step": 443900 + }, + { + "epoch": 6.57, + "learning_rate": 1.7181066611452913e-05, + "loss": 1.3159, + "step": 444000 + }, + { + "epoch": 6.57, + "learning_rate": 1.7173671911973498e-05, + "loss": 1.3199, + "step": 444100 + }, + { + "epoch": 6.57, + "learning_rate": 1.7166277212494087e-05, + "loss": 1.3582, + "step": 444200 + }, + { + "epoch": 6.57, + "learning_rate": 1.715888251301467e-05, + "loss": 1.3554, + "step": 444300 + }, + { + "epoch": 6.57, + "learning_rate": 1.715148781353526e-05, + "loss": 1.2932, + "step": 444400 + }, + { + "epoch": 6.57, + "learning_rate": 1.7144093114055844e-05, + "loss": 1.3013, + "step": 444500 + }, + { + "epoch": 6.58, + "learning_rate": 1.7136698414576433e-05, + "loss": 1.3086, + "step": 444600 + }, + { + "epoch": 6.58, + "learning_rate": 1.712930371509702e-05, + "loss": 1.317, + "step": 444700 + }, + { + "epoch": 6.58, + "learning_rate": 1.7121909015617608e-05, + "loss": 1.3181, + "step": 444800 + }, + { + "epoch": 6.58, + "learning_rate": 1.7114514316138193e-05, + "loss": 1.3348, + "step": 444900 + }, + { + "epoch": 6.58, + "learning_rate": 1.710711961665878e-05, + "loss": 1.3014, + "step": 445000 + }, + { + "epoch": 6.58, + "learning_rate": 1.7099724917179365e-05, + "loss": 1.3325, + "step": 445100 + }, + { + "epoch": 6.58, + "learning_rate": 1.7092330217699954e-05, + "loss": 1.3484, + "step": 445200 + }, + { + "epoch": 6.59, + "learning_rate": 1.708493551822054e-05, + "loss": 1.3093, + "step": 445300 + }, + { + "epoch": 6.59, + "learning_rate": 1.7077614765735923e-05, + "loss": 1.3348, + "step": 445400 + }, + { + "epoch": 6.59, + "learning_rate": 1.707022006625651e-05, + "loss": 1.3234, + "step": 445500 + }, + { + "epoch": 6.59, + "learning_rate": 1.7062825366777095e-05, + "loss": 1.32, + "step": 445600 + }, + { + "epoch": 6.59, + "learning_rate": 1.705543066729768e-05, + "loss": 1.3341, + "step": 445700 + }, + { + "epoch": 6.59, + "learning_rate": 1.704803596781827e-05, + "loss": 1.3174, + "step": 445800 + }, + { + "epoch": 6.59, + "learning_rate": 1.7040641268338855e-05, + "loss": 1.3154, + "step": 445900 + }, + { + "epoch": 6.6, + "learning_rate": 1.7033246568859444e-05, + "loss": 1.3126, + "step": 446000 + }, + { + "epoch": 6.6, + "learning_rate": 1.702585186938003e-05, + "loss": 1.311, + "step": 446100 + }, + { + "epoch": 6.6, + "learning_rate": 1.701845716990062e-05, + "loss": 1.2956, + "step": 446200 + }, + { + "epoch": 6.6, + "learning_rate": 1.70110624704212e-05, + "loss": 1.3377, + "step": 446300 + }, + { + "epoch": 6.6, + "learning_rate": 1.700366777094179e-05, + "loss": 1.3328, + "step": 446400 + }, + { + "epoch": 6.6, + "learning_rate": 1.6996273071462375e-05, + "loss": 1.3112, + "step": 446500 + }, + { + "epoch": 6.6, + "learning_rate": 1.6988878371982964e-05, + "loss": 1.3229, + "step": 446600 + }, + { + "epoch": 6.61, + "learning_rate": 1.698148367250355e-05, + "loss": 1.3158, + "step": 446700 + }, + { + "epoch": 6.61, + "learning_rate": 1.697408897302414e-05, + "loss": 1.32, + "step": 446800 + }, + { + "epoch": 6.61, + "learning_rate": 1.696669427354472e-05, + "loss": 1.3164, + "step": 446900 + }, + { + "epoch": 6.61, + "learning_rate": 1.695929957406531e-05, + "loss": 1.3064, + "step": 447000 + }, + { + "epoch": 6.61, + "learning_rate": 1.6951904874585896e-05, + "loss": 1.3424, + "step": 447100 + }, + { + "epoch": 6.61, + "learning_rate": 1.6944510175106485e-05, + "loss": 1.3339, + "step": 447200 + }, + { + "epoch": 6.62, + "learning_rate": 1.693711547562707e-05, + "loss": 1.3187, + "step": 447300 + }, + { + "epoch": 6.62, + "learning_rate": 1.692972077614766e-05, + "loss": 1.312, + "step": 447400 + }, + { + "epoch": 6.62, + "learning_rate": 1.6922326076668245e-05, + "loss": 1.3285, + "step": 447500 + }, + { + "epoch": 6.62, + "learning_rate": 1.6915005324183626e-05, + "loss": 1.3107, + "step": 447600 + }, + { + "epoch": 6.62, + "learning_rate": 1.690761062470421e-05, + "loss": 1.3171, + "step": 447700 + }, + { + "epoch": 6.62, + "learning_rate": 1.69002159252248e-05, + "loss": 1.3338, + "step": 447800 + }, + { + "epoch": 6.62, + "learning_rate": 1.6892821225745386e-05, + "loss": 1.3284, + "step": 447900 + }, + { + "epoch": 6.63, + "learning_rate": 1.6885426526265975e-05, + "loss": 1.3153, + "step": 448000 + }, + { + "epoch": 6.63, + "learning_rate": 1.687803182678656e-05, + "loss": 1.2977, + "step": 448100 + }, + { + "epoch": 6.63, + "learning_rate": 1.687063712730715e-05, + "loss": 1.326, + "step": 448200 + }, + { + "epoch": 6.63, + "learning_rate": 1.6863242427827732e-05, + "loss": 1.3189, + "step": 448300 + }, + { + "epoch": 6.63, + "learning_rate": 1.685584772834832e-05, + "loss": 1.3132, + "step": 448400 + }, + { + "epoch": 6.63, + "learning_rate": 1.6848453028868907e-05, + "loss": 1.3349, + "step": 448500 + }, + { + "epoch": 6.63, + "learning_rate": 1.6841058329389496e-05, + "loss": 1.3497, + "step": 448600 + }, + { + "epoch": 6.64, + "learning_rate": 1.683366362991008e-05, + "loss": 1.3184, + "step": 448700 + }, + { + "epoch": 6.64, + "learning_rate": 1.682626893043067e-05, + "loss": 1.3172, + "step": 448800 + }, + { + "epoch": 6.64, + "learning_rate": 1.6818874230951253e-05, + "loss": 1.321, + "step": 448900 + }, + { + "epoch": 6.64, + "learning_rate": 1.681147953147184e-05, + "loss": 1.3209, + "step": 449000 + }, + { + "epoch": 6.64, + "learning_rate": 1.6804084831992427e-05, + "loss": 1.3281, + "step": 449100 + }, + { + "epoch": 6.64, + "learning_rate": 1.6796690132513016e-05, + "loss": 1.3115, + "step": 449200 + }, + { + "epoch": 6.64, + "learning_rate": 1.6789295433033602e-05, + "loss": 1.3381, + "step": 449300 + }, + { + "epoch": 6.65, + "learning_rate": 1.678190073355419e-05, + "loss": 1.3086, + "step": 449400 + }, + { + "epoch": 6.65, + "learning_rate": 1.6774506034074777e-05, + "loss": 1.3236, + "step": 449500 + }, + { + "epoch": 6.65, + "learning_rate": 1.6767111334595362e-05, + "loss": 1.3444, + "step": 449600 + }, + { + "epoch": 6.65, + "learning_rate": 1.6759790582110743e-05, + "loss": 1.321, + "step": 449700 + }, + { + "epoch": 6.65, + "learning_rate": 1.6752395882631332e-05, + "loss": 1.3097, + "step": 449800 + }, + { + "epoch": 6.65, + "learning_rate": 1.6745001183151917e-05, + "loss": 1.3299, + "step": 449900 + }, + { + "epoch": 6.66, + "learning_rate": 1.6737606483672506e-05, + "loss": 1.3173, + "step": 450000 + }, + { + "epoch": 6.66, + "learning_rate": 1.6730211784193092e-05, + "loss": 1.3186, + "step": 450100 + }, + { + "epoch": 6.66, + "learning_rate": 1.6722817084713678e-05, + "loss": 1.3287, + "step": 450200 + }, + { + "epoch": 6.66, + "learning_rate": 1.6715422385234263e-05, + "loss": 1.3325, + "step": 450300 + }, + { + "epoch": 6.66, + "learning_rate": 1.6708027685754852e-05, + "loss": 1.3165, + "step": 450400 + }, + { + "epoch": 6.66, + "learning_rate": 1.6700632986275438e-05, + "loss": 1.296, + "step": 450500 + }, + { + "epoch": 6.66, + "learning_rate": 1.6693238286796027e-05, + "loss": 1.293, + "step": 450600 + }, + { + "epoch": 6.67, + "learning_rate": 1.6685843587316613e-05, + "loss": 1.305, + "step": 450700 + }, + { + "epoch": 6.67, + "learning_rate": 1.6678448887837202e-05, + "loss": 1.3075, + "step": 450800 + }, + { + "epoch": 6.67, + "learning_rate": 1.6671054188357784e-05, + "loss": 1.3307, + "step": 450900 + }, + { + "epoch": 6.67, + "learning_rate": 1.6663659488878373e-05, + "loss": 1.3202, + "step": 451000 + }, + { + "epoch": 6.67, + "learning_rate": 1.665626478939896e-05, + "loss": 1.3247, + "step": 451100 + }, + { + "epoch": 6.67, + "learning_rate": 1.6648870089919548e-05, + "loss": 1.3434, + "step": 451200 + }, + { + "epoch": 6.67, + "learning_rate": 1.6641475390440133e-05, + "loss": 1.3104, + "step": 451300 + }, + { + "epoch": 6.68, + "learning_rate": 1.6634080690960722e-05, + "loss": 1.3011, + "step": 451400 + }, + { + "epoch": 6.68, + "learning_rate": 1.6626685991481308e-05, + "loss": 1.2938, + "step": 451500 + }, + { + "epoch": 6.68, + "learning_rate": 1.6619291292001894e-05, + "loss": 1.338, + "step": 451600 + }, + { + "epoch": 6.68, + "learning_rate": 1.661189659252248e-05, + "loss": 1.3106, + "step": 451700 + }, + { + "epoch": 6.68, + "learning_rate": 1.6604575840037863e-05, + "loss": 1.3305, + "step": 451800 + }, + { + "epoch": 6.68, + "learning_rate": 1.659718114055845e-05, + "loss": 1.3023, + "step": 451900 + }, + { + "epoch": 6.68, + "learning_rate": 1.6589786441079034e-05, + "loss": 1.3374, + "step": 452000 + }, + { + "epoch": 6.69, + "learning_rate": 1.6582391741599623e-05, + "loss": 1.3123, + "step": 452100 + }, + { + "epoch": 6.69, + "learning_rate": 1.657499704212021e-05, + "loss": 1.3249, + "step": 452200 + }, + { + "epoch": 6.69, + "learning_rate": 1.6567602342640795e-05, + "loss": 1.3257, + "step": 452300 + }, + { + "epoch": 6.69, + "learning_rate": 1.6560207643161384e-05, + "loss": 1.3005, + "step": 452400 + }, + { + "epoch": 6.69, + "learning_rate": 1.655281294368197e-05, + "loss": 1.3334, + "step": 452500 + }, + { + "epoch": 6.69, + "learning_rate": 1.654541824420256e-05, + "loss": 1.3416, + "step": 452600 + }, + { + "epoch": 6.7, + "learning_rate": 1.6538023544723144e-05, + "loss": 1.3299, + "step": 452700 + }, + { + "epoch": 6.7, + "learning_rate": 1.6530628845243733e-05, + "loss": 1.3295, + "step": 452800 + }, + { + "epoch": 6.7, + "learning_rate": 1.6523234145764315e-05, + "loss": 1.317, + "step": 452900 + }, + { + "epoch": 6.7, + "learning_rate": 1.6515839446284904e-05, + "loss": 1.3268, + "step": 453000 + }, + { + "epoch": 6.7, + "learning_rate": 1.650844474680549e-05, + "loss": 1.2952, + "step": 453100 + }, + { + "epoch": 6.7, + "learning_rate": 1.650105004732608e-05, + "loss": 1.3316, + "step": 453200 + }, + { + "epoch": 6.7, + "learning_rate": 1.6493655347846665e-05, + "loss": 1.3075, + "step": 453300 + }, + { + "epoch": 6.71, + "learning_rate": 1.6486260648367254e-05, + "loss": 1.3111, + "step": 453400 + }, + { + "epoch": 6.71, + "learning_rate": 1.6478865948887836e-05, + "loss": 1.3325, + "step": 453500 + }, + { + "epoch": 6.71, + "learning_rate": 1.6471471249408425e-05, + "loss": 1.3321, + "step": 453600 + }, + { + "epoch": 6.71, + "learning_rate": 1.646407654992901e-05, + "loss": 1.3242, + "step": 453700 + }, + { + "epoch": 6.71, + "learning_rate": 1.645675579744439e-05, + "loss": 1.3271, + "step": 453800 + }, + { + "epoch": 6.71, + "learning_rate": 1.644936109796498e-05, + "loss": 1.3341, + "step": 453900 + }, + { + "epoch": 6.71, + "learning_rate": 1.6441966398485566e-05, + "loss": 1.3171, + "step": 454000 + }, + { + "epoch": 6.72, + "learning_rate": 1.6434571699006155e-05, + "loss": 1.2992, + "step": 454100 + }, + { + "epoch": 6.72, + "learning_rate": 1.642717699952674e-05, + "loss": 1.3427, + "step": 454200 + }, + { + "epoch": 6.72, + "learning_rate": 1.6419782300047326e-05, + "loss": 1.311, + "step": 454300 + }, + { + "epoch": 6.72, + "learning_rate": 1.6412387600567915e-05, + "loss": 1.3405, + "step": 454400 + }, + { + "epoch": 6.72, + "learning_rate": 1.64049929010885e-05, + "loss": 1.3322, + "step": 454500 + }, + { + "epoch": 6.72, + "learning_rate": 1.6397598201609086e-05, + "loss": 1.2961, + "step": 454600 + }, + { + "epoch": 6.72, + "learning_rate": 1.6390203502129675e-05, + "loss": 1.3218, + "step": 454700 + }, + { + "epoch": 6.73, + "learning_rate": 1.638280880265026e-05, + "loss": 1.319, + "step": 454800 + }, + { + "epoch": 6.73, + "learning_rate": 1.6375414103170847e-05, + "loss": 1.2974, + "step": 454900 + }, + { + "epoch": 6.73, + "learning_rate": 1.6368019403691436e-05, + "loss": 1.317, + "step": 455000 + }, + { + "epoch": 6.73, + "learning_rate": 1.636062470421202e-05, + "loss": 1.3258, + "step": 455100 + }, + { + "epoch": 6.73, + "learning_rate": 1.635323000473261e-05, + "loss": 1.3199, + "step": 455200 + }, + { + "epoch": 6.73, + "learning_rate": 1.6345835305253196e-05, + "loss": 1.3278, + "step": 455300 + }, + { + "epoch": 6.74, + "learning_rate": 1.6338440605773785e-05, + "loss": 1.3355, + "step": 455400 + }, + { + "epoch": 6.74, + "learning_rate": 1.6331045906294367e-05, + "loss": 1.3397, + "step": 455500 + }, + { + "epoch": 6.74, + "learning_rate": 1.6323651206814956e-05, + "loss": 1.3179, + "step": 455600 + }, + { + "epoch": 6.74, + "learning_rate": 1.6316256507335542e-05, + "loss": 1.3412, + "step": 455700 + }, + { + "epoch": 6.74, + "learning_rate": 1.630886180785613e-05, + "loss": 1.3348, + "step": 455800 + }, + { + "epoch": 6.74, + "learning_rate": 1.6301467108376716e-05, + "loss": 1.3337, + "step": 455900 + }, + { + "epoch": 6.74, + "learning_rate": 1.6294146355892097e-05, + "loss": 1.3003, + "step": 456000 + }, + { + "epoch": 6.75, + "learning_rate": 1.6286751656412686e-05, + "loss": 1.3339, + "step": 456100 + }, + { + "epoch": 6.75, + "learning_rate": 1.627935695693327e-05, + "loss": 1.3042, + "step": 456200 + }, + { + "epoch": 6.75, + "learning_rate": 1.6271962257453857e-05, + "loss": 1.3123, + "step": 456300 + }, + { + "epoch": 6.75, + "learning_rate": 1.6264567557974443e-05, + "loss": 1.3199, + "step": 456400 + }, + { + "epoch": 6.75, + "learning_rate": 1.6257172858495032e-05, + "loss": 1.3189, + "step": 456500 + }, + { + "epoch": 6.75, + "learning_rate": 1.6249778159015618e-05, + "loss": 1.3221, + "step": 456600 + }, + { + "epoch": 6.75, + "learning_rate": 1.6242383459536207e-05, + "loss": 1.3291, + "step": 456700 + }, + { + "epoch": 6.76, + "learning_rate": 1.6234988760056792e-05, + "loss": 1.3121, + "step": 456800 + }, + { + "epoch": 6.76, + "learning_rate": 1.6227594060577378e-05, + "loss": 1.3266, + "step": 456900 + }, + { + "epoch": 6.76, + "learning_rate": 1.6220199361097967e-05, + "loss": 1.3144, + "step": 457000 + }, + { + "epoch": 6.76, + "learning_rate": 1.6212804661618552e-05, + "loss": 1.3239, + "step": 457100 + }, + { + "epoch": 6.76, + "learning_rate": 1.6205409962139138e-05, + "loss": 1.3232, + "step": 457200 + }, + { + "epoch": 6.76, + "learning_rate": 1.6198015262659727e-05, + "loss": 1.2982, + "step": 457300 + }, + { + "epoch": 6.76, + "learning_rate": 1.6190620563180313e-05, + "loss": 1.3373, + "step": 457400 + }, + { + "epoch": 6.77, + "learning_rate": 1.61832258637009e-05, + "loss": 1.3407, + "step": 457500 + }, + { + "epoch": 6.77, + "learning_rate": 1.6175831164221487e-05, + "loss": 1.3172, + "step": 457600 + }, + { + "epoch": 6.77, + "learning_rate": 1.6168436464742073e-05, + "loss": 1.3153, + "step": 457700 + }, + { + "epoch": 6.77, + "learning_rate": 1.6161041765262662e-05, + "loss": 1.3215, + "step": 457800 + }, + { + "epoch": 6.77, + "learning_rate": 1.6153647065783248e-05, + "loss": 1.3311, + "step": 457900 + }, + { + "epoch": 6.77, + "learning_rate": 1.6146252366303837e-05, + "loss": 1.2825, + "step": 458000 + }, + { + "epoch": 6.77, + "learning_rate": 1.6138857666824422e-05, + "loss": 1.3204, + "step": 458100 + }, + { + "epoch": 6.78, + "learning_rate": 1.61315369143398e-05, + "loss": 1.32, + "step": 458200 + }, + { + "epoch": 6.78, + "learning_rate": 1.612414221486039e-05, + "loss": 1.3211, + "step": 458300 + }, + { + "epoch": 6.78, + "learning_rate": 1.6116747515380974e-05, + "loss": 1.3042, + "step": 458400 + }, + { + "epoch": 6.78, + "learning_rate": 1.6109426762896358e-05, + "loss": 1.3353, + "step": 458500 + }, + { + "epoch": 6.78, + "learning_rate": 1.6102032063416944e-05, + "loss": 1.3321, + "step": 458600 + }, + { + "epoch": 6.78, + "learning_rate": 1.6094637363937533e-05, + "loss": 1.3069, + "step": 458700 + }, + { + "epoch": 6.79, + "learning_rate": 1.6087242664458115e-05, + "loss": 1.3155, + "step": 458800 + }, + { + "epoch": 6.79, + "learning_rate": 1.6079847964978704e-05, + "loss": 1.3116, + "step": 458900 + }, + { + "epoch": 6.79, + "learning_rate": 1.607245326549929e-05, + "loss": 1.3339, + "step": 459000 + }, + { + "epoch": 6.79, + "learning_rate": 1.606505856601988e-05, + "loss": 1.3148, + "step": 459100 + }, + { + "epoch": 6.79, + "learning_rate": 1.6057663866540464e-05, + "loss": 1.3203, + "step": 459200 + }, + { + "epoch": 6.79, + "learning_rate": 1.6050269167061053e-05, + "loss": 1.2841, + "step": 459300 + }, + { + "epoch": 6.79, + "learning_rate": 1.604287446758164e-05, + "loss": 1.3188, + "step": 459400 + }, + { + "epoch": 6.8, + "learning_rate": 1.6035479768102225e-05, + "loss": 1.3186, + "step": 459500 + }, + { + "epoch": 6.8, + "learning_rate": 1.602808506862281e-05, + "loss": 1.3187, + "step": 459600 + }, + { + "epoch": 6.8, + "learning_rate": 1.60206903691434e-05, + "loss": 1.3222, + "step": 459700 + }, + { + "epoch": 6.8, + "learning_rate": 1.6013295669663985e-05, + "loss": 1.3505, + "step": 459800 + }, + { + "epoch": 6.8, + "learning_rate": 1.6005900970184574e-05, + "loss": 1.3107, + "step": 459900 + }, + { + "epoch": 6.8, + "learning_rate": 1.599850627070516e-05, + "loss": 1.3121, + "step": 460000 + }, + { + "epoch": 6.8, + "learning_rate": 1.5991111571225745e-05, + "loss": 1.3158, + "step": 460100 + }, + { + "epoch": 6.81, + "learning_rate": 1.598371687174633e-05, + "loss": 1.3174, + "step": 460200 + }, + { + "epoch": 6.81, + "learning_rate": 1.597632217226692e-05, + "loss": 1.331, + "step": 460300 + }, + { + "epoch": 6.81, + "learning_rate": 1.5968927472787505e-05, + "loss": 1.3001, + "step": 460400 + }, + { + "epoch": 6.81, + "learning_rate": 1.5961532773308094e-05, + "loss": 1.3252, + "step": 460500 + }, + { + "epoch": 6.81, + "learning_rate": 1.595413807382868e-05, + "loss": 1.3169, + "step": 460600 + }, + { + "epoch": 6.81, + "learning_rate": 1.594674337434927e-05, + "loss": 1.3255, + "step": 460700 + }, + { + "epoch": 6.81, + "learning_rate": 1.593934867486985e-05, + "loss": 1.3048, + "step": 460800 + }, + { + "epoch": 6.82, + "learning_rate": 1.593195397539044e-05, + "loss": 1.3218, + "step": 460900 + }, + { + "epoch": 6.82, + "learning_rate": 1.5924559275911026e-05, + "loss": 1.3091, + "step": 461000 + }, + { + "epoch": 6.82, + "learning_rate": 1.5917164576431615e-05, + "loss": 1.3193, + "step": 461100 + }, + { + "epoch": 6.82, + "learning_rate": 1.59097698769522e-05, + "loss": 1.3618, + "step": 461200 + }, + { + "epoch": 6.82, + "learning_rate": 1.590237517747279e-05, + "loss": 1.3294, + "step": 461300 + }, + { + "epoch": 6.82, + "learning_rate": 1.5894980477993375e-05, + "loss": 1.3209, + "step": 461400 + }, + { + "epoch": 6.83, + "learning_rate": 1.588758577851396e-05, + "loss": 1.3081, + "step": 461500 + }, + { + "epoch": 6.83, + "learning_rate": 1.5880191079034547e-05, + "loss": 1.3139, + "step": 461600 + }, + { + "epoch": 6.83, + "learning_rate": 1.5872796379555136e-05, + "loss": 1.3445, + "step": 461700 + }, + { + "epoch": 6.83, + "learning_rate": 1.586540168007572e-05, + "loss": 1.3122, + "step": 461800 + }, + { + "epoch": 6.83, + "learning_rate": 1.585800698059631e-05, + "loss": 1.3144, + "step": 461900 + }, + { + "epoch": 6.83, + "learning_rate": 1.5850612281116896e-05, + "loss": 1.3199, + "step": 462000 + }, + { + "epoch": 6.83, + "learning_rate": 1.584321758163748e-05, + "loss": 1.3159, + "step": 462100 + }, + { + "epoch": 6.84, + "learning_rate": 1.583582288215807e-05, + "loss": 1.3181, + "step": 462200 + }, + { + "epoch": 6.84, + "learning_rate": 1.5828428182678656e-05, + "loss": 1.3075, + "step": 462300 + }, + { + "epoch": 6.84, + "learning_rate": 1.5821033483199242e-05, + "loss": 1.3083, + "step": 462400 + }, + { + "epoch": 6.84, + "learning_rate": 1.581363878371983e-05, + "loss": 1.3271, + "step": 462500 + }, + { + "epoch": 6.84, + "learning_rate": 1.5806244084240417e-05, + "loss": 1.3035, + "step": 462600 + }, + { + "epoch": 6.84, + "learning_rate": 1.5798849384761006e-05, + "loss": 1.337, + "step": 462700 + }, + { + "epoch": 6.84, + "learning_rate": 1.5791528632276383e-05, + "loss": 1.3396, + "step": 462800 + }, + { + "epoch": 6.85, + "learning_rate": 1.578413393279697e-05, + "loss": 1.3239, + "step": 462900 + }, + { + "epoch": 6.85, + "learning_rate": 1.5776739233317557e-05, + "loss": 1.3315, + "step": 463000 + }, + { + "epoch": 6.85, + "learning_rate": 1.5769344533838146e-05, + "loss": 1.3083, + "step": 463100 + }, + { + "epoch": 6.85, + "learning_rate": 1.5761949834358732e-05, + "loss": 1.3186, + "step": 463200 + }, + { + "epoch": 6.85, + "learning_rate": 1.575455513487932e-05, + "loss": 1.3096, + "step": 463300 + }, + { + "epoch": 6.85, + "learning_rate": 1.5747160435399907e-05, + "loss": 1.316, + "step": 463400 + }, + { + "epoch": 6.85, + "learning_rate": 1.5739765735920492e-05, + "loss": 1.3168, + "step": 463500 + }, + { + "epoch": 6.86, + "learning_rate": 1.5732371036441078e-05, + "loss": 1.3204, + "step": 463600 + }, + { + "epoch": 6.86, + "learning_rate": 1.5724976336961667e-05, + "loss": 1.3035, + "step": 463700 + }, + { + "epoch": 6.86, + "learning_rate": 1.5717581637482253e-05, + "loss": 1.3266, + "step": 463800 + }, + { + "epoch": 6.86, + "learning_rate": 1.571018693800284e-05, + "loss": 1.3291, + "step": 463900 + }, + { + "epoch": 6.86, + "learning_rate": 1.5702792238523427e-05, + "loss": 1.3129, + "step": 464000 + }, + { + "epoch": 6.86, + "learning_rate": 1.5695397539044013e-05, + "loss": 1.3114, + "step": 464100 + }, + { + "epoch": 6.87, + "learning_rate": 1.56880028395646e-05, + "loss": 1.3404, + "step": 464200 + }, + { + "epoch": 6.87, + "learning_rate": 1.5680608140085188e-05, + "loss": 1.3237, + "step": 464300 + }, + { + "epoch": 6.87, + "learning_rate": 1.5673213440605773e-05, + "loss": 1.3351, + "step": 464400 + }, + { + "epoch": 6.87, + "learning_rate": 1.5665818741126362e-05, + "loss": 1.3233, + "step": 464500 + }, + { + "epoch": 6.87, + "learning_rate": 1.5658424041646948e-05, + "loss": 1.3406, + "step": 464600 + }, + { + "epoch": 6.87, + "learning_rate": 1.5651029342167537e-05, + "loss": 1.3241, + "step": 464700 + }, + { + "epoch": 6.87, + "learning_rate": 1.5643708589682914e-05, + "loss": 1.2915, + "step": 464800 + }, + { + "epoch": 6.88, + "learning_rate": 1.5636313890203503e-05, + "loss": 1.3257, + "step": 464900 + }, + { + "epoch": 6.88, + "learning_rate": 1.562891919072409e-05, + "loss": 1.3468, + "step": 465000 + }, + { + "epoch": 6.88, + "learning_rate": 1.5621524491244678e-05, + "loss": 1.3514, + "step": 465100 + }, + { + "epoch": 6.88, + "learning_rate": 1.5614129791765263e-05, + "loss": 1.3454, + "step": 465200 + }, + { + "epoch": 6.88, + "learning_rate": 1.5606735092285852e-05, + "loss": 1.3152, + "step": 465300 + }, + { + "epoch": 6.88, + "learning_rate": 1.5599340392806435e-05, + "loss": 1.3122, + "step": 465400 + }, + { + "epoch": 6.88, + "learning_rate": 1.5591945693327024e-05, + "loss": 1.3161, + "step": 465500 + }, + { + "epoch": 6.89, + "learning_rate": 1.558455099384761e-05, + "loss": 1.3005, + "step": 465600 + }, + { + "epoch": 6.89, + "learning_rate": 1.5577156294368198e-05, + "loss": 1.3242, + "step": 465700 + }, + { + "epoch": 6.89, + "learning_rate": 1.5569761594888784e-05, + "loss": 1.3209, + "step": 465800 + }, + { + "epoch": 6.89, + "learning_rate": 1.5562366895409373e-05, + "loss": 1.3109, + "step": 465900 + }, + { + "epoch": 6.89, + "learning_rate": 1.555497219592996e-05, + "loss": 1.3375, + "step": 466000 + }, + { + "epoch": 6.89, + "learning_rate": 1.5547577496450544e-05, + "loss": 1.3073, + "step": 466100 + }, + { + "epoch": 6.89, + "learning_rate": 1.554018279697113e-05, + "loss": 1.2985, + "step": 466200 + }, + { + "epoch": 6.9, + "learning_rate": 1.553278809749172e-05, + "loss": 1.3262, + "step": 466300 + }, + { + "epoch": 6.9, + "learning_rate": 1.5525393398012304e-05, + "loss": 1.3234, + "step": 466400 + }, + { + "epoch": 6.9, + "learning_rate": 1.5517998698532893e-05, + "loss": 1.3505, + "step": 466500 + }, + { + "epoch": 6.9, + "learning_rate": 1.551060399905348e-05, + "loss": 1.3172, + "step": 466600 + }, + { + "epoch": 6.9, + "learning_rate": 1.5503209299574068e-05, + "loss": 1.3248, + "step": 466700 + }, + { + "epoch": 6.9, + "learning_rate": 1.549581460009465e-05, + "loss": 1.3305, + "step": 466800 + }, + { + "epoch": 6.91, + "learning_rate": 1.548841990061524e-05, + "loss": 1.3285, + "step": 466900 + }, + { + "epoch": 6.91, + "learning_rate": 1.548109914813062e-05, + "loss": 1.293, + "step": 467000 + }, + { + "epoch": 6.91, + "learning_rate": 1.547370444865121e-05, + "loss": 1.324, + "step": 467100 + }, + { + "epoch": 6.91, + "learning_rate": 1.5466309749171795e-05, + "loss": 1.3245, + "step": 467200 + }, + { + "epoch": 6.91, + "learning_rate": 1.5458915049692384e-05, + "loss": 1.3246, + "step": 467300 + }, + { + "epoch": 6.91, + "learning_rate": 1.5451520350212966e-05, + "loss": 1.3213, + "step": 467400 + }, + { + "epoch": 6.91, + "learning_rate": 1.5444125650733555e-05, + "loss": 1.3233, + "step": 467500 + }, + { + "epoch": 6.92, + "learning_rate": 1.543673095125414e-05, + "loss": 1.3197, + "step": 467600 + }, + { + "epoch": 6.92, + "learning_rate": 1.542933625177473e-05, + "loss": 1.3403, + "step": 467700 + }, + { + "epoch": 6.92, + "learning_rate": 1.5421941552295315e-05, + "loss": 1.3305, + "step": 467800 + }, + { + "epoch": 6.92, + "learning_rate": 1.5414546852815904e-05, + "loss": 1.3211, + "step": 467900 + }, + { + "epoch": 6.92, + "learning_rate": 1.540715215333649e-05, + "loss": 1.3138, + "step": 468000 + }, + { + "epoch": 6.92, + "learning_rate": 1.5399757453857075e-05, + "loss": 1.3351, + "step": 468100 + }, + { + "epoch": 6.92, + "learning_rate": 1.539236275437766e-05, + "loss": 1.3292, + "step": 468200 + }, + { + "epoch": 6.93, + "learning_rate": 1.538496805489825e-05, + "loss": 1.3093, + "step": 468300 + }, + { + "epoch": 6.93, + "learning_rate": 1.5377573355418836e-05, + "loss": 1.3408, + "step": 468400 + }, + { + "epoch": 6.93, + "learning_rate": 1.5370178655939425e-05, + "loss": 1.3489, + "step": 468500 + }, + { + "epoch": 6.93, + "learning_rate": 1.536278395646001e-05, + "loss": 1.3413, + "step": 468600 + }, + { + "epoch": 6.93, + "learning_rate": 1.5355389256980596e-05, + "loss": 1.3191, + "step": 468700 + }, + { + "epoch": 6.93, + "learning_rate": 1.534799455750118e-05, + "loss": 1.3306, + "step": 468800 + }, + { + "epoch": 6.93, + "learning_rate": 1.534059985802177e-05, + "loss": 1.3207, + "step": 468900 + }, + { + "epoch": 6.94, + "learning_rate": 1.5333205158542356e-05, + "loss": 1.3008, + "step": 469000 + }, + { + "epoch": 6.94, + "learning_rate": 1.5325810459062945e-05, + "loss": 1.3296, + "step": 469100 + }, + { + "epoch": 6.94, + "learning_rate": 1.5318489706578326e-05, + "loss": 1.321, + "step": 469200 + }, + { + "epoch": 6.94, + "learning_rate": 1.5311095007098915e-05, + "loss": 1.3535, + "step": 469300 + }, + { + "epoch": 6.94, + "learning_rate": 1.5303700307619497e-05, + "loss": 1.3262, + "step": 469400 + }, + { + "epoch": 6.94, + "learning_rate": 1.5296305608140086e-05, + "loss": 1.3295, + "step": 469500 + }, + { + "epoch": 6.95, + "learning_rate": 1.5288910908660672e-05, + "loss": 1.3303, + "step": 469600 + }, + { + "epoch": 6.95, + "learning_rate": 1.528151620918126e-05, + "loss": 1.3194, + "step": 469700 + }, + { + "epoch": 6.95, + "learning_rate": 1.5274121509701846e-05, + "loss": 1.3047, + "step": 469800 + }, + { + "epoch": 6.95, + "learning_rate": 1.5266726810222435e-05, + "loss": 1.3347, + "step": 469900 + }, + { + "epoch": 6.95, + "learning_rate": 1.525933211074302e-05, + "loss": 1.3244, + "step": 470000 + }, + { + "epoch": 6.95, + "learning_rate": 1.5251937411263607e-05, + "loss": 1.3141, + "step": 470100 + }, + { + "epoch": 6.95, + "learning_rate": 1.5244542711784194e-05, + "loss": 1.3284, + "step": 470200 + }, + { + "epoch": 6.96, + "learning_rate": 1.523714801230478e-05, + "loss": 1.3122, + "step": 470300 + }, + { + "epoch": 6.96, + "learning_rate": 1.5229753312825367e-05, + "loss": 1.3263, + "step": 470400 + }, + { + "epoch": 6.96, + "learning_rate": 1.5222358613345954e-05, + "loss": 1.3488, + "step": 470500 + }, + { + "epoch": 6.96, + "learning_rate": 1.5214963913866542e-05, + "loss": 1.3126, + "step": 470600 + }, + { + "epoch": 6.96, + "learning_rate": 1.5207569214387127e-05, + "loss": 1.3342, + "step": 470700 + }, + { + "epoch": 6.96, + "learning_rate": 1.5200174514907715e-05, + "loss": 1.3197, + "step": 470800 + }, + { + "epoch": 6.96, + "learning_rate": 1.5192779815428302e-05, + "loss": 1.3212, + "step": 470900 + }, + { + "epoch": 6.97, + "learning_rate": 1.518538511594889e-05, + "loss": 1.3356, + "step": 471000 + }, + { + "epoch": 6.97, + "learning_rate": 1.5177990416469477e-05, + "loss": 1.3145, + "step": 471100 + }, + { + "epoch": 6.97, + "learning_rate": 1.5170669663984857e-05, + "loss": 1.3199, + "step": 471200 + }, + { + "epoch": 6.97, + "learning_rate": 1.5163274964505444e-05, + "loss": 1.3131, + "step": 471300 + }, + { + "epoch": 6.97, + "learning_rate": 1.5155880265026028e-05, + "loss": 1.3222, + "step": 471400 + }, + { + "epoch": 6.97, + "learning_rate": 1.5148485565546616e-05, + "loss": 1.3396, + "step": 471500 + }, + { + "epoch": 6.97, + "learning_rate": 1.5141090866067203e-05, + "loss": 1.3201, + "step": 471600 + }, + { + "epoch": 6.98, + "learning_rate": 1.513369616658779e-05, + "loss": 1.3197, + "step": 471700 + }, + { + "epoch": 6.98, + "learning_rate": 1.5126301467108378e-05, + "loss": 1.3451, + "step": 471800 + }, + { + "epoch": 6.98, + "learning_rate": 1.5118906767628965e-05, + "loss": 1.3368, + "step": 471900 + }, + { + "epoch": 6.98, + "learning_rate": 1.5111512068149549e-05, + "loss": 1.3204, + "step": 472000 + }, + { + "epoch": 6.98, + "learning_rate": 1.5104117368670136e-05, + "loss": 1.3387, + "step": 472100 + }, + { + "epoch": 6.98, + "learning_rate": 1.5096722669190724e-05, + "loss": 1.3113, + "step": 472200 + }, + { + "epoch": 6.98, + "learning_rate": 1.5089327969711311e-05, + "loss": 1.3052, + "step": 472300 + }, + { + "epoch": 6.99, + "learning_rate": 1.5081933270231898e-05, + "loss": 1.3321, + "step": 472400 + }, + { + "epoch": 6.99, + "learning_rate": 1.5074538570752486e-05, + "loss": 1.314, + "step": 472500 + }, + { + "epoch": 6.99, + "learning_rate": 1.5067143871273073e-05, + "loss": 1.3318, + "step": 472600 + }, + { + "epoch": 6.99, + "learning_rate": 1.5059749171793659e-05, + "loss": 1.3022, + "step": 472700 + }, + { + "epoch": 6.99, + "learning_rate": 1.5052354472314246e-05, + "loss": 1.3446, + "step": 472800 + }, + { + "epoch": 6.99, + "learning_rate": 1.5044959772834832e-05, + "loss": 1.3182, + "step": 472900 + }, + { + "epoch": 7.0, + "learning_rate": 1.5037565073355419e-05, + "loss": 1.3318, + "step": 473000 + }, + { + "epoch": 7.0, + "learning_rate": 1.5030170373876006e-05, + "loss": 1.3427, + "step": 473100 + }, + { + "epoch": 7.0, + "learning_rate": 1.5022849621391388e-05, + "loss": 1.2966, + "step": 473200 + }, + { + "epoch": 7.0, + "learning_rate": 1.5015454921911976e-05, + "loss": 1.3331, + "step": 473300 + } + ], + "max_steps": 676160, + "num_train_epochs": 10, + "total_flos": 2.891701043509248e+18, + "trial_name": null, + "trial_params": null +}