diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,52618 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 4.0, + "global_step": 87599, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 1.0000000000000002e-06, + "loss": 10.6468, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 2.0000000000000003e-06, + "loss": 10.6492, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 3e-06, + "loss": 10.6458, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.000000000000001e-06, + "loss": 10.6415, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 5e-06, + "loss": 10.6404, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 6e-06, + "loss": 10.6303, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 7.000000000000001e-06, + "loss": 10.6289, + "step": 70 + }, + { + "epoch": 0.0, + "learning_rate": 8.000000000000001e-06, + "loss": 10.6157, + "step": 80 + }, + { + "epoch": 0.0, + "learning_rate": 9e-06, + "loss": 10.6121, + "step": 90 + }, + { + "epoch": 0.0, + "learning_rate": 1e-05, + "loss": 10.6048, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 1.1000000000000001e-05, + "loss": 10.5845, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 1.2e-05, + "loss": 10.5538, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 1.3000000000000001e-05, + "loss": 10.5247, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 1.4000000000000001e-05, + "loss": 10.4147, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 1.5e-05, + "loss": 10.2692, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 1.6000000000000003e-05, + "loss": 10.0956, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 1.7000000000000003e-05, + "loss": 9.9124, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 1.8e-05, + "loss": 9.7317, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 1.9e-05, + "loss": 9.4866, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 2e-05, + "loss": 9.2719, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 2.1e-05, + "loss": 9.0888, + "step": 210 + }, + { + "epoch": 0.01, + "learning_rate": 2.2000000000000003e-05, + "loss": 8.8357, + "step": 220 + }, + { + "epoch": 0.01, + "learning_rate": 2.3000000000000003e-05, + "loss": 8.6098, + "step": 230 + }, + { + "epoch": 0.01, + "learning_rate": 2.4e-05, + "loss": 8.3784, + "step": 240 + }, + { + "epoch": 0.01, + "learning_rate": 2.5e-05, + "loss": 8.045, + "step": 250 + }, + { + "epoch": 0.01, + "learning_rate": 2.6000000000000002e-05, + "loss": 7.8679, + "step": 260 + }, + { + "epoch": 0.01, + "learning_rate": 2.7000000000000002e-05, + "loss": 7.6852, + "step": 270 + }, + { + "epoch": 0.01, + "learning_rate": 2.8000000000000003e-05, + "loss": 7.4301, + "step": 280 + }, + { + "epoch": 0.01, + "learning_rate": 2.9e-05, + "loss": 7.1625, + "step": 290 + }, + { + "epoch": 0.01, + "learning_rate": 3e-05, + "loss": 7.0188, + "step": 300 + }, + { + "epoch": 0.01, + "learning_rate": 3.1e-05, + "loss": 6.8507, + "step": 310 + }, + { + "epoch": 0.01, + "learning_rate": 3.2000000000000005e-05, + "loss": 6.6495, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 3.3e-05, + "loss": 6.7173, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 3.4000000000000007e-05, + "loss": 6.4475, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 3.5e-05, + "loss": 6.3956, + "step": 350 + }, + { + "epoch": 0.02, + "learning_rate": 3.6e-05, + "loss": 6.2667, + "step": 360 + }, + { + "epoch": 0.02, + "learning_rate": 3.7e-05, + "loss": 6.3509, + "step": 370 + }, + { + "epoch": 0.02, + "learning_rate": 3.8e-05, + "loss": 6.2078, + "step": 380 + }, + { + "epoch": 0.02, + "learning_rate": 3.9000000000000006e-05, + "loss": 6.2955, + "step": 390 + }, + { + "epoch": 0.02, + "learning_rate": 4e-05, + "loss": 6.2176, + "step": 400 + }, + { + "epoch": 0.02, + "learning_rate": 4.1e-05, + "loss": 6.2656, + "step": 410 + }, + { + "epoch": 0.02, + "learning_rate": 4.2e-05, + "loss": 6.0814, + "step": 420 + }, + { + "epoch": 0.02, + "learning_rate": 4.3e-05, + "loss": 6.0821, + "step": 430 + }, + { + "epoch": 0.02, + "learning_rate": 4.4000000000000006e-05, + "loss": 6.1788, + "step": 440 + }, + { + "epoch": 0.02, + "learning_rate": 4.5e-05, + "loss": 6.0505, + "step": 450 + }, + { + "epoch": 0.02, + "learning_rate": 4.600000000000001e-05, + "loss": 6.0381, + "step": 460 + }, + { + "epoch": 0.02, + "learning_rate": 4.7e-05, + "loss": 6.0533, + "step": 470 + }, + { + "epoch": 0.02, + "learning_rate": 4.8e-05, + "loss": 6.0918, + "step": 480 + }, + { + "epoch": 0.02, + "learning_rate": 4.9e-05, + "loss": 6.1718, + "step": 490 + }, + { + "epoch": 0.02, + "learning_rate": 5e-05, + "loss": 6.0889, + "step": 500 + }, + { + "epoch": 0.02, + "learning_rate": 4.9997711565746717e-05, + "loss": 5.9879, + "step": 510 + }, + { + "epoch": 0.02, + "learning_rate": 4.999542313149344e-05, + "loss": 6.0499, + "step": 520 + }, + { + "epoch": 0.02, + "learning_rate": 4.999313469724015e-05, + "loss": 6.0488, + "step": 530 + }, + { + "epoch": 0.02, + "learning_rate": 4.9990846262986866e-05, + "loss": 6.0059, + "step": 540 + }, + { + "epoch": 0.03, + "learning_rate": 4.998855782873359e-05, + "loss": 6.0262, + "step": 550 + }, + { + "epoch": 0.03, + "learning_rate": 4.99862693944803e-05, + "loss": 6.0835, + "step": 560 + }, + { + "epoch": 0.03, + "learning_rate": 4.9983980960227015e-05, + "loss": 6.0749, + "step": 570 + }, + { + "epoch": 0.03, + "learning_rate": 4.9981692525973736e-05, + "loss": 5.991, + "step": 580 + }, + { + "epoch": 0.03, + "learning_rate": 4.997940409172045e-05, + "loss": 5.951, + "step": 590 + }, + { + "epoch": 0.03, + "learning_rate": 4.997711565746716e-05, + "loss": 6.0269, + "step": 600 + }, + { + "epoch": 0.03, + "learning_rate": 4.997482722321388e-05, + "loss": 6.0419, + "step": 610 + }, + { + "epoch": 0.03, + "learning_rate": 4.997253878896059e-05, + "loss": 6.0267, + "step": 620 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970250354707306e-05, + "loss": 6.03, + "step": 630 + }, + { + "epoch": 0.03, + "learning_rate": 4.996796192045403e-05, + "loss": 6.0709, + "step": 640 + }, + { + "epoch": 0.03, + "learning_rate": 4.996567348620074e-05, + "loss": 6.0702, + "step": 650 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963385051947456e-05, + "loss": 5.9611, + "step": 660 + }, + { + "epoch": 0.03, + "learning_rate": 4.9961096617694177e-05, + "loss": 6.0348, + "step": 670 + }, + { + "epoch": 0.03, + "learning_rate": 4.995880818344089e-05, + "loss": 5.9904, + "step": 680 + }, + { + "epoch": 0.03, + "learning_rate": 4.9956519749187605e-05, + "loss": 5.9468, + "step": 690 + }, + { + "epoch": 0.03, + "learning_rate": 4.9954231314934326e-05, + "loss": 6.0501, + "step": 700 + }, + { + "epoch": 0.03, + "learning_rate": 4.995194288068104e-05, + "loss": 6.1255, + "step": 710 + }, + { + "epoch": 0.03, + "learning_rate": 4.9949654446427754e-05, + "loss": 5.9189, + "step": 720 + }, + { + "epoch": 0.03, + "learning_rate": 4.9947366012174475e-05, + "loss": 5.9645, + "step": 730 + }, + { + "epoch": 0.03, + "learning_rate": 4.994507757792119e-05, + "loss": 5.9799, + "step": 740 + }, + { + "epoch": 0.03, + "learning_rate": 4.99427891436679e-05, + "loss": 5.9145, + "step": 750 + }, + { + "epoch": 0.03, + "learning_rate": 4.9940500709414624e-05, + "loss": 5.9911, + "step": 760 + }, + { + "epoch": 0.04, + "learning_rate": 4.993821227516134e-05, + "loss": 5.8975, + "step": 770 + }, + { + "epoch": 0.04, + "learning_rate": 4.993592384090805e-05, + "loss": 5.9823, + "step": 780 + }, + { + "epoch": 0.04, + "learning_rate": 4.993363540665477e-05, + "loss": 5.9283, + "step": 790 + }, + { + "epoch": 0.04, + "learning_rate": 4.993134697240148e-05, + "loss": 5.8387, + "step": 800 + }, + { + "epoch": 0.04, + "learning_rate": 4.99290585381482e-05, + "loss": 5.9164, + "step": 810 + }, + { + "epoch": 0.04, + "learning_rate": 4.9926770103894916e-05, + "loss": 5.8926, + "step": 820 + }, + { + "epoch": 0.04, + "learning_rate": 4.992448166964163e-05, + "loss": 5.8451, + "step": 830 + }, + { + "epoch": 0.04, + "learning_rate": 4.992219323538835e-05, + "loss": 5.8911, + "step": 840 + }, + { + "epoch": 0.04, + "learning_rate": 4.9919904801135065e-05, + "loss": 5.9424, + "step": 850 + }, + { + "epoch": 0.04, + "learning_rate": 4.991761636688178e-05, + "loss": 5.9003, + "step": 860 + }, + { + "epoch": 0.04, + "learning_rate": 4.99153279326285e-05, + "loss": 5.8693, + "step": 870 + }, + { + "epoch": 0.04, + "learning_rate": 4.9913039498375214e-05, + "loss": 5.9383, + "step": 880 + }, + { + "epoch": 0.04, + "learning_rate": 4.991075106412193e-05, + "loss": 5.867, + "step": 890 + }, + { + "epoch": 0.04, + "learning_rate": 4.990846262986865e-05, + "loss": 5.7465, + "step": 900 + }, + { + "epoch": 0.04, + "learning_rate": 4.990617419561536e-05, + "loss": 5.7667, + "step": 910 + }, + { + "epoch": 0.04, + "learning_rate": 4.990388576136208e-05, + "loss": 5.8488, + "step": 920 + }, + { + "epoch": 0.04, + "learning_rate": 4.99015973271088e-05, + "loss": 5.7592, + "step": 930 + }, + { + "epoch": 0.04, + "learning_rate": 4.989930889285551e-05, + "loss": 5.8281, + "step": 940 + }, + { + "epoch": 0.04, + "learning_rate": 4.9897020458602226e-05, + "loss": 5.8211, + "step": 950 + }, + { + "epoch": 0.04, + "learning_rate": 4.989473202434895e-05, + "loss": 5.6165, + "step": 960 + }, + { + "epoch": 0.04, + "learning_rate": 4.989244359009566e-05, + "loss": 5.8037, + "step": 970 + }, + { + "epoch": 0.04, + "learning_rate": 4.9890155155842376e-05, + "loss": 5.7826, + "step": 980 + }, + { + "epoch": 0.05, + "learning_rate": 4.9887866721589097e-05, + "loss": 5.7144, + "step": 990 + }, + { + "epoch": 0.05, + "learning_rate": 4.9885578287335804e-05, + "loss": 5.6998, + "step": 1000 + }, + { + "epoch": 0.05, + "learning_rate": 4.988328985308252e-05, + "loss": 5.764, + "step": 1010 + }, + { + "epoch": 0.05, + "learning_rate": 4.988100141882924e-05, + "loss": 5.6322, + "step": 1020 + }, + { + "epoch": 0.05, + "learning_rate": 4.987871298457595e-05, + "loss": 5.7329, + "step": 1030 + }, + { + "epoch": 0.05, + "learning_rate": 4.987642455032267e-05, + "loss": 5.6375, + "step": 1040 + }, + { + "epoch": 0.05, + "learning_rate": 4.987413611606939e-05, + "loss": 5.5558, + "step": 1050 + }, + { + "epoch": 0.05, + "learning_rate": 4.98718476818161e-05, + "loss": 5.521, + "step": 1060 + }, + { + "epoch": 0.05, + "learning_rate": 4.9869559247562816e-05, + "loss": 5.717, + "step": 1070 + }, + { + "epoch": 0.05, + "learning_rate": 4.986727081330954e-05, + "loss": 5.5194, + "step": 1080 + }, + { + "epoch": 0.05, + "learning_rate": 4.986498237905625e-05, + "loss": 5.5116, + "step": 1090 + }, + { + "epoch": 0.05, + "learning_rate": 4.9862693944802966e-05, + "loss": 5.6348, + "step": 1100 + }, + { + "epoch": 0.05, + "learning_rate": 4.9860405510549687e-05, + "loss": 5.3917, + "step": 1110 + }, + { + "epoch": 0.05, + "learning_rate": 4.98581170762964e-05, + "loss": 5.6154, + "step": 1120 + }, + { + "epoch": 0.05, + "learning_rate": 4.9855828642043115e-05, + "loss": 5.4854, + "step": 1130 + }, + { + "epoch": 0.05, + "learning_rate": 4.9853540207789836e-05, + "loss": 5.564, + "step": 1140 + }, + { + "epoch": 0.05, + "learning_rate": 4.985125177353655e-05, + "loss": 5.4132, + "step": 1150 + }, + { + "epoch": 0.05, + "learning_rate": 4.9848963339283264e-05, + "loss": 5.4725, + "step": 1160 + }, + { + "epoch": 0.05, + "learning_rate": 4.9846674905029985e-05, + "loss": 5.4318, + "step": 1170 + }, + { + "epoch": 0.05, + "learning_rate": 4.98443864707767e-05, + "loss": 5.3892, + "step": 1180 + }, + { + "epoch": 0.05, + "learning_rate": 4.984209803652341e-05, + "loss": 5.3275, + "step": 1190 + }, + { + "epoch": 0.05, + "learning_rate": 4.983980960227013e-05, + "loss": 5.3651, + "step": 1200 + }, + { + "epoch": 0.06, + "learning_rate": 4.983752116801684e-05, + "loss": 5.4367, + "step": 1210 + }, + { + "epoch": 0.06, + "learning_rate": 4.983523273376356e-05, + "loss": 5.3669, + "step": 1220 + }, + { + "epoch": 0.06, + "learning_rate": 4.9832944299510276e-05, + "loss": 5.2643, + "step": 1230 + }, + { + "epoch": 0.06, + "learning_rate": 4.983065586525699e-05, + "loss": 5.3319, + "step": 1240 + }, + { + "epoch": 0.06, + "learning_rate": 4.982836743100371e-05, + "loss": 5.4298, + "step": 1250 + }, + { + "epoch": 0.06, + "learning_rate": 4.9826078996750426e-05, + "loss": 5.1351, + "step": 1260 + }, + { + "epoch": 0.06, + "learning_rate": 4.982379056249714e-05, + "loss": 5.2959, + "step": 1270 + }, + { + "epoch": 0.06, + "learning_rate": 4.982150212824386e-05, + "loss": 5.3613, + "step": 1280 + }, + { + "epoch": 0.06, + "learning_rate": 4.9819213693990575e-05, + "loss": 5.1739, + "step": 1290 + }, + { + "epoch": 0.06, + "learning_rate": 4.981692525973729e-05, + "loss": 5.2899, + "step": 1300 + }, + { + "epoch": 0.06, + "learning_rate": 4.981463682548401e-05, + "loss": 5.1881, + "step": 1310 + }, + { + "epoch": 0.06, + "learning_rate": 4.9812348391230724e-05, + "loss": 5.2533, + "step": 1320 + }, + { + "epoch": 0.06, + "learning_rate": 4.981005995697744e-05, + "loss": 5.1671, + "step": 1330 + }, + { + "epoch": 0.06, + "learning_rate": 4.980777152272416e-05, + "loss": 5.193, + "step": 1340 + }, + { + "epoch": 0.06, + "learning_rate": 4.980548308847087e-05, + "loss": 5.5243, + "step": 1350 + }, + { + "epoch": 0.06, + "learning_rate": 4.980319465421759e-05, + "loss": 4.9803, + "step": 1360 + }, + { + "epoch": 0.06, + "learning_rate": 4.980090621996431e-05, + "loss": 5.0336, + "step": 1370 + }, + { + "epoch": 0.06, + "learning_rate": 4.979861778571102e-05, + "loss": 4.9251, + "step": 1380 + }, + { + "epoch": 0.06, + "learning_rate": 4.979632935145773e-05, + "loss": 5.0601, + "step": 1390 + }, + { + "epoch": 0.06, + "learning_rate": 4.979404091720445e-05, + "loss": 4.8293, + "step": 1400 + }, + { + "epoch": 0.06, + "learning_rate": 4.9791752482951165e-05, + "loss": 5.1071, + "step": 1410 + }, + { + "epoch": 0.06, + "learning_rate": 4.978946404869788e-05, + "loss": 5.0192, + "step": 1420 + }, + { + "epoch": 0.07, + "learning_rate": 4.97871756144446e-05, + "loss": 4.974, + "step": 1430 + }, + { + "epoch": 0.07, + "learning_rate": 4.9784887180191314e-05, + "loss": 4.9292, + "step": 1440 + }, + { + "epoch": 0.07, + "learning_rate": 4.978259874593803e-05, + "loss": 4.9188, + "step": 1450 + }, + { + "epoch": 0.07, + "learning_rate": 4.978031031168475e-05, + "loss": 4.9091, + "step": 1460 + }, + { + "epoch": 0.07, + "learning_rate": 4.977802187743146e-05, + "loss": 4.9007, + "step": 1470 + }, + { + "epoch": 0.07, + "learning_rate": 4.977573344317818e-05, + "loss": 4.7506, + "step": 1480 + }, + { + "epoch": 0.07, + "learning_rate": 4.97734450089249e-05, + "loss": 4.9873, + "step": 1490 + }, + { + "epoch": 0.07, + "learning_rate": 4.977115657467161e-05, + "loss": 4.9096, + "step": 1500 + }, + { + "epoch": 0.07, + "learning_rate": 4.9768868140418326e-05, + "loss": 4.8511, + "step": 1510 + }, + { + "epoch": 0.07, + "learning_rate": 4.976657970616505e-05, + "loss": 4.8182, + "step": 1520 + }, + { + "epoch": 0.07, + "learning_rate": 4.976429127191176e-05, + "loss": 4.6981, + "step": 1530 + }, + { + "epoch": 0.07, + "learning_rate": 4.9762002837658476e-05, + "loss": 4.7869, + "step": 1540 + }, + { + "epoch": 0.07, + "learning_rate": 4.9759714403405196e-05, + "loss": 4.5999, + "step": 1550 + }, + { + "epoch": 0.07, + "learning_rate": 4.975742596915191e-05, + "loss": 4.7654, + "step": 1560 + }, + { + "epoch": 0.07, + "learning_rate": 4.9755137534898625e-05, + "loss": 4.8185, + "step": 1570 + }, + { + "epoch": 0.07, + "learning_rate": 4.9752849100645346e-05, + "loss": 5.0029, + "step": 1580 + }, + { + "epoch": 0.07, + "learning_rate": 4.975056066639205e-05, + "loss": 4.8317, + "step": 1590 + }, + { + "epoch": 0.07, + "learning_rate": 4.9748272232138774e-05, + "loss": 4.7292, + "step": 1600 + }, + { + "epoch": 0.07, + "learning_rate": 4.974598379788549e-05, + "loss": 4.6309, + "step": 1610 + }, + { + "epoch": 0.07, + "learning_rate": 4.97436953636322e-05, + "loss": 4.6402, + "step": 1620 + }, + { + "epoch": 0.07, + "learning_rate": 4.974140692937892e-05, + "loss": 4.6918, + "step": 1630 + }, + { + "epoch": 0.07, + "learning_rate": 4.973911849512564e-05, + "loss": 4.5851, + "step": 1640 + }, + { + "epoch": 0.08, + "learning_rate": 4.973683006087235e-05, + "loss": 4.6675, + "step": 1650 + }, + { + "epoch": 0.08, + "learning_rate": 4.973454162661907e-05, + "loss": 4.5958, + "step": 1660 + }, + { + "epoch": 0.08, + "learning_rate": 4.9732253192365786e-05, + "loss": 4.689, + "step": 1670 + }, + { + "epoch": 0.08, + "learning_rate": 4.97299647581125e-05, + "loss": 4.4157, + "step": 1680 + }, + { + "epoch": 0.08, + "learning_rate": 4.9727676323859215e-05, + "loss": 4.5446, + "step": 1690 + }, + { + "epoch": 0.08, + "learning_rate": 4.9725387889605936e-05, + "loss": 4.4268, + "step": 1700 + }, + { + "epoch": 0.08, + "learning_rate": 4.972309945535265e-05, + "loss": 4.4364, + "step": 1710 + }, + { + "epoch": 0.08, + "learning_rate": 4.9720811021099364e-05, + "loss": 4.5106, + "step": 1720 + }, + { + "epoch": 0.08, + "learning_rate": 4.9718522586846085e-05, + "loss": 4.6897, + "step": 1730 + }, + { + "epoch": 0.08, + "learning_rate": 4.97162341525928e-05, + "loss": 4.4631, + "step": 1740 + }, + { + "epoch": 0.08, + "learning_rate": 4.971394571833951e-05, + "loss": 4.5942, + "step": 1750 + }, + { + "epoch": 0.08, + "learning_rate": 4.9711657284086234e-05, + "loss": 4.4797, + "step": 1760 + }, + { + "epoch": 0.08, + "learning_rate": 4.970936884983295e-05, + "loss": 4.3681, + "step": 1770 + }, + { + "epoch": 0.08, + "learning_rate": 4.970708041557966e-05, + "loss": 4.4252, + "step": 1780 + }, + { + "epoch": 0.08, + "learning_rate": 4.9704791981326376e-05, + "loss": 4.3536, + "step": 1790 + }, + { + "epoch": 0.08, + "learning_rate": 4.970250354707309e-05, + "loss": 4.5191, + "step": 1800 + }, + { + "epoch": 0.08, + "learning_rate": 4.970021511281981e-05, + "loss": 4.2711, + "step": 1810 + }, + { + "epoch": 0.08, + "learning_rate": 4.9697926678566526e-05, + "loss": 4.5769, + "step": 1820 + }, + { + "epoch": 0.08, + "learning_rate": 4.969563824431324e-05, + "loss": 4.2891, + "step": 1830 + }, + { + "epoch": 0.08, + "learning_rate": 4.969334981005996e-05, + "loss": 4.3045, + "step": 1840 + }, + { + "epoch": 0.08, + "learning_rate": 4.9691061375806675e-05, + "loss": 4.4258, + "step": 1850 + }, + { + "epoch": 0.08, + "learning_rate": 4.968877294155339e-05, + "loss": 4.2687, + "step": 1860 + }, + { + "epoch": 0.09, + "learning_rate": 4.968648450730011e-05, + "loss": 4.4235, + "step": 1870 + }, + { + "epoch": 0.09, + "learning_rate": 4.9684196073046824e-05, + "loss": 4.1402, + "step": 1880 + }, + { + "epoch": 0.09, + "learning_rate": 4.968190763879354e-05, + "loss": 4.209, + "step": 1890 + }, + { + "epoch": 0.09, + "learning_rate": 4.967961920454026e-05, + "loss": 4.2591, + "step": 1900 + }, + { + "epoch": 0.09, + "learning_rate": 4.967733077028697e-05, + "loss": 4.144, + "step": 1910 + }, + { + "epoch": 0.09, + "learning_rate": 4.967504233603369e-05, + "loss": 4.283, + "step": 1920 + }, + { + "epoch": 0.09, + "learning_rate": 4.967275390178041e-05, + "loss": 4.3179, + "step": 1930 + }, + { + "epoch": 0.09, + "learning_rate": 4.967046546752712e-05, + "loss": 4.1564, + "step": 1940 + }, + { + "epoch": 0.09, + "learning_rate": 4.9668177033273836e-05, + "loss": 4.1144, + "step": 1950 + }, + { + "epoch": 0.09, + "learning_rate": 4.966588859902056e-05, + "loss": 4.3768, + "step": 1960 + }, + { + "epoch": 0.09, + "learning_rate": 4.966360016476727e-05, + "loss": 4.3718, + "step": 1970 + }, + { + "epoch": 0.09, + "learning_rate": 4.9661311730513986e-05, + "loss": 4.1446, + "step": 1980 + }, + { + "epoch": 0.09, + "learning_rate": 4.96590232962607e-05, + "loss": 4.1684, + "step": 1990 + }, + { + "epoch": 0.09, + "learning_rate": 4.9656734862007414e-05, + "loss": 4.1453, + "step": 2000 + }, + { + "epoch": 0.09, + "learning_rate": 4.965444642775413e-05, + "loss": 4.0921, + "step": 2010 + }, + { + "epoch": 0.09, + "learning_rate": 4.965215799350085e-05, + "loss": 4.115, + "step": 2020 + }, + { + "epoch": 0.09, + "learning_rate": 4.964986955924756e-05, + "loss": 4.094, + "step": 2030 + }, + { + "epoch": 0.09, + "learning_rate": 4.964758112499428e-05, + "loss": 4.0735, + "step": 2040 + }, + { + "epoch": 0.09, + "learning_rate": 4.9645292690741e-05, + "loss": 4.0245, + "step": 2050 + }, + { + "epoch": 0.09, + "learning_rate": 4.964300425648771e-05, + "loss": 3.9754, + "step": 2060 + }, + { + "epoch": 0.09, + "learning_rate": 4.9640715822234426e-05, + "loss": 4.1716, + "step": 2070 + }, + { + "epoch": 0.09, + "learning_rate": 4.963842738798115e-05, + "loss": 3.8179, + "step": 2080 + }, + { + "epoch": 0.1, + "learning_rate": 4.963613895372786e-05, + "loss": 4.036, + "step": 2090 + }, + { + "epoch": 0.1, + "learning_rate": 4.9633850519474575e-05, + "loss": 4.2522, + "step": 2100 + }, + { + "epoch": 0.1, + "learning_rate": 4.9631562085221296e-05, + "loss": 3.9801, + "step": 2110 + }, + { + "epoch": 0.1, + "learning_rate": 4.962927365096801e-05, + "loss": 3.9878, + "step": 2120 + }, + { + "epoch": 0.1, + "learning_rate": 4.9626985216714725e-05, + "loss": 3.9668, + "step": 2130 + }, + { + "epoch": 0.1, + "learning_rate": 4.9624696782461446e-05, + "loss": 4.0614, + "step": 2140 + }, + { + "epoch": 0.1, + "learning_rate": 4.962240834820816e-05, + "loss": 4.0036, + "step": 2150 + }, + { + "epoch": 0.1, + "learning_rate": 4.9620119913954874e-05, + "loss": 3.8624, + "step": 2160 + }, + { + "epoch": 0.1, + "learning_rate": 4.9617831479701595e-05, + "loss": 3.9028, + "step": 2170 + }, + { + "epoch": 0.1, + "learning_rate": 4.96155430454483e-05, + "loss": 3.982, + "step": 2180 + }, + { + "epoch": 0.1, + "learning_rate": 4.961325461119502e-05, + "loss": 3.9641, + "step": 2190 + }, + { + "epoch": 0.1, + "learning_rate": 4.961096617694174e-05, + "loss": 3.7962, + "step": 2200 + }, + { + "epoch": 0.1, + "learning_rate": 4.960867774268845e-05, + "loss": 4.0071, + "step": 2210 + }, + { + "epoch": 0.1, + "learning_rate": 4.960638930843517e-05, + "loss": 3.9433, + "step": 2220 + }, + { + "epoch": 0.1, + "learning_rate": 4.9604100874181886e-05, + "loss": 3.943, + "step": 2230 + }, + { + "epoch": 0.1, + "learning_rate": 4.96018124399286e-05, + "loss": 3.6835, + "step": 2240 + }, + { + "epoch": 0.1, + "learning_rate": 4.959952400567532e-05, + "loss": 3.9436, + "step": 2250 + }, + { + "epoch": 0.1, + "learning_rate": 4.9597235571422035e-05, + "loss": 3.6303, + "step": 2260 + }, + { + "epoch": 0.1, + "learning_rate": 4.959494713716875e-05, + "loss": 3.6082, + "step": 2270 + }, + { + "epoch": 0.1, + "learning_rate": 4.959265870291547e-05, + "loss": 3.9698, + "step": 2280 + }, + { + "epoch": 0.1, + "learning_rate": 4.9590370268662185e-05, + "loss": 3.7346, + "step": 2290 + }, + { + "epoch": 0.11, + "learning_rate": 4.95880818344089e-05, + "loss": 3.785, + "step": 2300 + }, + { + "epoch": 0.11, + "learning_rate": 4.958579340015562e-05, + "loss": 3.634, + "step": 2310 + }, + { + "epoch": 0.11, + "learning_rate": 4.9583504965902334e-05, + "loss": 3.8224, + "step": 2320 + }, + { + "epoch": 0.11, + "learning_rate": 4.958121653164905e-05, + "loss": 3.7121, + "step": 2330 + }, + { + "epoch": 0.11, + "learning_rate": 4.957892809739577e-05, + "loss": 3.6801, + "step": 2340 + }, + { + "epoch": 0.11, + "learning_rate": 4.957663966314248e-05, + "loss": 3.6434, + "step": 2350 + }, + { + "epoch": 0.11, + "learning_rate": 4.95743512288892e-05, + "loss": 3.5632, + "step": 2360 + }, + { + "epoch": 0.11, + "learning_rate": 4.957206279463592e-05, + "loss": 3.7956, + "step": 2370 + }, + { + "epoch": 0.11, + "learning_rate": 4.9569774360382625e-05, + "loss": 3.7147, + "step": 2380 + }, + { + "epoch": 0.11, + "learning_rate": 4.956748592612934e-05, + "loss": 3.9062, + "step": 2390 + }, + { + "epoch": 0.11, + "learning_rate": 4.956519749187606e-05, + "loss": 3.7015, + "step": 2400 + }, + { + "epoch": 0.11, + "learning_rate": 4.9562909057622775e-05, + "loss": 3.7737, + "step": 2410 + }, + { + "epoch": 0.11, + "learning_rate": 4.956062062336949e-05, + "loss": 3.6225, + "step": 2420 + }, + { + "epoch": 0.11, + "learning_rate": 4.955833218911621e-05, + "loss": 3.8234, + "step": 2430 + }, + { + "epoch": 0.11, + "learning_rate": 4.9556043754862924e-05, + "loss": 3.6219, + "step": 2440 + }, + { + "epoch": 0.11, + "learning_rate": 4.955375532060964e-05, + "loss": 3.4964, + "step": 2450 + }, + { + "epoch": 0.11, + "learning_rate": 4.955146688635636e-05, + "loss": 3.5518, + "step": 2460 + }, + { + "epoch": 0.11, + "learning_rate": 4.954917845210307e-05, + "loss": 3.4895, + "step": 2470 + }, + { + "epoch": 0.11, + "learning_rate": 4.954689001784979e-05, + "loss": 3.5502, + "step": 2480 + }, + { + "epoch": 0.11, + "learning_rate": 4.954460158359651e-05, + "loss": 3.629, + "step": 2490 + }, + { + "epoch": 0.11, + "learning_rate": 4.954231314934322e-05, + "loss": 3.6208, + "step": 2500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9540024715089936e-05, + "loss": 3.5531, + "step": 2510 + }, + { + "epoch": 0.12, + "learning_rate": 4.953773628083666e-05, + "loss": 3.4852, + "step": 2520 + }, + { + "epoch": 0.12, + "learning_rate": 4.953544784658337e-05, + "loss": 3.446, + "step": 2530 + }, + { + "epoch": 0.12, + "learning_rate": 4.9533159412330085e-05, + "loss": 3.5893, + "step": 2540 + }, + { + "epoch": 0.12, + "learning_rate": 4.9530870978076806e-05, + "loss": 3.5965, + "step": 2550 + }, + { + "epoch": 0.12, + "learning_rate": 4.952858254382352e-05, + "loss": 3.5799, + "step": 2560 + }, + { + "epoch": 0.12, + "learning_rate": 4.9526294109570235e-05, + "loss": 3.5579, + "step": 2570 + }, + { + "epoch": 0.12, + "learning_rate": 4.952400567531695e-05, + "loss": 3.5597, + "step": 2580 + }, + { + "epoch": 0.12, + "learning_rate": 4.952171724106366e-05, + "loss": 3.64, + "step": 2590 + }, + { + "epoch": 0.12, + "learning_rate": 4.9519428806810384e-05, + "loss": 3.6476, + "step": 2600 + }, + { + "epoch": 0.12, + "learning_rate": 4.95171403725571e-05, + "loss": 3.4483, + "step": 2610 + }, + { + "epoch": 0.12, + "learning_rate": 4.951485193830381e-05, + "loss": 3.6166, + "step": 2620 + }, + { + "epoch": 0.12, + "learning_rate": 4.951256350405053e-05, + "loss": 3.4921, + "step": 2630 + }, + { + "epoch": 0.12, + "learning_rate": 4.951027506979725e-05, + "loss": 3.5374, + "step": 2640 + }, + { + "epoch": 0.12, + "learning_rate": 4.950798663554396e-05, + "loss": 3.6803, + "step": 2650 + }, + { + "epoch": 0.12, + "learning_rate": 4.950569820129068e-05, + "loss": 3.594, + "step": 2660 + }, + { + "epoch": 0.12, + "learning_rate": 4.9503409767037396e-05, + "loss": 3.4511, + "step": 2670 + }, + { + "epoch": 0.12, + "learning_rate": 4.950112133278411e-05, + "loss": 3.5751, + "step": 2680 + }, + { + "epoch": 0.12, + "learning_rate": 4.949883289853083e-05, + "loss": 3.4753, + "step": 2690 + }, + { + "epoch": 0.12, + "learning_rate": 4.9496544464277545e-05, + "loss": 3.4152, + "step": 2700 + }, + { + "epoch": 0.12, + "learning_rate": 4.949425603002426e-05, + "loss": 3.4295, + "step": 2710 + }, + { + "epoch": 0.12, + "learning_rate": 4.949196759577098e-05, + "loss": 3.4824, + "step": 2720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9489679161517695e-05, + "loss": 3.4024, + "step": 2730 + }, + { + "epoch": 0.13, + "learning_rate": 4.948739072726441e-05, + "loss": 3.5077, + "step": 2740 + }, + { + "epoch": 0.13, + "learning_rate": 4.948510229301113e-05, + "loss": 3.5525, + "step": 2750 + }, + { + "epoch": 0.13, + "learning_rate": 4.9482813858757844e-05, + "loss": 3.3653, + "step": 2760 + }, + { + "epoch": 0.13, + "learning_rate": 4.948052542450456e-05, + "loss": 3.4213, + "step": 2770 + }, + { + "epoch": 0.13, + "learning_rate": 4.947823699025127e-05, + "loss": 3.2256, + "step": 2780 + }, + { + "epoch": 0.13, + "learning_rate": 4.9475948555997986e-05, + "loss": 3.3265, + "step": 2790 + }, + { + "epoch": 0.13, + "learning_rate": 4.94736601217447e-05, + "loss": 3.5103, + "step": 2800 + }, + { + "epoch": 0.13, + "learning_rate": 4.947137168749142e-05, + "loss": 3.2936, + "step": 2810 + }, + { + "epoch": 0.13, + "learning_rate": 4.9469083253238135e-05, + "loss": 3.2385, + "step": 2820 + }, + { + "epoch": 0.13, + "learning_rate": 4.946679481898485e-05, + "loss": 3.2628, + "step": 2830 + }, + { + "epoch": 0.13, + "learning_rate": 4.946450638473157e-05, + "loss": 3.2475, + "step": 2840 + }, + { + "epoch": 0.13, + "learning_rate": 4.9462217950478285e-05, + "loss": 3.3101, + "step": 2850 + }, + { + "epoch": 0.13, + "learning_rate": 4.9459929516225e-05, + "loss": 3.275, + "step": 2860 + }, + { + "epoch": 0.13, + "learning_rate": 4.945764108197172e-05, + "loss": 3.2514, + "step": 2870 + }, + { + "epoch": 0.13, + "learning_rate": 4.9455352647718434e-05, + "loss": 3.4532, + "step": 2880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945306421346515e-05, + "loss": 3.349, + "step": 2890 + }, + { + "epoch": 0.13, + "learning_rate": 4.945077577921187e-05, + "loss": 3.1803, + "step": 2900 + }, + { + "epoch": 0.13, + "learning_rate": 4.944848734495858e-05, + "loss": 3.3639, + "step": 2910 + }, + { + "epoch": 0.13, + "learning_rate": 4.94461989107053e-05, + "loss": 3.3155, + "step": 2920 + }, + { + "epoch": 0.13, + "learning_rate": 4.944391047645202e-05, + "loss": 3.3371, + "step": 2930 + }, + { + "epoch": 0.13, + "learning_rate": 4.944162204219873e-05, + "loss": 3.2016, + "step": 2940 + }, + { + "epoch": 0.13, + "learning_rate": 4.9439333607945446e-05, + "loss": 3.1036, + "step": 2950 + }, + { + "epoch": 0.14, + "learning_rate": 4.943704517369217e-05, + "loss": 3.3109, + "step": 2960 + }, + { + "epoch": 0.14, + "learning_rate": 4.9434756739438874e-05, + "loss": 3.3041, + "step": 2970 + }, + { + "epoch": 0.14, + "learning_rate": 4.943246830518559e-05, + "loss": 3.2101, + "step": 2980 + }, + { + "epoch": 0.14, + "learning_rate": 4.943017987093231e-05, + "loss": 3.2372, + "step": 2990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9427891436679024e-05, + "loss": 3.1914, + "step": 3000 + }, + { + "epoch": 0.14, + "learning_rate": 4.942560300242574e-05, + "loss": 3.2797, + "step": 3010 + }, + { + "epoch": 0.14, + "learning_rate": 4.942331456817246e-05, + "loss": 3.3601, + "step": 3020 + }, + { + "epoch": 0.14, + "learning_rate": 4.942102613391917e-05, + "loss": 3.4813, + "step": 3030 + }, + { + "epoch": 0.14, + "learning_rate": 4.941873769966589e-05, + "loss": 3.1892, + "step": 3040 + }, + { + "epoch": 0.14, + "learning_rate": 4.941644926541261e-05, + "loss": 3.3008, + "step": 3050 + }, + { + "epoch": 0.14, + "learning_rate": 4.941416083115932e-05, + "loss": 3.3227, + "step": 3060 + }, + { + "epoch": 0.14, + "learning_rate": 4.9411872396906036e-05, + "loss": 3.2429, + "step": 3070 + }, + { + "epoch": 0.14, + "learning_rate": 4.940958396265276e-05, + "loss": 3.375, + "step": 3080 + }, + { + "epoch": 0.14, + "learning_rate": 4.940729552839947e-05, + "loss": 3.255, + "step": 3090 + }, + { + "epoch": 0.14, + "learning_rate": 4.9405007094146185e-05, + "loss": 3.3024, + "step": 3100 + }, + { + "epoch": 0.14, + "learning_rate": 4.9402718659892906e-05, + "loss": 3.3271, + "step": 3110 + }, + { + "epoch": 0.14, + "learning_rate": 4.940043022563962e-05, + "loss": 3.1107, + "step": 3120 + }, + { + "epoch": 0.14, + "learning_rate": 4.9398141791386335e-05, + "loss": 3.1728, + "step": 3130 + }, + { + "epoch": 0.14, + "learning_rate": 4.9395853357133055e-05, + "loss": 3.4514, + "step": 3140 + }, + { + "epoch": 0.14, + "learning_rate": 4.939356492287977e-05, + "loss": 2.8632, + "step": 3150 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391276488626484e-05, + "loss": 3.4124, + "step": 3160 + }, + { + "epoch": 0.14, + "learning_rate": 4.93889880543732e-05, + "loss": 3.111, + "step": 3170 + }, + { + "epoch": 0.15, + "learning_rate": 4.938669962011991e-05, + "loss": 3.1243, + "step": 3180 + }, + { + "epoch": 0.15, + "learning_rate": 4.938441118586663e-05, + "loss": 3.2455, + "step": 3190 + }, + { + "epoch": 0.15, + "learning_rate": 4.938212275161335e-05, + "loss": 3.1032, + "step": 3200 + }, + { + "epoch": 0.15, + "learning_rate": 4.937983431736006e-05, + "loss": 3.3117, + "step": 3210 + }, + { + "epoch": 0.15, + "learning_rate": 4.937754588310678e-05, + "loss": 3.2521, + "step": 3220 + }, + { + "epoch": 0.15, + "learning_rate": 4.9375257448853496e-05, + "loss": 3.3084, + "step": 3230 + }, + { + "epoch": 0.15, + "learning_rate": 4.937296901460021e-05, + "loss": 3.1779, + "step": 3240 + }, + { + "epoch": 0.15, + "learning_rate": 4.937068058034693e-05, + "loss": 3.0942, + "step": 3250 + }, + { + "epoch": 0.15, + "learning_rate": 4.9368392146093645e-05, + "loss": 2.9396, + "step": 3260 + }, + { + "epoch": 0.15, + "learning_rate": 4.936610371184036e-05, + "loss": 3.1771, + "step": 3270 + }, + { + "epoch": 0.15, + "learning_rate": 4.936381527758708e-05, + "loss": 3.116, + "step": 3280 + }, + { + "epoch": 0.15, + "learning_rate": 4.9361526843333795e-05, + "loss": 3.1216, + "step": 3290 + }, + { + "epoch": 0.15, + "learning_rate": 4.935923840908051e-05, + "loss": 3.0335, + "step": 3300 + }, + { + "epoch": 0.15, + "learning_rate": 4.935694997482723e-05, + "loss": 3.2038, + "step": 3310 + }, + { + "epoch": 0.15, + "learning_rate": 4.9354661540573944e-05, + "loss": 3.1194, + "step": 3320 + }, + { + "epoch": 0.15, + "learning_rate": 4.935237310632066e-05, + "loss": 3.0588, + "step": 3330 + }, + { + "epoch": 0.15, + "learning_rate": 4.935008467206738e-05, + "loss": 3.1875, + "step": 3340 + }, + { + "epoch": 0.15, + "learning_rate": 4.934779623781409e-05, + "loss": 3.0593, + "step": 3350 + }, + { + "epoch": 0.15, + "learning_rate": 4.934550780356081e-05, + "loss": 3.0694, + "step": 3360 + }, + { + "epoch": 0.15, + "learning_rate": 4.934321936930752e-05, + "loss": 3.0894, + "step": 3370 + }, + { + "epoch": 0.15, + "learning_rate": 4.9340930935054235e-05, + "loss": 3.0116, + "step": 3380 + }, + { + "epoch": 0.15, + "learning_rate": 4.933864250080095e-05, + "loss": 3.0874, + "step": 3390 + }, + { + "epoch": 0.16, + "learning_rate": 4.933635406654767e-05, + "loss": 3.2471, + "step": 3400 + }, + { + "epoch": 0.16, + "learning_rate": 4.9334065632294384e-05, + "loss": 3.1044, + "step": 3410 + }, + { + "epoch": 0.16, + "learning_rate": 4.93317771980411e-05, + "loss": 2.9131, + "step": 3420 + }, + { + "epoch": 0.16, + "learning_rate": 4.932948876378782e-05, + "loss": 3.0253, + "step": 3430 + }, + { + "epoch": 0.16, + "learning_rate": 4.9327200329534534e-05, + "loss": 3.0284, + "step": 3440 + }, + { + "epoch": 0.16, + "learning_rate": 4.932491189528125e-05, + "loss": 2.9507, + "step": 3450 + }, + { + "epoch": 0.16, + "learning_rate": 4.932262346102797e-05, + "loss": 3.0571, + "step": 3460 + }, + { + "epoch": 0.16, + "learning_rate": 4.932033502677468e-05, + "loss": 3.0379, + "step": 3470 + }, + { + "epoch": 0.16, + "learning_rate": 4.93180465925214e-05, + "loss": 3.2292, + "step": 3480 + }, + { + "epoch": 0.16, + "learning_rate": 4.931575815826812e-05, + "loss": 3.0898, + "step": 3490 + }, + { + "epoch": 0.16, + "learning_rate": 4.931346972401483e-05, + "loss": 2.9006, + "step": 3500 + }, + { + "epoch": 0.16, + "learning_rate": 4.9311181289761546e-05, + "loss": 2.984, + "step": 3510 + }, + { + "epoch": 0.16, + "learning_rate": 4.930889285550827e-05, + "loss": 3.0123, + "step": 3520 + }, + { + "epoch": 0.16, + "learning_rate": 4.930660442125498e-05, + "loss": 3.2169, + "step": 3530 + }, + { + "epoch": 0.16, + "learning_rate": 4.9304315987001695e-05, + "loss": 3.252, + "step": 3540 + }, + { + "epoch": 0.16, + "learning_rate": 4.9302027552748416e-05, + "loss": 3.0935, + "step": 3550 + }, + { + "epoch": 0.16, + "learning_rate": 4.929973911849513e-05, + "loss": 2.9241, + "step": 3560 + }, + { + "epoch": 0.16, + "learning_rate": 4.9297450684241844e-05, + "loss": 3.0553, + "step": 3570 + }, + { + "epoch": 0.16, + "learning_rate": 4.929516224998856e-05, + "loss": 3.2508, + "step": 3580 + }, + { + "epoch": 0.16, + "learning_rate": 4.929287381573527e-05, + "loss": 2.9135, + "step": 3590 + }, + { + "epoch": 0.16, + "learning_rate": 4.9290585381481994e-05, + "loss": 3.1094, + "step": 3600 + }, + { + "epoch": 0.16, + "learning_rate": 4.928829694722871e-05, + "loss": 3.0007, + "step": 3610 + }, + { + "epoch": 0.17, + "learning_rate": 4.928600851297542e-05, + "loss": 2.9614, + "step": 3620 + }, + { + "epoch": 0.17, + "learning_rate": 4.928372007872214e-05, + "loss": 3.1314, + "step": 3630 + }, + { + "epoch": 0.17, + "learning_rate": 4.928143164446886e-05, + "loss": 2.9977, + "step": 3640 + }, + { + "epoch": 0.17, + "learning_rate": 4.927914321021557e-05, + "loss": 2.9733, + "step": 3650 + }, + { + "epoch": 0.17, + "learning_rate": 4.927685477596229e-05, + "loss": 3.0223, + "step": 3660 + }, + { + "epoch": 0.17, + "learning_rate": 4.9274566341709006e-05, + "loss": 2.9454, + "step": 3670 + }, + { + "epoch": 0.17, + "learning_rate": 4.927227790745572e-05, + "loss": 2.8938, + "step": 3680 + }, + { + "epoch": 0.17, + "learning_rate": 4.926998947320244e-05, + "loss": 2.8467, + "step": 3690 + }, + { + "epoch": 0.17, + "learning_rate": 4.9267701038949155e-05, + "loss": 2.946, + "step": 3700 + }, + { + "epoch": 0.17, + "learning_rate": 4.926541260469587e-05, + "loss": 3.1829, + "step": 3710 + }, + { + "epoch": 0.17, + "learning_rate": 4.926312417044259e-05, + "loss": 3.0131, + "step": 3720 + }, + { + "epoch": 0.17, + "learning_rate": 4.9260835736189305e-05, + "loss": 3.0267, + "step": 3730 + }, + { + "epoch": 0.17, + "learning_rate": 4.925854730193602e-05, + "loss": 3.3027, + "step": 3740 + }, + { + "epoch": 0.17, + "learning_rate": 4.925625886768274e-05, + "loss": 2.9908, + "step": 3750 + }, + { + "epoch": 0.17, + "learning_rate": 4.925397043342945e-05, + "loss": 2.8306, + "step": 3760 + }, + { + "epoch": 0.17, + "learning_rate": 4.925168199917616e-05, + "loss": 3.061, + "step": 3770 + }, + { + "epoch": 0.17, + "learning_rate": 4.924939356492288e-05, + "loss": 2.9045, + "step": 3780 + }, + { + "epoch": 0.17, + "learning_rate": 4.9247105130669596e-05, + "loss": 2.8821, + "step": 3790 + }, + { + "epoch": 0.17, + "learning_rate": 4.924481669641631e-05, + "loss": 2.8715, + "step": 3800 + }, + { + "epoch": 0.17, + "learning_rate": 4.924252826216303e-05, + "loss": 2.9067, + "step": 3810 + }, + { + "epoch": 0.17, + "learning_rate": 4.9240239827909745e-05, + "loss": 3.0192, + "step": 3820 + }, + { + "epoch": 0.17, + "learning_rate": 4.923795139365646e-05, + "loss": 3.0298, + "step": 3830 + }, + { + "epoch": 0.18, + "learning_rate": 4.923566295940318e-05, + "loss": 2.9034, + "step": 3840 + }, + { + "epoch": 0.18, + "learning_rate": 4.9233374525149894e-05, + "loss": 2.9827, + "step": 3850 + }, + { + "epoch": 0.18, + "learning_rate": 4.923108609089661e-05, + "loss": 2.8672, + "step": 3860 + }, + { + "epoch": 0.18, + "learning_rate": 4.922879765664333e-05, + "loss": 2.9291, + "step": 3870 + }, + { + "epoch": 0.18, + "learning_rate": 4.9226509222390044e-05, + "loss": 2.9763, + "step": 3880 + }, + { + "epoch": 0.18, + "learning_rate": 4.922422078813676e-05, + "loss": 2.8443, + "step": 3890 + }, + { + "epoch": 0.18, + "learning_rate": 4.922193235388348e-05, + "loss": 2.8285, + "step": 3900 + }, + { + "epoch": 0.18, + "learning_rate": 4.921964391963019e-05, + "loss": 2.8529, + "step": 3910 + }, + { + "epoch": 0.18, + "learning_rate": 4.921735548537691e-05, + "loss": 2.9645, + "step": 3920 + }, + { + "epoch": 0.18, + "learning_rate": 4.921506705112363e-05, + "loss": 2.9857, + "step": 3930 + }, + { + "epoch": 0.18, + "learning_rate": 4.921277861687034e-05, + "loss": 3.0512, + "step": 3940 + }, + { + "epoch": 0.18, + "learning_rate": 4.9210490182617056e-05, + "loss": 2.8715, + "step": 3950 + }, + { + "epoch": 0.18, + "learning_rate": 4.920820174836377e-05, + "loss": 2.9225, + "step": 3960 + }, + { + "epoch": 0.18, + "learning_rate": 4.9205913314110484e-05, + "loss": 2.7595, + "step": 3970 + }, + { + "epoch": 0.18, + "learning_rate": 4.9203624879857205e-05, + "loss": 2.8244, + "step": 3980 + }, + { + "epoch": 0.18, + "learning_rate": 4.920133644560392e-05, + "loss": 2.8686, + "step": 3990 + }, + { + "epoch": 0.18, + "learning_rate": 4.9199048011350634e-05, + "loss": 2.771, + "step": 4000 + }, + { + "epoch": 0.18, + "learning_rate": 4.919675957709735e-05, + "loss": 2.8594, + "step": 4010 + }, + { + "epoch": 0.18, + "learning_rate": 4.919447114284407e-05, + "loss": 2.9962, + "step": 4020 + }, + { + "epoch": 0.18, + "learning_rate": 4.919218270859078e-05, + "loss": 2.9638, + "step": 4030 + }, + { + "epoch": 0.18, + "learning_rate": 4.91898942743375e-05, + "loss": 3.0201, + "step": 4040 + }, + { + "epoch": 0.18, + "learning_rate": 4.918760584008422e-05, + "loss": 2.9256, + "step": 4050 + }, + { + "epoch": 0.19, + "learning_rate": 4.918531740583093e-05, + "loss": 3.0207, + "step": 4060 + }, + { + "epoch": 0.19, + "learning_rate": 4.9183028971577646e-05, + "loss": 2.7377, + "step": 4070 + }, + { + "epoch": 0.19, + "learning_rate": 4.918074053732437e-05, + "loss": 2.9199, + "step": 4080 + }, + { + "epoch": 0.19, + "learning_rate": 4.917845210307108e-05, + "loss": 2.9998, + "step": 4090 + }, + { + "epoch": 0.19, + "learning_rate": 4.9176163668817795e-05, + "loss": 2.8586, + "step": 4100 + }, + { + "epoch": 0.19, + "learning_rate": 4.9173875234564516e-05, + "loss": 2.993, + "step": 4110 + }, + { + "epoch": 0.19, + "learning_rate": 4.917158680031123e-05, + "loss": 2.8115, + "step": 4120 + }, + { + "epoch": 0.19, + "learning_rate": 4.9169298366057944e-05, + "loss": 2.8757, + "step": 4130 + }, + { + "epoch": 0.19, + "learning_rate": 4.9167009931804665e-05, + "loss": 2.9125, + "step": 4140 + }, + { + "epoch": 0.19, + "learning_rate": 4.916472149755138e-05, + "loss": 2.8955, + "step": 4150 + }, + { + "epoch": 0.19, + "learning_rate": 4.9162433063298094e-05, + "loss": 2.7962, + "step": 4160 + }, + { + "epoch": 0.19, + "learning_rate": 4.916014462904481e-05, + "loss": 2.7999, + "step": 4170 + }, + { + "epoch": 0.19, + "learning_rate": 4.915785619479152e-05, + "loss": 2.9908, + "step": 4180 + }, + { + "epoch": 0.19, + "learning_rate": 4.915556776053824e-05, + "loss": 2.8225, + "step": 4190 + }, + { + "epoch": 0.19, + "learning_rate": 4.915327932628496e-05, + "loss": 2.7451, + "step": 4200 + }, + { + "epoch": 0.19, + "learning_rate": 4.915099089203167e-05, + "loss": 3.0593, + "step": 4210 + }, + { + "epoch": 0.19, + "learning_rate": 4.914870245777839e-05, + "loss": 2.7044, + "step": 4220 + }, + { + "epoch": 0.19, + "learning_rate": 4.9146414023525106e-05, + "loss": 3.036, + "step": 4230 + }, + { + "epoch": 0.19, + "learning_rate": 4.914412558927182e-05, + "loss": 3.0159, + "step": 4240 + }, + { + "epoch": 0.19, + "learning_rate": 4.914183715501854e-05, + "loss": 2.789, + "step": 4250 + }, + { + "epoch": 0.19, + "learning_rate": 4.9139548720765255e-05, + "loss": 2.8524, + "step": 4260 + }, + { + "epoch": 0.19, + "learning_rate": 4.913726028651197e-05, + "loss": 2.9255, + "step": 4270 + }, + { + "epoch": 0.2, + "learning_rate": 4.913497185225869e-05, + "loss": 2.819, + "step": 4280 + }, + { + "epoch": 0.2, + "learning_rate": 4.9132683418005404e-05, + "loss": 2.8614, + "step": 4290 + }, + { + "epoch": 0.2, + "learning_rate": 4.913039498375212e-05, + "loss": 2.8063, + "step": 4300 + }, + { + "epoch": 0.2, + "learning_rate": 4.912810654949884e-05, + "loss": 2.8566, + "step": 4310 + }, + { + "epoch": 0.2, + "learning_rate": 4.9125818115245554e-05, + "loss": 2.9287, + "step": 4320 + }, + { + "epoch": 0.2, + "learning_rate": 4.912352968099227e-05, + "loss": 2.9403, + "step": 4330 + }, + { + "epoch": 0.2, + "learning_rate": 4.912124124673899e-05, + "loss": 2.7084, + "step": 4340 + }, + { + "epoch": 0.2, + "learning_rate": 4.91189528124857e-05, + "loss": 2.7331, + "step": 4350 + }, + { + "epoch": 0.2, + "learning_rate": 4.911666437823241e-05, + "loss": 2.9216, + "step": 4360 + }, + { + "epoch": 0.2, + "learning_rate": 4.911437594397913e-05, + "loss": 3.0126, + "step": 4370 + }, + { + "epoch": 0.2, + "learning_rate": 4.9112087509725845e-05, + "loss": 2.9256, + "step": 4380 + }, + { + "epoch": 0.2, + "learning_rate": 4.910979907547256e-05, + "loss": 2.8328, + "step": 4390 + }, + { + "epoch": 0.2, + "learning_rate": 4.910751064121928e-05, + "loss": 2.7388, + "step": 4400 + }, + { + "epoch": 0.2, + "learning_rate": 4.9105222206965994e-05, + "loss": 2.7339, + "step": 4410 + }, + { + "epoch": 0.2, + "learning_rate": 4.910293377271271e-05, + "loss": 2.8324, + "step": 4420 + }, + { + "epoch": 0.2, + "learning_rate": 4.910064533845943e-05, + "loss": 2.8383, + "step": 4430 + }, + { + "epoch": 0.2, + "learning_rate": 4.9098356904206144e-05, + "loss": 2.7014, + "step": 4440 + }, + { + "epoch": 0.2, + "learning_rate": 4.909606846995286e-05, + "loss": 2.8201, + "step": 4450 + }, + { + "epoch": 0.2, + "learning_rate": 4.909378003569958e-05, + "loss": 3.0264, + "step": 4460 + }, + { + "epoch": 0.2, + "learning_rate": 4.909149160144629e-05, + "loss": 2.8091, + "step": 4470 + }, + { + "epoch": 0.2, + "learning_rate": 4.908920316719301e-05, + "loss": 2.8537, + "step": 4480 + }, + { + "epoch": 0.21, + "learning_rate": 4.908691473293973e-05, + "loss": 2.9466, + "step": 4490 + }, + { + "epoch": 0.21, + "learning_rate": 4.908462629868644e-05, + "loss": 2.9187, + "step": 4500 + }, + { + "epoch": 0.21, + "learning_rate": 4.9082337864433156e-05, + "loss": 2.8632, + "step": 4510 + }, + { + "epoch": 0.21, + "learning_rate": 4.908004943017988e-05, + "loss": 2.781, + "step": 4520 + }, + { + "epoch": 0.21, + "learning_rate": 4.907776099592659e-05, + "loss": 2.851, + "step": 4530 + }, + { + "epoch": 0.21, + "learning_rate": 4.9075472561673305e-05, + "loss": 2.6722, + "step": 4540 + }, + { + "epoch": 0.21, + "learning_rate": 4.907318412742002e-05, + "loss": 2.8292, + "step": 4550 + }, + { + "epoch": 0.21, + "learning_rate": 4.9070895693166733e-05, + "loss": 2.9132, + "step": 4560 + }, + { + "epoch": 0.21, + "learning_rate": 4.9068607258913454e-05, + "loss": 2.5775, + "step": 4570 + }, + { + "epoch": 0.21, + "learning_rate": 4.906631882466017e-05, + "loss": 2.7283, + "step": 4580 + }, + { + "epoch": 0.21, + "learning_rate": 4.906403039040688e-05, + "loss": 2.9837, + "step": 4590 + }, + { + "epoch": 0.21, + "learning_rate": 4.9061741956153604e-05, + "loss": 3.0339, + "step": 4600 + }, + { + "epoch": 0.21, + "learning_rate": 4.905945352190032e-05, + "loss": 2.6225, + "step": 4610 + }, + { + "epoch": 0.21, + "learning_rate": 4.905716508764703e-05, + "loss": 2.7087, + "step": 4620 + }, + { + "epoch": 0.21, + "learning_rate": 4.905487665339375e-05, + "loss": 2.8347, + "step": 4630 + }, + { + "epoch": 0.21, + "learning_rate": 4.905258821914047e-05, + "loss": 2.7919, + "step": 4640 + }, + { + "epoch": 0.21, + "learning_rate": 4.905029978488718e-05, + "loss": 2.6434, + "step": 4650 + }, + { + "epoch": 0.21, + "learning_rate": 4.90480113506339e-05, + "loss": 3.0099, + "step": 4660 + }, + { + "epoch": 0.21, + "learning_rate": 4.9045722916380616e-05, + "loss": 2.8333, + "step": 4670 + }, + { + "epoch": 0.21, + "learning_rate": 4.904343448212733e-05, + "loss": 2.7933, + "step": 4680 + }, + { + "epoch": 0.21, + "learning_rate": 4.904114604787405e-05, + "loss": 2.9621, + "step": 4690 + }, + { + "epoch": 0.21, + "learning_rate": 4.9038857613620765e-05, + "loss": 2.7535, + "step": 4700 + }, + { + "epoch": 0.22, + "learning_rate": 4.903656917936748e-05, + "loss": 2.8269, + "step": 4710 + }, + { + "epoch": 0.22, + "learning_rate": 4.90342807451142e-05, + "loss": 2.7093, + "step": 4720 + }, + { + "epoch": 0.22, + "learning_rate": 4.9031992310860914e-05, + "loss": 2.8733, + "step": 4730 + }, + { + "epoch": 0.22, + "learning_rate": 4.902970387660763e-05, + "loss": 2.9256, + "step": 4740 + }, + { + "epoch": 0.22, + "learning_rate": 4.902741544235434e-05, + "loss": 2.7271, + "step": 4750 + }, + { + "epoch": 0.22, + "learning_rate": 4.902512700810106e-05, + "loss": 2.8289, + "step": 4760 + }, + { + "epoch": 0.22, + "learning_rate": 4.902283857384777e-05, + "loss": 2.7238, + "step": 4770 + }, + { + "epoch": 0.22, + "learning_rate": 4.902055013959449e-05, + "loss": 2.7404, + "step": 4780 + }, + { + "epoch": 0.22, + "learning_rate": 4.9018261705341206e-05, + "loss": 2.6856, + "step": 4790 + }, + { + "epoch": 0.22, + "learning_rate": 4.901597327108792e-05, + "loss": 2.8218, + "step": 4800 + }, + { + "epoch": 0.22, + "learning_rate": 4.901368483683464e-05, + "loss": 2.8808, + "step": 4810 + }, + { + "epoch": 0.22, + "learning_rate": 4.9011396402581355e-05, + "loss": 2.8053, + "step": 4820 + }, + { + "epoch": 0.22, + "learning_rate": 4.900910796832807e-05, + "loss": 2.7797, + "step": 4830 + }, + { + "epoch": 0.22, + "learning_rate": 4.900681953407479e-05, + "loss": 2.6849, + "step": 4840 + }, + { + "epoch": 0.22, + "learning_rate": 4.9004531099821504e-05, + "loss": 2.9224, + "step": 4850 + }, + { + "epoch": 0.22, + "learning_rate": 4.900224266556822e-05, + "loss": 2.9994, + "step": 4860 + }, + { + "epoch": 0.22, + "learning_rate": 4.899995423131494e-05, + "loss": 2.8157, + "step": 4870 + }, + { + "epoch": 0.22, + "learning_rate": 4.8997665797061653e-05, + "loss": 2.7373, + "step": 4880 + }, + { + "epoch": 0.22, + "learning_rate": 4.899537736280837e-05, + "loss": 2.7284, + "step": 4890 + }, + { + "epoch": 0.22, + "learning_rate": 4.899308892855509e-05, + "loss": 2.8367, + "step": 4900 + }, + { + "epoch": 0.22, + "learning_rate": 4.89908004943018e-05, + "loss": 2.7574, + "step": 4910 + }, + { + "epoch": 0.22, + "learning_rate": 4.898851206004852e-05, + "loss": 2.9788, + "step": 4920 + }, + { + "epoch": 0.23, + "learning_rate": 4.898622362579524e-05, + "loss": 2.8612, + "step": 4930 + }, + { + "epoch": 0.23, + "learning_rate": 4.898393519154195e-05, + "loss": 2.8805, + "step": 4940 + }, + { + "epoch": 0.23, + "learning_rate": 4.8981646757288666e-05, + "loss": 2.741, + "step": 4950 + }, + { + "epoch": 0.23, + "learning_rate": 4.897935832303538e-05, + "loss": 2.7458, + "step": 4960 + }, + { + "epoch": 0.23, + "learning_rate": 4.8977069888782094e-05, + "loss": 2.7443, + "step": 4970 + }, + { + "epoch": 0.23, + "learning_rate": 4.8974781454528815e-05, + "loss": 2.7667, + "step": 4980 + }, + { + "epoch": 0.23, + "learning_rate": 4.897249302027553e-05, + "loss": 2.7318, + "step": 4990 + }, + { + "epoch": 0.23, + "learning_rate": 4.8970204586022243e-05, + "loss": 2.7639, + "step": 5000 + }, + { + "epoch": 0.23, + "learning_rate": 4.8967916151768964e-05, + "loss": 2.8134, + "step": 5010 + }, + { + "epoch": 0.23, + "learning_rate": 4.896562771751568e-05, + "loss": 2.6872, + "step": 5020 + }, + { + "epoch": 0.23, + "learning_rate": 4.896333928326239e-05, + "loss": 2.8299, + "step": 5030 + }, + { + "epoch": 0.23, + "learning_rate": 4.8961050849009113e-05, + "loss": 2.8018, + "step": 5040 + }, + { + "epoch": 0.23, + "learning_rate": 4.895876241475583e-05, + "loss": 2.7224, + "step": 5050 + }, + { + "epoch": 0.23, + "learning_rate": 4.895647398050254e-05, + "loss": 2.9706, + "step": 5060 + }, + { + "epoch": 0.23, + "learning_rate": 4.895418554624926e-05, + "loss": 2.8135, + "step": 5070 + }, + { + "epoch": 0.23, + "learning_rate": 4.895189711199598e-05, + "loss": 2.64, + "step": 5080 + }, + { + "epoch": 0.23, + "learning_rate": 4.894960867774269e-05, + "loss": 3.0843, + "step": 5090 + }, + { + "epoch": 0.23, + "learning_rate": 4.894732024348941e-05, + "loss": 2.7187, + "step": 5100 + }, + { + "epoch": 0.23, + "learning_rate": 4.8945031809236126e-05, + "loss": 2.7207, + "step": 5110 + }, + { + "epoch": 0.23, + "learning_rate": 4.894274337498284e-05, + "loss": 2.7636, + "step": 5120 + }, + { + "epoch": 0.23, + "learning_rate": 4.8940454940729554e-05, + "loss": 2.8889, + "step": 5130 + }, + { + "epoch": 0.23, + "learning_rate": 4.8938166506476275e-05, + "loss": 2.5818, + "step": 5140 + }, + { + "epoch": 0.24, + "learning_rate": 4.893587807222298e-05, + "loss": 2.7533, + "step": 5150 + }, + { + "epoch": 0.24, + "learning_rate": 4.8933589637969703e-05, + "loss": 2.767, + "step": 5160 + }, + { + "epoch": 0.24, + "learning_rate": 4.893130120371642e-05, + "loss": 2.6906, + "step": 5170 + }, + { + "epoch": 0.24, + "learning_rate": 4.892901276946313e-05, + "loss": 2.6821, + "step": 5180 + }, + { + "epoch": 0.24, + "learning_rate": 4.892672433520985e-05, + "loss": 2.7352, + "step": 5190 + }, + { + "epoch": 0.24, + "learning_rate": 4.892443590095657e-05, + "loss": 2.7433, + "step": 5200 + }, + { + "epoch": 0.24, + "learning_rate": 4.892214746670328e-05, + "loss": 2.7549, + "step": 5210 + }, + { + "epoch": 0.24, + "learning_rate": 4.891985903245e-05, + "loss": 2.6625, + "step": 5220 + }, + { + "epoch": 0.24, + "learning_rate": 4.8917570598196716e-05, + "loss": 2.6029, + "step": 5230 + }, + { + "epoch": 0.24, + "learning_rate": 4.891528216394343e-05, + "loss": 2.5315, + "step": 5240 + }, + { + "epoch": 0.24, + "learning_rate": 4.891299372969015e-05, + "loss": 2.7698, + "step": 5250 + }, + { + "epoch": 0.24, + "learning_rate": 4.8910705295436865e-05, + "loss": 2.648, + "step": 5260 + }, + { + "epoch": 0.24, + "learning_rate": 4.890841686118358e-05, + "loss": 2.497, + "step": 5270 + }, + { + "epoch": 0.24, + "learning_rate": 4.89061284269303e-05, + "loss": 2.7683, + "step": 5280 + }, + { + "epoch": 0.24, + "learning_rate": 4.8903839992677014e-05, + "loss": 2.7788, + "step": 5290 + }, + { + "epoch": 0.24, + "learning_rate": 4.890155155842373e-05, + "loss": 2.6834, + "step": 5300 + }, + { + "epoch": 0.24, + "learning_rate": 4.889926312417045e-05, + "loss": 2.6312, + "step": 5310 + }, + { + "epoch": 0.24, + "learning_rate": 4.8896974689917163e-05, + "loss": 2.5096, + "step": 5320 + }, + { + "epoch": 0.24, + "learning_rate": 4.889468625566388e-05, + "loss": 2.6317, + "step": 5330 + }, + { + "epoch": 0.24, + "learning_rate": 4.889239782141059e-05, + "loss": 2.7149, + "step": 5340 + }, + { + "epoch": 0.24, + "learning_rate": 4.8890109387157306e-05, + "loss": 2.5823, + "step": 5350 + }, + { + "epoch": 0.24, + "learning_rate": 4.888782095290402e-05, + "loss": 2.6265, + "step": 5360 + }, + { + "epoch": 0.25, + "learning_rate": 4.888553251865074e-05, + "loss": 2.5827, + "step": 5370 + }, + { + "epoch": 0.25, + "learning_rate": 4.8883244084397455e-05, + "loss": 2.5964, + "step": 5380 + }, + { + "epoch": 0.25, + "learning_rate": 4.888095565014417e-05, + "loss": 2.6781, + "step": 5390 + }, + { + "epoch": 0.25, + "learning_rate": 4.887866721589089e-05, + "loss": 2.69, + "step": 5400 + }, + { + "epoch": 0.25, + "learning_rate": 4.8876378781637604e-05, + "loss": 2.7274, + "step": 5410 + }, + { + "epoch": 0.25, + "learning_rate": 4.887409034738432e-05, + "loss": 2.8764, + "step": 5420 + }, + { + "epoch": 0.25, + "learning_rate": 4.887180191313104e-05, + "loss": 2.8443, + "step": 5430 + }, + { + "epoch": 0.25, + "learning_rate": 4.886951347887775e-05, + "loss": 2.6021, + "step": 5440 + }, + { + "epoch": 0.25, + "learning_rate": 4.886722504462447e-05, + "loss": 2.6461, + "step": 5450 + }, + { + "epoch": 0.25, + "learning_rate": 4.886493661037119e-05, + "loss": 2.7554, + "step": 5460 + }, + { + "epoch": 0.25, + "learning_rate": 4.88626481761179e-05, + "loss": 2.6662, + "step": 5470 + }, + { + "epoch": 0.25, + "learning_rate": 4.886035974186462e-05, + "loss": 2.7161, + "step": 5480 + }, + { + "epoch": 0.25, + "learning_rate": 4.885807130761134e-05, + "loss": 2.734, + "step": 5490 + }, + { + "epoch": 0.25, + "learning_rate": 4.885578287335805e-05, + "loss": 2.5871, + "step": 5500 + }, + { + "epoch": 0.25, + "learning_rate": 4.8853494439104766e-05, + "loss": 2.6411, + "step": 5510 + }, + { + "epoch": 0.25, + "learning_rate": 4.885120600485149e-05, + "loss": 2.6922, + "step": 5520 + }, + { + "epoch": 0.25, + "learning_rate": 4.88489175705982e-05, + "loss": 2.8183, + "step": 5530 + }, + { + "epoch": 0.25, + "learning_rate": 4.8846629136344915e-05, + "loss": 2.8494, + "step": 5540 + }, + { + "epoch": 0.25, + "learning_rate": 4.884434070209163e-05, + "loss": 2.7624, + "step": 5550 + }, + { + "epoch": 0.25, + "learning_rate": 4.884205226783834e-05, + "loss": 2.4398, + "step": 5560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8839763833585064e-05, + "loss": 2.6945, + "step": 5570 + }, + { + "epoch": 0.25, + "learning_rate": 4.883747539933178e-05, + "loss": 2.5568, + "step": 5580 + }, + { + "epoch": 0.26, + "learning_rate": 4.883518696507849e-05, + "loss": 2.7731, + "step": 5590 + }, + { + "epoch": 0.26, + "learning_rate": 4.8832898530825213e-05, + "loss": 2.6838, + "step": 5600 + }, + { + "epoch": 0.26, + "learning_rate": 4.883061009657193e-05, + "loss": 2.6429, + "step": 5610 + }, + { + "epoch": 0.26, + "learning_rate": 4.882832166231864e-05, + "loss": 2.5941, + "step": 5620 + }, + { + "epoch": 0.26, + "learning_rate": 4.882603322806536e-05, + "loss": 2.6691, + "step": 5630 + }, + { + "epoch": 0.26, + "learning_rate": 4.882374479381208e-05, + "loss": 2.7819, + "step": 5640 + }, + { + "epoch": 0.26, + "learning_rate": 4.882145635955879e-05, + "loss": 2.7618, + "step": 5650 + }, + { + "epoch": 0.26, + "learning_rate": 4.881916792530551e-05, + "loss": 2.56, + "step": 5660 + }, + { + "epoch": 0.26, + "learning_rate": 4.8816879491052226e-05, + "loss": 2.7314, + "step": 5670 + }, + { + "epoch": 0.26, + "learning_rate": 4.881459105679894e-05, + "loss": 2.7079, + "step": 5680 + }, + { + "epoch": 0.26, + "learning_rate": 4.881230262254566e-05, + "loss": 2.5127, + "step": 5690 + }, + { + "epoch": 0.26, + "learning_rate": 4.8810014188292375e-05, + "loss": 2.5525, + "step": 5700 + }, + { + "epoch": 0.26, + "learning_rate": 4.880772575403909e-05, + "loss": 2.8079, + "step": 5710 + }, + { + "epoch": 0.26, + "learning_rate": 4.880543731978581e-05, + "loss": 2.7778, + "step": 5720 + }, + { + "epoch": 0.26, + "learning_rate": 4.8803148885532524e-05, + "loss": 2.7115, + "step": 5730 + }, + { + "epoch": 0.26, + "learning_rate": 4.880086045127923e-05, + "loss": 2.7208, + "step": 5740 + }, + { + "epoch": 0.26, + "learning_rate": 4.879857201702595e-05, + "loss": 2.5249, + "step": 5750 + }, + { + "epoch": 0.26, + "learning_rate": 4.879628358277267e-05, + "loss": 2.6811, + "step": 5760 + }, + { + "epoch": 0.26, + "learning_rate": 4.879399514851938e-05, + "loss": 2.6039, + "step": 5770 + }, + { + "epoch": 0.26, + "learning_rate": 4.87917067142661e-05, + "loss": 2.5605, + "step": 5780 + }, + { + "epoch": 0.26, + "learning_rate": 4.8789418280012816e-05, + "loss": 2.6976, + "step": 5790 + }, + { + "epoch": 0.26, + "learning_rate": 4.878712984575953e-05, + "loss": 2.6381, + "step": 5800 + }, + { + "epoch": 0.27, + "learning_rate": 4.878484141150625e-05, + "loss": 2.8265, + "step": 5810 + }, + { + "epoch": 0.27, + "learning_rate": 4.8782552977252965e-05, + "loss": 2.4658, + "step": 5820 + }, + { + "epoch": 0.27, + "learning_rate": 4.878026454299968e-05, + "loss": 2.6137, + "step": 5830 + }, + { + "epoch": 0.27, + "learning_rate": 4.87779761087464e-05, + "loss": 2.6636, + "step": 5840 + }, + { + "epoch": 0.27, + "learning_rate": 4.8775687674493114e-05, + "loss": 2.6783, + "step": 5850 + }, + { + "epoch": 0.27, + "learning_rate": 4.877339924023983e-05, + "loss": 2.8019, + "step": 5860 + }, + { + "epoch": 0.27, + "learning_rate": 4.877111080598655e-05, + "loss": 2.732, + "step": 5870 + }, + { + "epoch": 0.27, + "learning_rate": 4.876882237173326e-05, + "loss": 2.7777, + "step": 5880 + }, + { + "epoch": 0.27, + "learning_rate": 4.876653393747998e-05, + "loss": 2.5736, + "step": 5890 + }, + { + "epoch": 0.27, + "learning_rate": 4.87642455032267e-05, + "loss": 2.8001, + "step": 5900 + }, + { + "epoch": 0.27, + "learning_rate": 4.876195706897341e-05, + "loss": 2.7418, + "step": 5910 + }, + { + "epoch": 0.27, + "learning_rate": 4.875966863472013e-05, + "loss": 2.6527, + "step": 5920 + }, + { + "epoch": 0.27, + "learning_rate": 4.875738020046685e-05, + "loss": 2.7263, + "step": 5930 + }, + { + "epoch": 0.27, + "learning_rate": 4.8755091766213555e-05, + "loss": 2.4208, + "step": 5940 + }, + { + "epoch": 0.27, + "learning_rate": 4.8752803331960276e-05, + "loss": 2.5759, + "step": 5950 + }, + { + "epoch": 0.27, + "learning_rate": 4.875051489770699e-05, + "loss": 2.9091, + "step": 5960 + }, + { + "epoch": 0.27, + "learning_rate": 4.8748226463453704e-05, + "loss": 2.5897, + "step": 5970 + }, + { + "epoch": 0.27, + "learning_rate": 4.8745938029200425e-05, + "loss": 2.8677, + "step": 5980 + }, + { + "epoch": 0.27, + "learning_rate": 4.874364959494714e-05, + "loss": 2.6504, + "step": 5990 + }, + { + "epoch": 0.27, + "learning_rate": 4.874136116069385e-05, + "loss": 2.7434, + "step": 6000 + }, + { + "epoch": 0.27, + "learning_rate": 4.8739072726440574e-05, + "loss": 2.7148, + "step": 6010 + }, + { + "epoch": 0.27, + "learning_rate": 4.873678429218729e-05, + "loss": 2.6927, + "step": 6020 + }, + { + "epoch": 0.28, + "learning_rate": 4.8734495857934e-05, + "loss": 2.54, + "step": 6030 + }, + { + "epoch": 0.28, + "learning_rate": 4.873220742368072e-05, + "loss": 2.6091, + "step": 6040 + }, + { + "epoch": 0.28, + "learning_rate": 4.872991898942744e-05, + "loss": 2.74, + "step": 6050 + }, + { + "epoch": 0.28, + "learning_rate": 4.872763055517415e-05, + "loss": 2.6368, + "step": 6060 + }, + { + "epoch": 0.28, + "learning_rate": 4.872534212092087e-05, + "loss": 2.6985, + "step": 6070 + }, + { + "epoch": 0.28, + "learning_rate": 4.872305368666759e-05, + "loss": 2.5432, + "step": 6080 + }, + { + "epoch": 0.28, + "learning_rate": 4.87207652524143e-05, + "loss": 2.5034, + "step": 6090 + }, + { + "epoch": 0.28, + "learning_rate": 4.871847681816102e-05, + "loss": 2.443, + "step": 6100 + }, + { + "epoch": 0.28, + "learning_rate": 4.8716188383907736e-05, + "loss": 2.6483, + "step": 6110 + }, + { + "epoch": 0.28, + "learning_rate": 4.871389994965445e-05, + "loss": 2.6671, + "step": 6120 + }, + { + "epoch": 0.28, + "learning_rate": 4.871161151540117e-05, + "loss": 2.8266, + "step": 6130 + }, + { + "epoch": 0.28, + "learning_rate": 4.870932308114788e-05, + "loss": 2.6691, + "step": 6140 + }, + { + "epoch": 0.28, + "learning_rate": 4.870703464689459e-05, + "loss": 2.6002, + "step": 6150 + }, + { + "epoch": 0.28, + "learning_rate": 4.870474621264131e-05, + "loss": 2.6232, + "step": 6160 + }, + { + "epoch": 0.28, + "learning_rate": 4.870245777838803e-05, + "loss": 2.7423, + "step": 6170 + }, + { + "epoch": 0.28, + "learning_rate": 4.870016934413474e-05, + "loss": 2.6878, + "step": 6180 + }, + { + "epoch": 0.28, + "learning_rate": 4.869788090988146e-05, + "loss": 2.6619, + "step": 6190 + }, + { + "epoch": 0.28, + "learning_rate": 4.8695592475628177e-05, + "loss": 2.7035, + "step": 6200 + }, + { + "epoch": 0.28, + "learning_rate": 4.869330404137489e-05, + "loss": 2.6629, + "step": 6210 + }, + { + "epoch": 0.28, + "learning_rate": 4.869101560712161e-05, + "loss": 2.5376, + "step": 6220 + }, + { + "epoch": 0.28, + "learning_rate": 4.8688727172868326e-05, + "loss": 2.6913, + "step": 6230 + }, + { + "epoch": 0.28, + "learning_rate": 4.868643873861504e-05, + "loss": 2.7134, + "step": 6240 + }, + { + "epoch": 0.29, + "learning_rate": 4.868415030436176e-05, + "loss": 2.5514, + "step": 6250 + }, + { + "epoch": 0.29, + "learning_rate": 4.8681861870108475e-05, + "loss": 2.7627, + "step": 6260 + }, + { + "epoch": 0.29, + "learning_rate": 4.867957343585519e-05, + "loss": 2.5179, + "step": 6270 + }, + { + "epoch": 0.29, + "learning_rate": 4.867728500160191e-05, + "loss": 2.7009, + "step": 6280 + }, + { + "epoch": 0.29, + "learning_rate": 4.8674996567348624e-05, + "loss": 2.6736, + "step": 6290 + }, + { + "epoch": 0.29, + "learning_rate": 4.867270813309534e-05, + "loss": 2.6327, + "step": 6300 + }, + { + "epoch": 0.29, + "learning_rate": 4.867041969884206e-05, + "loss": 2.489, + "step": 6310 + }, + { + "epoch": 0.29, + "learning_rate": 4.866813126458877e-05, + "loss": 2.593, + "step": 6320 + }, + { + "epoch": 0.29, + "learning_rate": 4.866584283033549e-05, + "loss": 2.8399, + "step": 6330 + }, + { + "epoch": 0.29, + "learning_rate": 4.86635543960822e-05, + "loss": 2.6832, + "step": 6340 + }, + { + "epoch": 0.29, + "learning_rate": 4.8661265961828916e-05, + "loss": 2.7637, + "step": 6350 + }, + { + "epoch": 0.29, + "learning_rate": 4.865897752757563e-05, + "loss": 2.4986, + "step": 6360 + }, + { + "epoch": 0.29, + "learning_rate": 4.865668909332235e-05, + "loss": 2.59, + "step": 6370 + }, + { + "epoch": 0.29, + "learning_rate": 4.8654400659069065e-05, + "loss": 2.655, + "step": 6380 + }, + { + "epoch": 0.29, + "learning_rate": 4.865211222481578e-05, + "loss": 2.6297, + "step": 6390 + }, + { + "epoch": 0.29, + "learning_rate": 4.86498237905625e-05, + "loss": 2.6105, + "step": 6400 + }, + { + "epoch": 0.29, + "learning_rate": 4.8647535356309214e-05, + "loss": 2.6415, + "step": 6410 + }, + { + "epoch": 0.29, + "learning_rate": 4.864524692205593e-05, + "loss": 2.6693, + "step": 6420 + }, + { + "epoch": 0.29, + "learning_rate": 4.864295848780265e-05, + "loss": 2.538, + "step": 6430 + }, + { + "epoch": 0.29, + "learning_rate": 4.864067005354936e-05, + "loss": 2.6659, + "step": 6440 + }, + { + "epoch": 0.29, + "learning_rate": 4.863838161929608e-05, + "loss": 2.3872, + "step": 6450 + }, + { + "epoch": 0.29, + "learning_rate": 4.86360931850428e-05, + "loss": 2.7324, + "step": 6460 + }, + { + "epoch": 0.3, + "learning_rate": 4.863380475078951e-05, + "loss": 2.728, + "step": 6470 + }, + { + "epoch": 0.3, + "learning_rate": 4.8631516316536227e-05, + "loss": 2.7703, + "step": 6480 + }, + { + "epoch": 0.3, + "learning_rate": 4.862922788228295e-05, + "loss": 2.7566, + "step": 6490 + }, + { + "epoch": 0.3, + "learning_rate": 4.862693944802966e-05, + "loss": 2.6029, + "step": 6500 + }, + { + "epoch": 0.3, + "learning_rate": 4.8624651013776376e-05, + "loss": 2.5409, + "step": 6510 + }, + { + "epoch": 0.3, + "learning_rate": 4.86223625795231e-05, + "loss": 2.8619, + "step": 6520 + }, + { + "epoch": 0.3, + "learning_rate": 4.8620074145269804e-05, + "loss": 2.3743, + "step": 6530 + }, + { + "epoch": 0.3, + "learning_rate": 4.8617785711016525e-05, + "loss": 2.7786, + "step": 6540 + }, + { + "epoch": 0.3, + "learning_rate": 4.861549727676324e-05, + "loss": 2.7577, + "step": 6550 + }, + { + "epoch": 0.3, + "learning_rate": 4.861320884250995e-05, + "loss": 2.6817, + "step": 6560 + }, + { + "epoch": 0.3, + "learning_rate": 4.8610920408256674e-05, + "loss": 2.5866, + "step": 6570 + }, + { + "epoch": 0.3, + "learning_rate": 4.860863197400339e-05, + "loss": 2.4611, + "step": 6580 + }, + { + "epoch": 0.3, + "learning_rate": 4.86063435397501e-05, + "loss": 2.5759, + "step": 6590 + }, + { + "epoch": 0.3, + "learning_rate": 4.860405510549682e-05, + "loss": 2.4774, + "step": 6600 + }, + { + "epoch": 0.3, + "learning_rate": 4.860176667124354e-05, + "loss": 2.6212, + "step": 6610 + }, + { + "epoch": 0.3, + "learning_rate": 4.859947823699025e-05, + "loss": 2.756, + "step": 6620 + }, + { + "epoch": 0.3, + "learning_rate": 4.859718980273697e-05, + "loss": 2.5809, + "step": 6630 + }, + { + "epoch": 0.3, + "learning_rate": 4.8594901368483687e-05, + "loss": 2.5641, + "step": 6640 + }, + { + "epoch": 0.3, + "learning_rate": 4.85926129342304e-05, + "loss": 2.5527, + "step": 6650 + }, + { + "epoch": 0.3, + "learning_rate": 4.859032449997712e-05, + "loss": 2.8218, + "step": 6660 + }, + { + "epoch": 0.3, + "learning_rate": 4.8588036065723836e-05, + "loss": 2.6231, + "step": 6670 + }, + { + "epoch": 0.31, + "learning_rate": 4.858574763147055e-05, + "loss": 2.7367, + "step": 6680 + }, + { + "epoch": 0.31, + "learning_rate": 4.858345919721727e-05, + "loss": 2.5321, + "step": 6690 + }, + { + "epoch": 0.31, + "learning_rate": 4.8581170762963985e-05, + "loss": 2.6097, + "step": 6700 + }, + { + "epoch": 0.31, + "learning_rate": 4.85788823287107e-05, + "loss": 2.6572, + "step": 6710 + }, + { + "epoch": 0.31, + "learning_rate": 4.857659389445742e-05, + "loss": 2.4732, + "step": 6720 + }, + { + "epoch": 0.31, + "learning_rate": 4.857430546020413e-05, + "loss": 2.7225, + "step": 6730 + }, + { + "epoch": 0.31, + "learning_rate": 4.857201702595084e-05, + "loss": 2.5497, + "step": 6740 + }, + { + "epoch": 0.31, + "learning_rate": 4.856972859169756e-05, + "loss": 2.5199, + "step": 6750 + }, + { + "epoch": 0.31, + "learning_rate": 4.8567440157444276e-05, + "loss": 2.7116, + "step": 6760 + }, + { + "epoch": 0.31, + "learning_rate": 4.856515172319099e-05, + "loss": 2.6121, + "step": 6770 + }, + { + "epoch": 0.31, + "learning_rate": 4.856286328893771e-05, + "loss": 2.5073, + "step": 6780 + }, + { + "epoch": 0.31, + "learning_rate": 4.8560574854684426e-05, + "loss": 2.6674, + "step": 6790 + }, + { + "epoch": 0.31, + "learning_rate": 4.855828642043114e-05, + "loss": 2.5039, + "step": 6800 + }, + { + "epoch": 0.31, + "learning_rate": 4.855599798617786e-05, + "loss": 2.6449, + "step": 6810 + }, + { + "epoch": 0.31, + "learning_rate": 4.8553709551924575e-05, + "loss": 2.7008, + "step": 6820 + }, + { + "epoch": 0.31, + "learning_rate": 4.855142111767129e-05, + "loss": 2.6295, + "step": 6830 + }, + { + "epoch": 0.31, + "learning_rate": 4.854913268341801e-05, + "loss": 2.7345, + "step": 6840 + }, + { + "epoch": 0.31, + "learning_rate": 4.8546844249164724e-05, + "loss": 2.7104, + "step": 6850 + }, + { + "epoch": 0.31, + "learning_rate": 4.854455581491144e-05, + "loss": 2.6052, + "step": 6860 + }, + { + "epoch": 0.31, + "learning_rate": 4.854226738065816e-05, + "loss": 2.654, + "step": 6870 + }, + { + "epoch": 0.31, + "learning_rate": 4.853997894640487e-05, + "loss": 2.5875, + "step": 6880 + }, + { + "epoch": 0.31, + "learning_rate": 4.853769051215159e-05, + "loss": 2.63, + "step": 6890 + }, + { + "epoch": 0.32, + "learning_rate": 4.853540207789831e-05, + "loss": 2.603, + "step": 6900 + }, + { + "epoch": 0.32, + "learning_rate": 4.853311364364502e-05, + "loss": 2.5952, + "step": 6910 + }, + { + "epoch": 0.32, + "learning_rate": 4.8530825209391737e-05, + "loss": 2.5097, + "step": 6920 + }, + { + "epoch": 0.32, + "learning_rate": 4.852853677513845e-05, + "loss": 2.6311, + "step": 6930 + }, + { + "epoch": 0.32, + "learning_rate": 4.8526248340885165e-05, + "loss": 2.5162, + "step": 6940 + }, + { + "epoch": 0.32, + "learning_rate": 4.8523959906631886e-05, + "loss": 2.5743, + "step": 6950 + }, + { + "epoch": 0.32, + "learning_rate": 4.85216714723786e-05, + "loss": 2.5102, + "step": 6960 + }, + { + "epoch": 0.32, + "learning_rate": 4.8519383038125314e-05, + "loss": 2.5564, + "step": 6970 + }, + { + "epoch": 0.32, + "learning_rate": 4.8517094603872035e-05, + "loss": 2.7323, + "step": 6980 + }, + { + "epoch": 0.32, + "learning_rate": 4.851480616961875e-05, + "loss": 2.6003, + "step": 6990 + }, + { + "epoch": 0.32, + "learning_rate": 4.851251773536546e-05, + "loss": 2.6922, + "step": 7000 + }, + { + "epoch": 0.32, + "learning_rate": 4.8510229301112184e-05, + "loss": 2.6887, + "step": 7010 + }, + { + "epoch": 0.32, + "learning_rate": 4.85079408668589e-05, + "loss": 2.7063, + "step": 7020 + }, + { + "epoch": 0.32, + "learning_rate": 4.850565243260561e-05, + "loss": 2.7458, + "step": 7030 + }, + { + "epoch": 0.32, + "learning_rate": 4.850336399835233e-05, + "loss": 2.6305, + "step": 7040 + }, + { + "epoch": 0.32, + "learning_rate": 4.850107556409905e-05, + "loss": 2.6088, + "step": 7050 + }, + { + "epoch": 0.32, + "learning_rate": 4.849878712984576e-05, + "loss": 2.6396, + "step": 7060 + }, + { + "epoch": 0.32, + "learning_rate": 4.849649869559248e-05, + "loss": 2.6442, + "step": 7070 + }, + { + "epoch": 0.32, + "learning_rate": 4.8494210261339197e-05, + "loss": 2.5578, + "step": 7080 + }, + { + "epoch": 0.32, + "learning_rate": 4.849192182708591e-05, + "loss": 2.6304, + "step": 7090 + }, + { + "epoch": 0.32, + "learning_rate": 4.848963339283263e-05, + "loss": 2.7613, + "step": 7100 + }, + { + "epoch": 0.32, + "learning_rate": 4.8487344958579346e-05, + "loss": 2.5997, + "step": 7110 + }, + { + "epoch": 0.33, + "learning_rate": 4.848505652432605e-05, + "loss": 2.5993, + "step": 7120 + }, + { + "epoch": 0.33, + "learning_rate": 4.8482768090072774e-05, + "loss": 2.4996, + "step": 7130 + }, + { + "epoch": 0.33, + "learning_rate": 4.848047965581949e-05, + "loss": 2.604, + "step": 7140 + }, + { + "epoch": 0.33, + "learning_rate": 4.84781912215662e-05, + "loss": 2.6322, + "step": 7150 + }, + { + "epoch": 0.33, + "learning_rate": 4.847590278731292e-05, + "loss": 2.623, + "step": 7160 + }, + { + "epoch": 0.33, + "learning_rate": 4.847361435305964e-05, + "loss": 2.6787, + "step": 7170 + }, + { + "epoch": 0.33, + "learning_rate": 4.847132591880635e-05, + "loss": 2.5988, + "step": 7180 + }, + { + "epoch": 0.33, + "learning_rate": 4.846903748455307e-05, + "loss": 2.5075, + "step": 7190 + }, + { + "epoch": 0.33, + "learning_rate": 4.8466749050299786e-05, + "loss": 2.5275, + "step": 7200 + }, + { + "epoch": 0.33, + "learning_rate": 4.84644606160465e-05, + "loss": 2.5091, + "step": 7210 + }, + { + "epoch": 0.33, + "learning_rate": 4.846217218179322e-05, + "loss": 2.7113, + "step": 7220 + }, + { + "epoch": 0.33, + "learning_rate": 4.8459883747539936e-05, + "loss": 2.5282, + "step": 7230 + }, + { + "epoch": 0.33, + "learning_rate": 4.845759531328665e-05, + "loss": 2.7646, + "step": 7240 + }, + { + "epoch": 0.33, + "learning_rate": 4.845530687903337e-05, + "loss": 2.4846, + "step": 7250 + }, + { + "epoch": 0.33, + "learning_rate": 4.8453018444780085e-05, + "loss": 2.5807, + "step": 7260 + }, + { + "epoch": 0.33, + "learning_rate": 4.84507300105268e-05, + "loss": 2.5587, + "step": 7270 + }, + { + "epoch": 0.33, + "learning_rate": 4.844844157627352e-05, + "loss": 2.5217, + "step": 7280 + }, + { + "epoch": 0.33, + "learning_rate": 4.8446153142020234e-05, + "loss": 2.4994, + "step": 7290 + }, + { + "epoch": 0.33, + "learning_rate": 4.844386470776695e-05, + "loss": 2.6186, + "step": 7300 + }, + { + "epoch": 0.33, + "learning_rate": 4.844157627351367e-05, + "loss": 2.6483, + "step": 7310 + }, + { + "epoch": 0.33, + "learning_rate": 4.8439287839260376e-05, + "loss": 2.5136, + "step": 7320 + }, + { + "epoch": 0.33, + "learning_rate": 4.84369994050071e-05, + "loss": 2.5561, + "step": 7330 + }, + { + "epoch": 0.34, + "learning_rate": 4.843471097075381e-05, + "loss": 2.6446, + "step": 7340 + }, + { + "epoch": 0.34, + "learning_rate": 4.8432422536500526e-05, + "loss": 2.5345, + "step": 7350 + }, + { + "epoch": 0.34, + "learning_rate": 4.8430134102247246e-05, + "loss": 2.4426, + "step": 7360 + }, + { + "epoch": 0.34, + "learning_rate": 4.842784566799396e-05, + "loss": 2.4672, + "step": 7370 + }, + { + "epoch": 0.34, + "learning_rate": 4.8425557233740675e-05, + "loss": 2.4849, + "step": 7380 + }, + { + "epoch": 0.34, + "learning_rate": 4.8423268799487396e-05, + "loss": 2.6723, + "step": 7390 + }, + { + "epoch": 0.34, + "learning_rate": 4.842098036523411e-05, + "loss": 2.4709, + "step": 7400 + }, + { + "epoch": 0.34, + "learning_rate": 4.8418691930980824e-05, + "loss": 2.3804, + "step": 7410 + }, + { + "epoch": 0.34, + "learning_rate": 4.8416403496727545e-05, + "loss": 2.3274, + "step": 7420 + }, + { + "epoch": 0.34, + "learning_rate": 4.841411506247426e-05, + "loss": 2.491, + "step": 7430 + }, + { + "epoch": 0.34, + "learning_rate": 4.841182662822097e-05, + "loss": 2.5782, + "step": 7440 + }, + { + "epoch": 0.34, + "learning_rate": 4.8409538193967694e-05, + "loss": 2.7523, + "step": 7450 + }, + { + "epoch": 0.34, + "learning_rate": 4.840724975971441e-05, + "loss": 2.4707, + "step": 7460 + }, + { + "epoch": 0.34, + "learning_rate": 4.840496132546112e-05, + "loss": 2.479, + "step": 7470 + }, + { + "epoch": 0.34, + "learning_rate": 4.8402672891207836e-05, + "loss": 2.5186, + "step": 7480 + }, + { + "epoch": 0.34, + "learning_rate": 4.840038445695456e-05, + "loss": 2.4696, + "step": 7490 + }, + { + "epoch": 0.34, + "learning_rate": 4.839809602270127e-05, + "loss": 2.5089, + "step": 7500 + }, + { + "epoch": 0.34, + "learning_rate": 4.8395807588447986e-05, + "loss": 2.5794, + "step": 7510 + }, + { + "epoch": 0.34, + "learning_rate": 4.83935191541947e-05, + "loss": 2.4805, + "step": 7520 + }, + { + "epoch": 0.34, + "learning_rate": 4.8391230719941414e-05, + "loss": 2.5781, + "step": 7530 + }, + { + "epoch": 0.34, + "learning_rate": 4.8388942285688135e-05, + "loss": 2.5655, + "step": 7540 + }, + { + "epoch": 0.34, + "learning_rate": 4.838665385143485e-05, + "loss": 2.531, + "step": 7550 + }, + { + "epoch": 0.35, + "learning_rate": 4.838436541718156e-05, + "loss": 2.7172, + "step": 7560 + }, + { + "epoch": 0.35, + "learning_rate": 4.8382076982928284e-05, + "loss": 2.6663, + "step": 7570 + }, + { + "epoch": 0.35, + "learning_rate": 4.8379788548675e-05, + "loss": 2.5828, + "step": 7580 + }, + { + "epoch": 0.35, + "learning_rate": 4.837750011442171e-05, + "loss": 2.588, + "step": 7590 + }, + { + "epoch": 0.35, + "learning_rate": 4.837521168016843e-05, + "loss": 2.4599, + "step": 7600 + }, + { + "epoch": 0.35, + "learning_rate": 4.837292324591515e-05, + "loss": 2.6646, + "step": 7610 + }, + { + "epoch": 0.35, + "learning_rate": 4.837063481166186e-05, + "loss": 2.6683, + "step": 7620 + }, + { + "epoch": 0.35, + "learning_rate": 4.836834637740858e-05, + "loss": 2.6381, + "step": 7630 + }, + { + "epoch": 0.35, + "learning_rate": 4.8366057943155296e-05, + "loss": 2.6133, + "step": 7640 + }, + { + "epoch": 0.35, + "learning_rate": 4.836376950890201e-05, + "loss": 2.5615, + "step": 7650 + }, + { + "epoch": 0.35, + "learning_rate": 4.836148107464873e-05, + "loss": 2.6379, + "step": 7660 + }, + { + "epoch": 0.35, + "learning_rate": 4.8359192640395446e-05, + "loss": 2.5443, + "step": 7670 + }, + { + "epoch": 0.35, + "learning_rate": 4.835690420614216e-05, + "loss": 2.5427, + "step": 7680 + }, + { + "epoch": 0.35, + "learning_rate": 4.835461577188888e-05, + "loss": 2.574, + "step": 7690 + }, + { + "epoch": 0.35, + "learning_rate": 4.8352327337635595e-05, + "loss": 2.6668, + "step": 7700 + }, + { + "epoch": 0.35, + "learning_rate": 4.835003890338231e-05, + "loss": 2.6152, + "step": 7710 + }, + { + "epoch": 0.35, + "learning_rate": 4.834775046912902e-05, + "loss": 2.5713, + "step": 7720 + }, + { + "epoch": 0.35, + "learning_rate": 4.834546203487574e-05, + "loss": 2.5762, + "step": 7730 + }, + { + "epoch": 0.35, + "learning_rate": 4.834317360062245e-05, + "loss": 2.5321, + "step": 7740 + }, + { + "epoch": 0.35, + "learning_rate": 4.834088516636917e-05, + "loss": 2.5608, + "step": 7750 + }, + { + "epoch": 0.35, + "learning_rate": 4.8338596732115886e-05, + "loss": 2.6055, + "step": 7760 + }, + { + "epoch": 0.35, + "learning_rate": 4.83363082978626e-05, + "loss": 2.5336, + "step": 7770 + }, + { + "epoch": 0.36, + "learning_rate": 4.833401986360932e-05, + "loss": 2.527, + "step": 7780 + }, + { + "epoch": 0.36, + "learning_rate": 4.8331731429356036e-05, + "loss": 2.6777, + "step": 7790 + }, + { + "epoch": 0.36, + "learning_rate": 4.832944299510275e-05, + "loss": 2.5895, + "step": 7800 + }, + { + "epoch": 0.36, + "learning_rate": 4.832715456084947e-05, + "loss": 2.4565, + "step": 7810 + }, + { + "epoch": 0.36, + "learning_rate": 4.8324866126596185e-05, + "loss": 2.6261, + "step": 7820 + }, + { + "epoch": 0.36, + "learning_rate": 4.83225776923429e-05, + "loss": 2.6804, + "step": 7830 + }, + { + "epoch": 0.36, + "learning_rate": 4.832028925808962e-05, + "loss": 2.5074, + "step": 7840 + }, + { + "epoch": 0.36, + "learning_rate": 4.8318000823836334e-05, + "loss": 2.4658, + "step": 7850 + }, + { + "epoch": 0.36, + "learning_rate": 4.831571238958305e-05, + "loss": 2.4826, + "step": 7860 + }, + { + "epoch": 0.36, + "learning_rate": 4.831342395532977e-05, + "loss": 2.5284, + "step": 7870 + }, + { + "epoch": 0.36, + "learning_rate": 4.831113552107648e-05, + "loss": 2.5864, + "step": 7880 + }, + { + "epoch": 0.36, + "learning_rate": 4.83088470868232e-05, + "loss": 2.4288, + "step": 7890 + }, + { + "epoch": 0.36, + "learning_rate": 4.830655865256992e-05, + "loss": 2.5494, + "step": 7900 + }, + { + "epoch": 0.36, + "learning_rate": 4.8304270218316625e-05, + "loss": 2.5612, + "step": 7910 + }, + { + "epoch": 0.36, + "learning_rate": 4.8301981784063346e-05, + "loss": 2.5314, + "step": 7920 + }, + { + "epoch": 0.36, + "learning_rate": 4.829969334981006e-05, + "loss": 2.6902, + "step": 7930 + }, + { + "epoch": 0.36, + "learning_rate": 4.8297404915556775e-05, + "loss": 2.6755, + "step": 7940 + }, + { + "epoch": 0.36, + "learning_rate": 4.8295116481303496e-05, + "loss": 2.6582, + "step": 7950 + }, + { + "epoch": 0.36, + "learning_rate": 4.829282804705021e-05, + "loss": 2.6417, + "step": 7960 + }, + { + "epoch": 0.36, + "learning_rate": 4.8290539612796924e-05, + "loss": 2.5043, + "step": 7970 + }, + { + "epoch": 0.36, + "learning_rate": 4.8288251178543645e-05, + "loss": 2.5601, + "step": 7980 + }, + { + "epoch": 0.36, + "learning_rate": 4.828596274429036e-05, + "loss": 2.5943, + "step": 7990 + }, + { + "epoch": 0.37, + "learning_rate": 4.828367431003707e-05, + "loss": 2.7184, + "step": 8000 + }, + { + "epoch": 0.37, + "learning_rate": 4.8281385875783794e-05, + "loss": 2.4364, + "step": 8010 + }, + { + "epoch": 0.37, + "learning_rate": 4.827909744153051e-05, + "loss": 2.4442, + "step": 8020 + }, + { + "epoch": 0.37, + "learning_rate": 4.827680900727722e-05, + "loss": 2.5539, + "step": 8030 + }, + { + "epoch": 0.37, + "learning_rate": 4.827452057302394e-05, + "loss": 2.5392, + "step": 8040 + }, + { + "epoch": 0.37, + "learning_rate": 4.827223213877066e-05, + "loss": 2.5492, + "step": 8050 + }, + { + "epoch": 0.37, + "learning_rate": 4.826994370451737e-05, + "loss": 2.6908, + "step": 8060 + }, + { + "epoch": 0.37, + "learning_rate": 4.826765527026409e-05, + "loss": 2.6334, + "step": 8070 + }, + { + "epoch": 0.37, + "learning_rate": 4.8265366836010806e-05, + "loss": 2.5766, + "step": 8080 + }, + { + "epoch": 0.37, + "learning_rate": 4.826307840175752e-05, + "loss": 2.5178, + "step": 8090 + }, + { + "epoch": 0.37, + "learning_rate": 4.826078996750424e-05, + "loss": 2.5681, + "step": 8100 + }, + { + "epoch": 0.37, + "learning_rate": 4.825850153325095e-05, + "loss": 2.586, + "step": 8110 + }, + { + "epoch": 0.37, + "learning_rate": 4.825621309899766e-05, + "loss": 2.5505, + "step": 8120 + }, + { + "epoch": 0.37, + "learning_rate": 4.8253924664744384e-05, + "loss": 2.6139, + "step": 8130 + }, + { + "epoch": 0.37, + "learning_rate": 4.82516362304911e-05, + "loss": 2.6023, + "step": 8140 + }, + { + "epoch": 0.37, + "learning_rate": 4.824934779623781e-05, + "loss": 2.5252, + "step": 8150 + }, + { + "epoch": 0.37, + "learning_rate": 4.824705936198453e-05, + "loss": 2.7363, + "step": 8160 + }, + { + "epoch": 0.37, + "learning_rate": 4.824477092773125e-05, + "loss": 2.5805, + "step": 8170 + }, + { + "epoch": 0.37, + "learning_rate": 4.824248249347796e-05, + "loss": 2.46, + "step": 8180 + }, + { + "epoch": 0.37, + "learning_rate": 4.824019405922468e-05, + "loss": 2.6099, + "step": 8190 + }, + { + "epoch": 0.37, + "learning_rate": 4.8237905624971396e-05, + "loss": 2.5576, + "step": 8200 + }, + { + "epoch": 0.37, + "learning_rate": 4.823561719071811e-05, + "loss": 2.6261, + "step": 8210 + }, + { + "epoch": 0.38, + "learning_rate": 4.823332875646483e-05, + "loss": 2.4407, + "step": 8220 + }, + { + "epoch": 0.38, + "learning_rate": 4.8231040322211546e-05, + "loss": 2.6228, + "step": 8230 + }, + { + "epoch": 0.38, + "learning_rate": 4.822875188795826e-05, + "loss": 2.5148, + "step": 8240 + }, + { + "epoch": 0.38, + "learning_rate": 4.822646345370498e-05, + "loss": 2.6236, + "step": 8250 + }, + { + "epoch": 0.38, + "learning_rate": 4.8224175019451695e-05, + "loss": 2.5435, + "step": 8260 + }, + { + "epoch": 0.38, + "learning_rate": 4.822188658519841e-05, + "loss": 2.4203, + "step": 8270 + }, + { + "epoch": 0.38, + "learning_rate": 4.821959815094513e-05, + "loss": 2.5873, + "step": 8280 + }, + { + "epoch": 0.38, + "learning_rate": 4.8217309716691844e-05, + "loss": 2.7855, + "step": 8290 + }, + { + "epoch": 0.38, + "learning_rate": 4.821502128243856e-05, + "loss": 2.4898, + "step": 8300 + }, + { + "epoch": 0.38, + "learning_rate": 4.821273284818527e-05, + "loss": 2.5695, + "step": 8310 + }, + { + "epoch": 0.38, + "learning_rate": 4.8210444413931986e-05, + "loss": 2.4473, + "step": 8320 + }, + { + "epoch": 0.38, + "learning_rate": 4.820815597967871e-05, + "loss": 2.4651, + "step": 8330 + }, + { + "epoch": 0.38, + "learning_rate": 4.820586754542542e-05, + "loss": 2.4806, + "step": 8340 + }, + { + "epoch": 0.38, + "learning_rate": 4.8203579111172135e-05, + "loss": 2.6206, + "step": 8350 + }, + { + "epoch": 0.38, + "learning_rate": 4.8201290676918856e-05, + "loss": 2.4872, + "step": 8360 + }, + { + "epoch": 0.38, + "learning_rate": 4.819900224266557e-05, + "loss": 2.6685, + "step": 8370 + }, + { + "epoch": 0.38, + "learning_rate": 4.8196713808412285e-05, + "loss": 2.5091, + "step": 8380 + }, + { + "epoch": 0.38, + "learning_rate": 4.8194425374159006e-05, + "loss": 2.5531, + "step": 8390 + }, + { + "epoch": 0.38, + "learning_rate": 4.819213693990572e-05, + "loss": 2.6317, + "step": 8400 + }, + { + "epoch": 0.38, + "learning_rate": 4.8189848505652434e-05, + "loss": 2.4965, + "step": 8410 + }, + { + "epoch": 0.38, + "learning_rate": 4.8187560071399155e-05, + "loss": 2.6153, + "step": 8420 + }, + { + "epoch": 0.38, + "learning_rate": 4.818527163714587e-05, + "loss": 2.5401, + "step": 8430 + }, + { + "epoch": 0.39, + "learning_rate": 4.818298320289258e-05, + "loss": 2.4877, + "step": 8440 + }, + { + "epoch": 0.39, + "learning_rate": 4.8180694768639304e-05, + "loss": 2.5317, + "step": 8450 + }, + { + "epoch": 0.39, + "learning_rate": 4.817840633438602e-05, + "loss": 2.508, + "step": 8460 + }, + { + "epoch": 0.39, + "learning_rate": 4.817611790013273e-05, + "loss": 2.5201, + "step": 8470 + }, + { + "epoch": 0.39, + "learning_rate": 4.817382946587945e-05, + "loss": 2.5711, + "step": 8480 + }, + { + "epoch": 0.39, + "learning_rate": 4.817154103162617e-05, + "loss": 2.5767, + "step": 8490 + }, + { + "epoch": 0.39, + "learning_rate": 4.816925259737288e-05, + "loss": 2.4583, + "step": 8500 + }, + { + "epoch": 0.39, + "learning_rate": 4.8166964163119595e-05, + "loss": 2.5907, + "step": 8510 + }, + { + "epoch": 0.39, + "learning_rate": 4.816467572886631e-05, + "loss": 2.7128, + "step": 8520 + }, + { + "epoch": 0.39, + "learning_rate": 4.8162387294613024e-05, + "loss": 2.6016, + "step": 8530 + }, + { + "epoch": 0.39, + "learning_rate": 4.8160098860359745e-05, + "loss": 2.5184, + "step": 8540 + }, + { + "epoch": 0.39, + "learning_rate": 4.815781042610646e-05, + "loss": 2.5818, + "step": 8550 + }, + { + "epoch": 0.39, + "learning_rate": 4.815552199185317e-05, + "loss": 2.5131, + "step": 8560 + }, + { + "epoch": 0.39, + "learning_rate": 4.8153233557599894e-05, + "loss": 2.4645, + "step": 8570 + }, + { + "epoch": 0.39, + "learning_rate": 4.815094512334661e-05, + "loss": 2.5413, + "step": 8580 + }, + { + "epoch": 0.39, + "learning_rate": 4.814865668909332e-05, + "loss": 2.4652, + "step": 8590 + }, + { + "epoch": 0.39, + "learning_rate": 4.814636825484004e-05, + "loss": 2.6726, + "step": 8600 + }, + { + "epoch": 0.39, + "learning_rate": 4.814407982058676e-05, + "loss": 2.5531, + "step": 8610 + }, + { + "epoch": 0.39, + "learning_rate": 4.814179138633347e-05, + "loss": 2.4753, + "step": 8620 + }, + { + "epoch": 0.39, + "learning_rate": 4.813950295208019e-05, + "loss": 2.4839, + "step": 8630 + }, + { + "epoch": 0.39, + "learning_rate": 4.8137214517826906e-05, + "loss": 2.5354, + "step": 8640 + }, + { + "epoch": 0.39, + "learning_rate": 4.813492608357362e-05, + "loss": 2.4137, + "step": 8650 + }, + { + "epoch": 0.4, + "learning_rate": 4.813263764932034e-05, + "loss": 2.4872, + "step": 8660 + }, + { + "epoch": 0.4, + "learning_rate": 4.8130349215067055e-05, + "loss": 2.7044, + "step": 8670 + }, + { + "epoch": 0.4, + "learning_rate": 4.812806078081377e-05, + "loss": 2.5203, + "step": 8680 + }, + { + "epoch": 0.4, + "learning_rate": 4.812577234656049e-05, + "loss": 2.5716, + "step": 8690 + }, + { + "epoch": 0.4, + "learning_rate": 4.81234839123072e-05, + "loss": 2.4747, + "step": 8700 + }, + { + "epoch": 0.4, + "learning_rate": 4.812119547805391e-05, + "loss": 2.4187, + "step": 8710 + }, + { + "epoch": 0.4, + "learning_rate": 4.811890704380063e-05, + "loss": 2.5006, + "step": 8720 + }, + { + "epoch": 0.4, + "learning_rate": 4.811661860954735e-05, + "loss": 2.3529, + "step": 8730 + }, + { + "epoch": 0.4, + "learning_rate": 4.811433017529406e-05, + "loss": 2.4003, + "step": 8740 + }, + { + "epoch": 0.4, + "learning_rate": 4.811204174104078e-05, + "loss": 2.5774, + "step": 8750 + }, + { + "epoch": 0.4, + "learning_rate": 4.8109753306787496e-05, + "loss": 2.5144, + "step": 8760 + }, + { + "epoch": 0.4, + "learning_rate": 4.810746487253421e-05, + "loss": 2.4883, + "step": 8770 + }, + { + "epoch": 0.4, + "learning_rate": 4.810517643828093e-05, + "loss": 2.5076, + "step": 8780 + }, + { + "epoch": 0.4, + "learning_rate": 4.8102888004027645e-05, + "loss": 2.4752, + "step": 8790 + }, + { + "epoch": 0.4, + "learning_rate": 4.810059956977436e-05, + "loss": 2.5049, + "step": 8800 + }, + { + "epoch": 0.4, + "learning_rate": 4.809831113552108e-05, + "loss": 2.5886, + "step": 8810 + }, + { + "epoch": 0.4, + "learning_rate": 4.8096022701267795e-05, + "loss": 2.478, + "step": 8820 + }, + { + "epoch": 0.4, + "learning_rate": 4.809373426701451e-05, + "loss": 2.4351, + "step": 8830 + }, + { + "epoch": 0.4, + "learning_rate": 4.809144583276123e-05, + "loss": 2.3795, + "step": 8840 + }, + { + "epoch": 0.4, + "learning_rate": 4.8089157398507944e-05, + "loss": 2.5045, + "step": 8850 + }, + { + "epoch": 0.4, + "learning_rate": 4.808686896425466e-05, + "loss": 2.5118, + "step": 8860 + }, + { + "epoch": 0.41, + "learning_rate": 4.808458053000138e-05, + "loss": 2.4221, + "step": 8870 + }, + { + "epoch": 0.41, + "learning_rate": 4.808229209574809e-05, + "loss": 2.4182, + "step": 8880 + }, + { + "epoch": 0.41, + "learning_rate": 4.808000366149481e-05, + "loss": 2.5603, + "step": 8890 + }, + { + "epoch": 0.41, + "learning_rate": 4.807771522724152e-05, + "loss": 2.642, + "step": 8900 + }, + { + "epoch": 0.41, + "learning_rate": 4.8075426792988235e-05, + "loss": 2.5941, + "step": 8910 + }, + { + "epoch": 0.41, + "learning_rate": 4.8073138358734956e-05, + "loss": 2.6514, + "step": 8920 + }, + { + "epoch": 0.41, + "learning_rate": 4.807084992448167e-05, + "loss": 2.6656, + "step": 8930 + }, + { + "epoch": 0.41, + "learning_rate": 4.8068561490228385e-05, + "loss": 2.6024, + "step": 8940 + }, + { + "epoch": 0.41, + "learning_rate": 4.8066273055975105e-05, + "loss": 2.6309, + "step": 8950 + }, + { + "epoch": 0.41, + "learning_rate": 4.806398462172182e-05, + "loss": 2.4661, + "step": 8960 + }, + { + "epoch": 0.41, + "learning_rate": 4.8061696187468534e-05, + "loss": 2.4152, + "step": 8970 + }, + { + "epoch": 0.41, + "learning_rate": 4.8059407753215255e-05, + "loss": 2.4096, + "step": 8980 + }, + { + "epoch": 0.41, + "learning_rate": 4.805711931896197e-05, + "loss": 2.4659, + "step": 8990 + }, + { + "epoch": 0.41, + "learning_rate": 4.805483088470868e-05, + "loss": 2.3591, + "step": 9000 + }, + { + "epoch": 0.41, + "learning_rate": 4.8052542450455404e-05, + "loss": 2.6971, + "step": 9010 + }, + { + "epoch": 0.41, + "learning_rate": 4.805025401620212e-05, + "loss": 2.5978, + "step": 9020 + }, + { + "epoch": 0.41, + "learning_rate": 4.804796558194883e-05, + "loss": 2.5958, + "step": 9030 + }, + { + "epoch": 0.41, + "learning_rate": 4.804567714769555e-05, + "loss": 2.5052, + "step": 9040 + }, + { + "epoch": 0.41, + "learning_rate": 4.804338871344227e-05, + "loss": 2.5074, + "step": 9050 + }, + { + "epoch": 0.41, + "learning_rate": 4.804110027918898e-05, + "loss": 2.442, + "step": 9060 + }, + { + "epoch": 0.41, + "learning_rate": 4.80388118449357e-05, + "loss": 2.4899, + "step": 9070 + }, + { + "epoch": 0.41, + "learning_rate": 4.8036523410682416e-05, + "loss": 2.5669, + "step": 9080 + }, + { + "epoch": 0.42, + "learning_rate": 4.803423497642913e-05, + "loss": 2.5385, + "step": 9090 + }, + { + "epoch": 0.42, + "learning_rate": 4.8031946542175845e-05, + "loss": 2.5921, + "step": 9100 + }, + { + "epoch": 0.42, + "learning_rate": 4.802965810792256e-05, + "loss": 2.5148, + "step": 9110 + }, + { + "epoch": 0.42, + "learning_rate": 4.802736967366927e-05, + "loss": 2.6347, + "step": 9120 + }, + { + "epoch": 0.42, + "learning_rate": 4.8025081239415994e-05, + "loss": 2.4504, + "step": 9130 + }, + { + "epoch": 0.42, + "learning_rate": 4.802279280516271e-05, + "loss": 2.4635, + "step": 9140 + }, + { + "epoch": 0.42, + "learning_rate": 4.802050437090942e-05, + "loss": 2.3694, + "step": 9150 + }, + { + "epoch": 0.42, + "learning_rate": 4.801821593665614e-05, + "loss": 2.4794, + "step": 9160 + }, + { + "epoch": 0.42, + "learning_rate": 4.801592750240286e-05, + "loss": 2.4176, + "step": 9170 + }, + { + "epoch": 0.42, + "learning_rate": 4.801363906814957e-05, + "loss": 2.4976, + "step": 9180 + }, + { + "epoch": 0.42, + "learning_rate": 4.801135063389629e-05, + "loss": 2.6711, + "step": 9190 + }, + { + "epoch": 0.42, + "learning_rate": 4.8009062199643006e-05, + "loss": 2.4859, + "step": 9200 + }, + { + "epoch": 0.42, + "learning_rate": 4.800677376538972e-05, + "loss": 2.5434, + "step": 9210 + }, + { + "epoch": 0.42, + "learning_rate": 4.800448533113644e-05, + "loss": 2.4095, + "step": 9220 + }, + { + "epoch": 0.42, + "learning_rate": 4.8002196896883155e-05, + "loss": 2.4597, + "step": 9230 + }, + { + "epoch": 0.42, + "learning_rate": 4.799990846262987e-05, + "loss": 2.3098, + "step": 9240 + }, + { + "epoch": 0.42, + "learning_rate": 4.799762002837659e-05, + "loss": 2.5524, + "step": 9250 + }, + { + "epoch": 0.42, + "learning_rate": 4.7995331594123305e-05, + "loss": 2.3824, + "step": 9260 + }, + { + "epoch": 0.42, + "learning_rate": 4.799304315987002e-05, + "loss": 2.6007, + "step": 9270 + }, + { + "epoch": 0.42, + "learning_rate": 4.799075472561674e-05, + "loss": 2.4054, + "step": 9280 + }, + { + "epoch": 0.42, + "learning_rate": 4.7988466291363454e-05, + "loss": 2.5336, + "step": 9290 + }, + { + "epoch": 0.42, + "learning_rate": 4.798617785711017e-05, + "loss": 2.4023, + "step": 9300 + }, + { + "epoch": 0.43, + "learning_rate": 4.798388942285688e-05, + "loss": 2.3675, + "step": 9310 + }, + { + "epoch": 0.43, + "learning_rate": 4.7981600988603596e-05, + "loss": 2.3319, + "step": 9320 + }, + { + "epoch": 0.43, + "learning_rate": 4.797931255435032e-05, + "loss": 2.5489, + "step": 9330 + }, + { + "epoch": 0.43, + "learning_rate": 4.797702412009703e-05, + "loss": 2.5419, + "step": 9340 + }, + { + "epoch": 0.43, + "learning_rate": 4.7974735685843745e-05, + "loss": 2.3948, + "step": 9350 + }, + { + "epoch": 0.43, + "learning_rate": 4.7972447251590466e-05, + "loss": 2.3667, + "step": 9360 + }, + { + "epoch": 0.43, + "learning_rate": 4.797015881733718e-05, + "loss": 2.6186, + "step": 9370 + }, + { + "epoch": 0.43, + "learning_rate": 4.7967870383083894e-05, + "loss": 2.4423, + "step": 9380 + }, + { + "epoch": 0.43, + "learning_rate": 4.7965581948830615e-05, + "loss": 2.331, + "step": 9390 + }, + { + "epoch": 0.43, + "learning_rate": 4.796329351457733e-05, + "loss": 2.4957, + "step": 9400 + }, + { + "epoch": 0.43, + "learning_rate": 4.7961005080324044e-05, + "loss": 2.6605, + "step": 9410 + }, + { + "epoch": 0.43, + "learning_rate": 4.7958716646070765e-05, + "loss": 2.6334, + "step": 9420 + }, + { + "epoch": 0.43, + "learning_rate": 4.795642821181748e-05, + "loss": 2.5243, + "step": 9430 + }, + { + "epoch": 0.43, + "learning_rate": 4.795413977756419e-05, + "loss": 2.503, + "step": 9440 + }, + { + "epoch": 0.43, + "learning_rate": 4.7951851343310914e-05, + "loss": 2.5767, + "step": 9450 + }, + { + "epoch": 0.43, + "learning_rate": 4.794956290905763e-05, + "loss": 2.5174, + "step": 9460 + }, + { + "epoch": 0.43, + "learning_rate": 4.794727447480434e-05, + "loss": 2.4903, + "step": 9470 + }, + { + "epoch": 0.43, + "learning_rate": 4.794498604055106e-05, + "loss": 2.592, + "step": 9480 + }, + { + "epoch": 0.43, + "learning_rate": 4.794269760629777e-05, + "loss": 2.5382, + "step": 9490 + }, + { + "epoch": 0.43, + "learning_rate": 4.7940409172044484e-05, + "loss": 2.5052, + "step": 9500 + }, + { + "epoch": 0.43, + "learning_rate": 4.7938120737791205e-05, + "loss": 2.3965, + "step": 9510 + }, + { + "epoch": 0.43, + "learning_rate": 4.793583230353792e-05, + "loss": 2.6111, + "step": 9520 + }, + { + "epoch": 0.44, + "learning_rate": 4.7933543869284634e-05, + "loss": 2.5139, + "step": 9530 + }, + { + "epoch": 0.44, + "learning_rate": 4.7931255435031355e-05, + "loss": 2.3385, + "step": 9540 + }, + { + "epoch": 0.44, + "learning_rate": 4.792896700077807e-05, + "loss": 2.406, + "step": 9550 + }, + { + "epoch": 0.44, + "learning_rate": 4.792667856652478e-05, + "loss": 2.3884, + "step": 9560 + }, + { + "epoch": 0.44, + "learning_rate": 4.7924390132271504e-05, + "loss": 2.4414, + "step": 9570 + }, + { + "epoch": 0.44, + "learning_rate": 4.792210169801822e-05, + "loss": 2.4814, + "step": 9580 + }, + { + "epoch": 0.44, + "learning_rate": 4.791981326376493e-05, + "loss": 2.5667, + "step": 9590 + }, + { + "epoch": 0.44, + "learning_rate": 4.791752482951165e-05, + "loss": 2.5959, + "step": 9600 + }, + { + "epoch": 0.44, + "learning_rate": 4.791523639525837e-05, + "loss": 2.239, + "step": 9610 + }, + { + "epoch": 0.44, + "learning_rate": 4.791294796100508e-05, + "loss": 2.5778, + "step": 9620 + }, + { + "epoch": 0.44, + "learning_rate": 4.79106595267518e-05, + "loss": 2.3503, + "step": 9630 + }, + { + "epoch": 0.44, + "learning_rate": 4.7908371092498516e-05, + "loss": 2.5121, + "step": 9640 + }, + { + "epoch": 0.44, + "learning_rate": 4.790608265824523e-05, + "loss": 2.3898, + "step": 9650 + }, + { + "epoch": 0.44, + "learning_rate": 4.790379422399195e-05, + "loss": 2.5714, + "step": 9660 + }, + { + "epoch": 0.44, + "learning_rate": 4.7901505789738665e-05, + "loss": 2.4222, + "step": 9670 + }, + { + "epoch": 0.44, + "learning_rate": 4.789921735548538e-05, + "loss": 2.5705, + "step": 9680 + }, + { + "epoch": 0.44, + "learning_rate": 4.7896928921232094e-05, + "loss": 2.4389, + "step": 9690 + }, + { + "epoch": 0.44, + "learning_rate": 4.789464048697881e-05, + "loss": 2.4607, + "step": 9700 + }, + { + "epoch": 0.44, + "learning_rate": 4.789235205272553e-05, + "loss": 2.4833, + "step": 9710 + }, + { + "epoch": 0.44, + "learning_rate": 4.789006361847224e-05, + "loss": 2.6475, + "step": 9720 + }, + { + "epoch": 0.44, + "learning_rate": 4.788777518421896e-05, + "loss": 2.5329, + "step": 9730 + }, + { + "epoch": 0.44, + "learning_rate": 4.788548674996568e-05, + "loss": 2.464, + "step": 9740 + }, + { + "epoch": 0.45, + "learning_rate": 4.788319831571239e-05, + "loss": 2.4013, + "step": 9750 + }, + { + "epoch": 0.45, + "learning_rate": 4.7880909881459106e-05, + "loss": 2.4784, + "step": 9760 + }, + { + "epoch": 0.45, + "learning_rate": 4.787862144720583e-05, + "loss": 2.4445, + "step": 9770 + }, + { + "epoch": 0.45, + "learning_rate": 4.787633301295254e-05, + "loss": 2.5219, + "step": 9780 + }, + { + "epoch": 0.45, + "learning_rate": 4.7874044578699255e-05, + "loss": 2.6752, + "step": 9790 + }, + { + "epoch": 0.45, + "learning_rate": 4.787175614444597e-05, + "loss": 2.4733, + "step": 9800 + }, + { + "epoch": 0.45, + "learning_rate": 4.786946771019269e-05, + "loss": 2.3437, + "step": 9810 + }, + { + "epoch": 0.45, + "learning_rate": 4.7867179275939404e-05, + "loss": 2.652, + "step": 9820 + }, + { + "epoch": 0.45, + "learning_rate": 4.786489084168612e-05, + "loss": 2.47, + "step": 9830 + }, + { + "epoch": 0.45, + "learning_rate": 4.786260240743284e-05, + "loss": 2.5827, + "step": 9840 + }, + { + "epoch": 0.45, + "learning_rate": 4.7860313973179554e-05, + "loss": 2.7081, + "step": 9850 + }, + { + "epoch": 0.45, + "learning_rate": 4.785802553892627e-05, + "loss": 2.5675, + "step": 9860 + }, + { + "epoch": 0.45, + "learning_rate": 4.785573710467299e-05, + "loss": 2.5603, + "step": 9870 + }, + { + "epoch": 0.45, + "learning_rate": 4.78534486704197e-05, + "loss": 2.4665, + "step": 9880 + }, + { + "epoch": 0.45, + "learning_rate": 4.785116023616642e-05, + "loss": 2.5384, + "step": 9890 + }, + { + "epoch": 0.45, + "learning_rate": 4.784887180191313e-05, + "loss": 2.4818, + "step": 9900 + }, + { + "epoch": 0.45, + "learning_rate": 4.7846583367659845e-05, + "loss": 2.4326, + "step": 9910 + }, + { + "epoch": 0.45, + "learning_rate": 4.7844294933406566e-05, + "loss": 2.4038, + "step": 9920 + }, + { + "epoch": 0.45, + "learning_rate": 4.784200649915328e-05, + "loss": 2.3771, + "step": 9930 + }, + { + "epoch": 0.45, + "learning_rate": 4.7839718064899994e-05, + "loss": 2.5238, + "step": 9940 + }, + { + "epoch": 0.45, + "learning_rate": 4.7837429630646715e-05, + "loss": 2.6072, + "step": 9950 + }, + { + "epoch": 0.45, + "learning_rate": 4.783514119639343e-05, + "loss": 2.5663, + "step": 9960 + }, + { + "epoch": 0.46, + "learning_rate": 4.7832852762140144e-05, + "loss": 2.5498, + "step": 9970 + }, + { + "epoch": 0.46, + "learning_rate": 4.7830564327886864e-05, + "loss": 2.5463, + "step": 9980 + }, + { + "epoch": 0.46, + "learning_rate": 4.782827589363358e-05, + "loss": 2.6455, + "step": 9990 + }, + { + "epoch": 0.46, + "learning_rate": 4.782598745938029e-05, + "loss": 2.6951, + "step": 10000 + }, + { + "epoch": 0.46, + "learning_rate": 4.7823699025127014e-05, + "loss": 2.5799, + "step": 10010 + }, + { + "epoch": 0.46, + "learning_rate": 4.782141059087373e-05, + "loss": 2.4259, + "step": 10020 + }, + { + "epoch": 0.46, + "learning_rate": 4.781912215662044e-05, + "loss": 2.2816, + "step": 10030 + }, + { + "epoch": 0.46, + "learning_rate": 4.781683372236716e-05, + "loss": 2.5339, + "step": 10040 + }, + { + "epoch": 0.46, + "learning_rate": 4.781454528811388e-05, + "loss": 2.6723, + "step": 10050 + }, + { + "epoch": 0.46, + "learning_rate": 4.781225685386059e-05, + "loss": 2.6393, + "step": 10060 + }, + { + "epoch": 0.46, + "learning_rate": 4.780996841960731e-05, + "loss": 2.4065, + "step": 10070 + }, + { + "epoch": 0.46, + "learning_rate": 4.7807679985354026e-05, + "loss": 2.4601, + "step": 10080 + }, + { + "epoch": 0.46, + "learning_rate": 4.7805391551100733e-05, + "loss": 2.7115, + "step": 10090 + }, + { + "epoch": 0.46, + "learning_rate": 4.7803103116847454e-05, + "loss": 2.4182, + "step": 10100 + }, + { + "epoch": 0.46, + "learning_rate": 4.780081468259417e-05, + "loss": 2.4533, + "step": 10110 + }, + { + "epoch": 0.46, + "learning_rate": 4.779852624834088e-05, + "loss": 2.4312, + "step": 10120 + }, + { + "epoch": 0.46, + "learning_rate": 4.7796237814087604e-05, + "loss": 2.4738, + "step": 10130 + }, + { + "epoch": 0.46, + "learning_rate": 4.779394937983432e-05, + "loss": 2.5474, + "step": 10140 + }, + { + "epoch": 0.46, + "learning_rate": 4.779166094558103e-05, + "loss": 2.328, + "step": 10150 + }, + { + "epoch": 0.46, + "learning_rate": 4.778937251132775e-05, + "loss": 2.4257, + "step": 10160 + }, + { + "epoch": 0.46, + "learning_rate": 4.778708407707447e-05, + "loss": 2.4171, + "step": 10170 + }, + { + "epoch": 0.46, + "learning_rate": 4.778479564282118e-05, + "loss": 2.4215, + "step": 10180 + }, + { + "epoch": 0.47, + "learning_rate": 4.77825072085679e-05, + "loss": 2.4224, + "step": 10190 + }, + { + "epoch": 0.47, + "learning_rate": 4.7780218774314616e-05, + "loss": 2.5248, + "step": 10200 + }, + { + "epoch": 0.47, + "learning_rate": 4.777793034006133e-05, + "loss": 2.4769, + "step": 10210 + }, + { + "epoch": 0.47, + "learning_rate": 4.777564190580805e-05, + "loss": 2.5698, + "step": 10220 + }, + { + "epoch": 0.47, + "learning_rate": 4.7773353471554765e-05, + "loss": 2.5372, + "step": 10230 + }, + { + "epoch": 0.47, + "learning_rate": 4.777106503730148e-05, + "loss": 2.486, + "step": 10240 + }, + { + "epoch": 0.47, + "learning_rate": 4.77687766030482e-05, + "loss": 2.4284, + "step": 10250 + }, + { + "epoch": 0.47, + "learning_rate": 4.7766488168794914e-05, + "loss": 2.5408, + "step": 10260 + }, + { + "epoch": 0.47, + "learning_rate": 4.776419973454163e-05, + "loss": 2.5585, + "step": 10270 + }, + { + "epoch": 0.47, + "learning_rate": 4.776191130028834e-05, + "loss": 2.5371, + "step": 10280 + }, + { + "epoch": 0.47, + "learning_rate": 4.775962286603506e-05, + "loss": 2.5955, + "step": 10290 + }, + { + "epoch": 0.47, + "learning_rate": 4.775733443178178e-05, + "loss": 2.4826, + "step": 10300 + }, + { + "epoch": 0.47, + "learning_rate": 4.775504599752849e-05, + "loss": 2.4955, + "step": 10310 + }, + { + "epoch": 0.47, + "learning_rate": 4.7752757563275206e-05, + "loss": 2.5477, + "step": 10320 + }, + { + "epoch": 0.47, + "learning_rate": 4.775046912902193e-05, + "loss": 2.3415, + "step": 10330 + }, + { + "epoch": 0.47, + "learning_rate": 4.774818069476864e-05, + "loss": 2.4569, + "step": 10340 + }, + { + "epoch": 0.47, + "learning_rate": 4.7745892260515355e-05, + "loss": 2.4084, + "step": 10350 + }, + { + "epoch": 0.47, + "learning_rate": 4.7743603826262076e-05, + "loss": 2.2895, + "step": 10360 + }, + { + "epoch": 0.47, + "learning_rate": 4.774131539200879e-05, + "loss": 2.4724, + "step": 10370 + }, + { + "epoch": 0.47, + "learning_rate": 4.7739026957755504e-05, + "loss": 2.5041, + "step": 10380 + }, + { + "epoch": 0.47, + "learning_rate": 4.7736738523502225e-05, + "loss": 2.3171, + "step": 10390 + }, + { + "epoch": 0.47, + "learning_rate": 4.773445008924894e-05, + "loss": 2.5456, + "step": 10400 + }, + { + "epoch": 0.48, + "learning_rate": 4.7732161654995654e-05, + "loss": 2.5014, + "step": 10410 + }, + { + "epoch": 0.48, + "learning_rate": 4.7729873220742374e-05, + "loss": 2.4247, + "step": 10420 + }, + { + "epoch": 0.48, + "learning_rate": 4.772758478648909e-05, + "loss": 2.5736, + "step": 10430 + }, + { + "epoch": 0.48, + "learning_rate": 4.77252963522358e-05, + "loss": 2.532, + "step": 10440 + }, + { + "epoch": 0.48, + "learning_rate": 4.7723007917982524e-05, + "loss": 2.4522, + "step": 10450 + }, + { + "epoch": 0.48, + "learning_rate": 4.772071948372924e-05, + "loss": 2.4223, + "step": 10460 + }, + { + "epoch": 0.48, + "learning_rate": 4.771843104947595e-05, + "loss": 2.3001, + "step": 10470 + }, + { + "epoch": 0.48, + "learning_rate": 4.7716142615222666e-05, + "loss": 2.4276, + "step": 10480 + }, + { + "epoch": 0.48, + "learning_rate": 4.771385418096938e-05, + "loss": 2.5513, + "step": 10490 + }, + { + "epoch": 0.48, + "learning_rate": 4.7711565746716094e-05, + "loss": 2.3925, + "step": 10500 + }, + { + "epoch": 0.48, + "learning_rate": 4.7709277312462815e-05, + "loss": 2.441, + "step": 10510 + }, + { + "epoch": 0.48, + "learning_rate": 4.770698887820953e-05, + "loss": 2.409, + "step": 10520 + }, + { + "epoch": 0.48, + "learning_rate": 4.7704700443956243e-05, + "loss": 2.4389, + "step": 10530 + }, + { + "epoch": 0.48, + "learning_rate": 4.7702412009702964e-05, + "loss": 2.4297, + "step": 10540 + }, + { + "epoch": 0.48, + "learning_rate": 4.770012357544968e-05, + "loss": 2.2659, + "step": 10550 + }, + { + "epoch": 0.48, + "learning_rate": 4.769783514119639e-05, + "loss": 2.3624, + "step": 10560 + }, + { + "epoch": 0.48, + "learning_rate": 4.7695546706943114e-05, + "loss": 2.4643, + "step": 10570 + }, + { + "epoch": 0.48, + "learning_rate": 4.769325827268983e-05, + "loss": 2.363, + "step": 10580 + }, + { + "epoch": 0.48, + "learning_rate": 4.769096983843654e-05, + "loss": 2.5992, + "step": 10590 + }, + { + "epoch": 0.48, + "learning_rate": 4.768868140418326e-05, + "loss": 2.4871, + "step": 10600 + }, + { + "epoch": 0.48, + "learning_rate": 4.768639296992998e-05, + "loss": 2.4541, + "step": 10610 + }, + { + "epoch": 0.48, + "learning_rate": 4.768410453567669e-05, + "loss": 2.4925, + "step": 10620 + }, + { + "epoch": 0.49, + "learning_rate": 4.768181610142341e-05, + "loss": 2.5078, + "step": 10630 + }, + { + "epoch": 0.49, + "learning_rate": 4.7679527667170126e-05, + "loss": 2.52, + "step": 10640 + }, + { + "epoch": 0.49, + "learning_rate": 4.767723923291684e-05, + "loss": 2.4908, + "step": 10650 + }, + { + "epoch": 0.49, + "learning_rate": 4.767495079866356e-05, + "loss": 2.4405, + "step": 10660 + }, + { + "epoch": 0.49, + "learning_rate": 4.7672662364410275e-05, + "loss": 2.521, + "step": 10670 + }, + { + "epoch": 0.49, + "learning_rate": 4.767037393015699e-05, + "loss": 2.4349, + "step": 10680 + }, + { + "epoch": 0.49, + "learning_rate": 4.7668085495903703e-05, + "loss": 2.3396, + "step": 10690 + }, + { + "epoch": 0.49, + "learning_rate": 4.766579706165042e-05, + "loss": 2.2763, + "step": 10700 + }, + { + "epoch": 0.49, + "learning_rate": 4.766350862739714e-05, + "loss": 2.421, + "step": 10710 + }, + { + "epoch": 0.49, + "learning_rate": 4.766122019314385e-05, + "loss": 2.4553, + "step": 10720 + }, + { + "epoch": 0.49, + "learning_rate": 4.765893175889057e-05, + "loss": 2.3664, + "step": 10730 + }, + { + "epoch": 0.49, + "learning_rate": 4.765664332463729e-05, + "loss": 2.5308, + "step": 10740 + }, + { + "epoch": 0.49, + "learning_rate": 4.7654354890384e-05, + "loss": 2.3722, + "step": 10750 + }, + { + "epoch": 0.49, + "learning_rate": 4.7652066456130716e-05, + "loss": 2.4598, + "step": 10760 + }, + { + "epoch": 0.49, + "learning_rate": 4.764977802187744e-05, + "loss": 2.4391, + "step": 10770 + }, + { + "epoch": 0.49, + "learning_rate": 4.764748958762415e-05, + "loss": 2.5109, + "step": 10780 + }, + { + "epoch": 0.49, + "learning_rate": 4.7645201153370865e-05, + "loss": 2.6127, + "step": 10790 + }, + { + "epoch": 0.49, + "learning_rate": 4.7642912719117586e-05, + "loss": 2.2723, + "step": 10800 + }, + { + "epoch": 0.49, + "learning_rate": 4.76406242848643e-05, + "loss": 2.4474, + "step": 10810 + }, + { + "epoch": 0.49, + "learning_rate": 4.7638335850611014e-05, + "loss": 2.4624, + "step": 10820 + }, + { + "epoch": 0.49, + "learning_rate": 4.7636047416357735e-05, + "loss": 2.3981, + "step": 10830 + }, + { + "epoch": 0.49, + "learning_rate": 4.763375898210445e-05, + "loss": 2.5574, + "step": 10840 + }, + { + "epoch": 0.5, + "learning_rate": 4.7631470547851163e-05, + "loss": 2.4176, + "step": 10850 + }, + { + "epoch": 0.5, + "learning_rate": 4.7629182113597884e-05, + "loss": 2.3263, + "step": 10860 + }, + { + "epoch": 0.5, + "learning_rate": 4.76268936793446e-05, + "loss": 2.2531, + "step": 10870 + }, + { + "epoch": 0.5, + "learning_rate": 4.7624605245091306e-05, + "loss": 2.3908, + "step": 10880 + }, + { + "epoch": 0.5, + "learning_rate": 4.762231681083803e-05, + "loss": 2.6001, + "step": 10890 + }, + { + "epoch": 0.5, + "learning_rate": 4.762002837658474e-05, + "loss": 2.4099, + "step": 10900 + }, + { + "epoch": 0.5, + "learning_rate": 4.7617739942331455e-05, + "loss": 2.4761, + "step": 10910 + }, + { + "epoch": 0.5, + "learning_rate": 4.7615451508078176e-05, + "loss": 2.4877, + "step": 10920 + }, + { + "epoch": 0.5, + "learning_rate": 4.761316307382489e-05, + "loss": 2.3782, + "step": 10930 + }, + { + "epoch": 0.5, + "learning_rate": 4.7610874639571604e-05, + "loss": 2.4546, + "step": 10940 + }, + { + "epoch": 0.5, + "learning_rate": 4.7608586205318325e-05, + "loss": 2.3586, + "step": 10950 + }, + { + "epoch": 0.5, + "learning_rate": 4.760629777106504e-05, + "loss": 2.488, + "step": 10960 + }, + { + "epoch": 0.5, + "learning_rate": 4.7604009336811753e-05, + "loss": 2.6488, + "step": 10970 + }, + { + "epoch": 0.5, + "learning_rate": 4.7601720902558474e-05, + "loss": 2.3287, + "step": 10980 + }, + { + "epoch": 0.5, + "learning_rate": 4.759943246830519e-05, + "loss": 2.426, + "step": 10990 + }, + { + "epoch": 0.5, + "learning_rate": 4.75971440340519e-05, + "loss": 2.5453, + "step": 11000 + }, + { + "epoch": 0.5, + "learning_rate": 4.7594855599798624e-05, + "loss": 2.5256, + "step": 11010 + }, + { + "epoch": 0.5, + "learning_rate": 4.759256716554534e-05, + "loss": 2.6035, + "step": 11020 + }, + { + "epoch": 0.5, + "learning_rate": 4.759027873129205e-05, + "loss": 2.3711, + "step": 11030 + }, + { + "epoch": 0.5, + "learning_rate": 4.758799029703877e-05, + "loss": 2.279, + "step": 11040 + }, + { + "epoch": 0.5, + "learning_rate": 4.758570186278549e-05, + "loss": 2.3476, + "step": 11050 + }, + { + "epoch": 0.51, + "learning_rate": 4.75834134285322e-05, + "loss": 2.3864, + "step": 11060 + }, + { + "epoch": 0.51, + "learning_rate": 4.7581124994278915e-05, + "loss": 2.4579, + "step": 11070 + }, + { + "epoch": 0.51, + "learning_rate": 4.757883656002563e-05, + "loss": 2.251, + "step": 11080 + }, + { + "epoch": 0.51, + "learning_rate": 4.757654812577234e-05, + "loss": 2.3625, + "step": 11090 + }, + { + "epoch": 0.51, + "learning_rate": 4.7574259691519064e-05, + "loss": 2.4586, + "step": 11100 + }, + { + "epoch": 0.51, + "learning_rate": 4.757197125726578e-05, + "loss": 2.5331, + "step": 11110 + }, + { + "epoch": 0.51, + "learning_rate": 4.756968282301249e-05, + "loss": 2.4914, + "step": 11120 + }, + { + "epoch": 0.51, + "learning_rate": 4.7567394388759213e-05, + "loss": 2.4793, + "step": 11130 + }, + { + "epoch": 0.51, + "learning_rate": 4.756510595450593e-05, + "loss": 2.3124, + "step": 11140 + }, + { + "epoch": 0.51, + "learning_rate": 4.756281752025264e-05, + "loss": 2.3458, + "step": 11150 + }, + { + "epoch": 0.51, + "learning_rate": 4.756052908599936e-05, + "loss": 2.4674, + "step": 11160 + }, + { + "epoch": 0.51, + "learning_rate": 4.755824065174608e-05, + "loss": 2.428, + "step": 11170 + }, + { + "epoch": 0.51, + "learning_rate": 4.755595221749279e-05, + "loss": 2.5912, + "step": 11180 + }, + { + "epoch": 0.51, + "learning_rate": 4.755366378323951e-05, + "loss": 2.5087, + "step": 11190 + }, + { + "epoch": 0.51, + "learning_rate": 4.7551375348986226e-05, + "loss": 2.4712, + "step": 11200 + }, + { + "epoch": 0.51, + "learning_rate": 4.754908691473294e-05, + "loss": 2.5694, + "step": 11210 + }, + { + "epoch": 0.51, + "learning_rate": 4.754679848047966e-05, + "loss": 2.6408, + "step": 11220 + }, + { + "epoch": 0.51, + "learning_rate": 4.7544510046226375e-05, + "loss": 2.4675, + "step": 11230 + }, + { + "epoch": 0.51, + "learning_rate": 4.754222161197309e-05, + "loss": 2.3694, + "step": 11240 + }, + { + "epoch": 0.51, + "learning_rate": 4.753993317771981e-05, + "loss": 2.4658, + "step": 11250 + }, + { + "epoch": 0.51, + "learning_rate": 4.7537644743466524e-05, + "loss": 2.4074, + "step": 11260 + }, + { + "epoch": 0.51, + "learning_rate": 4.753535630921324e-05, + "loss": 2.4344, + "step": 11270 + }, + { + "epoch": 0.52, + "learning_rate": 4.753306787495995e-05, + "loss": 2.387, + "step": 11280 + }, + { + "epoch": 0.52, + "learning_rate": 4.753077944070667e-05, + "loss": 2.269, + "step": 11290 + }, + { + "epoch": 0.52, + "learning_rate": 4.752849100645339e-05, + "loss": 2.4307, + "step": 11300 + }, + { + "epoch": 0.52, + "learning_rate": 4.75262025722001e-05, + "loss": 2.5995, + "step": 11310 + }, + { + "epoch": 0.52, + "learning_rate": 4.7523914137946816e-05, + "loss": 2.4835, + "step": 11320 + }, + { + "epoch": 0.52, + "learning_rate": 4.752162570369354e-05, + "loss": 2.5428, + "step": 11330 + }, + { + "epoch": 0.52, + "learning_rate": 4.751933726944025e-05, + "loss": 2.4802, + "step": 11340 + }, + { + "epoch": 0.52, + "learning_rate": 4.7517048835186965e-05, + "loss": 2.452, + "step": 11350 + }, + { + "epoch": 0.52, + "learning_rate": 4.7514760400933686e-05, + "loss": 2.2569, + "step": 11360 + }, + { + "epoch": 0.52, + "learning_rate": 4.75124719666804e-05, + "loss": 2.5582, + "step": 11370 + }, + { + "epoch": 0.52, + "learning_rate": 4.7510183532427114e-05, + "loss": 2.4104, + "step": 11380 + }, + { + "epoch": 0.52, + "learning_rate": 4.7507895098173835e-05, + "loss": 2.4496, + "step": 11390 + }, + { + "epoch": 0.52, + "learning_rate": 4.750560666392055e-05, + "loss": 2.4586, + "step": 11400 + }, + { + "epoch": 0.52, + "learning_rate": 4.7503318229667263e-05, + "loss": 2.4268, + "step": 11410 + }, + { + "epoch": 0.52, + "learning_rate": 4.7501029795413984e-05, + "loss": 2.3161, + "step": 11420 + }, + { + "epoch": 0.52, + "learning_rate": 4.74987413611607e-05, + "loss": 2.4652, + "step": 11430 + }, + { + "epoch": 0.52, + "learning_rate": 4.749645292690741e-05, + "loss": 2.3014, + "step": 11440 + }, + { + "epoch": 0.52, + "learning_rate": 4.7494164492654133e-05, + "loss": 2.4633, + "step": 11450 + }, + { + "epoch": 0.52, + "learning_rate": 4.749187605840085e-05, + "loss": 2.3784, + "step": 11460 + }, + { + "epoch": 0.52, + "learning_rate": 4.7489587624147555e-05, + "loss": 2.5262, + "step": 11470 + }, + { + "epoch": 0.52, + "learning_rate": 4.7487299189894276e-05, + "loss": 2.4124, + "step": 11480 + }, + { + "epoch": 0.52, + "learning_rate": 4.748501075564099e-05, + "loss": 2.4753, + "step": 11490 + }, + { + "epoch": 0.53, + "learning_rate": 4.7482722321387704e-05, + "loss": 2.4224, + "step": 11500 + }, + { + "epoch": 0.53, + "learning_rate": 4.7480433887134425e-05, + "loss": 2.4929, + "step": 11510 + }, + { + "epoch": 0.53, + "learning_rate": 4.747814545288114e-05, + "loss": 2.3438, + "step": 11520 + }, + { + "epoch": 0.53, + "learning_rate": 4.747585701862785e-05, + "loss": 2.4286, + "step": 11530 + }, + { + "epoch": 0.53, + "learning_rate": 4.7473568584374574e-05, + "loss": 2.3516, + "step": 11540 + }, + { + "epoch": 0.53, + "learning_rate": 4.747128015012129e-05, + "loss": 2.4137, + "step": 11550 + }, + { + "epoch": 0.53, + "learning_rate": 4.7468991715868e-05, + "loss": 2.3481, + "step": 11560 + }, + { + "epoch": 0.53, + "learning_rate": 4.7466703281614723e-05, + "loss": 2.2856, + "step": 11570 + }, + { + "epoch": 0.53, + "learning_rate": 4.746441484736144e-05, + "loss": 2.4066, + "step": 11580 + }, + { + "epoch": 0.53, + "learning_rate": 4.746212641310815e-05, + "loss": 2.2657, + "step": 11590 + }, + { + "epoch": 0.53, + "learning_rate": 4.745983797885487e-05, + "loss": 2.4656, + "step": 11600 + }, + { + "epoch": 0.53, + "learning_rate": 4.745754954460159e-05, + "loss": 2.5749, + "step": 11610 + }, + { + "epoch": 0.53, + "learning_rate": 4.74552611103483e-05, + "loss": 2.4728, + "step": 11620 + }, + { + "epoch": 0.53, + "learning_rate": 4.745297267609502e-05, + "loss": 2.4439, + "step": 11630 + }, + { + "epoch": 0.53, + "learning_rate": 4.7450684241841736e-05, + "loss": 2.3964, + "step": 11640 + }, + { + "epoch": 0.53, + "learning_rate": 4.744839580758845e-05, + "loss": 2.3424, + "step": 11650 + }, + { + "epoch": 0.53, + "learning_rate": 4.744610737333517e-05, + "loss": 2.5954, + "step": 11660 + }, + { + "epoch": 0.53, + "learning_rate": 4.744381893908188e-05, + "loss": 2.3345, + "step": 11670 + }, + { + "epoch": 0.53, + "learning_rate": 4.74415305048286e-05, + "loss": 2.3142, + "step": 11680 + }, + { + "epoch": 0.53, + "learning_rate": 4.743924207057531e-05, + "loss": 2.5515, + "step": 11690 + }, + { + "epoch": 0.53, + "learning_rate": 4.743695363632203e-05, + "loss": 2.4432, + "step": 11700 + }, + { + "epoch": 0.53, + "learning_rate": 4.743466520206875e-05, + "loss": 2.459, + "step": 11710 + }, + { + "epoch": 0.54, + "learning_rate": 4.743237676781546e-05, + "loss": 2.5662, + "step": 11720 + }, + { + "epoch": 0.54, + "learning_rate": 4.743008833356218e-05, + "loss": 2.4513, + "step": 11730 + }, + { + "epoch": 0.54, + "learning_rate": 4.74277998993089e-05, + "loss": 2.3803, + "step": 11740 + }, + { + "epoch": 0.54, + "learning_rate": 4.742551146505561e-05, + "loss": 2.4526, + "step": 11750 + }, + { + "epoch": 0.54, + "learning_rate": 4.7423223030802326e-05, + "loss": 2.4028, + "step": 11760 + }, + { + "epoch": 0.54, + "learning_rate": 4.742093459654905e-05, + "loss": 2.4073, + "step": 11770 + }, + { + "epoch": 0.54, + "learning_rate": 4.741864616229576e-05, + "loss": 2.4882, + "step": 11780 + }, + { + "epoch": 0.54, + "learning_rate": 4.7416357728042475e-05, + "loss": 2.2168, + "step": 11790 + }, + { + "epoch": 0.54, + "learning_rate": 4.7414069293789196e-05, + "loss": 2.3842, + "step": 11800 + }, + { + "epoch": 0.54, + "learning_rate": 4.741178085953591e-05, + "loss": 2.4738, + "step": 11810 + }, + { + "epoch": 0.54, + "learning_rate": 4.7409492425282624e-05, + "loss": 2.4862, + "step": 11820 + }, + { + "epoch": 0.54, + "learning_rate": 4.7407203991029345e-05, + "loss": 2.2994, + "step": 11830 + }, + { + "epoch": 0.54, + "learning_rate": 4.740491555677606e-05, + "loss": 2.2938, + "step": 11840 + }, + { + "epoch": 0.54, + "learning_rate": 4.740262712252277e-05, + "loss": 2.6097, + "step": 11850 + }, + { + "epoch": 0.54, + "learning_rate": 4.740033868826949e-05, + "loss": 2.5511, + "step": 11860 + }, + { + "epoch": 0.54, + "learning_rate": 4.73980502540162e-05, + "loss": 2.5772, + "step": 11870 + }, + { + "epoch": 0.54, + "learning_rate": 4.7395761819762916e-05, + "loss": 2.4539, + "step": 11880 + }, + { + "epoch": 0.54, + "learning_rate": 4.739347338550964e-05, + "loss": 2.3915, + "step": 11890 + }, + { + "epoch": 0.54, + "learning_rate": 4.739118495125635e-05, + "loss": 2.3369, + "step": 11900 + }, + { + "epoch": 0.54, + "learning_rate": 4.7388896517003065e-05, + "loss": 2.5115, + "step": 11910 + }, + { + "epoch": 0.54, + "learning_rate": 4.7386608082749786e-05, + "loss": 2.4355, + "step": 11920 + }, + { + "epoch": 0.54, + "learning_rate": 4.73843196484965e-05, + "loss": 2.2688, + "step": 11930 + }, + { + "epoch": 0.55, + "learning_rate": 4.7382031214243214e-05, + "loss": 2.5251, + "step": 11940 + }, + { + "epoch": 0.55, + "learning_rate": 4.7379742779989935e-05, + "loss": 2.4591, + "step": 11950 + }, + { + "epoch": 0.55, + "learning_rate": 4.737745434573665e-05, + "loss": 2.3582, + "step": 11960 + }, + { + "epoch": 0.55, + "learning_rate": 4.737516591148336e-05, + "loss": 2.4197, + "step": 11970 + }, + { + "epoch": 0.55, + "learning_rate": 4.7372877477230084e-05, + "loss": 2.3609, + "step": 11980 + }, + { + "epoch": 0.55, + "learning_rate": 4.73705890429768e-05, + "loss": 2.3847, + "step": 11990 + }, + { + "epoch": 0.55, + "learning_rate": 4.736830060872351e-05, + "loss": 2.4748, + "step": 12000 + }, + { + "epoch": 0.55, + "learning_rate": 4.736601217447023e-05, + "loss": 2.6183, + "step": 12010 + }, + { + "epoch": 0.55, + "learning_rate": 4.736372374021695e-05, + "loss": 2.4806, + "step": 12020 + }, + { + "epoch": 0.55, + "learning_rate": 4.736143530596366e-05, + "loss": 2.3203, + "step": 12030 + }, + { + "epoch": 0.55, + "learning_rate": 4.735914687171038e-05, + "loss": 2.5022, + "step": 12040 + }, + { + "epoch": 0.55, + "learning_rate": 4.73568584374571e-05, + "loss": 2.4721, + "step": 12050 + }, + { + "epoch": 0.55, + "learning_rate": 4.735457000320381e-05, + "loss": 2.5584, + "step": 12060 + }, + { + "epoch": 0.55, + "learning_rate": 4.7352281568950525e-05, + "loss": 2.5219, + "step": 12070 + }, + { + "epoch": 0.55, + "learning_rate": 4.734999313469724e-05, + "loss": 2.4887, + "step": 12080 + }, + { + "epoch": 0.55, + "learning_rate": 4.734770470044396e-05, + "loss": 2.4061, + "step": 12090 + }, + { + "epoch": 0.55, + "learning_rate": 4.7345416266190674e-05, + "loss": 2.4451, + "step": 12100 + }, + { + "epoch": 0.55, + "learning_rate": 4.734312783193739e-05, + "loss": 2.5277, + "step": 12110 + }, + { + "epoch": 0.55, + "learning_rate": 4.734083939768411e-05, + "loss": 2.3891, + "step": 12120 + }, + { + "epoch": 0.55, + "learning_rate": 4.733855096343082e-05, + "loss": 2.3473, + "step": 12130 + }, + { + "epoch": 0.55, + "learning_rate": 4.733626252917754e-05, + "loss": 2.3845, + "step": 12140 + }, + { + "epoch": 0.55, + "learning_rate": 4.733397409492425e-05, + "loss": 2.2788, + "step": 12150 + }, + { + "epoch": 0.56, + "learning_rate": 4.733168566067097e-05, + "loss": 2.2998, + "step": 12160 + }, + { + "epoch": 0.56, + "learning_rate": 4.732939722641769e-05, + "loss": 2.4849, + "step": 12170 + }, + { + "epoch": 0.56, + "learning_rate": 4.73271087921644e-05, + "loss": 2.3921, + "step": 12180 + }, + { + "epoch": 0.56, + "learning_rate": 4.732482035791112e-05, + "loss": 2.4635, + "step": 12190 + }, + { + "epoch": 0.56, + "learning_rate": 4.7322531923657836e-05, + "loss": 2.3999, + "step": 12200 + }, + { + "epoch": 0.56, + "learning_rate": 4.732024348940455e-05, + "loss": 2.3772, + "step": 12210 + }, + { + "epoch": 0.56, + "learning_rate": 4.731795505515127e-05, + "loss": 2.2757, + "step": 12220 + }, + { + "epoch": 0.56, + "learning_rate": 4.7315666620897985e-05, + "loss": 2.4624, + "step": 12230 + }, + { + "epoch": 0.56, + "learning_rate": 4.73133781866447e-05, + "loss": 2.3496, + "step": 12240 + }, + { + "epoch": 0.56, + "learning_rate": 4.731108975239142e-05, + "loss": 2.5335, + "step": 12250 + }, + { + "epoch": 0.56, + "learning_rate": 4.730880131813813e-05, + "loss": 2.4936, + "step": 12260 + }, + { + "epoch": 0.56, + "learning_rate": 4.730651288388485e-05, + "loss": 2.3932, + "step": 12270 + }, + { + "epoch": 0.56, + "learning_rate": 4.730422444963156e-05, + "loss": 2.4017, + "step": 12280 + }, + { + "epoch": 0.56, + "learning_rate": 4.7301936015378277e-05, + "loss": 2.4033, + "step": 12290 + }, + { + "epoch": 0.56, + "learning_rate": 4.7299647581125e-05, + "loss": 2.6011, + "step": 12300 + }, + { + "epoch": 0.56, + "learning_rate": 4.729735914687171e-05, + "loss": 2.4405, + "step": 12310 + }, + { + "epoch": 0.56, + "learning_rate": 4.7295070712618426e-05, + "loss": 2.5269, + "step": 12320 + }, + { + "epoch": 0.56, + "learning_rate": 4.729278227836515e-05, + "loss": 2.5057, + "step": 12330 + }, + { + "epoch": 0.56, + "learning_rate": 4.729049384411186e-05, + "loss": 2.5175, + "step": 12340 + }, + { + "epoch": 0.56, + "learning_rate": 4.7288205409858575e-05, + "loss": 2.266, + "step": 12350 + }, + { + "epoch": 0.56, + "learning_rate": 4.7285916975605296e-05, + "loss": 2.3182, + "step": 12360 + }, + { + "epoch": 0.56, + "learning_rate": 4.728362854135201e-05, + "loss": 2.4067, + "step": 12370 + }, + { + "epoch": 0.57, + "learning_rate": 4.7281340107098724e-05, + "loss": 2.5653, + "step": 12380 + }, + { + "epoch": 0.57, + "learning_rate": 4.7279051672845445e-05, + "loss": 2.5013, + "step": 12390 + }, + { + "epoch": 0.57, + "learning_rate": 4.727676323859216e-05, + "loss": 2.6026, + "step": 12400 + }, + { + "epoch": 0.57, + "learning_rate": 4.727447480433887e-05, + "loss": 2.4167, + "step": 12410 + }, + { + "epoch": 0.57, + "learning_rate": 4.7272186370085594e-05, + "loss": 2.5231, + "step": 12420 + }, + { + "epoch": 0.57, + "learning_rate": 4.726989793583231e-05, + "loss": 2.3266, + "step": 12430 + }, + { + "epoch": 0.57, + "learning_rate": 4.726760950157902e-05, + "loss": 2.5778, + "step": 12440 + }, + { + "epoch": 0.57, + "learning_rate": 4.726532106732574e-05, + "loss": 2.6265, + "step": 12450 + }, + { + "epoch": 0.57, + "learning_rate": 4.726303263307245e-05, + "loss": 2.3835, + "step": 12460 + }, + { + "epoch": 0.57, + "learning_rate": 4.7260744198819165e-05, + "loss": 2.4495, + "step": 12470 + }, + { + "epoch": 0.57, + "learning_rate": 4.7258455764565886e-05, + "loss": 2.5442, + "step": 12480 + }, + { + "epoch": 0.57, + "learning_rate": 4.72561673303126e-05, + "loss": 2.5227, + "step": 12490 + }, + { + "epoch": 0.57, + "learning_rate": 4.7253878896059314e-05, + "loss": 2.5473, + "step": 12500 + }, + { + "epoch": 0.57, + "learning_rate": 4.7251590461806035e-05, + "loss": 2.4424, + "step": 12510 + }, + { + "epoch": 0.57, + "learning_rate": 4.724930202755275e-05, + "loss": 2.4274, + "step": 12520 + }, + { + "epoch": 0.57, + "learning_rate": 4.724701359329946e-05, + "loss": 2.4511, + "step": 12530 + }, + { + "epoch": 0.57, + "learning_rate": 4.7244725159046184e-05, + "loss": 2.2158, + "step": 12540 + }, + { + "epoch": 0.57, + "learning_rate": 4.72424367247929e-05, + "loss": 2.5181, + "step": 12550 + }, + { + "epoch": 0.57, + "learning_rate": 4.724014829053961e-05, + "loss": 2.3674, + "step": 12560 + }, + { + "epoch": 0.57, + "learning_rate": 4.723785985628633e-05, + "loss": 2.6321, + "step": 12570 + }, + { + "epoch": 0.57, + "learning_rate": 4.723557142203305e-05, + "loss": 2.3881, + "step": 12580 + }, + { + "epoch": 0.57, + "learning_rate": 4.723328298777976e-05, + "loss": 2.5139, + "step": 12590 + }, + { + "epoch": 0.58, + "learning_rate": 4.723099455352648e-05, + "loss": 2.4597, + "step": 12600 + }, + { + "epoch": 0.58, + "learning_rate": 4.7228706119273197e-05, + "loss": 2.4001, + "step": 12610 + }, + { + "epoch": 0.58, + "learning_rate": 4.722641768501991e-05, + "loss": 2.4194, + "step": 12620 + }, + { + "epoch": 0.58, + "learning_rate": 4.722412925076663e-05, + "loss": 2.3315, + "step": 12630 + }, + { + "epoch": 0.58, + "learning_rate": 4.7221840816513346e-05, + "loss": 2.5334, + "step": 12640 + }, + { + "epoch": 0.58, + "learning_rate": 4.721955238226006e-05, + "loss": 2.3351, + "step": 12650 + }, + { + "epoch": 0.58, + "learning_rate": 4.7217263948006774e-05, + "loss": 2.4023, + "step": 12660 + }, + { + "epoch": 0.58, + "learning_rate": 4.721497551375349e-05, + "loss": 2.4533, + "step": 12670 + }, + { + "epoch": 0.58, + "learning_rate": 4.721268707950021e-05, + "loss": 2.4789, + "step": 12680 + }, + { + "epoch": 0.58, + "learning_rate": 4.721039864524692e-05, + "loss": 2.3654, + "step": 12690 + }, + { + "epoch": 0.58, + "learning_rate": 4.720811021099364e-05, + "loss": 2.4534, + "step": 12700 + }, + { + "epoch": 0.58, + "learning_rate": 4.720582177674036e-05, + "loss": 2.4059, + "step": 12710 + }, + { + "epoch": 0.58, + "learning_rate": 4.720353334248707e-05, + "loss": 2.4726, + "step": 12720 + }, + { + "epoch": 0.58, + "learning_rate": 4.7201244908233787e-05, + "loss": 2.5129, + "step": 12730 + }, + { + "epoch": 0.58, + "learning_rate": 4.719895647398051e-05, + "loss": 2.4124, + "step": 12740 + }, + { + "epoch": 0.58, + "learning_rate": 4.719666803972722e-05, + "loss": 2.6052, + "step": 12750 + }, + { + "epoch": 0.58, + "learning_rate": 4.7194379605473936e-05, + "loss": 2.4838, + "step": 12760 + }, + { + "epoch": 0.58, + "learning_rate": 4.7192091171220657e-05, + "loss": 2.4921, + "step": 12770 + }, + { + "epoch": 0.58, + "learning_rate": 4.718980273696737e-05, + "loss": 2.4695, + "step": 12780 + }, + { + "epoch": 0.58, + "learning_rate": 4.7187514302714085e-05, + "loss": 2.3834, + "step": 12790 + }, + { + "epoch": 0.58, + "learning_rate": 4.7185225868460806e-05, + "loss": 2.4955, + "step": 12800 + }, + { + "epoch": 0.58, + "learning_rate": 4.718293743420752e-05, + "loss": 2.5316, + "step": 12810 + }, + { + "epoch": 0.59, + "learning_rate": 4.7180648999954234e-05, + "loss": 2.5042, + "step": 12820 + }, + { + "epoch": 0.59, + "learning_rate": 4.7178360565700955e-05, + "loss": 2.4018, + "step": 12830 + }, + { + "epoch": 0.59, + "learning_rate": 4.717607213144767e-05, + "loss": 2.4403, + "step": 12840 + }, + { + "epoch": 0.59, + "learning_rate": 4.7173783697194376e-05, + "loss": 2.4678, + "step": 12850 + }, + { + "epoch": 0.59, + "learning_rate": 4.71714952629411e-05, + "loss": 2.4937, + "step": 12860 + }, + { + "epoch": 0.59, + "learning_rate": 4.716920682868781e-05, + "loss": 2.4366, + "step": 12870 + }, + { + "epoch": 0.59, + "learning_rate": 4.7166918394434526e-05, + "loss": 2.4178, + "step": 12880 + }, + { + "epoch": 0.59, + "learning_rate": 4.7164629960181247e-05, + "loss": 2.3645, + "step": 12890 + }, + { + "epoch": 0.59, + "learning_rate": 4.716234152592796e-05, + "loss": 2.5206, + "step": 12900 + }, + { + "epoch": 0.59, + "learning_rate": 4.7160053091674675e-05, + "loss": 2.3971, + "step": 12910 + }, + { + "epoch": 0.59, + "learning_rate": 4.7157764657421396e-05, + "loss": 2.4581, + "step": 12920 + }, + { + "epoch": 0.59, + "learning_rate": 4.715547622316811e-05, + "loss": 2.3665, + "step": 12930 + }, + { + "epoch": 0.59, + "learning_rate": 4.7153187788914824e-05, + "loss": 2.3638, + "step": 12940 + }, + { + "epoch": 0.59, + "learning_rate": 4.7150899354661545e-05, + "loss": 2.4613, + "step": 12950 + }, + { + "epoch": 0.59, + "learning_rate": 4.714861092040826e-05, + "loss": 2.3943, + "step": 12960 + }, + { + "epoch": 0.59, + "learning_rate": 4.714632248615497e-05, + "loss": 2.511, + "step": 12970 + }, + { + "epoch": 0.59, + "learning_rate": 4.7144034051901694e-05, + "loss": 2.3228, + "step": 12980 + }, + { + "epoch": 0.59, + "learning_rate": 4.714174561764841e-05, + "loss": 2.327, + "step": 12990 + }, + { + "epoch": 0.59, + "learning_rate": 4.713945718339512e-05, + "loss": 2.3646, + "step": 13000 + }, + { + "epoch": 0.59, + "learning_rate": 4.713716874914184e-05, + "loss": 2.3014, + "step": 13010 + }, + { + "epoch": 0.59, + "learning_rate": 4.713488031488856e-05, + "loss": 2.3736, + "step": 13020 + }, + { + "epoch": 0.59, + "learning_rate": 4.713259188063527e-05, + "loss": 2.2308, + "step": 13030 + }, + { + "epoch": 0.6, + "learning_rate": 4.713030344638199e-05, + "loss": 2.5022, + "step": 13040 + }, + { + "epoch": 0.6, + "learning_rate": 4.71280150121287e-05, + "loss": 2.3569, + "step": 13050 + }, + { + "epoch": 0.6, + "learning_rate": 4.712572657787542e-05, + "loss": 2.4698, + "step": 13060 + }, + { + "epoch": 0.6, + "learning_rate": 4.7123438143622135e-05, + "loss": 2.4983, + "step": 13070 + }, + { + "epoch": 0.6, + "learning_rate": 4.712114970936885e-05, + "loss": 2.4369, + "step": 13080 + }, + { + "epoch": 0.6, + "learning_rate": 4.711886127511557e-05, + "loss": 2.4027, + "step": 13090 + }, + { + "epoch": 0.6, + "learning_rate": 4.7116572840862284e-05, + "loss": 2.3425, + "step": 13100 + }, + { + "epoch": 0.6, + "learning_rate": 4.7114284406609e-05, + "loss": 2.6171, + "step": 13110 + }, + { + "epoch": 0.6, + "learning_rate": 4.711199597235572e-05, + "loss": 2.3407, + "step": 13120 + }, + { + "epoch": 0.6, + "learning_rate": 4.710970753810243e-05, + "loss": 2.2714, + "step": 13130 + }, + { + "epoch": 0.6, + "learning_rate": 4.710741910384915e-05, + "loss": 2.4728, + "step": 13140 + }, + { + "epoch": 0.6, + "learning_rate": 4.710513066959587e-05, + "loss": 2.4691, + "step": 13150 + }, + { + "epoch": 0.6, + "learning_rate": 4.710284223534258e-05, + "loss": 2.3251, + "step": 13160 + }, + { + "epoch": 0.6, + "learning_rate": 4.7100553801089296e-05, + "loss": 2.2302, + "step": 13170 + }, + { + "epoch": 0.6, + "learning_rate": 4.709826536683602e-05, + "loss": 2.4588, + "step": 13180 + }, + { + "epoch": 0.6, + "learning_rate": 4.709597693258273e-05, + "loss": 2.2778, + "step": 13190 + }, + { + "epoch": 0.6, + "learning_rate": 4.7093688498329446e-05, + "loss": 2.3084, + "step": 13200 + }, + { + "epoch": 0.6, + "learning_rate": 4.7091400064076167e-05, + "loss": 2.3952, + "step": 13210 + }, + { + "epoch": 0.6, + "learning_rate": 4.708911162982288e-05, + "loss": 2.48, + "step": 13220 + }, + { + "epoch": 0.6, + "learning_rate": 4.7086823195569595e-05, + "loss": 2.3173, + "step": 13230 + }, + { + "epoch": 0.6, + "learning_rate": 4.708453476131631e-05, + "loss": 2.2694, + "step": 13240 + }, + { + "epoch": 0.61, + "learning_rate": 4.708224632706302e-05, + "loss": 2.4534, + "step": 13250 + }, + { + "epoch": 0.61, + "learning_rate": 4.707995789280974e-05, + "loss": 2.3544, + "step": 13260 + }, + { + "epoch": 0.61, + "learning_rate": 4.707766945855646e-05, + "loss": 2.5172, + "step": 13270 + }, + { + "epoch": 0.61, + "learning_rate": 4.707538102430317e-05, + "loss": 2.6517, + "step": 13280 + }, + { + "epoch": 0.61, + "learning_rate": 4.7073092590049886e-05, + "loss": 2.4843, + "step": 13290 + }, + { + "epoch": 0.61, + "learning_rate": 4.707080415579661e-05, + "loss": 2.3516, + "step": 13300 + }, + { + "epoch": 0.61, + "learning_rate": 4.706851572154332e-05, + "loss": 2.5803, + "step": 13310 + }, + { + "epoch": 0.61, + "learning_rate": 4.7066227287290036e-05, + "loss": 2.4354, + "step": 13320 + }, + { + "epoch": 0.61, + "learning_rate": 4.7063938853036757e-05, + "loss": 2.4567, + "step": 13330 + }, + { + "epoch": 0.61, + "learning_rate": 4.706165041878347e-05, + "loss": 2.2692, + "step": 13340 + }, + { + "epoch": 0.61, + "learning_rate": 4.7059361984530185e-05, + "loss": 2.4237, + "step": 13350 + }, + { + "epoch": 0.61, + "learning_rate": 4.7057073550276906e-05, + "loss": 2.4367, + "step": 13360 + }, + { + "epoch": 0.61, + "learning_rate": 4.705478511602362e-05, + "loss": 2.3526, + "step": 13370 + }, + { + "epoch": 0.61, + "learning_rate": 4.7052496681770334e-05, + "loss": 2.3193, + "step": 13380 + }, + { + "epoch": 0.61, + "learning_rate": 4.7050208247517055e-05, + "loss": 2.4629, + "step": 13390 + }, + { + "epoch": 0.61, + "learning_rate": 4.704791981326377e-05, + "loss": 2.3296, + "step": 13400 + }, + { + "epoch": 0.61, + "learning_rate": 4.704563137901048e-05, + "loss": 2.4398, + "step": 13410 + }, + { + "epoch": 0.61, + "learning_rate": 4.7043342944757204e-05, + "loss": 2.3305, + "step": 13420 + }, + { + "epoch": 0.61, + "learning_rate": 4.704105451050392e-05, + "loss": 2.2806, + "step": 13430 + }, + { + "epoch": 0.61, + "learning_rate": 4.7038766076250626e-05, + "loss": 2.3806, + "step": 13440 + }, + { + "epoch": 0.61, + "learning_rate": 4.7036477641997346e-05, + "loss": 2.4166, + "step": 13450 + }, + { + "epoch": 0.61, + "learning_rate": 4.703418920774406e-05, + "loss": 2.1978, + "step": 13460 + }, + { + "epoch": 0.62, + "learning_rate": 4.7031900773490775e-05, + "loss": 2.2902, + "step": 13470 + }, + { + "epoch": 0.62, + "learning_rate": 4.7029612339237496e-05, + "loss": 2.3657, + "step": 13480 + }, + { + "epoch": 0.62, + "learning_rate": 4.702732390498421e-05, + "loss": 2.3334, + "step": 13490 + }, + { + "epoch": 0.62, + "learning_rate": 4.7025035470730924e-05, + "loss": 2.4528, + "step": 13500 + }, + { + "epoch": 0.62, + "learning_rate": 4.7022747036477645e-05, + "loss": 2.3754, + "step": 13510 + }, + { + "epoch": 0.62, + "learning_rate": 4.702045860222436e-05, + "loss": 2.3537, + "step": 13520 + }, + { + "epoch": 0.62, + "learning_rate": 4.701817016797107e-05, + "loss": 2.4485, + "step": 13530 + }, + { + "epoch": 0.62, + "learning_rate": 4.7015881733717794e-05, + "loss": 2.5266, + "step": 13540 + }, + { + "epoch": 0.62, + "learning_rate": 4.701359329946451e-05, + "loss": 2.5417, + "step": 13550 + }, + { + "epoch": 0.62, + "learning_rate": 4.701130486521122e-05, + "loss": 2.4745, + "step": 13560 + }, + { + "epoch": 0.62, + "learning_rate": 4.700901643095794e-05, + "loss": 2.3031, + "step": 13570 + }, + { + "epoch": 0.62, + "learning_rate": 4.700672799670466e-05, + "loss": 2.5789, + "step": 13580 + }, + { + "epoch": 0.62, + "learning_rate": 4.700443956245137e-05, + "loss": 2.5256, + "step": 13590 + }, + { + "epoch": 0.62, + "learning_rate": 4.700215112819809e-05, + "loss": 2.3836, + "step": 13600 + }, + { + "epoch": 0.62, + "learning_rate": 4.6999862693944806e-05, + "loss": 2.4568, + "step": 13610 + }, + { + "epoch": 0.62, + "learning_rate": 4.699757425969152e-05, + "loss": 2.4344, + "step": 13620 + }, + { + "epoch": 0.62, + "learning_rate": 4.699528582543824e-05, + "loss": 2.464, + "step": 13630 + }, + { + "epoch": 0.62, + "learning_rate": 4.699299739118495e-05, + "loss": 2.3177, + "step": 13640 + }, + { + "epoch": 0.62, + "learning_rate": 4.699070895693167e-05, + "loss": 2.391, + "step": 13650 + }, + { + "epoch": 0.62, + "learning_rate": 4.6988420522678384e-05, + "loss": 2.3471, + "step": 13660 + }, + { + "epoch": 0.62, + "learning_rate": 4.69861320884251e-05, + "loss": 2.4073, + "step": 13670 + }, + { + "epoch": 0.62, + "learning_rate": 4.698384365417182e-05, + "loss": 2.4141, + "step": 13680 + }, + { + "epoch": 0.63, + "learning_rate": 4.698155521991853e-05, + "loss": 2.2538, + "step": 13690 + }, + { + "epoch": 0.63, + "learning_rate": 4.697926678566525e-05, + "loss": 2.2921, + "step": 13700 + }, + { + "epoch": 0.63, + "learning_rate": 4.697697835141197e-05, + "loss": 2.4317, + "step": 13710 + }, + { + "epoch": 0.63, + "learning_rate": 4.697468991715868e-05, + "loss": 2.3687, + "step": 13720 + }, + { + "epoch": 0.63, + "learning_rate": 4.6972401482905396e-05, + "loss": 2.5864, + "step": 13730 + }, + { + "epoch": 0.63, + "learning_rate": 4.697011304865212e-05, + "loss": 2.3882, + "step": 13740 + }, + { + "epoch": 0.63, + "learning_rate": 4.696782461439883e-05, + "loss": 2.3981, + "step": 13750 + }, + { + "epoch": 0.63, + "learning_rate": 4.6965536180145546e-05, + "loss": 2.3473, + "step": 13760 + }, + { + "epoch": 0.63, + "learning_rate": 4.6963247745892266e-05, + "loss": 2.4379, + "step": 13770 + }, + { + "epoch": 0.63, + "learning_rate": 4.696095931163898e-05, + "loss": 2.2774, + "step": 13780 + }, + { + "epoch": 0.63, + "learning_rate": 4.6958670877385695e-05, + "loss": 2.2976, + "step": 13790 + }, + { + "epoch": 0.63, + "learning_rate": 4.6956382443132416e-05, + "loss": 2.2165, + "step": 13800 + }, + { + "epoch": 0.63, + "learning_rate": 4.695409400887913e-05, + "loss": 2.406, + "step": 13810 + }, + { + "epoch": 0.63, + "learning_rate": 4.6951805574625844e-05, + "loss": 2.393, + "step": 13820 + }, + { + "epoch": 0.63, + "learning_rate": 4.6949517140372565e-05, + "loss": 2.3793, + "step": 13830 + }, + { + "epoch": 0.63, + "learning_rate": 4.694722870611927e-05, + "loss": 2.3294, + "step": 13840 + }, + { + "epoch": 0.63, + "learning_rate": 4.6944940271865986e-05, + "loss": 2.3039, + "step": 13850 + }, + { + "epoch": 0.63, + "learning_rate": 4.694265183761271e-05, + "loss": 2.4783, + "step": 13860 + }, + { + "epoch": 0.63, + "learning_rate": 4.694036340335942e-05, + "loss": 2.2788, + "step": 13870 + }, + { + "epoch": 0.63, + "learning_rate": 4.6938074969106135e-05, + "loss": 2.3901, + "step": 13880 + }, + { + "epoch": 0.63, + "learning_rate": 4.6935786534852856e-05, + "loss": 2.3516, + "step": 13890 + }, + { + "epoch": 0.63, + "learning_rate": 4.693349810059957e-05, + "loss": 2.364, + "step": 13900 + }, + { + "epoch": 0.64, + "learning_rate": 4.6931209666346285e-05, + "loss": 2.4494, + "step": 13910 + }, + { + "epoch": 0.64, + "learning_rate": 4.6928921232093006e-05, + "loss": 2.2642, + "step": 13920 + }, + { + "epoch": 0.64, + "learning_rate": 4.692663279783972e-05, + "loss": 2.3744, + "step": 13930 + }, + { + "epoch": 0.64, + "learning_rate": 4.6924344363586434e-05, + "loss": 2.3571, + "step": 13940 + }, + { + "epoch": 0.64, + "learning_rate": 4.6922055929333155e-05, + "loss": 2.2558, + "step": 13950 + }, + { + "epoch": 0.64, + "learning_rate": 4.691976749507987e-05, + "loss": 2.3763, + "step": 13960 + }, + { + "epoch": 0.64, + "learning_rate": 4.691747906082658e-05, + "loss": 2.3461, + "step": 13970 + }, + { + "epoch": 0.64, + "learning_rate": 4.6915190626573304e-05, + "loss": 2.4663, + "step": 13980 + }, + { + "epoch": 0.64, + "learning_rate": 4.691290219232002e-05, + "loss": 2.3942, + "step": 13990 + }, + { + "epoch": 0.64, + "learning_rate": 4.691061375806673e-05, + "loss": 2.3715, + "step": 14000 + }, + { + "epoch": 0.64, + "learning_rate": 4.690832532381345e-05, + "loss": 2.4367, + "step": 14010 + }, + { + "epoch": 0.64, + "learning_rate": 4.690603688956017e-05, + "loss": 2.523, + "step": 14020 + }, + { + "epoch": 0.64, + "learning_rate": 4.690374845530688e-05, + "loss": 2.3815, + "step": 14030 + }, + { + "epoch": 0.64, + "learning_rate": 4.6901460021053596e-05, + "loss": 2.4112, + "step": 14040 + }, + { + "epoch": 0.64, + "learning_rate": 4.689917158680031e-05, + "loss": 2.2423, + "step": 14050 + }, + { + "epoch": 0.64, + "learning_rate": 4.689688315254703e-05, + "loss": 2.4133, + "step": 14060 + }, + { + "epoch": 0.64, + "learning_rate": 4.6894594718293745e-05, + "loss": 2.3051, + "step": 14070 + }, + { + "epoch": 0.64, + "learning_rate": 4.689230628404046e-05, + "loss": 2.5434, + "step": 14080 + }, + { + "epoch": 0.64, + "learning_rate": 4.689001784978718e-05, + "loss": 2.3681, + "step": 14090 + }, + { + "epoch": 0.64, + "learning_rate": 4.6887729415533894e-05, + "loss": 2.3268, + "step": 14100 + }, + { + "epoch": 0.64, + "learning_rate": 4.688544098128061e-05, + "loss": 2.3015, + "step": 14110 + }, + { + "epoch": 0.64, + "learning_rate": 4.688315254702733e-05, + "loss": 2.3093, + "step": 14120 + }, + { + "epoch": 0.65, + "learning_rate": 4.688086411277404e-05, + "loss": 2.3067, + "step": 14130 + }, + { + "epoch": 0.65, + "learning_rate": 4.687857567852076e-05, + "loss": 2.3219, + "step": 14140 + }, + { + "epoch": 0.65, + "learning_rate": 4.687628724426748e-05, + "loss": 2.4299, + "step": 14150 + }, + { + "epoch": 0.65, + "learning_rate": 4.687399881001419e-05, + "loss": 2.5319, + "step": 14160 + }, + { + "epoch": 0.65, + "learning_rate": 4.6871710375760906e-05, + "loss": 2.2746, + "step": 14170 + }, + { + "epoch": 0.65, + "learning_rate": 4.686942194150763e-05, + "loss": 2.3274, + "step": 14180 + }, + { + "epoch": 0.65, + "learning_rate": 4.686713350725434e-05, + "loss": 2.4078, + "step": 14190 + }, + { + "epoch": 0.65, + "learning_rate": 4.6864845073001056e-05, + "loss": 2.3112, + "step": 14200 + }, + { + "epoch": 0.65, + "learning_rate": 4.6862556638747776e-05, + "loss": 2.3443, + "step": 14210 + }, + { + "epoch": 0.65, + "learning_rate": 4.686026820449449e-05, + "loss": 2.4104, + "step": 14220 + }, + { + "epoch": 0.65, + "learning_rate": 4.68579797702412e-05, + "loss": 2.3984, + "step": 14230 + }, + { + "epoch": 0.65, + "learning_rate": 4.685569133598792e-05, + "loss": 2.3395, + "step": 14240 + }, + { + "epoch": 0.65, + "learning_rate": 4.685340290173463e-05, + "loss": 2.4568, + "step": 14250 + }, + { + "epoch": 0.65, + "learning_rate": 4.685111446748135e-05, + "loss": 2.5401, + "step": 14260 + }, + { + "epoch": 0.65, + "learning_rate": 4.684882603322807e-05, + "loss": 2.387, + "step": 14270 + }, + { + "epoch": 0.65, + "learning_rate": 4.684653759897478e-05, + "loss": 2.3915, + "step": 14280 + }, + { + "epoch": 0.65, + "learning_rate": 4.6844249164721496e-05, + "loss": 2.462, + "step": 14290 + }, + { + "epoch": 0.65, + "learning_rate": 4.684196073046822e-05, + "loss": 2.4498, + "step": 14300 + }, + { + "epoch": 0.65, + "learning_rate": 4.683967229621493e-05, + "loss": 2.3025, + "step": 14310 + }, + { + "epoch": 0.65, + "learning_rate": 4.6837383861961645e-05, + "loss": 2.3295, + "step": 14320 + }, + { + "epoch": 0.65, + "learning_rate": 4.6835095427708366e-05, + "loss": 2.463, + "step": 14330 + }, + { + "epoch": 0.65, + "learning_rate": 4.683280699345508e-05, + "loss": 2.3643, + "step": 14340 + }, + { + "epoch": 0.66, + "learning_rate": 4.6830518559201795e-05, + "loss": 2.5365, + "step": 14350 + }, + { + "epoch": 0.66, + "learning_rate": 4.6828230124948516e-05, + "loss": 2.3952, + "step": 14360 + }, + { + "epoch": 0.66, + "learning_rate": 4.682594169069523e-05, + "loss": 2.4698, + "step": 14370 + }, + { + "epoch": 0.66, + "learning_rate": 4.6823653256441944e-05, + "loss": 2.3816, + "step": 14380 + }, + { + "epoch": 0.66, + "learning_rate": 4.6821364822188665e-05, + "loss": 2.3845, + "step": 14390 + }, + { + "epoch": 0.66, + "learning_rate": 4.681907638793538e-05, + "loss": 2.3689, + "step": 14400 + }, + { + "epoch": 0.66, + "learning_rate": 4.681678795368209e-05, + "loss": 2.3599, + "step": 14410 + }, + { + "epoch": 0.66, + "learning_rate": 4.6814499519428814e-05, + "loss": 2.3926, + "step": 14420 + }, + { + "epoch": 0.66, + "learning_rate": 4.681221108517552e-05, + "loss": 2.4718, + "step": 14430 + }, + { + "epoch": 0.66, + "learning_rate": 4.680992265092224e-05, + "loss": 2.4573, + "step": 14440 + }, + { + "epoch": 0.66, + "learning_rate": 4.6807634216668956e-05, + "loss": 2.5606, + "step": 14450 + }, + { + "epoch": 0.66, + "learning_rate": 4.680534578241567e-05, + "loss": 2.4595, + "step": 14460 + }, + { + "epoch": 0.66, + "learning_rate": 4.680305734816239e-05, + "loss": 2.2884, + "step": 14470 + }, + { + "epoch": 0.66, + "learning_rate": 4.6800768913909105e-05, + "loss": 2.3518, + "step": 14480 + }, + { + "epoch": 0.66, + "learning_rate": 4.679848047965582e-05, + "loss": 2.3527, + "step": 14490 + }, + { + "epoch": 0.66, + "learning_rate": 4.6796192045402534e-05, + "loss": 2.5002, + "step": 14500 + }, + { + "epoch": 0.66, + "learning_rate": 4.6793903611149255e-05, + "loss": 2.2668, + "step": 14510 + }, + { + "epoch": 0.66, + "learning_rate": 4.679161517689597e-05, + "loss": 2.2006, + "step": 14520 + }, + { + "epoch": 0.66, + "learning_rate": 4.678932674264268e-05, + "loss": 2.3714, + "step": 14530 + }, + { + "epoch": 0.66, + "learning_rate": 4.6787038308389404e-05, + "loss": 2.539, + "step": 14540 + }, + { + "epoch": 0.66, + "learning_rate": 4.678474987413612e-05, + "loss": 2.3998, + "step": 14550 + }, + { + "epoch": 0.66, + "learning_rate": 4.678246143988283e-05, + "loss": 2.3236, + "step": 14560 + }, + { + "epoch": 0.67, + "learning_rate": 4.678017300562955e-05, + "loss": 2.2425, + "step": 14570 + }, + { + "epoch": 0.67, + "learning_rate": 4.677788457137627e-05, + "loss": 2.5189, + "step": 14580 + }, + { + "epoch": 0.67, + "learning_rate": 4.677559613712298e-05, + "loss": 2.3014, + "step": 14590 + }, + { + "epoch": 0.67, + "learning_rate": 4.67733077028697e-05, + "loss": 2.4558, + "step": 14600 + }, + { + "epoch": 0.67, + "learning_rate": 4.6771019268616416e-05, + "loss": 2.3848, + "step": 14610 + }, + { + "epoch": 0.67, + "learning_rate": 4.676873083436313e-05, + "loss": 2.421, + "step": 14620 + }, + { + "epoch": 0.67, + "learning_rate": 4.6766442400109845e-05, + "loss": 2.411, + "step": 14630 + }, + { + "epoch": 0.67, + "learning_rate": 4.676415396585656e-05, + "loss": 2.3328, + "step": 14640 + }, + { + "epoch": 0.67, + "learning_rate": 4.676186553160328e-05, + "loss": 2.4546, + "step": 14650 + }, + { + "epoch": 0.67, + "learning_rate": 4.6759577097349994e-05, + "loss": 2.3235, + "step": 14660 + }, + { + "epoch": 0.67, + "learning_rate": 4.675728866309671e-05, + "loss": 2.3447, + "step": 14670 + }, + { + "epoch": 0.67, + "learning_rate": 4.675500022884343e-05, + "loss": 2.2483, + "step": 14680 + }, + { + "epoch": 0.67, + "learning_rate": 4.675271179459014e-05, + "loss": 2.3659, + "step": 14690 + }, + { + "epoch": 0.67, + "learning_rate": 4.675042336033686e-05, + "loss": 2.2684, + "step": 14700 + }, + { + "epoch": 0.67, + "learning_rate": 4.674813492608358e-05, + "loss": 2.4088, + "step": 14710 + }, + { + "epoch": 0.67, + "learning_rate": 4.674584649183029e-05, + "loss": 2.2825, + "step": 14720 + }, + { + "epoch": 0.67, + "learning_rate": 4.6743558057577006e-05, + "loss": 2.4442, + "step": 14730 + }, + { + "epoch": 0.67, + "learning_rate": 4.674126962332373e-05, + "loss": 2.6232, + "step": 14740 + }, + { + "epoch": 0.67, + "learning_rate": 4.673898118907044e-05, + "loss": 2.4191, + "step": 14750 + }, + { + "epoch": 0.67, + "learning_rate": 4.6736692754817155e-05, + "loss": 2.4357, + "step": 14760 + }, + { + "epoch": 0.67, + "learning_rate": 4.6734404320563876e-05, + "loss": 2.2647, + "step": 14770 + }, + { + "epoch": 0.67, + "learning_rate": 4.673211588631059e-05, + "loss": 2.4307, + "step": 14780 + }, + { + "epoch": 0.68, + "learning_rate": 4.6729827452057305e-05, + "loss": 2.4112, + "step": 14790 + }, + { + "epoch": 0.68, + "learning_rate": 4.6727539017804026e-05, + "loss": 2.4224, + "step": 14800 + }, + { + "epoch": 0.68, + "learning_rate": 4.672525058355074e-05, + "loss": 2.3461, + "step": 14810 + }, + { + "epoch": 0.68, + "learning_rate": 4.6722962149297454e-05, + "loss": 2.2378, + "step": 14820 + }, + { + "epoch": 0.68, + "learning_rate": 4.672067371504417e-05, + "loss": 2.3231, + "step": 14830 + }, + { + "epoch": 0.68, + "learning_rate": 4.671838528079088e-05, + "loss": 2.3036, + "step": 14840 + }, + { + "epoch": 0.68, + "learning_rate": 4.6716096846537596e-05, + "loss": 2.2811, + "step": 14850 + }, + { + "epoch": 0.68, + "learning_rate": 4.671380841228432e-05, + "loss": 2.425, + "step": 14860 + }, + { + "epoch": 0.68, + "learning_rate": 4.671151997803103e-05, + "loss": 2.3556, + "step": 14870 + }, + { + "epoch": 0.68, + "learning_rate": 4.6709231543777745e-05, + "loss": 2.3853, + "step": 14880 + }, + { + "epoch": 0.68, + "learning_rate": 4.6706943109524466e-05, + "loss": 2.4541, + "step": 14890 + }, + { + "epoch": 0.68, + "learning_rate": 4.670465467527118e-05, + "loss": 2.4022, + "step": 14900 + }, + { + "epoch": 0.68, + "learning_rate": 4.6702366241017895e-05, + "loss": 2.4588, + "step": 14910 + }, + { + "epoch": 0.68, + "learning_rate": 4.6700077806764615e-05, + "loss": 2.4838, + "step": 14920 + }, + { + "epoch": 0.68, + "learning_rate": 4.669778937251133e-05, + "loss": 2.3258, + "step": 14930 + }, + { + "epoch": 0.68, + "learning_rate": 4.6695500938258044e-05, + "loss": 2.3718, + "step": 14940 + }, + { + "epoch": 0.68, + "learning_rate": 4.6693212504004765e-05, + "loss": 2.323, + "step": 14950 + }, + { + "epoch": 0.68, + "learning_rate": 4.669092406975148e-05, + "loss": 2.4033, + "step": 14960 + }, + { + "epoch": 0.68, + "learning_rate": 4.668863563549819e-05, + "loss": 2.5025, + "step": 14970 + }, + { + "epoch": 0.68, + "learning_rate": 4.6686347201244914e-05, + "loss": 2.3335, + "step": 14980 + }, + { + "epoch": 0.68, + "learning_rate": 4.668405876699163e-05, + "loss": 2.425, + "step": 14990 + }, + { + "epoch": 0.68, + "learning_rate": 4.668177033273834e-05, + "loss": 2.4599, + "step": 15000 + }, + { + "epoch": 0.69, + "learning_rate": 4.667948189848506e-05, + "loss": 2.3241, + "step": 15010 + }, + { + "epoch": 0.69, + "learning_rate": 4.667719346423177e-05, + "loss": 2.4939, + "step": 15020 + }, + { + "epoch": 0.69, + "learning_rate": 4.667490502997849e-05, + "loss": 2.3753, + "step": 15030 + }, + { + "epoch": 0.69, + "learning_rate": 4.6672616595725205e-05, + "loss": 2.3891, + "step": 15040 + }, + { + "epoch": 0.69, + "learning_rate": 4.667032816147192e-05, + "loss": 2.2793, + "step": 15050 + }, + { + "epoch": 0.69, + "learning_rate": 4.666803972721864e-05, + "loss": 2.2299, + "step": 15060 + }, + { + "epoch": 0.69, + "learning_rate": 4.6665751292965355e-05, + "loss": 2.3184, + "step": 15070 + }, + { + "epoch": 0.69, + "learning_rate": 4.666346285871207e-05, + "loss": 2.1556, + "step": 15080 + }, + { + "epoch": 0.69, + "learning_rate": 4.666117442445879e-05, + "loss": 2.5305, + "step": 15090 + }, + { + "epoch": 0.69, + "learning_rate": 4.6658885990205504e-05, + "loss": 2.3223, + "step": 15100 + }, + { + "epoch": 0.69, + "learning_rate": 4.665659755595222e-05, + "loss": 2.3775, + "step": 15110 + }, + { + "epoch": 0.69, + "learning_rate": 4.665430912169894e-05, + "loss": 2.4301, + "step": 15120 + }, + { + "epoch": 0.69, + "learning_rate": 4.665202068744565e-05, + "loss": 2.3689, + "step": 15130 + }, + { + "epoch": 0.69, + "learning_rate": 4.664973225319237e-05, + "loss": 2.2417, + "step": 15140 + }, + { + "epoch": 0.69, + "learning_rate": 4.664744381893909e-05, + "loss": 2.3305, + "step": 15150 + }, + { + "epoch": 0.69, + "learning_rate": 4.66451553846858e-05, + "loss": 2.3792, + "step": 15160 + }, + { + "epoch": 0.69, + "learning_rate": 4.6642866950432516e-05, + "loss": 2.4314, + "step": 15170 + }, + { + "epoch": 0.69, + "learning_rate": 4.664057851617924e-05, + "loss": 2.328, + "step": 15180 + }, + { + "epoch": 0.69, + "learning_rate": 4.663829008192595e-05, + "loss": 2.4546, + "step": 15190 + }, + { + "epoch": 0.69, + "learning_rate": 4.6636001647672665e-05, + "loss": 2.1596, + "step": 15200 + }, + { + "epoch": 0.69, + "learning_rate": 4.6633713213419386e-05, + "loss": 2.4049, + "step": 15210 + }, + { + "epoch": 0.69, + "learning_rate": 4.6631424779166094e-05, + "loss": 2.309, + "step": 15220 + }, + { + "epoch": 0.7, + "learning_rate": 4.662913634491281e-05, + "loss": 2.3605, + "step": 15230 + }, + { + "epoch": 0.7, + "learning_rate": 4.662684791065953e-05, + "loss": 2.3429, + "step": 15240 + }, + { + "epoch": 0.7, + "learning_rate": 4.662455947640624e-05, + "loss": 2.3773, + "step": 15250 + }, + { + "epoch": 0.7, + "learning_rate": 4.662227104215296e-05, + "loss": 2.3893, + "step": 15260 + }, + { + "epoch": 0.7, + "learning_rate": 4.661998260789968e-05, + "loss": 2.4296, + "step": 15270 + }, + { + "epoch": 0.7, + "learning_rate": 4.661769417364639e-05, + "loss": 2.4665, + "step": 15280 + }, + { + "epoch": 0.7, + "learning_rate": 4.6615405739393106e-05, + "loss": 2.3994, + "step": 15290 + }, + { + "epoch": 0.7, + "learning_rate": 4.661311730513983e-05, + "loss": 2.3696, + "step": 15300 + }, + { + "epoch": 0.7, + "learning_rate": 4.661082887088654e-05, + "loss": 2.4547, + "step": 15310 + }, + { + "epoch": 0.7, + "learning_rate": 4.6608540436633255e-05, + "loss": 2.3975, + "step": 15320 + }, + { + "epoch": 0.7, + "learning_rate": 4.6606252002379976e-05, + "loss": 2.3168, + "step": 15330 + }, + { + "epoch": 0.7, + "learning_rate": 4.660396356812669e-05, + "loss": 2.3848, + "step": 15340 + }, + { + "epoch": 0.7, + "learning_rate": 4.6601675133873405e-05, + "loss": 2.4167, + "step": 15350 + }, + { + "epoch": 0.7, + "learning_rate": 4.6599386699620125e-05, + "loss": 2.3531, + "step": 15360 + }, + { + "epoch": 0.7, + "learning_rate": 4.659709826536684e-05, + "loss": 2.3287, + "step": 15370 + }, + { + "epoch": 0.7, + "learning_rate": 4.6594809831113554e-05, + "loss": 2.561, + "step": 15380 + }, + { + "epoch": 0.7, + "learning_rate": 4.6592521396860275e-05, + "loss": 2.3686, + "step": 15390 + }, + { + "epoch": 0.7, + "learning_rate": 4.659023296260699e-05, + "loss": 2.5802, + "step": 15400 + }, + { + "epoch": 0.7, + "learning_rate": 4.65879445283537e-05, + "loss": 2.2783, + "step": 15410 + }, + { + "epoch": 0.7, + "learning_rate": 4.658565609410042e-05, + "loss": 2.3303, + "step": 15420 + }, + { + "epoch": 0.7, + "learning_rate": 4.658336765984713e-05, + "loss": 2.319, + "step": 15430 + }, + { + "epoch": 0.71, + "learning_rate": 4.658107922559385e-05, + "loss": 2.3461, + "step": 15440 + }, + { + "epoch": 0.71, + "learning_rate": 4.6578790791340566e-05, + "loss": 2.2342, + "step": 15450 + }, + { + "epoch": 0.71, + "learning_rate": 4.657650235708728e-05, + "loss": 2.3677, + "step": 15460 + }, + { + "epoch": 0.71, + "learning_rate": 4.6574213922834e-05, + "loss": 2.2846, + "step": 15470 + }, + { + "epoch": 0.71, + "learning_rate": 4.6571925488580715e-05, + "loss": 2.4242, + "step": 15480 + }, + { + "epoch": 0.71, + "learning_rate": 4.656963705432743e-05, + "loss": 2.432, + "step": 15490 + }, + { + "epoch": 0.71, + "learning_rate": 4.656734862007415e-05, + "loss": 2.3391, + "step": 15500 + }, + { + "epoch": 0.71, + "learning_rate": 4.6565060185820865e-05, + "loss": 2.5358, + "step": 15510 + }, + { + "epoch": 0.71, + "learning_rate": 4.656277175156758e-05, + "loss": 2.44, + "step": 15520 + }, + { + "epoch": 0.71, + "learning_rate": 4.65604833173143e-05, + "loss": 2.4573, + "step": 15530 + }, + { + "epoch": 0.71, + "learning_rate": 4.6558194883061014e-05, + "loss": 2.3064, + "step": 15540 + }, + { + "epoch": 0.71, + "learning_rate": 4.655590644880773e-05, + "loss": 2.3635, + "step": 15550 + }, + { + "epoch": 0.71, + "learning_rate": 4.655361801455445e-05, + "loss": 2.481, + "step": 15560 + }, + { + "epoch": 0.71, + "learning_rate": 4.655132958030116e-05, + "loss": 2.3738, + "step": 15570 + }, + { + "epoch": 0.71, + "learning_rate": 4.654904114604788e-05, + "loss": 2.3046, + "step": 15580 + }, + { + "epoch": 0.71, + "learning_rate": 4.654675271179459e-05, + "loss": 2.4667, + "step": 15590 + }, + { + "epoch": 0.71, + "learning_rate": 4.654446427754131e-05, + "loss": 2.5539, + "step": 15600 + }, + { + "epoch": 0.71, + "learning_rate": 4.6542175843288026e-05, + "loss": 2.3635, + "step": 15610 + }, + { + "epoch": 0.71, + "learning_rate": 4.653988740903474e-05, + "loss": 2.3257, + "step": 15620 + }, + { + "epoch": 0.71, + "learning_rate": 4.6537598974781454e-05, + "loss": 2.4459, + "step": 15630 + }, + { + "epoch": 0.71, + "learning_rate": 4.653531054052817e-05, + "loss": 2.3885, + "step": 15640 + }, + { + "epoch": 0.71, + "learning_rate": 4.653302210627489e-05, + "loss": 2.3926, + "step": 15650 + }, + { + "epoch": 0.72, + "learning_rate": 4.6530733672021604e-05, + "loss": 2.2393, + "step": 15660 + }, + { + "epoch": 0.72, + "learning_rate": 4.652844523776832e-05, + "loss": 2.4107, + "step": 15670 + }, + { + "epoch": 0.72, + "learning_rate": 4.652615680351504e-05, + "loss": 2.4162, + "step": 15680 + }, + { + "epoch": 0.72, + "learning_rate": 4.652386836926175e-05, + "loss": 2.2627, + "step": 15690 + }, + { + "epoch": 0.72, + "learning_rate": 4.652157993500847e-05, + "loss": 2.308, + "step": 15700 + }, + { + "epoch": 0.72, + "learning_rate": 4.651929150075519e-05, + "loss": 2.349, + "step": 15710 + }, + { + "epoch": 0.72, + "learning_rate": 4.65170030665019e-05, + "loss": 2.4102, + "step": 15720 + }, + { + "epoch": 0.72, + "learning_rate": 4.6514714632248616e-05, + "loss": 2.4103, + "step": 15730 + }, + { + "epoch": 0.72, + "learning_rate": 4.651242619799534e-05, + "loss": 2.3012, + "step": 15740 + }, + { + "epoch": 0.72, + "learning_rate": 4.651013776374205e-05, + "loss": 2.3481, + "step": 15750 + }, + { + "epoch": 0.72, + "learning_rate": 4.6507849329488765e-05, + "loss": 2.3335, + "step": 15760 + }, + { + "epoch": 0.72, + "learning_rate": 4.6505560895235486e-05, + "loss": 2.4007, + "step": 15770 + }, + { + "epoch": 0.72, + "learning_rate": 4.65032724609822e-05, + "loss": 2.3978, + "step": 15780 + }, + { + "epoch": 0.72, + "learning_rate": 4.6500984026728914e-05, + "loss": 2.2898, + "step": 15790 + }, + { + "epoch": 0.72, + "learning_rate": 4.6498695592475635e-05, + "loss": 2.5494, + "step": 15800 + }, + { + "epoch": 0.72, + "learning_rate": 4.649640715822235e-05, + "loss": 2.3529, + "step": 15810 + }, + { + "epoch": 0.72, + "learning_rate": 4.649411872396906e-05, + "loss": 2.5162, + "step": 15820 + }, + { + "epoch": 0.72, + "learning_rate": 4.649183028971578e-05, + "loss": 2.3746, + "step": 15830 + }, + { + "epoch": 0.72, + "learning_rate": 4.648954185546249e-05, + "loss": 2.3764, + "step": 15840 + }, + { + "epoch": 0.72, + "learning_rate": 4.6487253421209206e-05, + "loss": 2.4426, + "step": 15850 + }, + { + "epoch": 0.72, + "learning_rate": 4.648496498695593e-05, + "loss": 2.4078, + "step": 15860 + }, + { + "epoch": 0.72, + "learning_rate": 4.648267655270264e-05, + "loss": 2.4534, + "step": 15870 + }, + { + "epoch": 0.73, + "learning_rate": 4.6480388118449355e-05, + "loss": 2.2913, + "step": 15880 + }, + { + "epoch": 0.73, + "learning_rate": 4.6478099684196076e-05, + "loss": 2.4197, + "step": 15890 + }, + { + "epoch": 0.73, + "learning_rate": 4.647581124994279e-05, + "loss": 2.486, + "step": 15900 + }, + { + "epoch": 0.73, + "learning_rate": 4.6473522815689504e-05, + "loss": 2.3556, + "step": 15910 + }, + { + "epoch": 0.73, + "learning_rate": 4.6471234381436225e-05, + "loss": 2.41, + "step": 15920 + }, + { + "epoch": 0.73, + "learning_rate": 4.646894594718294e-05, + "loss": 2.4941, + "step": 15930 + }, + { + "epoch": 0.73, + "learning_rate": 4.6466657512929654e-05, + "loss": 2.384, + "step": 15940 + }, + { + "epoch": 0.73, + "learning_rate": 4.6464369078676374e-05, + "loss": 2.3485, + "step": 15950 + }, + { + "epoch": 0.73, + "learning_rate": 4.646208064442309e-05, + "loss": 2.3617, + "step": 15960 + }, + { + "epoch": 0.73, + "learning_rate": 4.64597922101698e-05, + "loss": 2.1509, + "step": 15970 + }, + { + "epoch": 0.73, + "learning_rate": 4.6457503775916524e-05, + "loss": 2.3831, + "step": 15980 + }, + { + "epoch": 0.73, + "learning_rate": 4.645521534166324e-05, + "loss": 2.303, + "step": 15990 + }, + { + "epoch": 0.73, + "learning_rate": 4.645292690740995e-05, + "loss": 2.2721, + "step": 16000 + }, + { + "epoch": 0.73, + "learning_rate": 4.6450638473156666e-05, + "loss": 2.3479, + "step": 16010 + }, + { + "epoch": 0.73, + "learning_rate": 4.644835003890338e-05, + "loss": 2.3042, + "step": 16020 + }, + { + "epoch": 0.73, + "learning_rate": 4.64460616046501e-05, + "loss": 2.5601, + "step": 16030 + }, + { + "epoch": 0.73, + "learning_rate": 4.6443773170396815e-05, + "loss": 2.2304, + "step": 16040 + }, + { + "epoch": 0.73, + "learning_rate": 4.644148473614353e-05, + "loss": 2.2836, + "step": 16050 + }, + { + "epoch": 0.73, + "learning_rate": 4.643919630189025e-05, + "loss": 2.3329, + "step": 16060 + }, + { + "epoch": 0.73, + "learning_rate": 4.6436907867636964e-05, + "loss": 2.3403, + "step": 16070 + }, + { + "epoch": 0.73, + "learning_rate": 4.643461943338368e-05, + "loss": 2.4404, + "step": 16080 + }, + { + "epoch": 0.73, + "learning_rate": 4.64323309991304e-05, + "loss": 2.3952, + "step": 16090 + }, + { + "epoch": 0.74, + "learning_rate": 4.6430042564877114e-05, + "loss": 2.1968, + "step": 16100 + }, + { + "epoch": 0.74, + "learning_rate": 4.642775413062383e-05, + "loss": 2.4064, + "step": 16110 + }, + { + "epoch": 0.74, + "learning_rate": 4.642546569637055e-05, + "loss": 2.4074, + "step": 16120 + }, + { + "epoch": 0.74, + "learning_rate": 4.642317726211726e-05, + "loss": 2.3967, + "step": 16130 + }, + { + "epoch": 0.74, + "learning_rate": 4.642088882786398e-05, + "loss": 2.594, + "step": 16140 + }, + { + "epoch": 0.74, + "learning_rate": 4.64186003936107e-05, + "loss": 2.4373, + "step": 16150 + }, + { + "epoch": 0.74, + "learning_rate": 4.641631195935741e-05, + "loss": 2.5472, + "step": 16160 + }, + { + "epoch": 0.74, + "learning_rate": 4.6414023525104126e-05, + "loss": 2.5062, + "step": 16170 + }, + { + "epoch": 0.74, + "learning_rate": 4.641173509085085e-05, + "loss": 2.4266, + "step": 16180 + }, + { + "epoch": 0.74, + "learning_rate": 4.640944665659756e-05, + "loss": 2.4457, + "step": 16190 + }, + { + "epoch": 0.74, + "learning_rate": 4.6407158222344275e-05, + "loss": 2.372, + "step": 16200 + }, + { + "epoch": 0.74, + "learning_rate": 4.640486978809099e-05, + "loss": 2.2929, + "step": 16210 + }, + { + "epoch": 0.74, + "learning_rate": 4.6402581353837704e-05, + "loss": 2.3839, + "step": 16220 + }, + { + "epoch": 0.74, + "learning_rate": 4.640029291958442e-05, + "loss": 2.465, + "step": 16230 + }, + { + "epoch": 0.74, + "learning_rate": 4.639800448533114e-05, + "loss": 2.4987, + "step": 16240 + }, + { + "epoch": 0.74, + "learning_rate": 4.639571605107785e-05, + "loss": 2.3799, + "step": 16250 + }, + { + "epoch": 0.74, + "learning_rate": 4.639342761682457e-05, + "loss": 2.4681, + "step": 16260 + }, + { + "epoch": 0.74, + "learning_rate": 4.639113918257129e-05, + "loss": 2.3439, + "step": 16270 + }, + { + "epoch": 0.74, + "learning_rate": 4.6388850748318e-05, + "loss": 2.4584, + "step": 16280 + }, + { + "epoch": 0.74, + "learning_rate": 4.6386562314064716e-05, + "loss": 2.52, + "step": 16290 + }, + { + "epoch": 0.74, + "learning_rate": 4.638427387981144e-05, + "loss": 2.2984, + "step": 16300 + }, + { + "epoch": 0.74, + "learning_rate": 4.638198544555815e-05, + "loss": 2.3908, + "step": 16310 + }, + { + "epoch": 0.75, + "learning_rate": 4.6379697011304865e-05, + "loss": 2.3904, + "step": 16320 + }, + { + "epoch": 0.75, + "learning_rate": 4.6377408577051586e-05, + "loss": 2.1904, + "step": 16330 + }, + { + "epoch": 0.75, + "learning_rate": 4.63751201427983e-05, + "loss": 2.481, + "step": 16340 + }, + { + "epoch": 0.75, + "learning_rate": 4.6372831708545014e-05, + "loss": 2.3175, + "step": 16350 + }, + { + "epoch": 0.75, + "learning_rate": 4.6370543274291735e-05, + "loss": 2.2616, + "step": 16360 + }, + { + "epoch": 0.75, + "learning_rate": 4.636825484003845e-05, + "loss": 2.3663, + "step": 16370 + }, + { + "epoch": 0.75, + "learning_rate": 4.6365966405785164e-05, + "loss": 2.3417, + "step": 16380 + }, + { + "epoch": 0.75, + "learning_rate": 4.6363677971531884e-05, + "loss": 2.387, + "step": 16390 + }, + { + "epoch": 0.75, + "learning_rate": 4.63613895372786e-05, + "loss": 2.2699, + "step": 16400 + }, + { + "epoch": 0.75, + "learning_rate": 4.635910110302531e-05, + "loss": 2.231, + "step": 16410 + }, + { + "epoch": 0.75, + "learning_rate": 4.635681266877203e-05, + "loss": 2.2993, + "step": 16420 + }, + { + "epoch": 0.75, + "learning_rate": 4.635452423451874e-05, + "loss": 2.408, + "step": 16430 + }, + { + "epoch": 0.75, + "learning_rate": 4.635223580026546e-05, + "loss": 2.4687, + "step": 16440 + }, + { + "epoch": 0.75, + "learning_rate": 4.6349947366012176e-05, + "loss": 2.3699, + "step": 16450 + }, + { + "epoch": 0.75, + "learning_rate": 4.634765893175889e-05, + "loss": 2.3452, + "step": 16460 + }, + { + "epoch": 0.75, + "learning_rate": 4.634537049750561e-05, + "loss": 2.5306, + "step": 16470 + }, + { + "epoch": 0.75, + "learning_rate": 4.6343082063252325e-05, + "loss": 2.4026, + "step": 16480 + }, + { + "epoch": 0.75, + "learning_rate": 4.634079362899904e-05, + "loss": 2.3577, + "step": 16490 + }, + { + "epoch": 0.75, + "learning_rate": 4.633850519474576e-05, + "loss": 2.4862, + "step": 16500 + }, + { + "epoch": 0.75, + "learning_rate": 4.6336216760492474e-05, + "loss": 2.4687, + "step": 16510 + }, + { + "epoch": 0.75, + "learning_rate": 4.633392832623919e-05, + "loss": 2.3897, + "step": 16520 + }, + { + "epoch": 0.75, + "learning_rate": 4.633163989198591e-05, + "loss": 2.4917, + "step": 16530 + }, + { + "epoch": 0.76, + "learning_rate": 4.6329351457732624e-05, + "loss": 2.4958, + "step": 16540 + }, + { + "epoch": 0.76, + "learning_rate": 4.632706302347934e-05, + "loss": 2.3769, + "step": 16550 + }, + { + "epoch": 0.76, + "learning_rate": 4.632477458922606e-05, + "loss": 2.2684, + "step": 16560 + }, + { + "epoch": 0.76, + "learning_rate": 4.632248615497277e-05, + "loss": 2.3025, + "step": 16570 + }, + { + "epoch": 0.76, + "learning_rate": 4.632019772071949e-05, + "loss": 2.3771, + "step": 16580 + }, + { + "epoch": 0.76, + "learning_rate": 4.631790928646621e-05, + "loss": 2.4185, + "step": 16590 + }, + { + "epoch": 0.76, + "learning_rate": 4.631562085221292e-05, + "loss": 2.5521, + "step": 16600 + }, + { + "epoch": 0.76, + "learning_rate": 4.631333241795963e-05, + "loss": 2.4028, + "step": 16610 + }, + { + "epoch": 0.76, + "learning_rate": 4.631104398370635e-05, + "loss": 2.2996, + "step": 16620 + }, + { + "epoch": 0.76, + "learning_rate": 4.6308755549453064e-05, + "loss": 2.3649, + "step": 16630 + }, + { + "epoch": 0.76, + "learning_rate": 4.630646711519978e-05, + "loss": 2.2891, + "step": 16640 + }, + { + "epoch": 0.76, + "learning_rate": 4.63041786809465e-05, + "loss": 2.3216, + "step": 16650 + }, + { + "epoch": 0.76, + "learning_rate": 4.6301890246693213e-05, + "loss": 2.1903, + "step": 16660 + }, + { + "epoch": 0.76, + "learning_rate": 4.629960181243993e-05, + "loss": 2.4027, + "step": 16670 + }, + { + "epoch": 0.76, + "learning_rate": 4.629731337818665e-05, + "loss": 2.4524, + "step": 16680 + }, + { + "epoch": 0.76, + "learning_rate": 4.629502494393336e-05, + "loss": 2.3274, + "step": 16690 + }, + { + "epoch": 0.76, + "learning_rate": 4.629273650968008e-05, + "loss": 2.3721, + "step": 16700 + }, + { + "epoch": 0.76, + "learning_rate": 4.62904480754268e-05, + "loss": 2.2037, + "step": 16710 + }, + { + "epoch": 0.76, + "learning_rate": 4.628815964117351e-05, + "loss": 2.3744, + "step": 16720 + }, + { + "epoch": 0.76, + "learning_rate": 4.6285871206920226e-05, + "loss": 2.2048, + "step": 16730 + }, + { + "epoch": 0.76, + "learning_rate": 4.628358277266695e-05, + "loss": 2.1571, + "step": 16740 + }, + { + "epoch": 0.76, + "learning_rate": 4.628129433841366e-05, + "loss": 2.3747, + "step": 16750 + }, + { + "epoch": 0.77, + "learning_rate": 4.6279005904160375e-05, + "loss": 2.263, + "step": 16760 + }, + { + "epoch": 0.77, + "learning_rate": 4.6276717469907096e-05, + "loss": 2.5846, + "step": 16770 + }, + { + "epoch": 0.77, + "learning_rate": 4.627442903565381e-05, + "loss": 2.272, + "step": 16780 + }, + { + "epoch": 0.77, + "learning_rate": 4.6272140601400524e-05, + "loss": 2.2812, + "step": 16790 + }, + { + "epoch": 0.77, + "learning_rate": 4.626985216714724e-05, + "loss": 2.2774, + "step": 16800 + }, + { + "epoch": 0.77, + "learning_rate": 4.626756373289395e-05, + "loss": 2.5418, + "step": 16810 + }, + { + "epoch": 0.77, + "learning_rate": 4.626527529864067e-05, + "loss": 2.206, + "step": 16820 + }, + { + "epoch": 0.77, + "learning_rate": 4.626298686438739e-05, + "loss": 2.3299, + "step": 16830 + }, + { + "epoch": 0.77, + "learning_rate": 4.62606984301341e-05, + "loss": 2.4003, + "step": 16840 + }, + { + "epoch": 0.77, + "learning_rate": 4.6258409995880816e-05, + "loss": 2.303, + "step": 16850 + }, + { + "epoch": 0.77, + "learning_rate": 4.625612156162754e-05, + "loss": 2.1617, + "step": 16860 + }, + { + "epoch": 0.77, + "learning_rate": 4.625383312737425e-05, + "loss": 2.2616, + "step": 16870 + }, + { + "epoch": 0.77, + "learning_rate": 4.6251544693120965e-05, + "loss": 2.4261, + "step": 16880 + }, + { + "epoch": 0.77, + "learning_rate": 4.6249256258867686e-05, + "loss": 2.4306, + "step": 16890 + }, + { + "epoch": 0.77, + "learning_rate": 4.62469678246144e-05, + "loss": 2.3649, + "step": 16900 + }, + { + "epoch": 0.77, + "learning_rate": 4.6244679390361114e-05, + "loss": 2.3116, + "step": 16910 + }, + { + "epoch": 0.77, + "learning_rate": 4.6242390956107835e-05, + "loss": 2.2467, + "step": 16920 + }, + { + "epoch": 0.77, + "learning_rate": 4.624010252185455e-05, + "loss": 2.4557, + "step": 16930 + }, + { + "epoch": 0.77, + "learning_rate": 4.6237814087601263e-05, + "loss": 2.328, + "step": 16940 + }, + { + "epoch": 0.77, + "learning_rate": 4.6235525653347984e-05, + "loss": 2.3345, + "step": 16950 + }, + { + "epoch": 0.77, + "learning_rate": 4.62332372190947e-05, + "loss": 2.2343, + "step": 16960 + }, + { + "epoch": 0.77, + "learning_rate": 4.623094878484141e-05, + "loss": 2.3054, + "step": 16970 + }, + { + "epoch": 0.78, + "learning_rate": 4.6228660350588134e-05, + "loss": 2.4167, + "step": 16980 + }, + { + "epoch": 0.78, + "learning_rate": 4.622637191633485e-05, + "loss": 2.4329, + "step": 16990 + }, + { + "epoch": 0.78, + "learning_rate": 4.622408348208156e-05, + "loss": 2.2721, + "step": 17000 + }, + { + "epoch": 0.78, + "learning_rate": 4.6221795047828276e-05, + "loss": 2.3178, + "step": 17010 + }, + { + "epoch": 0.78, + "learning_rate": 4.621950661357499e-05, + "loss": 2.3451, + "step": 17020 + }, + { + "epoch": 0.78, + "learning_rate": 4.621721817932171e-05, + "loss": 2.2038, + "step": 17030 + }, + { + "epoch": 0.78, + "learning_rate": 4.6214929745068425e-05, + "loss": 2.3536, + "step": 17040 + }, + { + "epoch": 0.78, + "learning_rate": 4.621264131081514e-05, + "loss": 2.259, + "step": 17050 + }, + { + "epoch": 0.78, + "learning_rate": 4.621035287656186e-05, + "loss": 2.4451, + "step": 17060 + }, + { + "epoch": 0.78, + "learning_rate": 4.6208064442308574e-05, + "loss": 2.4755, + "step": 17070 + }, + { + "epoch": 0.78, + "learning_rate": 4.620577600805529e-05, + "loss": 2.2509, + "step": 17080 + }, + { + "epoch": 0.78, + "learning_rate": 4.620348757380201e-05, + "loss": 2.4435, + "step": 17090 + }, + { + "epoch": 0.78, + "learning_rate": 4.6201199139548723e-05, + "loss": 2.3517, + "step": 17100 + }, + { + "epoch": 0.78, + "learning_rate": 4.619891070529544e-05, + "loss": 2.3907, + "step": 17110 + }, + { + "epoch": 0.78, + "learning_rate": 4.619662227104216e-05, + "loss": 2.4024, + "step": 17120 + }, + { + "epoch": 0.78, + "learning_rate": 4.619433383678887e-05, + "loss": 2.5077, + "step": 17130 + }, + { + "epoch": 0.78, + "learning_rate": 4.619204540253559e-05, + "loss": 2.5415, + "step": 17140 + }, + { + "epoch": 0.78, + "learning_rate": 4.618975696828231e-05, + "loss": 2.1163, + "step": 17150 + }, + { + "epoch": 0.78, + "learning_rate": 4.618746853402902e-05, + "loss": 2.485, + "step": 17160 + }, + { + "epoch": 0.78, + "learning_rate": 4.6185180099775736e-05, + "loss": 2.2067, + "step": 17170 + }, + { + "epoch": 0.78, + "learning_rate": 4.618289166552246e-05, + "loss": 2.3718, + "step": 17180 + }, + { + "epoch": 0.78, + "learning_rate": 4.618060323126917e-05, + "loss": 2.3559, + "step": 17190 + }, + { + "epoch": 0.79, + "learning_rate": 4.617831479701588e-05, + "loss": 2.2452, + "step": 17200 + }, + { + "epoch": 0.79, + "learning_rate": 4.61760263627626e-05, + "loss": 2.2834, + "step": 17210 + }, + { + "epoch": 0.79, + "learning_rate": 4.6173737928509313e-05, + "loss": 2.3021, + "step": 17220 + }, + { + "epoch": 0.79, + "learning_rate": 4.617144949425603e-05, + "loss": 2.4749, + "step": 17230 + }, + { + "epoch": 0.79, + "learning_rate": 4.616916106000275e-05, + "loss": 2.2662, + "step": 17240 + }, + { + "epoch": 0.79, + "learning_rate": 4.616687262574946e-05, + "loss": 2.4053, + "step": 17250 + }, + { + "epoch": 0.79, + "learning_rate": 4.616458419149618e-05, + "loss": 2.4304, + "step": 17260 + }, + { + "epoch": 0.79, + "learning_rate": 4.61622957572429e-05, + "loss": 2.3871, + "step": 17270 + }, + { + "epoch": 0.79, + "learning_rate": 4.616000732298961e-05, + "loss": 2.2039, + "step": 17280 + }, + { + "epoch": 0.79, + "learning_rate": 4.6157718888736326e-05, + "loss": 2.2611, + "step": 17290 + }, + { + "epoch": 0.79, + "learning_rate": 4.615543045448305e-05, + "loss": 2.3669, + "step": 17300 + }, + { + "epoch": 0.79, + "learning_rate": 4.615314202022976e-05, + "loss": 2.336, + "step": 17310 + }, + { + "epoch": 0.79, + "learning_rate": 4.6150853585976475e-05, + "loss": 2.3281, + "step": 17320 + }, + { + "epoch": 0.79, + "learning_rate": 4.6148565151723196e-05, + "loss": 2.2588, + "step": 17330 + }, + { + "epoch": 0.79, + "learning_rate": 4.614627671746991e-05, + "loss": 2.2774, + "step": 17340 + }, + { + "epoch": 0.79, + "learning_rate": 4.6143988283216624e-05, + "loss": 2.3469, + "step": 17350 + }, + { + "epoch": 0.79, + "learning_rate": 4.6141699848963345e-05, + "loss": 2.3594, + "step": 17360 + }, + { + "epoch": 0.79, + "learning_rate": 4.613941141471006e-05, + "loss": 2.3696, + "step": 17370 + }, + { + "epoch": 0.79, + "learning_rate": 4.6137122980456773e-05, + "loss": 2.2175, + "step": 17380 + }, + { + "epoch": 0.79, + "learning_rate": 4.6134834546203494e-05, + "loss": 2.3206, + "step": 17390 + }, + { + "epoch": 0.79, + "learning_rate": 4.61325461119502e-05, + "loss": 2.3904, + "step": 17400 + }, + { + "epoch": 0.79, + "learning_rate": 4.613025767769692e-05, + "loss": 2.4154, + "step": 17410 + }, + { + "epoch": 0.8, + "learning_rate": 4.612796924344364e-05, + "loss": 2.421, + "step": 17420 + }, + { + "epoch": 0.8, + "learning_rate": 4.612568080919035e-05, + "loss": 2.3102, + "step": 17430 + }, + { + "epoch": 0.8, + "learning_rate": 4.612339237493707e-05, + "loss": 2.4184, + "step": 17440 + }, + { + "epoch": 0.8, + "learning_rate": 4.6121103940683786e-05, + "loss": 2.3762, + "step": 17450 + }, + { + "epoch": 0.8, + "learning_rate": 4.61188155064305e-05, + "loss": 2.4048, + "step": 17460 + }, + { + "epoch": 0.8, + "learning_rate": 4.611652707217722e-05, + "loss": 2.3142, + "step": 17470 + }, + { + "epoch": 0.8, + "learning_rate": 4.6114238637923935e-05, + "loss": 2.5533, + "step": 17480 + }, + { + "epoch": 0.8, + "learning_rate": 4.611195020367065e-05, + "loss": 2.2675, + "step": 17490 + }, + { + "epoch": 0.8, + "learning_rate": 4.610966176941737e-05, + "loss": 2.4015, + "step": 17500 + }, + { + "epoch": 0.8, + "learning_rate": 4.6107373335164084e-05, + "loss": 2.4635, + "step": 17510 + }, + { + "epoch": 0.8, + "learning_rate": 4.61050849009108e-05, + "loss": 2.1911, + "step": 17520 + }, + { + "epoch": 0.8, + "learning_rate": 4.610279646665752e-05, + "loss": 2.3663, + "step": 17530 + }, + { + "epoch": 0.8, + "learning_rate": 4.6100508032404233e-05, + "loss": 2.4052, + "step": 17540 + }, + { + "epoch": 0.8, + "learning_rate": 4.609821959815095e-05, + "loss": 2.425, + "step": 17550 + }, + { + "epoch": 0.8, + "learning_rate": 4.609593116389767e-05, + "loss": 2.3711, + "step": 17560 + }, + { + "epoch": 0.8, + "learning_rate": 4.609364272964438e-05, + "loss": 2.3581, + "step": 17570 + }, + { + "epoch": 0.8, + "learning_rate": 4.60913542953911e-05, + "loss": 2.2151, + "step": 17580 + }, + { + "epoch": 0.8, + "learning_rate": 4.608906586113781e-05, + "loss": 2.2845, + "step": 17590 + }, + { + "epoch": 0.8, + "learning_rate": 4.6086777426884525e-05, + "loss": 2.3327, + "step": 17600 + }, + { + "epoch": 0.8, + "learning_rate": 4.608448899263124e-05, + "loss": 2.3474, + "step": 17610 + }, + { + "epoch": 0.8, + "learning_rate": 4.608220055837796e-05, + "loss": 2.3489, + "step": 17620 + }, + { + "epoch": 0.81, + "learning_rate": 4.6079912124124674e-05, + "loss": 2.2983, + "step": 17630 + }, + { + "epoch": 0.81, + "learning_rate": 4.607762368987139e-05, + "loss": 2.3996, + "step": 17640 + }, + { + "epoch": 0.81, + "learning_rate": 4.607533525561811e-05, + "loss": 2.3051, + "step": 17650 + }, + { + "epoch": 0.81, + "learning_rate": 4.607304682136482e-05, + "loss": 2.1683, + "step": 17660 + }, + { + "epoch": 0.81, + "learning_rate": 4.607075838711154e-05, + "loss": 2.3945, + "step": 17670 + }, + { + "epoch": 0.81, + "learning_rate": 4.606846995285826e-05, + "loss": 2.3088, + "step": 17680 + }, + { + "epoch": 0.81, + "learning_rate": 4.606618151860497e-05, + "loss": 2.3674, + "step": 17690 + }, + { + "epoch": 0.81, + "learning_rate": 4.606389308435169e-05, + "loss": 2.4392, + "step": 17700 + }, + { + "epoch": 0.81, + "learning_rate": 4.606160465009841e-05, + "loss": 2.3587, + "step": 17710 + }, + { + "epoch": 0.81, + "learning_rate": 4.605931621584512e-05, + "loss": 2.288, + "step": 17720 + }, + { + "epoch": 0.81, + "learning_rate": 4.6057027781591836e-05, + "loss": 2.2625, + "step": 17730 + }, + { + "epoch": 0.81, + "learning_rate": 4.605473934733856e-05, + "loss": 2.2856, + "step": 17740 + }, + { + "epoch": 0.81, + "learning_rate": 4.605245091308527e-05, + "loss": 2.2983, + "step": 17750 + }, + { + "epoch": 0.81, + "learning_rate": 4.6050162478831985e-05, + "loss": 2.3513, + "step": 17760 + }, + { + "epoch": 0.81, + "learning_rate": 4.6047874044578706e-05, + "loss": 2.2521, + "step": 17770 + }, + { + "epoch": 0.81, + "learning_rate": 4.604558561032542e-05, + "loss": 2.2695, + "step": 17780 + }, + { + "epoch": 0.81, + "learning_rate": 4.6043297176072134e-05, + "loss": 2.2536, + "step": 17790 + }, + { + "epoch": 0.81, + "learning_rate": 4.604100874181885e-05, + "loss": 2.2978, + "step": 17800 + }, + { + "epoch": 0.81, + "learning_rate": 4.603872030756556e-05, + "loss": 2.32, + "step": 17810 + }, + { + "epoch": 0.81, + "learning_rate": 4.603643187331228e-05, + "loss": 2.3636, + "step": 17820 + }, + { + "epoch": 0.81, + "learning_rate": 4.6034143439059e-05, + "loss": 2.2627, + "step": 17830 + }, + { + "epoch": 0.81, + "learning_rate": 4.603185500480571e-05, + "loss": 2.3688, + "step": 17840 + }, + { + "epoch": 0.82, + "learning_rate": 4.602956657055243e-05, + "loss": 2.5841, + "step": 17850 + }, + { + "epoch": 0.82, + "learning_rate": 4.602727813629915e-05, + "loss": 2.2331, + "step": 17860 + }, + { + "epoch": 0.82, + "learning_rate": 4.602498970204586e-05, + "loss": 2.4572, + "step": 17870 + }, + { + "epoch": 0.82, + "learning_rate": 4.602270126779258e-05, + "loss": 2.3244, + "step": 17880 + }, + { + "epoch": 0.82, + "learning_rate": 4.6020412833539296e-05, + "loss": 2.4307, + "step": 17890 + }, + { + "epoch": 0.82, + "learning_rate": 4.601812439928601e-05, + "loss": 2.4377, + "step": 17900 + }, + { + "epoch": 0.82, + "learning_rate": 4.601583596503273e-05, + "loss": 2.3428, + "step": 17910 + }, + { + "epoch": 0.82, + "learning_rate": 4.6013547530779445e-05, + "loss": 2.2729, + "step": 17920 + }, + { + "epoch": 0.82, + "learning_rate": 4.601125909652616e-05, + "loss": 2.1953, + "step": 17930 + }, + { + "epoch": 0.82, + "learning_rate": 4.600897066227287e-05, + "loss": 2.1959, + "step": 17940 + }, + { + "epoch": 0.82, + "learning_rate": 4.6006682228019594e-05, + "loss": 2.2828, + "step": 17950 + }, + { + "epoch": 0.82, + "learning_rate": 4.600439379376631e-05, + "loss": 2.3928, + "step": 17960 + }, + { + "epoch": 0.82, + "learning_rate": 4.600210535951302e-05, + "loss": 2.3492, + "step": 17970 + }, + { + "epoch": 0.82, + "learning_rate": 4.5999816925259743e-05, + "loss": 2.1146, + "step": 17980 + }, + { + "epoch": 0.82, + "learning_rate": 4.599752849100645e-05, + "loss": 2.3273, + "step": 17990 + }, + { + "epoch": 0.82, + "learning_rate": 4.599524005675317e-05, + "loss": 2.1981, + "step": 18000 + }, + { + "epoch": 0.82, + "learning_rate": 4.5992951622499886e-05, + "loss": 2.4604, + "step": 18010 + }, + { + "epoch": 0.82, + "learning_rate": 4.59906631882466e-05, + "loss": 2.338, + "step": 18020 + }, + { + "epoch": 0.82, + "learning_rate": 4.598837475399332e-05, + "loss": 2.1164, + "step": 18030 + }, + { + "epoch": 0.82, + "learning_rate": 4.5986086319740035e-05, + "loss": 2.3731, + "step": 18040 + }, + { + "epoch": 0.82, + "learning_rate": 4.598379788548675e-05, + "loss": 2.2833, + "step": 18050 + }, + { + "epoch": 0.82, + "learning_rate": 4.598150945123347e-05, + "loss": 2.389, + "step": 18060 + }, + { + "epoch": 0.83, + "learning_rate": 4.5979221016980184e-05, + "loss": 2.3295, + "step": 18070 + }, + { + "epoch": 0.83, + "learning_rate": 4.59769325827269e-05, + "loss": 2.2687, + "step": 18080 + }, + { + "epoch": 0.83, + "learning_rate": 4.597464414847362e-05, + "loss": 2.3204, + "step": 18090 + }, + { + "epoch": 0.83, + "learning_rate": 4.597235571422033e-05, + "loss": 2.1336, + "step": 18100 + }, + { + "epoch": 0.83, + "learning_rate": 4.597006727996705e-05, + "loss": 2.2926, + "step": 18110 + }, + { + "epoch": 0.83, + "learning_rate": 4.596777884571377e-05, + "loss": 2.3188, + "step": 18120 + }, + { + "epoch": 0.83, + "learning_rate": 4.596549041146048e-05, + "loss": 2.3169, + "step": 18130 + }, + { + "epoch": 0.83, + "learning_rate": 4.59632019772072e-05, + "loss": 2.3555, + "step": 18140 + }, + { + "epoch": 0.83, + "learning_rate": 4.596091354295392e-05, + "loss": 2.4652, + "step": 18150 + }, + { + "epoch": 0.83, + "learning_rate": 4.595862510870063e-05, + "loss": 2.3797, + "step": 18160 + }, + { + "epoch": 0.83, + "learning_rate": 4.5956336674447346e-05, + "loss": 2.4016, + "step": 18170 + }, + { + "epoch": 0.83, + "learning_rate": 4.595404824019407e-05, + "loss": 2.3414, + "step": 18180 + }, + { + "epoch": 0.83, + "learning_rate": 4.5951759805940774e-05, + "loss": 2.1794, + "step": 18190 + }, + { + "epoch": 0.83, + "learning_rate": 4.594947137168749e-05, + "loss": 2.5393, + "step": 18200 + }, + { + "epoch": 0.83, + "learning_rate": 4.594718293743421e-05, + "loss": 2.4237, + "step": 18210 + }, + { + "epoch": 0.83, + "learning_rate": 4.594489450318092e-05, + "loss": 2.5043, + "step": 18220 + }, + { + "epoch": 0.83, + "learning_rate": 4.594260606892764e-05, + "loss": 2.3517, + "step": 18230 + }, + { + "epoch": 0.83, + "learning_rate": 4.594031763467436e-05, + "loss": 2.3566, + "step": 18240 + }, + { + "epoch": 0.83, + "learning_rate": 4.593802920042107e-05, + "loss": 2.2744, + "step": 18250 + }, + { + "epoch": 0.83, + "learning_rate": 4.5935740766167787e-05, + "loss": 2.3902, + "step": 18260 + }, + { + "epoch": 0.83, + "learning_rate": 4.593345233191451e-05, + "loss": 2.5577, + "step": 18270 + }, + { + "epoch": 0.83, + "learning_rate": 4.593116389766122e-05, + "loss": 2.48, + "step": 18280 + }, + { + "epoch": 0.84, + "learning_rate": 4.5928875463407936e-05, + "loss": 2.3165, + "step": 18290 + }, + { + "epoch": 0.84, + "learning_rate": 4.592658702915466e-05, + "loss": 2.3504, + "step": 18300 + }, + { + "epoch": 0.84, + "learning_rate": 4.592429859490137e-05, + "loss": 2.5786, + "step": 18310 + }, + { + "epoch": 0.84, + "learning_rate": 4.5922010160648085e-05, + "loss": 2.4614, + "step": 18320 + }, + { + "epoch": 0.84, + "learning_rate": 4.5919721726394806e-05, + "loss": 2.3181, + "step": 18330 + }, + { + "epoch": 0.84, + "learning_rate": 4.591743329214152e-05, + "loss": 2.2768, + "step": 18340 + }, + { + "epoch": 0.84, + "learning_rate": 4.5915144857888234e-05, + "loss": 2.1263, + "step": 18350 + }, + { + "epoch": 0.84, + "learning_rate": 4.5912856423634955e-05, + "loss": 2.2496, + "step": 18360 + }, + { + "epoch": 0.84, + "learning_rate": 4.591056798938167e-05, + "loss": 2.3443, + "step": 18370 + }, + { + "epoch": 0.84, + "learning_rate": 4.590827955512838e-05, + "loss": 2.2266, + "step": 18380 + }, + { + "epoch": 0.84, + "learning_rate": 4.59059911208751e-05, + "loss": 2.5338, + "step": 18390 + }, + { + "epoch": 0.84, + "learning_rate": 4.590370268662181e-05, + "loss": 2.3269, + "step": 18400 + }, + { + "epoch": 0.84, + "learning_rate": 4.590141425236853e-05, + "loss": 2.4094, + "step": 18410 + }, + { + "epoch": 0.84, + "learning_rate": 4.5899125818115247e-05, + "loss": 2.2199, + "step": 18420 + }, + { + "epoch": 0.84, + "learning_rate": 4.589683738386196e-05, + "loss": 2.4159, + "step": 18430 + }, + { + "epoch": 0.84, + "learning_rate": 4.589454894960868e-05, + "loss": 2.2806, + "step": 18440 + }, + { + "epoch": 0.84, + "learning_rate": 4.5892260515355396e-05, + "loss": 2.3626, + "step": 18450 + }, + { + "epoch": 0.84, + "learning_rate": 4.588997208110211e-05, + "loss": 2.2269, + "step": 18460 + }, + { + "epoch": 0.84, + "learning_rate": 4.588768364684883e-05, + "loss": 2.4798, + "step": 18470 + }, + { + "epoch": 0.84, + "learning_rate": 4.5885395212595545e-05, + "loss": 2.3738, + "step": 18480 + }, + { + "epoch": 0.84, + "learning_rate": 4.588310677834226e-05, + "loss": 2.4491, + "step": 18490 + }, + { + "epoch": 0.84, + "learning_rate": 4.588081834408898e-05, + "loss": 2.4643, + "step": 18500 + }, + { + "epoch": 0.85, + "learning_rate": 4.5878529909835694e-05, + "loss": 2.3537, + "step": 18510 + }, + { + "epoch": 0.85, + "learning_rate": 4.587624147558241e-05, + "loss": 2.2546, + "step": 18520 + }, + { + "epoch": 0.85, + "learning_rate": 4.587395304132913e-05, + "loss": 2.3253, + "step": 18530 + }, + { + "epoch": 0.85, + "learning_rate": 4.587166460707584e-05, + "loss": 2.1583, + "step": 18540 + }, + { + "epoch": 0.85, + "learning_rate": 4.586937617282256e-05, + "loss": 2.3538, + "step": 18550 + }, + { + "epoch": 0.85, + "learning_rate": 4.586708773856928e-05, + "loss": 2.2073, + "step": 18560 + }, + { + "epoch": 0.85, + "learning_rate": 4.586479930431599e-05, + "loss": 2.3757, + "step": 18570 + }, + { + "epoch": 0.85, + "learning_rate": 4.58625108700627e-05, + "loss": 2.4133, + "step": 18580 + }, + { + "epoch": 0.85, + "learning_rate": 4.586022243580942e-05, + "loss": 2.2985, + "step": 18590 + }, + { + "epoch": 0.85, + "learning_rate": 4.5857934001556135e-05, + "loss": 2.2218, + "step": 18600 + }, + { + "epoch": 0.85, + "learning_rate": 4.585564556730285e-05, + "loss": 2.332, + "step": 18610 + }, + { + "epoch": 0.85, + "learning_rate": 4.585335713304957e-05, + "loss": 2.2867, + "step": 18620 + }, + { + "epoch": 0.85, + "learning_rate": 4.5851068698796284e-05, + "loss": 2.5229, + "step": 18630 + }, + { + "epoch": 0.85, + "learning_rate": 4.5848780264543e-05, + "loss": 2.0596, + "step": 18640 + }, + { + "epoch": 0.85, + "learning_rate": 4.584649183028972e-05, + "loss": 2.3329, + "step": 18650 + }, + { + "epoch": 0.85, + "learning_rate": 4.584420339603643e-05, + "loss": 2.2265, + "step": 18660 + }, + { + "epoch": 0.85, + "learning_rate": 4.584191496178315e-05, + "loss": 2.2037, + "step": 18670 + }, + { + "epoch": 0.85, + "learning_rate": 4.583962652752987e-05, + "loss": 2.3796, + "step": 18680 + }, + { + "epoch": 0.85, + "learning_rate": 4.583733809327658e-05, + "loss": 2.3182, + "step": 18690 + }, + { + "epoch": 0.85, + "learning_rate": 4.5835049659023297e-05, + "loss": 2.4208, + "step": 18700 + }, + { + "epoch": 0.85, + "learning_rate": 4.583276122477002e-05, + "loss": 2.4165, + "step": 18710 + }, + { + "epoch": 0.85, + "learning_rate": 4.583047279051673e-05, + "loss": 2.3324, + "step": 18720 + }, + { + "epoch": 0.86, + "learning_rate": 4.5828184356263446e-05, + "loss": 2.2363, + "step": 18730 + }, + { + "epoch": 0.86, + "learning_rate": 4.582589592201017e-05, + "loss": 2.2866, + "step": 18740 + }, + { + "epoch": 0.86, + "learning_rate": 4.582360748775688e-05, + "loss": 2.37, + "step": 18750 + }, + { + "epoch": 0.86, + "learning_rate": 4.5821319053503595e-05, + "loss": 2.2878, + "step": 18760 + }, + { + "epoch": 0.86, + "learning_rate": 4.5819030619250316e-05, + "loss": 2.2953, + "step": 18770 + }, + { + "epoch": 0.86, + "learning_rate": 4.581674218499702e-05, + "loss": 2.4315, + "step": 18780 + }, + { + "epoch": 0.86, + "learning_rate": 4.5814453750743744e-05, + "loss": 2.372, + "step": 18790 + }, + { + "epoch": 0.86, + "learning_rate": 4.581216531649046e-05, + "loss": 2.2587, + "step": 18800 + }, + { + "epoch": 0.86, + "learning_rate": 4.580987688223717e-05, + "loss": 2.2833, + "step": 18810 + }, + { + "epoch": 0.86, + "learning_rate": 4.580758844798389e-05, + "loss": 2.3189, + "step": 18820 + }, + { + "epoch": 0.86, + "learning_rate": 4.580530001373061e-05, + "loss": 2.3696, + "step": 18830 + }, + { + "epoch": 0.86, + "learning_rate": 4.580301157947732e-05, + "loss": 2.3561, + "step": 18840 + }, + { + "epoch": 0.86, + "learning_rate": 4.580072314522404e-05, + "loss": 2.129, + "step": 18850 + }, + { + "epoch": 0.86, + "learning_rate": 4.5798434710970757e-05, + "loss": 2.2292, + "step": 18860 + }, + { + "epoch": 0.86, + "learning_rate": 4.579614627671747e-05, + "loss": 2.4677, + "step": 18870 + }, + { + "epoch": 0.86, + "learning_rate": 4.579385784246419e-05, + "loss": 2.4652, + "step": 18880 + }, + { + "epoch": 0.86, + "learning_rate": 4.5791569408210906e-05, + "loss": 2.4207, + "step": 18890 + }, + { + "epoch": 0.86, + "learning_rate": 4.578928097395762e-05, + "loss": 2.2877, + "step": 18900 + }, + { + "epoch": 0.86, + "learning_rate": 4.578699253970434e-05, + "loss": 2.2329, + "step": 18910 + }, + { + "epoch": 0.86, + "learning_rate": 4.5784704105451055e-05, + "loss": 2.3119, + "step": 18920 + }, + { + "epoch": 0.86, + "learning_rate": 4.578241567119777e-05, + "loss": 2.3258, + "step": 18930 + }, + { + "epoch": 0.86, + "learning_rate": 4.578012723694449e-05, + "loss": 2.3482, + "step": 18940 + }, + { + "epoch": 0.87, + "learning_rate": 4.5777838802691204e-05, + "loss": 2.3033, + "step": 18950 + }, + { + "epoch": 0.87, + "learning_rate": 4.577555036843792e-05, + "loss": 2.4461, + "step": 18960 + }, + { + "epoch": 0.87, + "learning_rate": 4.577326193418464e-05, + "loss": 2.2534, + "step": 18970 + }, + { + "epoch": 0.87, + "learning_rate": 4.5770973499931346e-05, + "loss": 2.4482, + "step": 18980 + }, + { + "epoch": 0.87, + "learning_rate": 4.576868506567806e-05, + "loss": 2.2505, + "step": 18990 + }, + { + "epoch": 0.87, + "learning_rate": 4.576639663142478e-05, + "loss": 2.2911, + "step": 19000 + }, + { + "epoch": 0.87, + "learning_rate": 4.5764108197171496e-05, + "loss": 2.3835, + "step": 19010 + }, + { + "epoch": 0.87, + "learning_rate": 4.576181976291821e-05, + "loss": 2.3962, + "step": 19020 + }, + { + "epoch": 0.87, + "learning_rate": 4.575953132866493e-05, + "loss": 2.3376, + "step": 19030 + }, + { + "epoch": 0.87, + "learning_rate": 4.5757242894411645e-05, + "loss": 2.324, + "step": 19040 + }, + { + "epoch": 0.87, + "learning_rate": 4.575495446015836e-05, + "loss": 2.2832, + "step": 19050 + }, + { + "epoch": 0.87, + "learning_rate": 4.575266602590508e-05, + "loss": 2.2894, + "step": 19060 + }, + { + "epoch": 0.87, + "learning_rate": 4.5750377591651794e-05, + "loss": 2.302, + "step": 19070 + }, + { + "epoch": 0.87, + "learning_rate": 4.574808915739851e-05, + "loss": 2.2316, + "step": 19080 + }, + { + "epoch": 0.87, + "learning_rate": 4.574580072314523e-05, + "loss": 2.4414, + "step": 19090 + }, + { + "epoch": 0.87, + "learning_rate": 4.574351228889194e-05, + "loss": 2.2921, + "step": 19100 + }, + { + "epoch": 0.87, + "learning_rate": 4.574122385463866e-05, + "loss": 2.2658, + "step": 19110 + }, + { + "epoch": 0.87, + "learning_rate": 4.573893542038538e-05, + "loss": 2.2008, + "step": 19120 + }, + { + "epoch": 0.87, + "learning_rate": 4.573664698613209e-05, + "loss": 2.297, + "step": 19130 + }, + { + "epoch": 0.87, + "learning_rate": 4.5734358551878807e-05, + "loss": 2.3842, + "step": 19140 + }, + { + "epoch": 0.87, + "learning_rate": 4.573207011762553e-05, + "loss": 2.0722, + "step": 19150 + }, + { + "epoch": 0.87, + "learning_rate": 4.572978168337224e-05, + "loss": 2.2495, + "step": 19160 + }, + { + "epoch": 0.88, + "learning_rate": 4.572749324911895e-05, + "loss": 2.3307, + "step": 19170 + }, + { + "epoch": 0.88, + "learning_rate": 4.572520481486567e-05, + "loss": 2.2726, + "step": 19180 + }, + { + "epoch": 0.88, + "learning_rate": 4.5722916380612384e-05, + "loss": 2.1957, + "step": 19190 + }, + { + "epoch": 0.88, + "learning_rate": 4.57206279463591e-05, + "loss": 2.2915, + "step": 19200 + }, + { + "epoch": 0.88, + "learning_rate": 4.571833951210582e-05, + "loss": 2.4166, + "step": 19210 + }, + { + "epoch": 0.88, + "learning_rate": 4.571605107785253e-05, + "loss": 2.3094, + "step": 19220 + }, + { + "epoch": 0.88, + "learning_rate": 4.571376264359925e-05, + "loss": 2.4369, + "step": 19230 + }, + { + "epoch": 0.88, + "learning_rate": 4.571147420934597e-05, + "loss": 2.1235, + "step": 19240 + }, + { + "epoch": 0.88, + "learning_rate": 4.570918577509268e-05, + "loss": 2.4122, + "step": 19250 + }, + { + "epoch": 0.88, + "learning_rate": 4.5706897340839396e-05, + "loss": 2.6411, + "step": 19260 + }, + { + "epoch": 0.88, + "learning_rate": 4.570460890658612e-05, + "loss": 2.2493, + "step": 19270 + }, + { + "epoch": 0.88, + "learning_rate": 4.570232047233283e-05, + "loss": 2.3754, + "step": 19280 + }, + { + "epoch": 0.88, + "learning_rate": 4.5700032038079546e-05, + "loss": 2.3285, + "step": 19290 + }, + { + "epoch": 0.88, + "learning_rate": 4.5697743603826267e-05, + "loss": 2.3002, + "step": 19300 + }, + { + "epoch": 0.88, + "learning_rate": 4.569545516957298e-05, + "loss": 2.2763, + "step": 19310 + }, + { + "epoch": 0.88, + "learning_rate": 4.5693166735319695e-05, + "loss": 2.3641, + "step": 19320 + }, + { + "epoch": 0.88, + "learning_rate": 4.5690878301066416e-05, + "loss": 2.404, + "step": 19330 + }, + { + "epoch": 0.88, + "learning_rate": 4.568858986681313e-05, + "loss": 2.2606, + "step": 19340 + }, + { + "epoch": 0.88, + "learning_rate": 4.5686301432559844e-05, + "loss": 2.3274, + "step": 19350 + }, + { + "epoch": 0.88, + "learning_rate": 4.5684012998306565e-05, + "loss": 2.3824, + "step": 19360 + }, + { + "epoch": 0.88, + "learning_rate": 4.568172456405327e-05, + "loss": 2.2078, + "step": 19370 + }, + { + "epoch": 0.88, + "learning_rate": 4.567943612979999e-05, + "loss": 2.2986, + "step": 19380 + }, + { + "epoch": 0.89, + "learning_rate": 4.567714769554671e-05, + "loss": 2.2778, + "step": 19390 + }, + { + "epoch": 0.89, + "learning_rate": 4.567485926129342e-05, + "loss": 2.3458, + "step": 19400 + }, + { + "epoch": 0.89, + "learning_rate": 4.567257082704014e-05, + "loss": 2.4335, + "step": 19410 + }, + { + "epoch": 0.89, + "learning_rate": 4.5670282392786856e-05, + "loss": 2.4214, + "step": 19420 + }, + { + "epoch": 0.89, + "learning_rate": 4.566799395853357e-05, + "loss": 2.4537, + "step": 19430 + }, + { + "epoch": 0.89, + "learning_rate": 4.566570552428029e-05, + "loss": 2.416, + "step": 19440 + }, + { + "epoch": 0.89, + "learning_rate": 4.5663417090027006e-05, + "loss": 2.3781, + "step": 19450 + }, + { + "epoch": 0.89, + "learning_rate": 4.566112865577372e-05, + "loss": 2.2515, + "step": 19460 + }, + { + "epoch": 0.89, + "learning_rate": 4.565884022152044e-05, + "loss": 2.1159, + "step": 19470 + }, + { + "epoch": 0.89, + "learning_rate": 4.5656551787267155e-05, + "loss": 2.2833, + "step": 19480 + }, + { + "epoch": 0.89, + "learning_rate": 4.565426335301387e-05, + "loss": 2.1917, + "step": 19490 + }, + { + "epoch": 0.89, + "learning_rate": 4.565197491876059e-05, + "loss": 2.2998, + "step": 19500 + }, + { + "epoch": 0.89, + "learning_rate": 4.5649686484507304e-05, + "loss": 2.3034, + "step": 19510 + }, + { + "epoch": 0.89, + "learning_rate": 4.564739805025402e-05, + "loss": 2.4424, + "step": 19520 + }, + { + "epoch": 0.89, + "learning_rate": 4.564510961600074e-05, + "loss": 2.32, + "step": 19530 + }, + { + "epoch": 0.89, + "learning_rate": 4.564282118174745e-05, + "loss": 2.3027, + "step": 19540 + }, + { + "epoch": 0.89, + "learning_rate": 4.564053274749417e-05, + "loss": 2.2994, + "step": 19550 + }, + { + "epoch": 0.89, + "learning_rate": 4.563824431324089e-05, + "loss": 2.402, + "step": 19560 + }, + { + "epoch": 0.89, + "learning_rate": 4.5635955878987596e-05, + "loss": 2.4273, + "step": 19570 + }, + { + "epoch": 0.89, + "learning_rate": 4.563366744473431e-05, + "loss": 2.1845, + "step": 19580 + }, + { + "epoch": 0.89, + "learning_rate": 4.563137901048103e-05, + "loss": 2.3509, + "step": 19590 + }, + { + "epoch": 0.89, + "learning_rate": 4.5629090576227745e-05, + "loss": 2.4227, + "step": 19600 + }, + { + "epoch": 0.9, + "learning_rate": 4.562680214197446e-05, + "loss": 2.2192, + "step": 19610 + }, + { + "epoch": 0.9, + "learning_rate": 4.562451370772118e-05, + "loss": 2.4397, + "step": 19620 + }, + { + "epoch": 0.9, + "learning_rate": 4.5622225273467894e-05, + "loss": 2.3549, + "step": 19630 + }, + { + "epoch": 0.9, + "learning_rate": 4.561993683921461e-05, + "loss": 2.3323, + "step": 19640 + }, + { + "epoch": 0.9, + "learning_rate": 4.561764840496133e-05, + "loss": 2.3329, + "step": 19650 + }, + { + "epoch": 0.9, + "learning_rate": 4.561535997070804e-05, + "loss": 2.3269, + "step": 19660 + }, + { + "epoch": 0.9, + "learning_rate": 4.561307153645476e-05, + "loss": 2.3676, + "step": 19670 + }, + { + "epoch": 0.9, + "learning_rate": 4.561078310220148e-05, + "loss": 2.4446, + "step": 19680 + }, + { + "epoch": 0.9, + "learning_rate": 4.560849466794819e-05, + "loss": 2.3611, + "step": 19690 + }, + { + "epoch": 0.9, + "learning_rate": 4.5606206233694906e-05, + "loss": 2.2033, + "step": 19700 + }, + { + "epoch": 0.9, + "learning_rate": 4.560391779944163e-05, + "loss": 2.1853, + "step": 19710 + }, + { + "epoch": 0.9, + "learning_rate": 4.560162936518834e-05, + "loss": 2.1611, + "step": 19720 + }, + { + "epoch": 0.9, + "learning_rate": 4.5599340930935056e-05, + "loss": 2.4002, + "step": 19730 + }, + { + "epoch": 0.9, + "learning_rate": 4.5597052496681776e-05, + "loss": 2.2953, + "step": 19740 + }, + { + "epoch": 0.9, + "learning_rate": 4.559476406242849e-05, + "loss": 2.3596, + "step": 19750 + }, + { + "epoch": 0.9, + "learning_rate": 4.5592475628175205e-05, + "loss": 2.5304, + "step": 19760 + }, + { + "epoch": 0.9, + "learning_rate": 4.559018719392192e-05, + "loss": 2.3825, + "step": 19770 + }, + { + "epoch": 0.9, + "learning_rate": 4.558789875966863e-05, + "loss": 2.3109, + "step": 19780 + }, + { + "epoch": 0.9, + "learning_rate": 4.5585610325415354e-05, + "loss": 2.3204, + "step": 19790 + }, + { + "epoch": 0.9, + "learning_rate": 4.558332189116207e-05, + "loss": 2.3479, + "step": 19800 + }, + { + "epoch": 0.9, + "learning_rate": 4.558103345690878e-05, + "loss": 2.3186, + "step": 19810 + }, + { + "epoch": 0.91, + "learning_rate": 4.55787450226555e-05, + "loss": 2.3685, + "step": 19820 + }, + { + "epoch": 0.91, + "learning_rate": 4.557645658840222e-05, + "loss": 2.2943, + "step": 19830 + }, + { + "epoch": 0.91, + "learning_rate": 4.557416815414893e-05, + "loss": 2.3816, + "step": 19840 + }, + { + "epoch": 0.91, + "learning_rate": 4.557187971989565e-05, + "loss": 2.3416, + "step": 19850 + }, + { + "epoch": 0.91, + "learning_rate": 4.5569591285642366e-05, + "loss": 2.3142, + "step": 19860 + }, + { + "epoch": 0.91, + "learning_rate": 4.556730285138908e-05, + "loss": 2.2883, + "step": 19870 + }, + { + "epoch": 0.91, + "learning_rate": 4.55650144171358e-05, + "loss": 2.312, + "step": 19880 + }, + { + "epoch": 0.91, + "learning_rate": 4.5562725982882516e-05, + "loss": 2.4228, + "step": 19890 + }, + { + "epoch": 0.91, + "learning_rate": 4.556043754862923e-05, + "loss": 2.1715, + "step": 19900 + }, + { + "epoch": 0.91, + "learning_rate": 4.555814911437595e-05, + "loss": 2.3988, + "step": 19910 + }, + { + "epoch": 0.91, + "learning_rate": 4.5555860680122665e-05, + "loss": 2.5324, + "step": 19920 + }, + { + "epoch": 0.91, + "learning_rate": 4.555357224586938e-05, + "loss": 2.3425, + "step": 19930 + }, + { + "epoch": 0.91, + "learning_rate": 4.55512838116161e-05, + "loss": 2.5047, + "step": 19940 + }, + { + "epoch": 0.91, + "learning_rate": 4.5548995377362814e-05, + "loss": 2.2874, + "step": 19950 + }, + { + "epoch": 0.91, + "learning_rate": 4.554670694310952e-05, + "loss": 2.3633, + "step": 19960 + }, + { + "epoch": 0.91, + "learning_rate": 4.554441850885624e-05, + "loss": 2.3696, + "step": 19970 + }, + { + "epoch": 0.91, + "learning_rate": 4.5542130074602956e-05, + "loss": 2.2353, + "step": 19980 + }, + { + "epoch": 0.91, + "learning_rate": 4.553984164034967e-05, + "loss": 2.4472, + "step": 19990 + }, + { + "epoch": 0.91, + "learning_rate": 4.553755320609639e-05, + "loss": 2.4039, + "step": 20000 + }, + { + "epoch": 0.91, + "learning_rate": 4.5535264771843106e-05, + "loss": 2.2052, + "step": 20010 + }, + { + "epoch": 0.91, + "learning_rate": 4.553297633758982e-05, + "loss": 2.3342, + "step": 20020 + }, + { + "epoch": 0.91, + "learning_rate": 4.553068790333654e-05, + "loss": 2.3392, + "step": 20030 + }, + { + "epoch": 0.92, + "learning_rate": 4.5528399469083255e-05, + "loss": 2.3362, + "step": 20040 + }, + { + "epoch": 0.92, + "learning_rate": 4.552611103482997e-05, + "loss": 2.2906, + "step": 20050 + }, + { + "epoch": 0.92, + "learning_rate": 4.552382260057669e-05, + "loss": 2.3721, + "step": 20060 + }, + { + "epoch": 0.92, + "learning_rate": 4.5521534166323404e-05, + "loss": 2.3866, + "step": 20070 + }, + { + "epoch": 0.92, + "learning_rate": 4.551924573207012e-05, + "loss": 2.2451, + "step": 20080 + }, + { + "epoch": 0.92, + "learning_rate": 4.551695729781684e-05, + "loss": 2.5169, + "step": 20090 + }, + { + "epoch": 0.92, + "learning_rate": 4.551466886356355e-05, + "loss": 2.4583, + "step": 20100 + }, + { + "epoch": 0.92, + "learning_rate": 4.551238042931027e-05, + "loss": 2.2339, + "step": 20110 + }, + { + "epoch": 0.92, + "learning_rate": 4.551009199505699e-05, + "loss": 2.2359, + "step": 20120 + }, + { + "epoch": 0.92, + "learning_rate": 4.55078035608037e-05, + "loss": 2.3246, + "step": 20130 + }, + { + "epoch": 0.92, + "learning_rate": 4.5505515126550416e-05, + "loss": 2.3768, + "step": 20140 + }, + { + "epoch": 0.92, + "learning_rate": 4.550322669229714e-05, + "loss": 2.3263, + "step": 20150 + }, + { + "epoch": 0.92, + "learning_rate": 4.5500938258043845e-05, + "loss": 2.3563, + "step": 20160 + }, + { + "epoch": 0.92, + "learning_rate": 4.5498649823790566e-05, + "loss": 2.4489, + "step": 20170 + }, + { + "epoch": 0.92, + "learning_rate": 4.549636138953728e-05, + "loss": 2.3039, + "step": 20180 + }, + { + "epoch": 0.92, + "learning_rate": 4.5494072955283994e-05, + "loss": 2.2202, + "step": 20190 + }, + { + "epoch": 0.92, + "learning_rate": 4.5491784521030715e-05, + "loss": 2.3698, + "step": 20200 + }, + { + "epoch": 0.92, + "learning_rate": 4.548949608677743e-05, + "loss": 2.2487, + "step": 20210 + }, + { + "epoch": 0.92, + "learning_rate": 4.548720765252414e-05, + "loss": 2.2787, + "step": 20220 + }, + { + "epoch": 0.92, + "learning_rate": 4.5484919218270864e-05, + "loss": 2.2818, + "step": 20230 + }, + { + "epoch": 0.92, + "learning_rate": 4.548263078401758e-05, + "loss": 2.2863, + "step": 20240 + }, + { + "epoch": 0.92, + "learning_rate": 4.548034234976429e-05, + "loss": 2.2049, + "step": 20250 + }, + { + "epoch": 0.93, + "learning_rate": 4.5478053915511006e-05, + "loss": 2.1635, + "step": 20260 + }, + { + "epoch": 0.93, + "learning_rate": 4.547576548125773e-05, + "loss": 2.2524, + "step": 20270 + }, + { + "epoch": 0.93, + "learning_rate": 4.547347704700444e-05, + "loss": 2.3154, + "step": 20280 + }, + { + "epoch": 0.93, + "learning_rate": 4.5471188612751155e-05, + "loss": 2.4233, + "step": 20290 + }, + { + "epoch": 0.93, + "learning_rate": 4.5468900178497876e-05, + "loss": 2.3096, + "step": 20300 + }, + { + "epoch": 0.93, + "learning_rate": 4.546661174424459e-05, + "loss": 2.2627, + "step": 20310 + }, + { + "epoch": 0.93, + "learning_rate": 4.5464323309991305e-05, + "loss": 2.4268, + "step": 20320 + }, + { + "epoch": 0.93, + "learning_rate": 4.5462034875738026e-05, + "loss": 2.4624, + "step": 20330 + }, + { + "epoch": 0.93, + "learning_rate": 4.545974644148474e-05, + "loss": 2.2327, + "step": 20340 + }, + { + "epoch": 0.93, + "learning_rate": 4.5457458007231454e-05, + "loss": 2.3966, + "step": 20350 + }, + { + "epoch": 0.93, + "learning_rate": 4.545516957297817e-05, + "loss": 2.3885, + "step": 20360 + }, + { + "epoch": 0.93, + "learning_rate": 4.545288113872488e-05, + "loss": 2.2399, + "step": 20370 + }, + { + "epoch": 0.93, + "learning_rate": 4.54505927044716e-05, + "loss": 2.3578, + "step": 20380 + }, + { + "epoch": 0.93, + "learning_rate": 4.544830427021832e-05, + "loss": 2.3703, + "step": 20390 + }, + { + "epoch": 0.93, + "learning_rate": 4.544601583596503e-05, + "loss": 2.4407, + "step": 20400 + }, + { + "epoch": 0.93, + "learning_rate": 4.544372740171175e-05, + "loss": 2.203, + "step": 20410 + }, + { + "epoch": 0.93, + "learning_rate": 4.5441438967458466e-05, + "loss": 2.5203, + "step": 20420 + }, + { + "epoch": 0.93, + "learning_rate": 4.543915053320518e-05, + "loss": 2.3311, + "step": 20430 + }, + { + "epoch": 0.93, + "learning_rate": 4.54368620989519e-05, + "loss": 2.3078, + "step": 20440 + }, + { + "epoch": 0.93, + "learning_rate": 4.5434573664698616e-05, + "loss": 2.3714, + "step": 20450 + }, + { + "epoch": 0.93, + "learning_rate": 4.543228523044533e-05, + "loss": 2.5494, + "step": 20460 + }, + { + "epoch": 0.93, + "learning_rate": 4.542999679619205e-05, + "loss": 2.5469, + "step": 20470 + }, + { + "epoch": 0.94, + "learning_rate": 4.5427708361938765e-05, + "loss": 2.2872, + "step": 20480 + }, + { + "epoch": 0.94, + "learning_rate": 4.542541992768548e-05, + "loss": 2.1746, + "step": 20490 + }, + { + "epoch": 0.94, + "learning_rate": 4.54231314934322e-05, + "loss": 2.3005, + "step": 20500 + }, + { + "epoch": 0.94, + "learning_rate": 4.5420843059178914e-05, + "loss": 2.234, + "step": 20510 + }, + { + "epoch": 0.94, + "learning_rate": 4.541855462492563e-05, + "loss": 2.4276, + "step": 20520 + }, + { + "epoch": 0.94, + "learning_rate": 4.541626619067235e-05, + "loss": 2.4703, + "step": 20530 + }, + { + "epoch": 0.94, + "learning_rate": 4.541397775641906e-05, + "loss": 2.2527, + "step": 20540 + }, + { + "epoch": 0.94, + "learning_rate": 4.541168932216578e-05, + "loss": 2.345, + "step": 20550 + }, + { + "epoch": 0.94, + "learning_rate": 4.540940088791249e-05, + "loss": 2.2503, + "step": 20560 + }, + { + "epoch": 0.94, + "learning_rate": 4.5407112453659205e-05, + "loss": 2.2598, + "step": 20570 + }, + { + "epoch": 0.94, + "learning_rate": 4.540482401940592e-05, + "loss": 2.2499, + "step": 20580 + }, + { + "epoch": 0.94, + "learning_rate": 4.540253558515264e-05, + "loss": 2.2606, + "step": 20590 + }, + { + "epoch": 0.94, + "learning_rate": 4.5400247150899355e-05, + "loss": 2.4059, + "step": 20600 + }, + { + "epoch": 0.94, + "learning_rate": 4.539795871664607e-05, + "loss": 2.2226, + "step": 20610 + }, + { + "epoch": 0.94, + "learning_rate": 4.539567028239279e-05, + "loss": 2.3535, + "step": 20620 + }, + { + "epoch": 0.94, + "learning_rate": 4.5393381848139504e-05, + "loss": 2.3696, + "step": 20630 + }, + { + "epoch": 0.94, + "learning_rate": 4.539109341388622e-05, + "loss": 2.2701, + "step": 20640 + }, + { + "epoch": 0.94, + "learning_rate": 4.538880497963294e-05, + "loss": 2.3742, + "step": 20650 + }, + { + "epoch": 0.94, + "learning_rate": 4.538651654537965e-05, + "loss": 2.3911, + "step": 20660 + }, + { + "epoch": 0.94, + "learning_rate": 4.538422811112637e-05, + "loss": 2.4521, + "step": 20670 + }, + { + "epoch": 0.94, + "learning_rate": 4.538193967687309e-05, + "loss": 2.2415, + "step": 20680 + }, + { + "epoch": 0.94, + "learning_rate": 4.53796512426198e-05, + "loss": 2.1932, + "step": 20690 + }, + { + "epoch": 0.95, + "learning_rate": 4.5377362808366516e-05, + "loss": 2.324, + "step": 20700 + }, + { + "epoch": 0.95, + "learning_rate": 4.537507437411324e-05, + "loss": 2.2722, + "step": 20710 + }, + { + "epoch": 0.95, + "learning_rate": 4.537278593985995e-05, + "loss": 2.2937, + "step": 20720 + }, + { + "epoch": 0.95, + "learning_rate": 4.5370497505606665e-05, + "loss": 2.1996, + "step": 20730 + }, + { + "epoch": 0.95, + "learning_rate": 4.5368209071353386e-05, + "loss": 2.2959, + "step": 20740 + }, + { + "epoch": 0.95, + "learning_rate": 4.5365920637100094e-05, + "loss": 2.2833, + "step": 20750 + }, + { + "epoch": 0.95, + "learning_rate": 4.5363632202846815e-05, + "loss": 2.2439, + "step": 20760 + }, + { + "epoch": 0.95, + "learning_rate": 4.536134376859353e-05, + "loss": 2.3691, + "step": 20770 + }, + { + "epoch": 0.95, + "learning_rate": 4.535905533434024e-05, + "loss": 2.3087, + "step": 20780 + }, + { + "epoch": 0.95, + "learning_rate": 4.5356766900086964e-05, + "loss": 2.3394, + "step": 20790 + }, + { + "epoch": 0.95, + "learning_rate": 4.535447846583368e-05, + "loss": 2.2847, + "step": 20800 + }, + { + "epoch": 0.95, + "learning_rate": 4.535219003158039e-05, + "loss": 2.3594, + "step": 20810 + }, + { + "epoch": 0.95, + "learning_rate": 4.534990159732711e-05, + "loss": 2.3482, + "step": 20820 + }, + { + "epoch": 0.95, + "learning_rate": 4.534761316307383e-05, + "loss": 2.3275, + "step": 20830 + }, + { + "epoch": 0.95, + "learning_rate": 4.534532472882054e-05, + "loss": 2.5, + "step": 20840 + }, + { + "epoch": 0.95, + "learning_rate": 4.534303629456726e-05, + "loss": 2.3864, + "step": 20850 + }, + { + "epoch": 0.95, + "learning_rate": 4.5340747860313976e-05, + "loss": 2.2909, + "step": 20860 + }, + { + "epoch": 0.95, + "learning_rate": 4.533845942606069e-05, + "loss": 2.3373, + "step": 20870 + }, + { + "epoch": 0.95, + "learning_rate": 4.533617099180741e-05, + "loss": 2.3343, + "step": 20880 + }, + { + "epoch": 0.95, + "learning_rate": 4.5333882557554125e-05, + "loss": 2.2317, + "step": 20890 + }, + { + "epoch": 0.95, + "learning_rate": 4.533159412330084e-05, + "loss": 2.5133, + "step": 20900 + }, + { + "epoch": 0.95, + "learning_rate": 4.532930568904756e-05, + "loss": 2.3618, + "step": 20910 + }, + { + "epoch": 0.96, + "learning_rate": 4.5327017254794275e-05, + "loss": 2.2439, + "step": 20920 + }, + { + "epoch": 0.96, + "learning_rate": 4.532472882054099e-05, + "loss": 2.4075, + "step": 20930 + }, + { + "epoch": 0.96, + "learning_rate": 4.532244038628771e-05, + "loss": 2.2629, + "step": 20940 + }, + { + "epoch": 0.96, + "learning_rate": 4.532015195203442e-05, + "loss": 2.3173, + "step": 20950 + }, + { + "epoch": 0.96, + "learning_rate": 4.531786351778113e-05, + "loss": 2.382, + "step": 20960 + }, + { + "epoch": 0.96, + "learning_rate": 4.531557508352785e-05, + "loss": 2.3389, + "step": 20970 + }, + { + "epoch": 0.96, + "learning_rate": 4.5313286649274566e-05, + "loss": 2.3886, + "step": 20980 + }, + { + "epoch": 0.96, + "learning_rate": 4.531099821502128e-05, + "loss": 2.3159, + "step": 20990 + }, + { + "epoch": 0.96, + "learning_rate": 4.5308709780768e-05, + "loss": 2.2627, + "step": 21000 + }, + { + "epoch": 0.96, + "learning_rate": 4.5306421346514715e-05, + "loss": 2.2969, + "step": 21010 + }, + { + "epoch": 0.96, + "learning_rate": 4.530413291226143e-05, + "loss": 2.2948, + "step": 21020 + }, + { + "epoch": 0.96, + "learning_rate": 4.530184447800815e-05, + "loss": 2.3191, + "step": 21030 + }, + { + "epoch": 0.96, + "learning_rate": 4.5299556043754865e-05, + "loss": 2.3351, + "step": 21040 + }, + { + "epoch": 0.96, + "learning_rate": 4.529726760950158e-05, + "loss": 2.3544, + "step": 21050 + }, + { + "epoch": 0.96, + "learning_rate": 4.52949791752483e-05, + "loss": 2.3897, + "step": 21060 + }, + { + "epoch": 0.96, + "learning_rate": 4.5292690740995014e-05, + "loss": 2.4179, + "step": 21070 + }, + { + "epoch": 0.96, + "learning_rate": 4.529040230674173e-05, + "loss": 2.3701, + "step": 21080 + }, + { + "epoch": 0.96, + "learning_rate": 4.528811387248845e-05, + "loss": 2.2162, + "step": 21090 + }, + { + "epoch": 0.96, + "learning_rate": 4.528582543823516e-05, + "loss": 2.3018, + "step": 21100 + }, + { + "epoch": 0.96, + "learning_rate": 4.528353700398188e-05, + "loss": 2.3175, + "step": 21110 + }, + { + "epoch": 0.96, + "learning_rate": 4.52812485697286e-05, + "loss": 2.3634, + "step": 21120 + }, + { + "epoch": 0.96, + "learning_rate": 4.527896013547531e-05, + "loss": 2.3329, + "step": 21130 + }, + { + "epoch": 0.97, + "learning_rate": 4.5276671701222026e-05, + "loss": 2.165, + "step": 21140 + }, + { + "epoch": 0.97, + "learning_rate": 4.527438326696874e-05, + "loss": 2.4515, + "step": 21150 + }, + { + "epoch": 0.97, + "learning_rate": 4.5272094832715455e-05, + "loss": 2.3241, + "step": 21160 + }, + { + "epoch": 0.97, + "learning_rate": 4.5269806398462175e-05, + "loss": 2.4588, + "step": 21170 + }, + { + "epoch": 0.97, + "learning_rate": 4.526751796420889e-05, + "loss": 2.4249, + "step": 21180 + }, + { + "epoch": 0.97, + "learning_rate": 4.5265229529955604e-05, + "loss": 2.2971, + "step": 21190 + }, + { + "epoch": 0.97, + "learning_rate": 4.5262941095702325e-05, + "loss": 2.3665, + "step": 21200 + }, + { + "epoch": 0.97, + "learning_rate": 4.526065266144904e-05, + "loss": 2.2997, + "step": 21210 + }, + { + "epoch": 0.97, + "learning_rate": 4.525836422719575e-05, + "loss": 2.3432, + "step": 21220 + }, + { + "epoch": 0.97, + "learning_rate": 4.5256075792942474e-05, + "loss": 2.1502, + "step": 21230 + }, + { + "epoch": 0.97, + "learning_rate": 4.525378735868919e-05, + "loss": 2.2443, + "step": 21240 + }, + { + "epoch": 0.97, + "learning_rate": 4.52514989244359e-05, + "loss": 2.6131, + "step": 21250 + }, + { + "epoch": 0.97, + "learning_rate": 4.524921049018262e-05, + "loss": 2.3306, + "step": 21260 + }, + { + "epoch": 0.97, + "learning_rate": 4.524692205592934e-05, + "loss": 2.231, + "step": 21270 + }, + { + "epoch": 0.97, + "learning_rate": 4.524463362167605e-05, + "loss": 2.2551, + "step": 21280 + }, + { + "epoch": 0.97, + "learning_rate": 4.524234518742277e-05, + "loss": 2.223, + "step": 21290 + }, + { + "epoch": 0.97, + "learning_rate": 4.5240056753169486e-05, + "loss": 2.2982, + "step": 21300 + }, + { + "epoch": 0.97, + "learning_rate": 4.52377683189162e-05, + "loss": 2.246, + "step": 21310 + }, + { + "epoch": 0.97, + "learning_rate": 4.523547988466292e-05, + "loss": 2.3108, + "step": 21320 + }, + { + "epoch": 0.97, + "learning_rate": 4.5233191450409635e-05, + "loss": 2.3256, + "step": 21330 + }, + { + "epoch": 0.97, + "learning_rate": 4.523090301615635e-05, + "loss": 2.2823, + "step": 21340 + }, + { + "epoch": 0.97, + "learning_rate": 4.5228614581903064e-05, + "loss": 2.1821, + "step": 21350 + }, + { + "epoch": 0.98, + "learning_rate": 4.522632614764978e-05, + "loss": 2.2537, + "step": 21360 + }, + { + "epoch": 0.98, + "learning_rate": 4.522403771339649e-05, + "loss": 2.2024, + "step": 21370 + }, + { + "epoch": 0.98, + "learning_rate": 4.522174927914321e-05, + "loss": 2.3644, + "step": 21380 + }, + { + "epoch": 0.98, + "learning_rate": 4.521946084488993e-05, + "loss": 2.4402, + "step": 21390 + }, + { + "epoch": 0.98, + "learning_rate": 4.521717241063664e-05, + "loss": 2.4038, + "step": 21400 + }, + { + "epoch": 0.98, + "learning_rate": 4.521488397638336e-05, + "loss": 2.3156, + "step": 21410 + }, + { + "epoch": 0.98, + "learning_rate": 4.5212595542130076e-05, + "loss": 2.3538, + "step": 21420 + }, + { + "epoch": 0.98, + "learning_rate": 4.521030710787679e-05, + "loss": 2.3233, + "step": 21430 + }, + { + "epoch": 0.98, + "learning_rate": 4.520801867362351e-05, + "loss": 2.2834, + "step": 21440 + }, + { + "epoch": 0.98, + "learning_rate": 4.5205730239370225e-05, + "loss": 2.3718, + "step": 21450 + }, + { + "epoch": 0.98, + "learning_rate": 4.520344180511694e-05, + "loss": 2.1681, + "step": 21460 + }, + { + "epoch": 0.98, + "learning_rate": 4.520115337086366e-05, + "loss": 2.4196, + "step": 21470 + }, + { + "epoch": 0.98, + "learning_rate": 4.5198864936610375e-05, + "loss": 2.3599, + "step": 21480 + }, + { + "epoch": 0.98, + "learning_rate": 4.519657650235709e-05, + "loss": 2.5683, + "step": 21490 + }, + { + "epoch": 0.98, + "learning_rate": 4.519428806810381e-05, + "loss": 2.2748, + "step": 21500 + }, + { + "epoch": 0.98, + "learning_rate": 4.5191999633850524e-05, + "loss": 2.3012, + "step": 21510 + }, + { + "epoch": 0.98, + "learning_rate": 4.518971119959724e-05, + "loss": 2.1967, + "step": 21520 + }, + { + "epoch": 0.98, + "learning_rate": 4.518742276534396e-05, + "loss": 2.3538, + "step": 21530 + }, + { + "epoch": 0.98, + "learning_rate": 4.5185134331090666e-05, + "loss": 2.2923, + "step": 21540 + }, + { + "epoch": 0.98, + "learning_rate": 4.518284589683738e-05, + "loss": 2.1332, + "step": 21550 + }, + { + "epoch": 0.98, + "learning_rate": 4.51805574625841e-05, + "loss": 2.2812, + "step": 21560 + }, + { + "epoch": 0.98, + "learning_rate": 4.5178269028330815e-05, + "loss": 2.5015, + "step": 21570 + }, + { + "epoch": 0.99, + "learning_rate": 4.517598059407753e-05, + "loss": 2.1773, + "step": 21580 + }, + { + "epoch": 0.99, + "learning_rate": 4.517369215982425e-05, + "loss": 2.1177, + "step": 21590 + }, + { + "epoch": 0.99, + "learning_rate": 4.5171403725570964e-05, + "loss": 2.2354, + "step": 21600 + }, + { + "epoch": 0.99, + "learning_rate": 4.516911529131768e-05, + "loss": 2.3471, + "step": 21610 + }, + { + "epoch": 0.99, + "learning_rate": 4.51668268570644e-05, + "loss": 2.2099, + "step": 21620 + }, + { + "epoch": 0.99, + "learning_rate": 4.5164538422811114e-05, + "loss": 2.2166, + "step": 21630 + }, + { + "epoch": 0.99, + "learning_rate": 4.516224998855783e-05, + "loss": 2.3403, + "step": 21640 + }, + { + "epoch": 0.99, + "learning_rate": 4.515996155430455e-05, + "loss": 2.4002, + "step": 21650 + }, + { + "epoch": 0.99, + "learning_rate": 4.515767312005126e-05, + "loss": 2.4118, + "step": 21660 + }, + { + "epoch": 0.99, + "learning_rate": 4.515538468579798e-05, + "loss": 2.078, + "step": 21670 + }, + { + "epoch": 0.99, + "learning_rate": 4.51530962515447e-05, + "loss": 2.3969, + "step": 21680 + }, + { + "epoch": 0.99, + "learning_rate": 4.515080781729141e-05, + "loss": 2.4961, + "step": 21690 + }, + { + "epoch": 0.99, + "learning_rate": 4.5148519383038126e-05, + "loss": 2.3624, + "step": 21700 + }, + { + "epoch": 0.99, + "learning_rate": 4.514623094878485e-05, + "loss": 2.4318, + "step": 21710 + }, + { + "epoch": 0.99, + "learning_rate": 4.514394251453156e-05, + "loss": 2.3499, + "step": 21720 + }, + { + "epoch": 0.99, + "learning_rate": 4.5141654080278275e-05, + "loss": 2.3653, + "step": 21730 + }, + { + "epoch": 0.99, + "learning_rate": 4.513936564602499e-05, + "loss": 2.177, + "step": 21740 + }, + { + "epoch": 0.99, + "learning_rate": 4.5137077211771704e-05, + "loss": 2.391, + "step": 21750 + }, + { + "epoch": 0.99, + "learning_rate": 4.5134788777518424e-05, + "loss": 2.2926, + "step": 21760 + }, + { + "epoch": 0.99, + "learning_rate": 4.513250034326514e-05, + "loss": 2.3313, + "step": 21770 + }, + { + "epoch": 0.99, + "learning_rate": 4.513021190901185e-05, + "loss": 2.3778, + "step": 21780 + }, + { + "epoch": 0.99, + "learning_rate": 4.5127923474758574e-05, + "loss": 2.3978, + "step": 21790 + }, + { + "epoch": 1.0, + "learning_rate": 4.512563504050529e-05, + "loss": 2.3711, + "step": 21800 + }, + { + "epoch": 1.0, + "learning_rate": 4.5123346606252e-05, + "loss": 2.231, + "step": 21810 + }, + { + "epoch": 1.0, + "learning_rate": 4.512105817199872e-05, + "loss": 2.3072, + "step": 21820 + }, + { + "epoch": 1.0, + "learning_rate": 4.511876973774544e-05, + "loss": 2.3983, + "step": 21830 + }, + { + "epoch": 1.0, + "learning_rate": 4.511648130349215e-05, + "loss": 2.5131, + "step": 21840 + }, + { + "epoch": 1.0, + "learning_rate": 4.511419286923887e-05, + "loss": 2.3321, + "step": 21850 + }, + { + "epoch": 1.0, + "learning_rate": 4.5111904434985586e-05, + "loss": 2.2685, + "step": 21860 + }, + { + "epoch": 1.0, + "learning_rate": 4.51096160007323e-05, + "loss": 2.29, + "step": 21870 + }, + { + "epoch": 1.0, + "learning_rate": 4.510732756647902e-05, + "loss": 2.2824, + "step": 21880 + }, + { + "epoch": 1.0, + "learning_rate": 4.5105039132225735e-05, + "loss": 2.4528, + "step": 21890 + }, + { + "epoch": 1.0, + "eval_cer": 0.6811390598773349, + "eval_em": 0.00728476821192053, + "eval_f1": 0.00728476821192053, + "eval_loss": 2.216672658920288, + "eval_runtime": 2775.0638, + "eval_samples_per_second": 3.809, + "eval_steps_per_second": 1.904, + "eval_wer": 0.9927152317880795, + "step": 21899 + }, + { + "epoch": 1.0, + "learning_rate": 4.510275069797245e-05, + "loss": 2.2786, + "step": 21900 + }, + { + "epoch": 1.0, + "learning_rate": 4.510046226371917e-05, + "loss": 2.3984, + "step": 21910 + }, + { + "epoch": 1.0, + "learning_rate": 4.5098173829465885e-05, + "loss": 2.3169, + "step": 21920 + }, + { + "epoch": 1.0, + "learning_rate": 4.50958853952126e-05, + "loss": 2.1992, + "step": 21930 + }, + { + "epoch": 1.0, + "learning_rate": 4.509359696095931e-05, + "loss": 2.4089, + "step": 21940 + }, + { + "epoch": 1.0, + "learning_rate": 4.509130852670603e-05, + "loss": 2.3878, + "step": 21950 + }, + { + "epoch": 1.0, + "learning_rate": 4.508902009245274e-05, + "loss": 2.2885, + "step": 21960 + }, + { + "epoch": 1.0, + "learning_rate": 4.508673165819946e-05, + "loss": 2.2094, + "step": 21970 + }, + { + "epoch": 1.0, + "learning_rate": 4.5084443223946176e-05, + "loss": 2.2895, + "step": 21980 + }, + { + "epoch": 1.0, + "learning_rate": 4.508215478969289e-05, + "loss": 2.3139, + "step": 21990 + }, + { + "epoch": 1.0, + "learning_rate": 4.507986635543961e-05, + "loss": 2.3456, + "step": 22000 + }, + { + "epoch": 1.01, + "learning_rate": 4.5077577921186325e-05, + "loss": 2.3049, + "step": 22010 + }, + { + "epoch": 1.01, + "learning_rate": 4.507528948693304e-05, + "loss": 2.3241, + "step": 22020 + }, + { + "epoch": 1.01, + "learning_rate": 4.507300105267976e-05, + "loss": 2.3352, + "step": 22030 + }, + { + "epoch": 1.01, + "learning_rate": 4.5070712618426474e-05, + "loss": 2.2437, + "step": 22040 + }, + { + "epoch": 1.01, + "learning_rate": 4.506842418417319e-05, + "loss": 2.3977, + "step": 22050 + }, + { + "epoch": 1.01, + "learning_rate": 4.506613574991991e-05, + "loss": 2.3157, + "step": 22060 + }, + { + "epoch": 1.01, + "learning_rate": 4.5063847315666624e-05, + "loss": 2.2105, + "step": 22070 + }, + { + "epoch": 1.01, + "learning_rate": 4.506155888141334e-05, + "loss": 2.2588, + "step": 22080 + }, + { + "epoch": 1.01, + "learning_rate": 4.505927044716006e-05, + "loss": 2.3755, + "step": 22090 + }, + { + "epoch": 1.01, + "learning_rate": 4.505698201290677e-05, + "loss": 2.3414, + "step": 22100 + }, + { + "epoch": 1.01, + "learning_rate": 4.505469357865349e-05, + "loss": 2.3759, + "step": 22110 + }, + { + "epoch": 1.01, + "learning_rate": 4.505240514440021e-05, + "loss": 2.2631, + "step": 22120 + }, + { + "epoch": 1.01, + "learning_rate": 4.505011671014692e-05, + "loss": 2.3923, + "step": 22130 + }, + { + "epoch": 1.01, + "learning_rate": 4.5047828275893636e-05, + "loss": 2.404, + "step": 22140 + }, + { + "epoch": 1.01, + "learning_rate": 4.504553984164035e-05, + "loss": 2.1583, + "step": 22150 + }, + { + "epoch": 1.01, + "learning_rate": 4.5043251407387064e-05, + "loss": 2.2416, + "step": 22160 + }, + { + "epoch": 1.01, + "learning_rate": 4.5040962973133785e-05, + "loss": 2.2574, + "step": 22170 + }, + { + "epoch": 1.01, + "learning_rate": 4.50386745388805e-05, + "loss": 2.1974, + "step": 22180 + }, + { + "epoch": 1.01, + "learning_rate": 4.5036386104627214e-05, + "loss": 2.2919, + "step": 22190 + }, + { + "epoch": 1.01, + "learning_rate": 4.5034097670373934e-05, + "loss": 2.1923, + "step": 22200 + }, + { + "epoch": 1.01, + "learning_rate": 4.503180923612065e-05, + "loss": 2.4379, + "step": 22210 + }, + { + "epoch": 1.01, + "learning_rate": 4.502952080186736e-05, + "loss": 2.2519, + "step": 22220 + }, + { + "epoch": 1.02, + "learning_rate": 4.5027232367614084e-05, + "loss": 2.4246, + "step": 22230 + }, + { + "epoch": 1.02, + "learning_rate": 4.50249439333608e-05, + "loss": 2.2347, + "step": 22240 + }, + { + "epoch": 1.02, + "learning_rate": 4.502265549910751e-05, + "loss": 2.1819, + "step": 22250 + }, + { + "epoch": 1.02, + "learning_rate": 4.502036706485423e-05, + "loss": 2.1016, + "step": 22260 + }, + { + "epoch": 1.02, + "learning_rate": 4.501807863060095e-05, + "loss": 2.3312, + "step": 22270 + }, + { + "epoch": 1.02, + "learning_rate": 4.501579019634766e-05, + "loss": 2.3409, + "step": 22280 + }, + { + "epoch": 1.02, + "learning_rate": 4.501350176209438e-05, + "loss": 2.2873, + "step": 22290 + }, + { + "epoch": 1.02, + "learning_rate": 4.5011213327841096e-05, + "loss": 2.1861, + "step": 22300 + }, + { + "epoch": 1.02, + "learning_rate": 4.500892489358781e-05, + "loss": 2.1877, + "step": 22310 + }, + { + "epoch": 1.02, + "learning_rate": 4.500663645933453e-05, + "loss": 2.1805, + "step": 22320 + }, + { + "epoch": 1.02, + "learning_rate": 4.500434802508124e-05, + "loss": 2.3718, + "step": 22330 + }, + { + "epoch": 1.02, + "learning_rate": 4.500205959082795e-05, + "loss": 2.3247, + "step": 22340 + }, + { + "epoch": 1.02, + "learning_rate": 4.4999771156574674e-05, + "loss": 2.1953, + "step": 22350 + }, + { + "epoch": 1.02, + "learning_rate": 4.499748272232139e-05, + "loss": 2.2575, + "step": 22360 + }, + { + "epoch": 1.02, + "learning_rate": 4.49951942880681e-05, + "loss": 2.4066, + "step": 22370 + }, + { + "epoch": 1.02, + "learning_rate": 4.499290585381482e-05, + "loss": 2.3497, + "step": 22380 + }, + { + "epoch": 1.02, + "learning_rate": 4.499061741956154e-05, + "loss": 2.3043, + "step": 22390 + }, + { + "epoch": 1.02, + "learning_rate": 4.498832898530825e-05, + "loss": 2.4081, + "step": 22400 + }, + { + "epoch": 1.02, + "learning_rate": 4.498604055105497e-05, + "loss": 2.237, + "step": 22410 + }, + { + "epoch": 1.02, + "learning_rate": 4.4983752116801686e-05, + "loss": 2.4168, + "step": 22420 + }, + { + "epoch": 1.02, + "learning_rate": 4.49814636825484e-05, + "loss": 2.2873, + "step": 22430 + }, + { + "epoch": 1.02, + "learning_rate": 4.497917524829512e-05, + "loss": 2.2442, + "step": 22440 + }, + { + "epoch": 1.03, + "learning_rate": 4.4976886814041835e-05, + "loss": 2.5326, + "step": 22450 + }, + { + "epoch": 1.03, + "learning_rate": 4.497459837978855e-05, + "loss": 2.1366, + "step": 22460 + }, + { + "epoch": 1.03, + "learning_rate": 4.497230994553527e-05, + "loss": 2.3103, + "step": 22470 + }, + { + "epoch": 1.03, + "learning_rate": 4.4970021511281984e-05, + "loss": 2.3425, + "step": 22480 + }, + { + "epoch": 1.03, + "learning_rate": 4.49677330770287e-05, + "loss": 2.3082, + "step": 22490 + }, + { + "epoch": 1.03, + "learning_rate": 4.496544464277542e-05, + "loss": 2.2169, + "step": 22500 + }, + { + "epoch": 1.03, + "learning_rate": 4.4963156208522134e-05, + "loss": 2.3988, + "step": 22510 + }, + { + "epoch": 1.03, + "learning_rate": 4.496086777426885e-05, + "loss": 2.2196, + "step": 22520 + }, + { + "epoch": 1.03, + "learning_rate": 4.495857934001556e-05, + "loss": 2.2251, + "step": 22530 + }, + { + "epoch": 1.03, + "learning_rate": 4.4956290905762276e-05, + "loss": 2.4297, + "step": 22540 + }, + { + "epoch": 1.03, + "learning_rate": 4.4954002471509e-05, + "loss": 2.1783, + "step": 22550 + }, + { + "epoch": 1.03, + "learning_rate": 4.495171403725571e-05, + "loss": 2.234, + "step": 22560 + }, + { + "epoch": 1.03, + "learning_rate": 4.4949425603002425e-05, + "loss": 2.3437, + "step": 22570 + }, + { + "epoch": 1.03, + "learning_rate": 4.4947137168749146e-05, + "loss": 2.3595, + "step": 22580 + }, + { + "epoch": 1.03, + "learning_rate": 4.494484873449586e-05, + "loss": 2.3109, + "step": 22590 + }, + { + "epoch": 1.03, + "learning_rate": 4.4942560300242574e-05, + "loss": 2.1046, + "step": 22600 + }, + { + "epoch": 1.03, + "learning_rate": 4.494027186598929e-05, + "loss": 2.2919, + "step": 22610 + }, + { + "epoch": 1.03, + "learning_rate": 4.493798343173601e-05, + "loss": 2.2704, + "step": 22620 + }, + { + "epoch": 1.03, + "learning_rate": 4.4935694997482724e-05, + "loss": 2.295, + "step": 22630 + }, + { + "epoch": 1.03, + "learning_rate": 4.493340656322944e-05, + "loss": 2.3103, + "step": 22640 + }, + { + "epoch": 1.03, + "learning_rate": 4.493111812897616e-05, + "loss": 2.3212, + "step": 22650 + }, + { + "epoch": 1.03, + "learning_rate": 4.492882969472287e-05, + "loss": 2.244, + "step": 22660 + }, + { + "epoch": 1.04, + "learning_rate": 4.492654126046959e-05, + "loss": 2.4235, + "step": 22670 + }, + { + "epoch": 1.04, + "learning_rate": 4.492425282621631e-05, + "loss": 2.3367, + "step": 22680 + }, + { + "epoch": 1.04, + "learning_rate": 4.492196439196302e-05, + "loss": 2.1472, + "step": 22690 + }, + { + "epoch": 1.04, + "learning_rate": 4.4919675957709736e-05, + "loss": 2.2936, + "step": 22700 + }, + { + "epoch": 1.04, + "learning_rate": 4.491738752345646e-05, + "loss": 2.321, + "step": 22710 + }, + { + "epoch": 1.04, + "learning_rate": 4.491509908920317e-05, + "loss": 2.3483, + "step": 22720 + }, + { + "epoch": 1.04, + "learning_rate": 4.4912810654949885e-05, + "loss": 2.2685, + "step": 22730 + }, + { + "epoch": 1.04, + "learning_rate": 4.49105222206966e-05, + "loss": 2.2381, + "step": 22740 + }, + { + "epoch": 1.04, + "learning_rate": 4.4908233786443313e-05, + "loss": 2.1682, + "step": 22750 + }, + { + "epoch": 1.04, + "learning_rate": 4.4905945352190034e-05, + "loss": 2.3424, + "step": 22760 + }, + { + "epoch": 1.04, + "learning_rate": 4.490365691793675e-05, + "loss": 2.2654, + "step": 22770 + }, + { + "epoch": 1.04, + "learning_rate": 4.490136848368346e-05, + "loss": 2.2483, + "step": 22780 + }, + { + "epoch": 1.04, + "learning_rate": 4.4899080049430184e-05, + "loss": 2.2085, + "step": 22790 + }, + { + "epoch": 1.04, + "learning_rate": 4.48967916151769e-05, + "loss": 2.2406, + "step": 22800 + }, + { + "epoch": 1.04, + "learning_rate": 4.489450318092361e-05, + "loss": 2.1984, + "step": 22810 + }, + { + "epoch": 1.04, + "learning_rate": 4.489221474667033e-05, + "loss": 2.3007, + "step": 22820 + }, + { + "epoch": 1.04, + "learning_rate": 4.488992631241705e-05, + "loss": 2.2391, + "step": 22830 + }, + { + "epoch": 1.04, + "learning_rate": 4.488763787816376e-05, + "loss": 2.2319, + "step": 22840 + }, + { + "epoch": 1.04, + "learning_rate": 4.488534944391048e-05, + "loss": 2.3318, + "step": 22850 + }, + { + "epoch": 1.04, + "learning_rate": 4.4883061009657196e-05, + "loss": 2.2977, + "step": 22860 + }, + { + "epoch": 1.04, + "learning_rate": 4.488077257540391e-05, + "loss": 2.2229, + "step": 22870 + }, + { + "epoch": 1.04, + "learning_rate": 4.487848414115063e-05, + "loss": 2.2238, + "step": 22880 + }, + { + "epoch": 1.05, + "learning_rate": 4.4876195706897345e-05, + "loss": 2.3456, + "step": 22890 + }, + { + "epoch": 1.05, + "learning_rate": 4.487390727264406e-05, + "loss": 2.2562, + "step": 22900 + }, + { + "epoch": 1.05, + "learning_rate": 4.487161883839078e-05, + "loss": 2.244, + "step": 22910 + }, + { + "epoch": 1.05, + "learning_rate": 4.4869330404137494e-05, + "loss": 2.3936, + "step": 22920 + }, + { + "epoch": 1.05, + "learning_rate": 4.48670419698842e-05, + "loss": 2.3535, + "step": 22930 + }, + { + "epoch": 1.05, + "learning_rate": 4.486475353563092e-05, + "loss": 2.215, + "step": 22940 + }, + { + "epoch": 1.05, + "learning_rate": 4.486246510137764e-05, + "loss": 2.2646, + "step": 22950 + }, + { + "epoch": 1.05, + "learning_rate": 4.486017666712435e-05, + "loss": 2.3649, + "step": 22960 + }, + { + "epoch": 1.05, + "learning_rate": 4.485788823287107e-05, + "loss": 2.2424, + "step": 22970 + }, + { + "epoch": 1.05, + "learning_rate": 4.4855599798617786e-05, + "loss": 2.2997, + "step": 22980 + }, + { + "epoch": 1.05, + "learning_rate": 4.48533113643645e-05, + "loss": 2.2354, + "step": 22990 + }, + { + "epoch": 1.05, + "learning_rate": 4.485102293011122e-05, + "loss": 2.2004, + "step": 23000 + }, + { + "epoch": 1.05, + "learning_rate": 4.4848734495857935e-05, + "loss": 2.3738, + "step": 23010 + }, + { + "epoch": 1.05, + "learning_rate": 4.484644606160465e-05, + "loss": 2.2414, + "step": 23020 + }, + { + "epoch": 1.05, + "learning_rate": 4.484415762735137e-05, + "loss": 2.2734, + "step": 23030 + }, + { + "epoch": 1.05, + "learning_rate": 4.4841869193098084e-05, + "loss": 2.2958, + "step": 23040 + }, + { + "epoch": 1.05, + "learning_rate": 4.48395807588448e-05, + "loss": 2.5156, + "step": 23050 + }, + { + "epoch": 1.05, + "learning_rate": 4.483729232459152e-05, + "loss": 2.3496, + "step": 23060 + }, + { + "epoch": 1.05, + "learning_rate": 4.4835003890338233e-05, + "loss": 2.3029, + "step": 23070 + }, + { + "epoch": 1.05, + "learning_rate": 4.483271545608495e-05, + "loss": 2.3776, + "step": 23080 + }, + { + "epoch": 1.05, + "learning_rate": 4.483042702183167e-05, + "loss": 2.3026, + "step": 23090 + }, + { + "epoch": 1.05, + "learning_rate": 4.482813858757838e-05, + "loss": 2.2881, + "step": 23100 + }, + { + "epoch": 1.06, + "learning_rate": 4.48258501533251e-05, + "loss": 2.2495, + "step": 23110 + }, + { + "epoch": 1.06, + "learning_rate": 4.482356171907181e-05, + "loss": 2.366, + "step": 23120 + }, + { + "epoch": 1.06, + "learning_rate": 4.4821273284818525e-05, + "loss": 2.1953, + "step": 23130 + }, + { + "epoch": 1.06, + "learning_rate": 4.4818984850565246e-05, + "loss": 2.1652, + "step": 23140 + }, + { + "epoch": 1.06, + "learning_rate": 4.481669641631196e-05, + "loss": 2.1998, + "step": 23150 + }, + { + "epoch": 1.06, + "learning_rate": 4.4814407982058674e-05, + "loss": 2.4456, + "step": 23160 + }, + { + "epoch": 1.06, + "learning_rate": 4.4812119547805395e-05, + "loss": 2.2987, + "step": 23170 + }, + { + "epoch": 1.06, + "learning_rate": 4.480983111355211e-05, + "loss": 2.4834, + "step": 23180 + }, + { + "epoch": 1.06, + "learning_rate": 4.4807542679298823e-05, + "loss": 2.2657, + "step": 23190 + }, + { + "epoch": 1.06, + "learning_rate": 4.4805254245045544e-05, + "loss": 2.2543, + "step": 23200 + }, + { + "epoch": 1.06, + "learning_rate": 4.480296581079226e-05, + "loss": 2.2808, + "step": 23210 + }, + { + "epoch": 1.06, + "learning_rate": 4.480067737653897e-05, + "loss": 2.2906, + "step": 23220 + }, + { + "epoch": 1.06, + "learning_rate": 4.4798388942285694e-05, + "loss": 2.3544, + "step": 23230 + }, + { + "epoch": 1.06, + "learning_rate": 4.479610050803241e-05, + "loss": 2.1754, + "step": 23240 + }, + { + "epoch": 1.06, + "learning_rate": 4.479381207377912e-05, + "loss": 2.6805, + "step": 23250 + }, + { + "epoch": 1.06, + "learning_rate": 4.479152363952584e-05, + "loss": 2.3576, + "step": 23260 + }, + { + "epoch": 1.06, + "learning_rate": 4.478923520527256e-05, + "loss": 2.4817, + "step": 23270 + }, + { + "epoch": 1.06, + "learning_rate": 4.478694677101927e-05, + "loss": 2.2975, + "step": 23280 + }, + { + "epoch": 1.06, + "learning_rate": 4.478465833676599e-05, + "loss": 2.1194, + "step": 23290 + }, + { + "epoch": 1.06, + "learning_rate": 4.4782369902512706e-05, + "loss": 2.2762, + "step": 23300 + }, + { + "epoch": 1.06, + "learning_rate": 4.478008146825942e-05, + "loss": 2.3417, + "step": 23310 + }, + { + "epoch": 1.06, + "learning_rate": 4.4777793034006134e-05, + "loss": 2.3347, + "step": 23320 + }, + { + "epoch": 1.07, + "learning_rate": 4.477550459975285e-05, + "loss": 2.1626, + "step": 23330 + }, + { + "epoch": 1.07, + "learning_rate": 4.477321616549956e-05, + "loss": 2.2611, + "step": 23340 + }, + { + "epoch": 1.07, + "learning_rate": 4.4770927731246283e-05, + "loss": 2.2869, + "step": 23350 + }, + { + "epoch": 1.07, + "learning_rate": 4.4768639296993e-05, + "loss": 2.356, + "step": 23360 + }, + { + "epoch": 1.07, + "learning_rate": 4.476635086273971e-05, + "loss": 2.2403, + "step": 23370 + }, + { + "epoch": 1.07, + "learning_rate": 4.476406242848643e-05, + "loss": 2.3383, + "step": 23380 + }, + { + "epoch": 1.07, + "learning_rate": 4.476177399423315e-05, + "loss": 2.2679, + "step": 23390 + }, + { + "epoch": 1.07, + "learning_rate": 4.475948555997986e-05, + "loss": 2.2827, + "step": 23400 + }, + { + "epoch": 1.07, + "learning_rate": 4.475719712572658e-05, + "loss": 2.2793, + "step": 23410 + }, + { + "epoch": 1.07, + "learning_rate": 4.4754908691473296e-05, + "loss": 2.1258, + "step": 23420 + }, + { + "epoch": 1.07, + "learning_rate": 4.475262025722001e-05, + "loss": 2.1554, + "step": 23430 + }, + { + "epoch": 1.07, + "learning_rate": 4.475033182296673e-05, + "loss": 2.2401, + "step": 23440 + }, + { + "epoch": 1.07, + "learning_rate": 4.4748043388713445e-05, + "loss": 2.397, + "step": 23450 + }, + { + "epoch": 1.07, + "learning_rate": 4.474575495446016e-05, + "loss": 2.4403, + "step": 23460 + }, + { + "epoch": 1.07, + "learning_rate": 4.474346652020688e-05, + "loss": 2.1008, + "step": 23470 + }, + { + "epoch": 1.07, + "learning_rate": 4.4741178085953594e-05, + "loss": 2.1997, + "step": 23480 + }, + { + "epoch": 1.07, + "learning_rate": 4.473888965170031e-05, + "loss": 2.2415, + "step": 23490 + }, + { + "epoch": 1.07, + "learning_rate": 4.473660121744703e-05, + "loss": 2.299, + "step": 23500 + }, + { + "epoch": 1.07, + "learning_rate": 4.4734312783193743e-05, + "loss": 2.3859, + "step": 23510 + }, + { + "epoch": 1.07, + "learning_rate": 4.473202434894046e-05, + "loss": 2.2103, + "step": 23520 + }, + { + "epoch": 1.07, + "learning_rate": 4.472973591468717e-05, + "loss": 2.3727, + "step": 23530 + }, + { + "epoch": 1.07, + "learning_rate": 4.4727447480433886e-05, + "loss": 2.2905, + "step": 23540 + }, + { + "epoch": 1.08, + "learning_rate": 4.472515904618061e-05, + "loss": 2.3087, + "step": 23550 + }, + { + "epoch": 1.08, + "learning_rate": 4.472287061192732e-05, + "loss": 2.2841, + "step": 23560 + }, + { + "epoch": 1.08, + "learning_rate": 4.4720582177674035e-05, + "loss": 2.2952, + "step": 23570 + }, + { + "epoch": 1.08, + "learning_rate": 4.4718293743420756e-05, + "loss": 2.3625, + "step": 23580 + }, + { + "epoch": 1.08, + "learning_rate": 4.471600530916747e-05, + "loss": 2.2248, + "step": 23590 + }, + { + "epoch": 1.08, + "learning_rate": 4.4713716874914184e-05, + "loss": 2.3198, + "step": 23600 + }, + { + "epoch": 1.08, + "learning_rate": 4.4711428440660905e-05, + "loss": 2.2969, + "step": 23610 + }, + { + "epoch": 1.08, + "learning_rate": 4.470914000640762e-05, + "loss": 2.379, + "step": 23620 + }, + { + "epoch": 1.08, + "learning_rate": 4.470685157215433e-05, + "loss": 2.3187, + "step": 23630 + }, + { + "epoch": 1.08, + "learning_rate": 4.4704563137901054e-05, + "loss": 2.2606, + "step": 23640 + }, + { + "epoch": 1.08, + "learning_rate": 4.470227470364777e-05, + "loss": 2.2952, + "step": 23650 + }, + { + "epoch": 1.08, + "learning_rate": 4.469998626939448e-05, + "loss": 2.2486, + "step": 23660 + }, + { + "epoch": 1.08, + "learning_rate": 4.4697697835141203e-05, + "loss": 2.3264, + "step": 23670 + }, + { + "epoch": 1.08, + "learning_rate": 4.469540940088792e-05, + "loss": 2.2853, + "step": 23680 + }, + { + "epoch": 1.08, + "learning_rate": 4.469312096663463e-05, + "loss": 2.1832, + "step": 23690 + }, + { + "epoch": 1.08, + "learning_rate": 4.469083253238135e-05, + "loss": 2.273, + "step": 23700 + }, + { + "epoch": 1.08, + "learning_rate": 4.468854409812807e-05, + "loss": 2.2548, + "step": 23710 + }, + { + "epoch": 1.08, + "learning_rate": 4.4686255663874774e-05, + "loss": 2.2789, + "step": 23720 + }, + { + "epoch": 1.08, + "learning_rate": 4.4683967229621495e-05, + "loss": 2.2469, + "step": 23730 + }, + { + "epoch": 1.08, + "learning_rate": 4.468167879536821e-05, + "loss": 2.286, + "step": 23740 + }, + { + "epoch": 1.08, + "learning_rate": 4.467939036111492e-05, + "loss": 2.4641, + "step": 23750 + }, + { + "epoch": 1.08, + "learning_rate": 4.4677101926861644e-05, + "loss": 2.186, + "step": 23760 + }, + { + "epoch": 1.09, + "learning_rate": 4.467481349260836e-05, + "loss": 2.405, + "step": 23770 + }, + { + "epoch": 1.09, + "learning_rate": 4.467252505835507e-05, + "loss": 2.2874, + "step": 23780 + }, + { + "epoch": 1.09, + "learning_rate": 4.4670236624101793e-05, + "loss": 2.5123, + "step": 23790 + }, + { + "epoch": 1.09, + "learning_rate": 4.466794818984851e-05, + "loss": 2.2953, + "step": 23800 + }, + { + "epoch": 1.09, + "learning_rate": 4.466565975559522e-05, + "loss": 2.3845, + "step": 23810 + }, + { + "epoch": 1.09, + "learning_rate": 4.466337132134194e-05, + "loss": 2.3188, + "step": 23820 + }, + { + "epoch": 1.09, + "learning_rate": 4.466108288708866e-05, + "loss": 2.3553, + "step": 23830 + }, + { + "epoch": 1.09, + "learning_rate": 4.465879445283537e-05, + "loss": 2.3429, + "step": 23840 + }, + { + "epoch": 1.09, + "learning_rate": 4.465650601858209e-05, + "loss": 2.4726, + "step": 23850 + }, + { + "epoch": 1.09, + "learning_rate": 4.4654217584328806e-05, + "loss": 2.1972, + "step": 23860 + }, + { + "epoch": 1.09, + "learning_rate": 4.465192915007552e-05, + "loss": 2.1696, + "step": 23870 + }, + { + "epoch": 1.09, + "learning_rate": 4.464964071582224e-05, + "loss": 2.1854, + "step": 23880 + }, + { + "epoch": 1.09, + "learning_rate": 4.4647352281568955e-05, + "loss": 2.2547, + "step": 23890 + }, + { + "epoch": 1.09, + "learning_rate": 4.464506384731567e-05, + "loss": 2.2351, + "step": 23900 + }, + { + "epoch": 1.09, + "learning_rate": 4.464277541306238e-05, + "loss": 2.213, + "step": 23910 + }, + { + "epoch": 1.09, + "learning_rate": 4.46404869788091e-05, + "loss": 2.2166, + "step": 23920 + }, + { + "epoch": 1.09, + "learning_rate": 4.463819854455581e-05, + "loss": 2.3071, + "step": 23930 + }, + { + "epoch": 1.09, + "learning_rate": 4.463591011030253e-05, + "loss": 2.3706, + "step": 23940 + }, + { + "epoch": 1.09, + "learning_rate": 4.463362167604925e-05, + "loss": 2.2887, + "step": 23950 + }, + { + "epoch": 1.09, + "learning_rate": 4.463133324179596e-05, + "loss": 2.1549, + "step": 23960 + }, + { + "epoch": 1.09, + "learning_rate": 4.462904480754268e-05, + "loss": 2.4724, + "step": 23970 + }, + { + "epoch": 1.09, + "learning_rate": 4.4626756373289396e-05, + "loss": 2.2536, + "step": 23980 + }, + { + "epoch": 1.1, + "learning_rate": 4.462446793903611e-05, + "loss": 2.2249, + "step": 23990 + }, + { + "epoch": 1.1, + "learning_rate": 4.462217950478283e-05, + "loss": 2.1397, + "step": 24000 + }, + { + "epoch": 1.1, + "learning_rate": 4.4619891070529545e-05, + "loss": 2.2628, + "step": 24010 + }, + { + "epoch": 1.1, + "learning_rate": 4.461760263627626e-05, + "loss": 2.2585, + "step": 24020 + }, + { + "epoch": 1.1, + "learning_rate": 4.461531420202298e-05, + "loss": 2.2245, + "step": 24030 + }, + { + "epoch": 1.1, + "learning_rate": 4.4613025767769694e-05, + "loss": 2.2137, + "step": 24040 + }, + { + "epoch": 1.1, + "learning_rate": 4.461073733351641e-05, + "loss": 2.2693, + "step": 24050 + }, + { + "epoch": 1.1, + "learning_rate": 4.460844889926313e-05, + "loss": 2.3853, + "step": 24060 + }, + { + "epoch": 1.1, + "learning_rate": 4.460616046500984e-05, + "loss": 2.3593, + "step": 24070 + }, + { + "epoch": 1.1, + "learning_rate": 4.460387203075656e-05, + "loss": 2.2257, + "step": 24080 + }, + { + "epoch": 1.1, + "learning_rate": 4.460158359650328e-05, + "loss": 2.373, + "step": 24090 + }, + { + "epoch": 1.1, + "learning_rate": 4.459929516224999e-05, + "loss": 2.1701, + "step": 24100 + }, + { + "epoch": 1.1, + "learning_rate": 4.459700672799671e-05, + "loss": 2.2545, + "step": 24110 + }, + { + "epoch": 1.1, + "learning_rate": 4.459471829374342e-05, + "loss": 2.2352, + "step": 24120 + }, + { + "epoch": 1.1, + "learning_rate": 4.4592429859490135e-05, + "loss": 2.2635, + "step": 24130 + }, + { + "epoch": 1.1, + "learning_rate": 4.4590141425236856e-05, + "loss": 2.3063, + "step": 24140 + }, + { + "epoch": 1.1, + "learning_rate": 4.458785299098357e-05, + "loss": 2.3205, + "step": 24150 + }, + { + "epoch": 1.1, + "learning_rate": 4.4585564556730284e-05, + "loss": 2.2775, + "step": 24160 + }, + { + "epoch": 1.1, + "learning_rate": 4.4583276122477005e-05, + "loss": 2.2944, + "step": 24170 + }, + { + "epoch": 1.1, + "learning_rate": 4.458098768822372e-05, + "loss": 2.3028, + "step": 24180 + }, + { + "epoch": 1.1, + "learning_rate": 4.457869925397043e-05, + "loss": 2.3336, + "step": 24190 + }, + { + "epoch": 1.11, + "learning_rate": 4.4576410819717154e-05, + "loss": 2.2674, + "step": 24200 + }, + { + "epoch": 1.11, + "learning_rate": 4.457412238546387e-05, + "loss": 2.2639, + "step": 24210 + }, + { + "epoch": 1.11, + "learning_rate": 4.457183395121058e-05, + "loss": 2.3059, + "step": 24220 + }, + { + "epoch": 1.11, + "learning_rate": 4.45695455169573e-05, + "loss": 2.2512, + "step": 24230 + }, + { + "epoch": 1.11, + "learning_rate": 4.456725708270402e-05, + "loss": 2.2663, + "step": 24240 + }, + { + "epoch": 1.11, + "learning_rate": 4.456496864845073e-05, + "loss": 2.2695, + "step": 24250 + }, + { + "epoch": 1.11, + "learning_rate": 4.456268021419745e-05, + "loss": 2.3058, + "step": 24260 + }, + { + "epoch": 1.11, + "learning_rate": 4.456039177994417e-05, + "loss": 2.1553, + "step": 24270 + }, + { + "epoch": 1.11, + "learning_rate": 4.455810334569088e-05, + "loss": 2.3837, + "step": 24280 + }, + { + "epoch": 1.11, + "learning_rate": 4.45558149114376e-05, + "loss": 2.3801, + "step": 24290 + }, + { + "epoch": 1.11, + "learning_rate": 4.4553526477184316e-05, + "loss": 2.3198, + "step": 24300 + }, + { + "epoch": 1.11, + "learning_rate": 4.455123804293102e-05, + "loss": 2.2181, + "step": 24310 + }, + { + "epoch": 1.11, + "learning_rate": 4.4548949608677744e-05, + "loss": 2.1765, + "step": 24320 + }, + { + "epoch": 1.11, + "learning_rate": 4.454666117442446e-05, + "loss": 2.2311, + "step": 24330 + }, + { + "epoch": 1.11, + "learning_rate": 4.454437274017117e-05, + "loss": 2.3063, + "step": 24340 + }, + { + "epoch": 1.11, + "learning_rate": 4.454208430591789e-05, + "loss": 2.1993, + "step": 24350 + }, + { + "epoch": 1.11, + "learning_rate": 4.453979587166461e-05, + "loss": 2.1779, + "step": 24360 + }, + { + "epoch": 1.11, + "learning_rate": 4.453750743741132e-05, + "loss": 2.2112, + "step": 24370 + }, + { + "epoch": 1.11, + "learning_rate": 4.453521900315804e-05, + "loss": 2.1919, + "step": 24380 + }, + { + "epoch": 1.11, + "learning_rate": 4.453293056890476e-05, + "loss": 2.3088, + "step": 24390 + }, + { + "epoch": 1.11, + "learning_rate": 4.453064213465147e-05, + "loss": 2.2676, + "step": 24400 + }, + { + "epoch": 1.11, + "learning_rate": 4.452835370039819e-05, + "loss": 2.3745, + "step": 24410 + }, + { + "epoch": 1.12, + "learning_rate": 4.4526065266144906e-05, + "loss": 2.1507, + "step": 24420 + }, + { + "epoch": 1.12, + "learning_rate": 4.452377683189162e-05, + "loss": 2.304, + "step": 24430 + }, + { + "epoch": 1.12, + "learning_rate": 4.452148839763834e-05, + "loss": 2.1808, + "step": 24440 + }, + { + "epoch": 1.12, + "learning_rate": 4.4519199963385055e-05, + "loss": 2.2421, + "step": 24450 + }, + { + "epoch": 1.12, + "learning_rate": 4.451691152913177e-05, + "loss": 2.3523, + "step": 24460 + }, + { + "epoch": 1.12, + "learning_rate": 4.451462309487849e-05, + "loss": 2.2287, + "step": 24470 + }, + { + "epoch": 1.12, + "learning_rate": 4.4512334660625204e-05, + "loss": 2.1491, + "step": 24480 + }, + { + "epoch": 1.12, + "learning_rate": 4.451004622637192e-05, + "loss": 2.2709, + "step": 24490 + }, + { + "epoch": 1.12, + "learning_rate": 4.450775779211864e-05, + "loss": 2.124, + "step": 24500 + }, + { + "epoch": 1.12, + "learning_rate": 4.4505469357865347e-05, + "loss": 2.2316, + "step": 24510 + }, + { + "epoch": 1.12, + "learning_rate": 4.450318092361207e-05, + "loss": 2.1693, + "step": 24520 + }, + { + "epoch": 1.12, + "learning_rate": 4.450089248935878e-05, + "loss": 2.2246, + "step": 24530 + }, + { + "epoch": 1.12, + "learning_rate": 4.4498604055105496e-05, + "loss": 2.1091, + "step": 24540 + }, + { + "epoch": 1.12, + "learning_rate": 4.449631562085222e-05, + "loss": 2.2332, + "step": 24550 + }, + { + "epoch": 1.12, + "learning_rate": 4.449402718659893e-05, + "loss": 2.2496, + "step": 24560 + }, + { + "epoch": 1.12, + "learning_rate": 4.4491738752345645e-05, + "loss": 2.2261, + "step": 24570 + }, + { + "epoch": 1.12, + "learning_rate": 4.4489450318092366e-05, + "loss": 2.2377, + "step": 24580 + }, + { + "epoch": 1.12, + "learning_rate": 4.448716188383908e-05, + "loss": 2.3601, + "step": 24590 + }, + { + "epoch": 1.12, + "learning_rate": 4.4484873449585794e-05, + "loss": 2.1195, + "step": 24600 + }, + { + "epoch": 1.12, + "learning_rate": 4.4482585015332515e-05, + "loss": 2.2967, + "step": 24610 + }, + { + "epoch": 1.12, + "learning_rate": 4.448029658107923e-05, + "loss": 2.2698, + "step": 24620 + }, + { + "epoch": 1.12, + "learning_rate": 4.447800814682594e-05, + "loss": 2.2255, + "step": 24630 + }, + { + "epoch": 1.13, + "learning_rate": 4.4475719712572664e-05, + "loss": 2.159, + "step": 24640 + }, + { + "epoch": 1.13, + "learning_rate": 4.447343127831938e-05, + "loss": 2.2749, + "step": 24650 + }, + { + "epoch": 1.13, + "learning_rate": 4.447114284406609e-05, + "loss": 2.1301, + "step": 24660 + }, + { + "epoch": 1.13, + "learning_rate": 4.446885440981281e-05, + "loss": 2.2038, + "step": 24670 + }, + { + "epoch": 1.13, + "learning_rate": 4.446656597555953e-05, + "loss": 2.4247, + "step": 24680 + }, + { + "epoch": 1.13, + "learning_rate": 4.446427754130624e-05, + "loss": 2.2751, + "step": 24690 + }, + { + "epoch": 1.13, + "learning_rate": 4.4461989107052956e-05, + "loss": 2.2593, + "step": 24700 + }, + { + "epoch": 1.13, + "learning_rate": 4.445970067279967e-05, + "loss": 2.448, + "step": 24710 + }, + { + "epoch": 1.13, + "learning_rate": 4.4457412238546384e-05, + "loss": 2.4001, + "step": 24720 + }, + { + "epoch": 1.13, + "learning_rate": 4.4455123804293105e-05, + "loss": 2.2066, + "step": 24730 + }, + { + "epoch": 1.13, + "learning_rate": 4.445283537003982e-05, + "loss": 2.1857, + "step": 24740 + }, + { + "epoch": 1.13, + "learning_rate": 4.445054693578653e-05, + "loss": 2.3528, + "step": 24750 + }, + { + "epoch": 1.13, + "learning_rate": 4.4448258501533254e-05, + "loss": 2.3385, + "step": 24760 + }, + { + "epoch": 1.13, + "learning_rate": 4.444597006727997e-05, + "loss": 2.1986, + "step": 24770 + }, + { + "epoch": 1.13, + "learning_rate": 4.444368163302668e-05, + "loss": 2.4069, + "step": 24780 + }, + { + "epoch": 1.13, + "learning_rate": 4.44413931987734e-05, + "loss": 2.1739, + "step": 24790 + }, + { + "epoch": 1.13, + "learning_rate": 4.443910476452012e-05, + "loss": 2.2184, + "step": 24800 + }, + { + "epoch": 1.13, + "learning_rate": 4.443681633026683e-05, + "loss": 2.302, + "step": 24810 + }, + { + "epoch": 1.13, + "learning_rate": 4.443452789601355e-05, + "loss": 2.4128, + "step": 24820 + }, + { + "epoch": 1.13, + "learning_rate": 4.4432239461760267e-05, + "loss": 2.4487, + "step": 24830 + }, + { + "epoch": 1.13, + "learning_rate": 4.442995102750698e-05, + "loss": 2.3491, + "step": 24840 + }, + { + "epoch": 1.13, + "learning_rate": 4.44276625932537e-05, + "loss": 2.211, + "step": 24850 + }, + { + "epoch": 1.14, + "learning_rate": 4.4425374159000416e-05, + "loss": 2.3704, + "step": 24860 + }, + { + "epoch": 1.14, + "learning_rate": 4.442308572474713e-05, + "loss": 2.0797, + "step": 24870 + }, + { + "epoch": 1.14, + "learning_rate": 4.442079729049385e-05, + "loss": 2.2076, + "step": 24880 + }, + { + "epoch": 1.14, + "learning_rate": 4.4418508856240565e-05, + "loss": 2.2729, + "step": 24890 + }, + { + "epoch": 1.14, + "learning_rate": 4.441622042198728e-05, + "loss": 2.1714, + "step": 24900 + }, + { + "epoch": 1.14, + "learning_rate": 4.441393198773399e-05, + "loss": 2.45, + "step": 24910 + }, + { + "epoch": 1.14, + "learning_rate": 4.441164355348071e-05, + "loss": 2.2123, + "step": 24920 + }, + { + "epoch": 1.14, + "learning_rate": 4.440935511922743e-05, + "loss": 2.249, + "step": 24930 + }, + { + "epoch": 1.14, + "learning_rate": 4.440706668497414e-05, + "loss": 2.2784, + "step": 24940 + }, + { + "epoch": 1.14, + "learning_rate": 4.4404778250720857e-05, + "loss": 2.155, + "step": 24950 + }, + { + "epoch": 1.14, + "learning_rate": 4.440248981646757e-05, + "loss": 2.21, + "step": 24960 + }, + { + "epoch": 1.14, + "learning_rate": 4.440020138221429e-05, + "loss": 2.3263, + "step": 24970 + }, + { + "epoch": 1.14, + "learning_rate": 4.4397912947961006e-05, + "loss": 2.2638, + "step": 24980 + }, + { + "epoch": 1.14, + "learning_rate": 4.439562451370772e-05, + "loss": 2.243, + "step": 24990 + }, + { + "epoch": 1.14, + "learning_rate": 4.439333607945444e-05, + "loss": 2.3748, + "step": 25000 + }, + { + "epoch": 1.14, + "learning_rate": 4.4391047645201155e-05, + "loss": 2.3651, + "step": 25010 + }, + { + "epoch": 1.14, + "learning_rate": 4.438875921094787e-05, + "loss": 2.2125, + "step": 25020 + }, + { + "epoch": 1.14, + "learning_rate": 4.438647077669459e-05, + "loss": 2.4181, + "step": 25030 + }, + { + "epoch": 1.14, + "learning_rate": 4.4384182342441304e-05, + "loss": 2.312, + "step": 25040 + }, + { + "epoch": 1.14, + "learning_rate": 4.438189390818802e-05, + "loss": 2.2971, + "step": 25050 + }, + { + "epoch": 1.14, + "learning_rate": 4.437960547393474e-05, + "loss": 2.239, + "step": 25060 + }, + { + "epoch": 1.14, + "learning_rate": 4.437731703968145e-05, + "loss": 2.1821, + "step": 25070 + }, + { + "epoch": 1.15, + "learning_rate": 4.437502860542817e-05, + "loss": 2.3857, + "step": 25080 + }, + { + "epoch": 1.15, + "learning_rate": 4.437274017117489e-05, + "loss": 2.255, + "step": 25090 + }, + { + "epoch": 1.15, + "learning_rate": 4.4370451736921596e-05, + "loss": 2.1913, + "step": 25100 + }, + { + "epoch": 1.15, + "learning_rate": 4.4368163302668317e-05, + "loss": 2.3231, + "step": 25110 + }, + { + "epoch": 1.15, + "learning_rate": 4.436587486841503e-05, + "loss": 2.2103, + "step": 25120 + }, + { + "epoch": 1.15, + "learning_rate": 4.4363586434161745e-05, + "loss": 2.2058, + "step": 25130 + }, + { + "epoch": 1.15, + "learning_rate": 4.4361297999908466e-05, + "loss": 2.2103, + "step": 25140 + }, + { + "epoch": 1.15, + "learning_rate": 4.435900956565518e-05, + "loss": 2.2999, + "step": 25150 + }, + { + "epoch": 1.15, + "learning_rate": 4.4356721131401894e-05, + "loss": 2.2136, + "step": 25160 + }, + { + "epoch": 1.15, + "learning_rate": 4.4354432697148615e-05, + "loss": 2.4168, + "step": 25170 + }, + { + "epoch": 1.15, + "learning_rate": 4.435214426289533e-05, + "loss": 2.3901, + "step": 25180 + }, + { + "epoch": 1.15, + "learning_rate": 4.434985582864204e-05, + "loss": 2.3331, + "step": 25190 + }, + { + "epoch": 1.15, + "learning_rate": 4.4347567394388764e-05, + "loss": 2.1937, + "step": 25200 + }, + { + "epoch": 1.15, + "learning_rate": 4.434527896013548e-05, + "loss": 2.1437, + "step": 25210 + }, + { + "epoch": 1.15, + "learning_rate": 4.434299052588219e-05, + "loss": 2.2642, + "step": 25220 + }, + { + "epoch": 1.15, + "learning_rate": 4.434070209162891e-05, + "loss": 2.2769, + "step": 25230 + }, + { + "epoch": 1.15, + "learning_rate": 4.433841365737563e-05, + "loss": 2.2554, + "step": 25240 + }, + { + "epoch": 1.15, + "learning_rate": 4.433612522312234e-05, + "loss": 2.259, + "step": 25250 + }, + { + "epoch": 1.15, + "learning_rate": 4.433383678886906e-05, + "loss": 2.1573, + "step": 25260 + }, + { + "epoch": 1.15, + "learning_rate": 4.4331548354615777e-05, + "loss": 2.0877, + "step": 25270 + }, + { + "epoch": 1.15, + "learning_rate": 4.432925992036249e-05, + "loss": 2.2957, + "step": 25280 + }, + { + "epoch": 1.15, + "learning_rate": 4.432697148610921e-05, + "loss": 2.1028, + "step": 25290 + }, + { + "epoch": 1.16, + "learning_rate": 4.432468305185592e-05, + "loss": 2.2047, + "step": 25300 + }, + { + "epoch": 1.16, + "learning_rate": 4.432239461760263e-05, + "loss": 2.2976, + "step": 25310 + }, + { + "epoch": 1.16, + "learning_rate": 4.4320106183349354e-05, + "loss": 2.2646, + "step": 25320 + }, + { + "epoch": 1.16, + "learning_rate": 4.431781774909607e-05, + "loss": 2.3561, + "step": 25330 + }, + { + "epoch": 1.16, + "learning_rate": 4.431552931484278e-05, + "loss": 2.3223, + "step": 25340 + }, + { + "epoch": 1.16, + "learning_rate": 4.43132408805895e-05, + "loss": 2.1644, + "step": 25350 + }, + { + "epoch": 1.16, + "learning_rate": 4.431095244633622e-05, + "loss": 2.2785, + "step": 25360 + }, + { + "epoch": 1.16, + "learning_rate": 4.430866401208293e-05, + "loss": 2.2536, + "step": 25370 + }, + { + "epoch": 1.16, + "learning_rate": 4.430637557782965e-05, + "loss": 2.23, + "step": 25380 + }, + { + "epoch": 1.16, + "learning_rate": 4.4304087143576366e-05, + "loss": 2.262, + "step": 25390 + }, + { + "epoch": 1.16, + "learning_rate": 4.430179870932308e-05, + "loss": 2.1855, + "step": 25400 + }, + { + "epoch": 1.16, + "learning_rate": 4.42995102750698e-05, + "loss": 2.2378, + "step": 25410 + }, + { + "epoch": 1.16, + "learning_rate": 4.4297221840816516e-05, + "loss": 2.4013, + "step": 25420 + }, + { + "epoch": 1.16, + "learning_rate": 4.429493340656323e-05, + "loss": 2.1159, + "step": 25430 + }, + { + "epoch": 1.16, + "learning_rate": 4.429264497230995e-05, + "loss": 2.2293, + "step": 25440 + }, + { + "epoch": 1.16, + "learning_rate": 4.4290356538056665e-05, + "loss": 2.2844, + "step": 25450 + }, + { + "epoch": 1.16, + "learning_rate": 4.428806810380338e-05, + "loss": 2.5541, + "step": 25460 + }, + { + "epoch": 1.16, + "learning_rate": 4.42857796695501e-05, + "loss": 2.2027, + "step": 25470 + }, + { + "epoch": 1.16, + "learning_rate": 4.4283491235296814e-05, + "loss": 2.3265, + "step": 25480 + }, + { + "epoch": 1.16, + "learning_rate": 4.428120280104353e-05, + "loss": 2.1812, + "step": 25490 + }, + { + "epoch": 1.16, + "learning_rate": 4.427891436679024e-05, + "loss": 2.2683, + "step": 25500 + }, + { + "epoch": 1.16, + "learning_rate": 4.4276625932536956e-05, + "loss": 2.2711, + "step": 25510 + }, + { + "epoch": 1.17, + "learning_rate": 4.427433749828368e-05, + "loss": 2.3269, + "step": 25520 + }, + { + "epoch": 1.17, + "learning_rate": 4.427204906403039e-05, + "loss": 2.3256, + "step": 25530 + }, + { + "epoch": 1.17, + "learning_rate": 4.4269760629777106e-05, + "loss": 2.257, + "step": 25540 + }, + { + "epoch": 1.17, + "learning_rate": 4.4267472195523826e-05, + "loss": 2.2607, + "step": 25550 + }, + { + "epoch": 1.17, + "learning_rate": 4.426518376127054e-05, + "loss": 2.2594, + "step": 25560 + }, + { + "epoch": 1.17, + "learning_rate": 4.4262895327017255e-05, + "loss": 2.4334, + "step": 25570 + }, + { + "epoch": 1.17, + "learning_rate": 4.4260606892763976e-05, + "loss": 2.2299, + "step": 25580 + }, + { + "epoch": 1.17, + "learning_rate": 4.425831845851069e-05, + "loss": 2.2076, + "step": 25590 + }, + { + "epoch": 1.17, + "learning_rate": 4.4256030024257404e-05, + "loss": 2.3395, + "step": 25600 + }, + { + "epoch": 1.17, + "learning_rate": 4.4253741590004125e-05, + "loss": 2.1569, + "step": 25610 + }, + { + "epoch": 1.17, + "learning_rate": 4.425145315575084e-05, + "loss": 2.3235, + "step": 25620 + }, + { + "epoch": 1.17, + "learning_rate": 4.424916472149755e-05, + "loss": 2.2791, + "step": 25630 + }, + { + "epoch": 1.17, + "learning_rate": 4.4246876287244274e-05, + "loss": 2.2436, + "step": 25640 + }, + { + "epoch": 1.17, + "learning_rate": 4.424458785299099e-05, + "loss": 2.2923, + "step": 25650 + }, + { + "epoch": 1.17, + "learning_rate": 4.42422994187377e-05, + "loss": 2.2488, + "step": 25660 + }, + { + "epoch": 1.17, + "learning_rate": 4.424001098448442e-05, + "loss": 2.3037, + "step": 25670 + }, + { + "epoch": 1.17, + "learning_rate": 4.423772255023114e-05, + "loss": 2.2168, + "step": 25680 + }, + { + "epoch": 1.17, + "learning_rate": 4.4235434115977845e-05, + "loss": 2.0943, + "step": 25690 + }, + { + "epoch": 1.17, + "learning_rate": 4.4233145681724566e-05, + "loss": 2.2102, + "step": 25700 + }, + { + "epoch": 1.17, + "learning_rate": 4.423085724747128e-05, + "loss": 2.3707, + "step": 25710 + }, + { + "epoch": 1.17, + "learning_rate": 4.4228568813217994e-05, + "loss": 2.3201, + "step": 25720 + }, + { + "epoch": 1.17, + "learning_rate": 4.4226280378964715e-05, + "loss": 2.3987, + "step": 25730 + }, + { + "epoch": 1.18, + "learning_rate": 4.422399194471143e-05, + "loss": 2.279, + "step": 25740 + }, + { + "epoch": 1.18, + "learning_rate": 4.422170351045814e-05, + "loss": 2.2009, + "step": 25750 + }, + { + "epoch": 1.18, + "learning_rate": 4.4219415076204864e-05, + "loss": 2.3064, + "step": 25760 + }, + { + "epoch": 1.18, + "learning_rate": 4.421712664195158e-05, + "loss": 2.1598, + "step": 25770 + }, + { + "epoch": 1.18, + "learning_rate": 4.421483820769829e-05, + "loss": 2.2582, + "step": 25780 + }, + { + "epoch": 1.18, + "learning_rate": 4.421254977344501e-05, + "loss": 2.1256, + "step": 25790 + }, + { + "epoch": 1.18, + "learning_rate": 4.421026133919173e-05, + "loss": 2.2932, + "step": 25800 + }, + { + "epoch": 1.18, + "learning_rate": 4.420797290493844e-05, + "loss": 2.3943, + "step": 25810 + }, + { + "epoch": 1.18, + "learning_rate": 4.420568447068516e-05, + "loss": 2.2726, + "step": 25820 + }, + { + "epoch": 1.18, + "learning_rate": 4.4203396036431876e-05, + "loss": 2.2157, + "step": 25830 + }, + { + "epoch": 1.18, + "learning_rate": 4.420110760217859e-05, + "loss": 2.2888, + "step": 25840 + }, + { + "epoch": 1.18, + "learning_rate": 4.419881916792531e-05, + "loss": 2.4838, + "step": 25850 + }, + { + "epoch": 1.18, + "learning_rate": 4.4196530733672026e-05, + "loss": 2.1625, + "step": 25860 + }, + { + "epoch": 1.18, + "learning_rate": 4.419424229941874e-05, + "loss": 2.2363, + "step": 25870 + }, + { + "epoch": 1.18, + "learning_rate": 4.419195386516546e-05, + "loss": 2.4828, + "step": 25880 + }, + { + "epoch": 1.18, + "learning_rate": 4.418966543091217e-05, + "loss": 2.4822, + "step": 25890 + }, + { + "epoch": 1.18, + "learning_rate": 4.418737699665889e-05, + "loss": 2.302, + "step": 25900 + }, + { + "epoch": 1.18, + "learning_rate": 4.41850885624056e-05, + "loss": 2.1256, + "step": 25910 + }, + { + "epoch": 1.18, + "learning_rate": 4.418280012815232e-05, + "loss": 2.1914, + "step": 25920 + }, + { + "epoch": 1.18, + "learning_rate": 4.418051169389904e-05, + "loss": 2.079, + "step": 25930 + }, + { + "epoch": 1.18, + "learning_rate": 4.417822325964575e-05, + "loss": 2.2081, + "step": 25940 + }, + { + "epoch": 1.18, + "learning_rate": 4.4175934825392466e-05, + "loss": 2.1153, + "step": 25950 + }, + { + "epoch": 1.19, + "learning_rate": 4.417364639113919e-05, + "loss": 2.274, + "step": 25960 + }, + { + "epoch": 1.19, + "learning_rate": 4.41713579568859e-05, + "loss": 2.2522, + "step": 25970 + }, + { + "epoch": 1.19, + "learning_rate": 4.4169069522632616e-05, + "loss": 2.2895, + "step": 25980 + }, + { + "epoch": 1.19, + "learning_rate": 4.4166781088379336e-05, + "loss": 2.4238, + "step": 25990 + }, + { + "epoch": 1.19, + "learning_rate": 4.416449265412605e-05, + "loss": 2.1795, + "step": 26000 + }, + { + "epoch": 1.19, + "learning_rate": 4.4162204219872765e-05, + "loss": 2.237, + "step": 26010 + }, + { + "epoch": 1.19, + "learning_rate": 4.4159915785619486e-05, + "loss": 2.1216, + "step": 26020 + }, + { + "epoch": 1.19, + "learning_rate": 4.41576273513662e-05, + "loss": 2.1635, + "step": 26030 + }, + { + "epoch": 1.19, + "learning_rate": 4.4155338917112914e-05, + "loss": 2.122, + "step": 26040 + }, + { + "epoch": 1.19, + "learning_rate": 4.415305048285963e-05, + "loss": 2.3703, + "step": 26050 + }, + { + "epoch": 1.19, + "learning_rate": 4.415076204860635e-05, + "loss": 2.2444, + "step": 26060 + }, + { + "epoch": 1.19, + "learning_rate": 4.414847361435306e-05, + "loss": 2.2513, + "step": 26070 + }, + { + "epoch": 1.19, + "learning_rate": 4.414618518009978e-05, + "loss": 2.2434, + "step": 26080 + }, + { + "epoch": 1.19, + "learning_rate": 4.414389674584649e-05, + "loss": 2.326, + "step": 26090 + }, + { + "epoch": 1.19, + "learning_rate": 4.4141608311593205e-05, + "loss": 2.1341, + "step": 26100 + }, + { + "epoch": 1.19, + "learning_rate": 4.4139319877339926e-05, + "loss": 2.2537, + "step": 26110 + }, + { + "epoch": 1.19, + "learning_rate": 4.413703144308664e-05, + "loss": 2.4402, + "step": 26120 + }, + { + "epoch": 1.19, + "learning_rate": 4.4134743008833355e-05, + "loss": 2.3245, + "step": 26130 + }, + { + "epoch": 1.19, + "learning_rate": 4.4132454574580076e-05, + "loss": 2.3338, + "step": 26140 + }, + { + "epoch": 1.19, + "learning_rate": 4.413016614032679e-05, + "loss": 2.2362, + "step": 26150 + }, + { + "epoch": 1.19, + "learning_rate": 4.4127877706073504e-05, + "loss": 2.321, + "step": 26160 + }, + { + "epoch": 1.19, + "learning_rate": 4.4125589271820225e-05, + "loss": 2.208, + "step": 26170 + }, + { + "epoch": 1.2, + "learning_rate": 4.412330083756694e-05, + "loss": 2.131, + "step": 26180 + }, + { + "epoch": 1.2, + "learning_rate": 4.412101240331365e-05, + "loss": 2.1384, + "step": 26190 + }, + { + "epoch": 1.2, + "learning_rate": 4.4118723969060374e-05, + "loss": 2.3083, + "step": 26200 + }, + { + "epoch": 1.2, + "learning_rate": 4.411643553480709e-05, + "loss": 2.2815, + "step": 26210 + }, + { + "epoch": 1.2, + "learning_rate": 4.41141471005538e-05, + "loss": 2.2491, + "step": 26220 + }, + { + "epoch": 1.2, + "learning_rate": 4.411185866630052e-05, + "loss": 2.4467, + "step": 26230 + }, + { + "epoch": 1.2, + "learning_rate": 4.410957023204724e-05, + "loss": 2.09, + "step": 26240 + }, + { + "epoch": 1.2, + "learning_rate": 4.410728179779395e-05, + "loss": 2.202, + "step": 26250 + }, + { + "epoch": 1.2, + "learning_rate": 4.410499336354067e-05, + "loss": 2.3052, + "step": 26260 + }, + { + "epoch": 1.2, + "learning_rate": 4.4102704929287386e-05, + "loss": 2.277, + "step": 26270 + }, + { + "epoch": 1.2, + "learning_rate": 4.41004164950341e-05, + "loss": 2.2987, + "step": 26280 + }, + { + "epoch": 1.2, + "learning_rate": 4.4098128060780815e-05, + "loss": 2.1745, + "step": 26290 + }, + { + "epoch": 1.2, + "learning_rate": 4.409583962652753e-05, + "loss": 2.3993, + "step": 26300 + }, + { + "epoch": 1.2, + "learning_rate": 4.409355119227424e-05, + "loss": 2.2328, + "step": 26310 + }, + { + "epoch": 1.2, + "learning_rate": 4.4091262758020964e-05, + "loss": 2.2647, + "step": 26320 + }, + { + "epoch": 1.2, + "learning_rate": 4.408897432376768e-05, + "loss": 2.1995, + "step": 26330 + }, + { + "epoch": 1.2, + "learning_rate": 4.408668588951439e-05, + "loss": 2.2455, + "step": 26340 + }, + { + "epoch": 1.2, + "learning_rate": 4.408439745526111e-05, + "loss": 2.3337, + "step": 26350 + }, + { + "epoch": 1.2, + "learning_rate": 4.408210902100783e-05, + "loss": 2.2002, + "step": 26360 + }, + { + "epoch": 1.2, + "learning_rate": 4.407982058675454e-05, + "loss": 2.2063, + "step": 26370 + }, + { + "epoch": 1.2, + "learning_rate": 4.407753215250126e-05, + "loss": 2.3306, + "step": 26380 + }, + { + "epoch": 1.21, + "learning_rate": 4.4075243718247976e-05, + "loss": 2.1077, + "step": 26390 + }, + { + "epoch": 1.21, + "learning_rate": 4.407295528399469e-05, + "loss": 2.3221, + "step": 26400 + }, + { + "epoch": 1.21, + "learning_rate": 4.407066684974141e-05, + "loss": 2.1849, + "step": 26410 + }, + { + "epoch": 1.21, + "learning_rate": 4.4068378415488126e-05, + "loss": 2.2153, + "step": 26420 + }, + { + "epoch": 1.21, + "learning_rate": 4.406608998123484e-05, + "loss": 2.1272, + "step": 26430 + }, + { + "epoch": 1.21, + "learning_rate": 4.406380154698156e-05, + "loss": 2.0843, + "step": 26440 + }, + { + "epoch": 1.21, + "learning_rate": 4.4061513112728275e-05, + "loss": 2.248, + "step": 26450 + }, + { + "epoch": 1.21, + "learning_rate": 4.405922467847499e-05, + "loss": 2.4257, + "step": 26460 + }, + { + "epoch": 1.21, + "learning_rate": 4.405693624422171e-05, + "loss": 2.2721, + "step": 26470 + }, + { + "epoch": 1.21, + "learning_rate": 4.405464780996842e-05, + "loss": 2.1948, + "step": 26480 + }, + { + "epoch": 1.21, + "learning_rate": 4.405235937571514e-05, + "loss": 2.2881, + "step": 26490 + }, + { + "epoch": 1.21, + "learning_rate": 4.405007094146185e-05, + "loss": 2.3021, + "step": 26500 + }, + { + "epoch": 1.21, + "learning_rate": 4.4047782507208566e-05, + "loss": 2.2531, + "step": 26510 + }, + { + "epoch": 1.21, + "learning_rate": 4.404549407295529e-05, + "loss": 2.3317, + "step": 26520 + }, + { + "epoch": 1.21, + "learning_rate": 4.4043205638702e-05, + "loss": 2.2281, + "step": 26530 + }, + { + "epoch": 1.21, + "learning_rate": 4.4040917204448715e-05, + "loss": 2.3392, + "step": 26540 + }, + { + "epoch": 1.21, + "learning_rate": 4.4038628770195436e-05, + "loss": 2.3488, + "step": 26550 + }, + { + "epoch": 1.21, + "learning_rate": 4.403634033594215e-05, + "loss": 2.1875, + "step": 26560 + }, + { + "epoch": 1.21, + "learning_rate": 4.4034051901688865e-05, + "loss": 2.2004, + "step": 26570 + }, + { + "epoch": 1.21, + "learning_rate": 4.4031763467435586e-05, + "loss": 2.2766, + "step": 26580 + }, + { + "epoch": 1.21, + "learning_rate": 4.40294750331823e-05, + "loss": 2.3332, + "step": 26590 + }, + { + "epoch": 1.21, + "learning_rate": 4.4027186598929014e-05, + "loss": 2.3131, + "step": 26600 + }, + { + "epoch": 1.22, + "learning_rate": 4.4024898164675735e-05, + "loss": 2.1778, + "step": 26610 + }, + { + "epoch": 1.22, + "learning_rate": 4.402260973042245e-05, + "loss": 2.4183, + "step": 26620 + }, + { + "epoch": 1.22, + "learning_rate": 4.402032129616916e-05, + "loss": 2.2488, + "step": 26630 + }, + { + "epoch": 1.22, + "learning_rate": 4.4018032861915884e-05, + "loss": 2.129, + "step": 26640 + }, + { + "epoch": 1.22, + "learning_rate": 4.40157444276626e-05, + "loss": 2.291, + "step": 26650 + }, + { + "epoch": 1.22, + "learning_rate": 4.401345599340931e-05, + "loss": 2.258, + "step": 26660 + }, + { + "epoch": 1.22, + "learning_rate": 4.401116755915603e-05, + "loss": 2.3021, + "step": 26670 + }, + { + "epoch": 1.22, + "learning_rate": 4.400887912490274e-05, + "loss": 2.2252, + "step": 26680 + }, + { + "epoch": 1.22, + "learning_rate": 4.4006590690649455e-05, + "loss": 2.3878, + "step": 26690 + }, + { + "epoch": 1.22, + "learning_rate": 4.4004302256396175e-05, + "loss": 2.107, + "step": 26700 + }, + { + "epoch": 1.22, + "learning_rate": 4.400201382214289e-05, + "loss": 2.3986, + "step": 26710 + }, + { + "epoch": 1.22, + "learning_rate": 4.3999725387889604e-05, + "loss": 2.2451, + "step": 26720 + }, + { + "epoch": 1.22, + "learning_rate": 4.3997436953636325e-05, + "loss": 2.1133, + "step": 26730 + }, + { + "epoch": 1.22, + "learning_rate": 4.399514851938304e-05, + "loss": 2.2691, + "step": 26740 + }, + { + "epoch": 1.22, + "learning_rate": 4.399286008512975e-05, + "loss": 2.1614, + "step": 26750 + }, + { + "epoch": 1.22, + "learning_rate": 4.3990571650876474e-05, + "loss": 2.1668, + "step": 26760 + }, + { + "epoch": 1.22, + "learning_rate": 4.398828321662319e-05, + "loss": 2.167, + "step": 26770 + }, + { + "epoch": 1.22, + "learning_rate": 4.39859947823699e-05, + "loss": 2.3436, + "step": 26780 + }, + { + "epoch": 1.22, + "learning_rate": 4.398370634811662e-05, + "loss": 2.1062, + "step": 26790 + }, + { + "epoch": 1.22, + "learning_rate": 4.398141791386334e-05, + "loss": 2.1801, + "step": 26800 + }, + { + "epoch": 1.22, + "learning_rate": 4.397912947961005e-05, + "loss": 2.2421, + "step": 26810 + }, + { + "epoch": 1.22, + "learning_rate": 4.397684104535677e-05, + "loss": 2.3968, + "step": 26820 + }, + { + "epoch": 1.23, + "learning_rate": 4.3974552611103486e-05, + "loss": 2.1781, + "step": 26830 + }, + { + "epoch": 1.23, + "learning_rate": 4.39722641768502e-05, + "loss": 2.2151, + "step": 26840 + }, + { + "epoch": 1.23, + "learning_rate": 4.396997574259692e-05, + "loss": 2.1584, + "step": 26850 + }, + { + "epoch": 1.23, + "learning_rate": 4.3967687308343635e-05, + "loss": 2.0782, + "step": 26860 + }, + { + "epoch": 1.23, + "learning_rate": 4.396539887409035e-05, + "loss": 2.3362, + "step": 26870 + }, + { + "epoch": 1.23, + "learning_rate": 4.3963110439837064e-05, + "loss": 2.2685, + "step": 26880 + }, + { + "epoch": 1.23, + "learning_rate": 4.396082200558378e-05, + "loss": 2.2134, + "step": 26890 + }, + { + "epoch": 1.23, + "learning_rate": 4.39585335713305e-05, + "loss": 2.2323, + "step": 26900 + }, + { + "epoch": 1.23, + "learning_rate": 4.395624513707721e-05, + "loss": 2.125, + "step": 26910 + }, + { + "epoch": 1.23, + "learning_rate": 4.395395670282393e-05, + "loss": 2.4238, + "step": 26920 + }, + { + "epoch": 1.23, + "learning_rate": 4.395166826857065e-05, + "loss": 2.3486, + "step": 26930 + }, + { + "epoch": 1.23, + "learning_rate": 4.394937983431736e-05, + "loss": 2.2849, + "step": 26940 + }, + { + "epoch": 1.23, + "learning_rate": 4.3947091400064076e-05, + "loss": 2.2468, + "step": 26950 + }, + { + "epoch": 1.23, + "learning_rate": 4.39448029658108e-05, + "loss": 2.2602, + "step": 26960 + }, + { + "epoch": 1.23, + "learning_rate": 4.394251453155751e-05, + "loss": 2.2907, + "step": 26970 + }, + { + "epoch": 1.23, + "learning_rate": 4.3940226097304225e-05, + "loss": 2.2525, + "step": 26980 + }, + { + "epoch": 1.23, + "learning_rate": 4.3937937663050946e-05, + "loss": 2.3345, + "step": 26990 + }, + { + "epoch": 1.23, + "learning_rate": 4.393564922879766e-05, + "loss": 2.2611, + "step": 27000 + }, + { + "epoch": 1.23, + "learning_rate": 4.3933360794544375e-05, + "loss": 2.2594, + "step": 27010 + }, + { + "epoch": 1.23, + "learning_rate": 4.3931072360291096e-05, + "loss": 2.1907, + "step": 27020 + }, + { + "epoch": 1.23, + "learning_rate": 4.392878392603781e-05, + "loss": 2.2139, + "step": 27030 + }, + { + "epoch": 1.23, + "learning_rate": 4.3926495491784524e-05, + "loss": 2.2012, + "step": 27040 + }, + { + "epoch": 1.24, + "learning_rate": 4.3924207057531245e-05, + "loss": 2.1965, + "step": 27050 + }, + { + "epoch": 1.24, + "learning_rate": 4.392191862327796e-05, + "loss": 2.3018, + "step": 27060 + }, + { + "epoch": 1.24, + "learning_rate": 4.391963018902467e-05, + "loss": 2.1636, + "step": 27070 + }, + { + "epoch": 1.24, + "learning_rate": 4.391734175477139e-05, + "loss": 2.136, + "step": 27080 + }, + { + "epoch": 1.24, + "learning_rate": 4.39150533205181e-05, + "loss": 2.4316, + "step": 27090 + }, + { + "epoch": 1.24, + "learning_rate": 4.3912764886264815e-05, + "loss": 2.3352, + "step": 27100 + }, + { + "epoch": 1.24, + "learning_rate": 4.3910476452011536e-05, + "loss": 2.2044, + "step": 27110 + }, + { + "epoch": 1.24, + "learning_rate": 4.390818801775825e-05, + "loss": 2.113, + "step": 27120 + }, + { + "epoch": 1.24, + "learning_rate": 4.3905899583504965e-05, + "loss": 2.262, + "step": 27130 + }, + { + "epoch": 1.24, + "learning_rate": 4.3903611149251685e-05, + "loss": 2.2298, + "step": 27140 + }, + { + "epoch": 1.24, + "learning_rate": 4.39013227149984e-05, + "loss": 2.2533, + "step": 27150 + }, + { + "epoch": 1.24, + "learning_rate": 4.3899034280745114e-05, + "loss": 2.4151, + "step": 27160 + }, + { + "epoch": 1.24, + "learning_rate": 4.3896745846491835e-05, + "loss": 2.229, + "step": 27170 + }, + { + "epoch": 1.24, + "learning_rate": 4.389445741223855e-05, + "loss": 2.3317, + "step": 27180 + }, + { + "epoch": 1.24, + "learning_rate": 4.389216897798526e-05, + "loss": 2.257, + "step": 27190 + }, + { + "epoch": 1.24, + "learning_rate": 4.3889880543731984e-05, + "loss": 2.2413, + "step": 27200 + }, + { + "epoch": 1.24, + "learning_rate": 4.38875921094787e-05, + "loss": 2.2385, + "step": 27210 + }, + { + "epoch": 1.24, + "learning_rate": 4.388530367522541e-05, + "loss": 2.5024, + "step": 27220 + }, + { + "epoch": 1.24, + "learning_rate": 4.388301524097213e-05, + "loss": 2.3374, + "step": 27230 + }, + { + "epoch": 1.24, + "learning_rate": 4.388072680671885e-05, + "loss": 2.2077, + "step": 27240 + }, + { + "epoch": 1.24, + "learning_rate": 4.387843837246556e-05, + "loss": 2.2619, + "step": 27250 + }, + { + "epoch": 1.24, + "learning_rate": 4.387614993821228e-05, + "loss": 2.2026, + "step": 27260 + }, + { + "epoch": 1.25, + "learning_rate": 4.387386150395899e-05, + "loss": 2.3982, + "step": 27270 + }, + { + "epoch": 1.25, + "learning_rate": 4.3871573069705704e-05, + "loss": 2.3381, + "step": 27280 + }, + { + "epoch": 1.25, + "learning_rate": 4.3869284635452425e-05, + "loss": 2.3795, + "step": 27290 + }, + { + "epoch": 1.25, + "learning_rate": 4.386699620119914e-05, + "loss": 2.2448, + "step": 27300 + }, + { + "epoch": 1.25, + "learning_rate": 4.386470776694585e-05, + "loss": 2.3164, + "step": 27310 + }, + { + "epoch": 1.25, + "learning_rate": 4.3862419332692574e-05, + "loss": 2.197, + "step": 27320 + }, + { + "epoch": 1.25, + "learning_rate": 4.386013089843929e-05, + "loss": 2.3321, + "step": 27330 + }, + { + "epoch": 1.25, + "learning_rate": 4.3857842464186e-05, + "loss": 2.147, + "step": 27340 + }, + { + "epoch": 1.25, + "learning_rate": 4.385555402993272e-05, + "loss": 2.2972, + "step": 27350 + }, + { + "epoch": 1.25, + "learning_rate": 4.385326559567944e-05, + "loss": 2.1299, + "step": 27360 + }, + { + "epoch": 1.25, + "learning_rate": 4.385097716142615e-05, + "loss": 2.1349, + "step": 27370 + }, + { + "epoch": 1.25, + "learning_rate": 4.384868872717287e-05, + "loss": 2.3583, + "step": 27380 + }, + { + "epoch": 1.25, + "learning_rate": 4.3846400292919586e-05, + "loss": 2.2612, + "step": 27390 + }, + { + "epoch": 1.25, + "learning_rate": 4.38441118586663e-05, + "loss": 2.3364, + "step": 27400 + }, + { + "epoch": 1.25, + "learning_rate": 4.384182342441302e-05, + "loss": 2.1991, + "step": 27410 + }, + { + "epoch": 1.25, + "learning_rate": 4.3839534990159735e-05, + "loss": 2.3132, + "step": 27420 + }, + { + "epoch": 1.25, + "learning_rate": 4.383724655590645e-05, + "loss": 2.2604, + "step": 27430 + }, + { + "epoch": 1.25, + "learning_rate": 4.383495812165317e-05, + "loss": 2.1945, + "step": 27440 + }, + { + "epoch": 1.25, + "learning_rate": 4.3832669687399885e-05, + "loss": 2.1017, + "step": 27450 + }, + { + "epoch": 1.25, + "learning_rate": 4.38303812531466e-05, + "loss": 2.199, + "step": 27460 + }, + { + "epoch": 1.25, + "learning_rate": 4.382809281889331e-05, + "loss": 2.2346, + "step": 27470 + }, + { + "epoch": 1.25, + "learning_rate": 4.382580438464003e-05, + "loss": 2.2823, + "step": 27480 + }, + { + "epoch": 1.26, + "learning_rate": 4.382351595038675e-05, + "loss": 2.1686, + "step": 27490 + }, + { + "epoch": 1.26, + "learning_rate": 4.382122751613346e-05, + "loss": 2.3466, + "step": 27500 + }, + { + "epoch": 1.26, + "learning_rate": 4.3818939081880176e-05, + "loss": 2.3026, + "step": 27510 + }, + { + "epoch": 1.26, + "learning_rate": 4.38166506476269e-05, + "loss": 2.3596, + "step": 27520 + }, + { + "epoch": 1.26, + "learning_rate": 4.381436221337361e-05, + "loss": 2.3329, + "step": 27530 + }, + { + "epoch": 1.26, + "learning_rate": 4.3812073779120325e-05, + "loss": 2.2289, + "step": 27540 + }, + { + "epoch": 1.26, + "learning_rate": 4.3809785344867046e-05, + "loss": 2.1802, + "step": 27550 + }, + { + "epoch": 1.26, + "learning_rate": 4.380749691061376e-05, + "loss": 2.45, + "step": 27560 + }, + { + "epoch": 1.26, + "learning_rate": 4.3805208476360474e-05, + "loss": 2.0827, + "step": 27570 + }, + { + "epoch": 1.26, + "learning_rate": 4.3802920042107195e-05, + "loss": 2.2093, + "step": 27580 + }, + { + "epoch": 1.26, + "learning_rate": 4.380063160785391e-05, + "loss": 2.4665, + "step": 27590 + }, + { + "epoch": 1.26, + "learning_rate": 4.3798343173600624e-05, + "loss": 2.1467, + "step": 27600 + }, + { + "epoch": 1.26, + "learning_rate": 4.3796054739347345e-05, + "loss": 2.149, + "step": 27610 + }, + { + "epoch": 1.26, + "learning_rate": 4.379376630509406e-05, + "loss": 2.1548, + "step": 27620 + }, + { + "epoch": 1.26, + "learning_rate": 4.379147787084077e-05, + "loss": 2.2776, + "step": 27630 + }, + { + "epoch": 1.26, + "learning_rate": 4.3789189436587494e-05, + "loss": 2.3913, + "step": 27640 + }, + { + "epoch": 1.26, + "learning_rate": 4.378690100233421e-05, + "loss": 2.1884, + "step": 27650 + }, + { + "epoch": 1.26, + "learning_rate": 4.378461256808092e-05, + "loss": 2.1749, + "step": 27660 + }, + { + "epoch": 1.26, + "learning_rate": 4.3782324133827636e-05, + "loss": 2.176, + "step": 27670 + }, + { + "epoch": 1.26, + "learning_rate": 4.378003569957435e-05, + "loss": 2.4215, + "step": 27680 + }, + { + "epoch": 1.26, + "learning_rate": 4.3777747265321064e-05, + "loss": 2.2409, + "step": 27690 + }, + { + "epoch": 1.26, + "learning_rate": 4.3775458831067785e-05, + "loss": 2.2433, + "step": 27700 + }, + { + "epoch": 1.27, + "learning_rate": 4.37731703968145e-05, + "loss": 2.2805, + "step": 27710 + }, + { + "epoch": 1.27, + "learning_rate": 4.3770881962561214e-05, + "loss": 2.2067, + "step": 27720 + }, + { + "epoch": 1.27, + "learning_rate": 4.3768593528307935e-05, + "loss": 2.2702, + "step": 27730 + }, + { + "epoch": 1.27, + "learning_rate": 4.376630509405465e-05, + "loss": 2.4218, + "step": 27740 + }, + { + "epoch": 1.27, + "learning_rate": 4.376401665980136e-05, + "loss": 2.3797, + "step": 27750 + }, + { + "epoch": 1.27, + "learning_rate": 4.3761728225548084e-05, + "loss": 2.188, + "step": 27760 + }, + { + "epoch": 1.27, + "learning_rate": 4.37594397912948e-05, + "loss": 2.3258, + "step": 27770 + }, + { + "epoch": 1.27, + "learning_rate": 4.375715135704151e-05, + "loss": 2.2971, + "step": 27780 + }, + { + "epoch": 1.27, + "learning_rate": 4.375486292278823e-05, + "loss": 2.1436, + "step": 27790 + }, + { + "epoch": 1.27, + "learning_rate": 4.375257448853495e-05, + "loss": 2.3227, + "step": 27800 + }, + { + "epoch": 1.27, + "learning_rate": 4.375028605428166e-05, + "loss": 2.2492, + "step": 27810 + }, + { + "epoch": 1.27, + "learning_rate": 4.374799762002838e-05, + "loss": 2.2638, + "step": 27820 + }, + { + "epoch": 1.27, + "learning_rate": 4.3745709185775096e-05, + "loss": 2.3175, + "step": 27830 + }, + { + "epoch": 1.27, + "learning_rate": 4.374342075152181e-05, + "loss": 2.2288, + "step": 27840 + }, + { + "epoch": 1.27, + "learning_rate": 4.374113231726853e-05, + "loss": 2.333, + "step": 27850 + }, + { + "epoch": 1.27, + "learning_rate": 4.3738843883015245e-05, + "loss": 2.2562, + "step": 27860 + }, + { + "epoch": 1.27, + "learning_rate": 4.373655544876196e-05, + "loss": 2.28, + "step": 27870 + }, + { + "epoch": 1.27, + "learning_rate": 4.3734267014508674e-05, + "loss": 2.2299, + "step": 27880 + }, + { + "epoch": 1.27, + "learning_rate": 4.373197858025539e-05, + "loss": 2.0271, + "step": 27890 + }, + { + "epoch": 1.27, + "learning_rate": 4.372969014600211e-05, + "loss": 2.1865, + "step": 27900 + }, + { + "epoch": 1.27, + "learning_rate": 4.372740171174882e-05, + "loss": 2.3976, + "step": 27910 + }, + { + "epoch": 1.27, + "learning_rate": 4.372511327749554e-05, + "loss": 2.3263, + "step": 27920 + }, + { + "epoch": 1.28, + "learning_rate": 4.372282484324226e-05, + "loss": 2.114, + "step": 27930 + }, + { + "epoch": 1.28, + "learning_rate": 4.372053640898897e-05, + "loss": 2.3048, + "step": 27940 + }, + { + "epoch": 1.28, + "learning_rate": 4.3718247974735686e-05, + "loss": 2.2542, + "step": 27950 + }, + { + "epoch": 1.28, + "learning_rate": 4.371595954048241e-05, + "loss": 2.135, + "step": 27960 + }, + { + "epoch": 1.28, + "learning_rate": 4.371367110622912e-05, + "loss": 2.2208, + "step": 27970 + }, + { + "epoch": 1.28, + "learning_rate": 4.3711382671975835e-05, + "loss": 2.266, + "step": 27980 + }, + { + "epoch": 1.28, + "learning_rate": 4.3709094237722556e-05, + "loss": 2.2769, + "step": 27990 + }, + { + "epoch": 1.28, + "learning_rate": 4.370680580346927e-05, + "loss": 2.1441, + "step": 28000 + }, + { + "epoch": 1.28, + "learning_rate": 4.3704517369215984e-05, + "loss": 2.2935, + "step": 28010 + }, + { + "epoch": 1.28, + "learning_rate": 4.3702228934962705e-05, + "loss": 2.401, + "step": 28020 + }, + { + "epoch": 1.28, + "learning_rate": 4.369994050070942e-05, + "loss": 2.2079, + "step": 28030 + }, + { + "epoch": 1.28, + "learning_rate": 4.3697652066456134e-05, + "loss": 2.1913, + "step": 28040 + }, + { + "epoch": 1.28, + "learning_rate": 4.3695363632202855e-05, + "loss": 2.2576, + "step": 28050 + }, + { + "epoch": 1.28, + "learning_rate": 4.369307519794956e-05, + "loss": 2.2689, + "step": 28060 + }, + { + "epoch": 1.28, + "learning_rate": 4.3690786763696276e-05, + "loss": 2.1698, + "step": 28070 + }, + { + "epoch": 1.28, + "learning_rate": 4.3688498329443e-05, + "loss": 2.1253, + "step": 28080 + }, + { + "epoch": 1.28, + "learning_rate": 4.368620989518971e-05, + "loss": 2.2868, + "step": 28090 + }, + { + "epoch": 1.28, + "learning_rate": 4.3683921460936425e-05, + "loss": 2.0501, + "step": 28100 + }, + { + "epoch": 1.28, + "learning_rate": 4.3681633026683146e-05, + "loss": 2.3896, + "step": 28110 + }, + { + "epoch": 1.28, + "learning_rate": 4.367934459242986e-05, + "loss": 2.1717, + "step": 28120 + }, + { + "epoch": 1.28, + "learning_rate": 4.3677056158176574e-05, + "loss": 2.2957, + "step": 28130 + }, + { + "epoch": 1.28, + "learning_rate": 4.3674767723923295e-05, + "loss": 2.2337, + "step": 28140 + }, + { + "epoch": 1.29, + "learning_rate": 4.367247928967001e-05, + "loss": 2.3352, + "step": 28150 + }, + { + "epoch": 1.29, + "learning_rate": 4.3670190855416724e-05, + "loss": 2.2068, + "step": 28160 + }, + { + "epoch": 1.29, + "learning_rate": 4.3667902421163444e-05, + "loss": 2.3666, + "step": 28170 + }, + { + "epoch": 1.29, + "learning_rate": 4.366561398691016e-05, + "loss": 2.306, + "step": 28180 + }, + { + "epoch": 1.29, + "learning_rate": 4.366332555265687e-05, + "loss": 2.2115, + "step": 28190 + }, + { + "epoch": 1.29, + "learning_rate": 4.3661037118403594e-05, + "loss": 2.1288, + "step": 28200 + }, + { + "epoch": 1.29, + "learning_rate": 4.365874868415031e-05, + "loss": 2.0608, + "step": 28210 + }, + { + "epoch": 1.29, + "learning_rate": 4.365646024989702e-05, + "loss": 2.3001, + "step": 28220 + }, + { + "epoch": 1.29, + "learning_rate": 4.365417181564374e-05, + "loss": 2.3103, + "step": 28230 + }, + { + "epoch": 1.29, + "learning_rate": 4.365188338139046e-05, + "loss": 2.239, + "step": 28240 + }, + { + "epoch": 1.29, + "learning_rate": 4.364959494713717e-05, + "loss": 2.1624, + "step": 28250 + }, + { + "epoch": 1.29, + "learning_rate": 4.3647306512883885e-05, + "loss": 2.2734, + "step": 28260 + }, + { + "epoch": 1.29, + "learning_rate": 4.36450180786306e-05, + "loss": 2.1851, + "step": 28270 + }, + { + "epoch": 1.29, + "learning_rate": 4.364272964437732e-05, + "loss": 2.3625, + "step": 28280 + }, + { + "epoch": 1.29, + "learning_rate": 4.3640441210124034e-05, + "loss": 2.3119, + "step": 28290 + }, + { + "epoch": 1.29, + "learning_rate": 4.363815277587075e-05, + "loss": 2.3616, + "step": 28300 + }, + { + "epoch": 1.29, + "learning_rate": 4.363586434161747e-05, + "loss": 2.0218, + "step": 28310 + }, + { + "epoch": 1.29, + "learning_rate": 4.3633575907364184e-05, + "loss": 2.2025, + "step": 28320 + }, + { + "epoch": 1.29, + "learning_rate": 4.36312874731109e-05, + "loss": 2.2558, + "step": 28330 + }, + { + "epoch": 1.29, + "learning_rate": 4.362899903885762e-05, + "loss": 2.2127, + "step": 28340 + }, + { + "epoch": 1.29, + "learning_rate": 4.362671060460433e-05, + "loss": 2.3427, + "step": 28350 + }, + { + "epoch": 1.29, + "learning_rate": 4.362442217035105e-05, + "loss": 2.1714, + "step": 28360 + }, + { + "epoch": 1.3, + "learning_rate": 4.362213373609777e-05, + "loss": 2.1931, + "step": 28370 + }, + { + "epoch": 1.3, + "learning_rate": 4.361984530184448e-05, + "loss": 2.3745, + "step": 28380 + }, + { + "epoch": 1.3, + "learning_rate": 4.3617556867591196e-05, + "loss": 2.1743, + "step": 28390 + }, + { + "epoch": 1.3, + "learning_rate": 4.361526843333791e-05, + "loss": 2.3113, + "step": 28400 + }, + { + "epoch": 1.3, + "learning_rate": 4.361297999908463e-05, + "loss": 2.3448, + "step": 28410 + }, + { + "epoch": 1.3, + "learning_rate": 4.3610691564831345e-05, + "loss": 2.1585, + "step": 28420 + }, + { + "epoch": 1.3, + "learning_rate": 4.360840313057806e-05, + "loss": 2.1739, + "step": 28430 + }, + { + "epoch": 1.3, + "learning_rate": 4.360611469632478e-05, + "loss": 2.2331, + "step": 28440 + }, + { + "epoch": 1.3, + "learning_rate": 4.3603826262071494e-05, + "loss": 2.2834, + "step": 28450 + }, + { + "epoch": 1.3, + "learning_rate": 4.360153782781821e-05, + "loss": 2.2659, + "step": 28460 + }, + { + "epoch": 1.3, + "learning_rate": 4.359924939356492e-05, + "loss": 2.173, + "step": 28470 + }, + { + "epoch": 1.3, + "learning_rate": 4.359696095931164e-05, + "loss": 2.3407, + "step": 28480 + }, + { + "epoch": 1.3, + "learning_rate": 4.359467252505836e-05, + "loss": 2.1362, + "step": 28490 + }, + { + "epoch": 1.3, + "learning_rate": 4.359238409080507e-05, + "loss": 2.2518, + "step": 28500 + }, + { + "epoch": 1.3, + "learning_rate": 4.3590095656551786e-05, + "loss": 2.2939, + "step": 28510 + }, + { + "epoch": 1.3, + "learning_rate": 4.358780722229851e-05, + "loss": 2.243, + "step": 28520 + }, + { + "epoch": 1.3, + "learning_rate": 4.358551878804522e-05, + "loss": 2.2809, + "step": 28530 + }, + { + "epoch": 1.3, + "learning_rate": 4.3583230353791935e-05, + "loss": 2.1699, + "step": 28540 + }, + { + "epoch": 1.3, + "learning_rate": 4.3580941919538656e-05, + "loss": 2.1813, + "step": 28550 + }, + { + "epoch": 1.3, + "learning_rate": 4.357865348528537e-05, + "loss": 2.2395, + "step": 28560 + }, + { + "epoch": 1.3, + "learning_rate": 4.3576365051032084e-05, + "loss": 2.1388, + "step": 28570 + }, + { + "epoch": 1.31, + "learning_rate": 4.3574076616778805e-05, + "loss": 2.3555, + "step": 28580 + }, + { + "epoch": 1.31, + "learning_rate": 4.357178818252552e-05, + "loss": 2.2796, + "step": 28590 + }, + { + "epoch": 1.31, + "learning_rate": 4.3569499748272234e-05, + "loss": 2.355, + "step": 28600 + }, + { + "epoch": 1.31, + "learning_rate": 4.3567211314018954e-05, + "loss": 2.1346, + "step": 28610 + }, + { + "epoch": 1.31, + "learning_rate": 4.356492287976567e-05, + "loss": 2.1793, + "step": 28620 + }, + { + "epoch": 1.31, + "learning_rate": 4.356263444551238e-05, + "loss": 2.2621, + "step": 28630 + }, + { + "epoch": 1.31, + "learning_rate": 4.3560346011259104e-05, + "loss": 2.1665, + "step": 28640 + }, + { + "epoch": 1.31, + "learning_rate": 4.355805757700582e-05, + "loss": 2.3427, + "step": 28650 + }, + { + "epoch": 1.31, + "learning_rate": 4.3555769142752525e-05, + "loss": 2.2908, + "step": 28660 + }, + { + "epoch": 1.31, + "learning_rate": 4.3553480708499246e-05, + "loss": 2.1994, + "step": 28670 + }, + { + "epoch": 1.31, + "learning_rate": 4.355119227424596e-05, + "loss": 2.181, + "step": 28680 + }, + { + "epoch": 1.31, + "learning_rate": 4.3548903839992674e-05, + "loss": 2.2747, + "step": 28690 + }, + { + "epoch": 1.31, + "learning_rate": 4.3546615405739395e-05, + "loss": 2.2688, + "step": 28700 + }, + { + "epoch": 1.31, + "learning_rate": 4.354432697148611e-05, + "loss": 2.1805, + "step": 28710 + }, + { + "epoch": 1.31, + "learning_rate": 4.3542038537232823e-05, + "loss": 2.3188, + "step": 28720 + }, + { + "epoch": 1.31, + "learning_rate": 4.3539750102979544e-05, + "loss": 2.2117, + "step": 28730 + }, + { + "epoch": 1.31, + "learning_rate": 4.353746166872626e-05, + "loss": 2.194, + "step": 28740 + }, + { + "epoch": 1.31, + "learning_rate": 4.353517323447297e-05, + "loss": 2.222, + "step": 28750 + }, + { + "epoch": 1.31, + "learning_rate": 4.3532884800219694e-05, + "loss": 2.0096, + "step": 28760 + }, + { + "epoch": 1.31, + "learning_rate": 4.353059636596641e-05, + "loss": 2.2394, + "step": 28770 + }, + { + "epoch": 1.31, + "learning_rate": 4.352830793171312e-05, + "loss": 2.2936, + "step": 28780 + }, + { + "epoch": 1.31, + "learning_rate": 4.352601949745984e-05, + "loss": 2.2695, + "step": 28790 + }, + { + "epoch": 1.32, + "learning_rate": 4.352373106320656e-05, + "loss": 2.2188, + "step": 28800 + }, + { + "epoch": 1.32, + "learning_rate": 4.352144262895327e-05, + "loss": 2.1946, + "step": 28810 + }, + { + "epoch": 1.32, + "learning_rate": 4.351915419469999e-05, + "loss": 2.2908, + "step": 28820 + }, + { + "epoch": 1.32, + "learning_rate": 4.3516865760446706e-05, + "loss": 2.197, + "step": 28830 + }, + { + "epoch": 1.32, + "learning_rate": 4.351457732619342e-05, + "loss": 2.385, + "step": 28840 + }, + { + "epoch": 1.32, + "learning_rate": 4.3512288891940134e-05, + "loss": 2.284, + "step": 28850 + }, + { + "epoch": 1.32, + "learning_rate": 4.351000045768685e-05, + "loss": 2.1954, + "step": 28860 + }, + { + "epoch": 1.32, + "learning_rate": 4.350771202343357e-05, + "loss": 2.1071, + "step": 28870 + }, + { + "epoch": 1.32, + "learning_rate": 4.3505423589180283e-05, + "loss": 2.2875, + "step": 28880 + }, + { + "epoch": 1.32, + "learning_rate": 4.3503135154927e-05, + "loss": 2.2441, + "step": 28890 + }, + { + "epoch": 1.32, + "learning_rate": 4.350084672067372e-05, + "loss": 2.1706, + "step": 28900 + }, + { + "epoch": 1.32, + "learning_rate": 4.349855828642043e-05, + "loss": 2.3903, + "step": 28910 + }, + { + "epoch": 1.32, + "learning_rate": 4.349626985216715e-05, + "loss": 2.2887, + "step": 28920 + }, + { + "epoch": 1.32, + "learning_rate": 4.349398141791387e-05, + "loss": 2.2433, + "step": 28930 + }, + { + "epoch": 1.32, + "learning_rate": 4.349169298366058e-05, + "loss": 2.3196, + "step": 28940 + }, + { + "epoch": 1.32, + "learning_rate": 4.3489404549407296e-05, + "loss": 2.2339, + "step": 28950 + }, + { + "epoch": 1.32, + "learning_rate": 4.348711611515402e-05, + "loss": 2.179, + "step": 28960 + }, + { + "epoch": 1.32, + "learning_rate": 4.348482768090073e-05, + "loss": 2.2136, + "step": 28970 + }, + { + "epoch": 1.32, + "learning_rate": 4.3482539246647445e-05, + "loss": 2.2694, + "step": 28980 + }, + { + "epoch": 1.32, + "learning_rate": 4.3480250812394166e-05, + "loss": 2.2348, + "step": 28990 + }, + { + "epoch": 1.32, + "learning_rate": 4.347796237814088e-05, + "loss": 2.3096, + "step": 29000 + }, + { + "epoch": 1.32, + "learning_rate": 4.3475673943887594e-05, + "loss": 2.2403, + "step": 29010 + }, + { + "epoch": 1.33, + "learning_rate": 4.3473385509634315e-05, + "loss": 2.2729, + "step": 29020 + }, + { + "epoch": 1.33, + "learning_rate": 4.347109707538103e-05, + "loss": 2.1817, + "step": 29030 + }, + { + "epoch": 1.33, + "learning_rate": 4.3468808641127744e-05, + "loss": 2.182, + "step": 29040 + }, + { + "epoch": 1.33, + "learning_rate": 4.346652020687446e-05, + "loss": 2.2399, + "step": 29050 + }, + { + "epoch": 1.33, + "learning_rate": 4.346423177262117e-05, + "loss": 2.1898, + "step": 29060 + }, + { + "epoch": 1.33, + "learning_rate": 4.3461943338367886e-05, + "loss": 2.3003, + "step": 29070 + }, + { + "epoch": 1.33, + "learning_rate": 4.345965490411461e-05, + "loss": 2.3884, + "step": 29080 + }, + { + "epoch": 1.33, + "learning_rate": 4.345736646986132e-05, + "loss": 2.1533, + "step": 29090 + }, + { + "epoch": 1.33, + "learning_rate": 4.3455078035608035e-05, + "loss": 2.2165, + "step": 29100 + }, + { + "epoch": 1.33, + "learning_rate": 4.3452789601354756e-05, + "loss": 2.0636, + "step": 29110 + }, + { + "epoch": 1.33, + "learning_rate": 4.345050116710147e-05, + "loss": 2.2275, + "step": 29120 + }, + { + "epoch": 1.33, + "learning_rate": 4.3448212732848184e-05, + "loss": 2.3564, + "step": 29130 + }, + { + "epoch": 1.33, + "learning_rate": 4.3445924298594905e-05, + "loss": 2.2038, + "step": 29140 + }, + { + "epoch": 1.33, + "learning_rate": 4.344363586434162e-05, + "loss": 2.1875, + "step": 29150 + }, + { + "epoch": 1.33, + "learning_rate": 4.3441347430088333e-05, + "loss": 2.4906, + "step": 29160 + }, + { + "epoch": 1.33, + "learning_rate": 4.3439058995835054e-05, + "loss": 2.1997, + "step": 29170 + }, + { + "epoch": 1.33, + "learning_rate": 4.343677056158177e-05, + "loss": 2.3372, + "step": 29180 + }, + { + "epoch": 1.33, + "learning_rate": 4.343448212732848e-05, + "loss": 2.2557, + "step": 29190 + }, + { + "epoch": 1.33, + "learning_rate": 4.3432193693075204e-05, + "loss": 2.3428, + "step": 29200 + }, + { + "epoch": 1.33, + "learning_rate": 4.342990525882192e-05, + "loss": 2.0956, + "step": 29210 + }, + { + "epoch": 1.33, + "learning_rate": 4.342761682456863e-05, + "loss": 2.347, + "step": 29220 + }, + { + "epoch": 1.33, + "learning_rate": 4.342532839031535e-05, + "loss": 2.3811, + "step": 29230 + }, + { + "epoch": 1.34, + "learning_rate": 4.342303995606207e-05, + "loss": 2.2781, + "step": 29240 + }, + { + "epoch": 1.34, + "learning_rate": 4.342075152180878e-05, + "loss": 2.2374, + "step": 29250 + }, + { + "epoch": 1.34, + "learning_rate": 4.3418463087555495e-05, + "loss": 2.1602, + "step": 29260 + }, + { + "epoch": 1.34, + "learning_rate": 4.341617465330221e-05, + "loss": 2.1937, + "step": 29270 + }, + { + "epoch": 1.34, + "learning_rate": 4.341388621904893e-05, + "loss": 2.3525, + "step": 29280 + }, + { + "epoch": 1.34, + "learning_rate": 4.3411597784795644e-05, + "loss": 2.1304, + "step": 29290 + }, + { + "epoch": 1.34, + "learning_rate": 4.340930935054236e-05, + "loss": 2.0873, + "step": 29300 + }, + { + "epoch": 1.34, + "learning_rate": 4.340702091628908e-05, + "loss": 2.2035, + "step": 29310 + }, + { + "epoch": 1.34, + "learning_rate": 4.3404732482035793e-05, + "loss": 2.3351, + "step": 29320 + }, + { + "epoch": 1.34, + "learning_rate": 4.340244404778251e-05, + "loss": 2.1116, + "step": 29330 + }, + { + "epoch": 1.34, + "learning_rate": 4.340015561352923e-05, + "loss": 2.2325, + "step": 29340 + }, + { + "epoch": 1.34, + "learning_rate": 4.339786717927594e-05, + "loss": 2.2103, + "step": 29350 + }, + { + "epoch": 1.34, + "learning_rate": 4.339557874502266e-05, + "loss": 2.2155, + "step": 29360 + }, + { + "epoch": 1.34, + "learning_rate": 4.339329031076938e-05, + "loss": 2.2223, + "step": 29370 + }, + { + "epoch": 1.34, + "learning_rate": 4.339100187651609e-05, + "loss": 2.2957, + "step": 29380 + }, + { + "epoch": 1.34, + "learning_rate": 4.3388713442262806e-05, + "loss": 2.1328, + "step": 29390 + }, + { + "epoch": 1.34, + "learning_rate": 4.338642500800953e-05, + "loss": 2.3111, + "step": 29400 + }, + { + "epoch": 1.34, + "learning_rate": 4.338413657375624e-05, + "loss": 2.2328, + "step": 29410 + }, + { + "epoch": 1.34, + "learning_rate": 4.3381848139502955e-05, + "loss": 2.3519, + "step": 29420 + }, + { + "epoch": 1.34, + "learning_rate": 4.3379559705249676e-05, + "loss": 2.2333, + "step": 29430 + }, + { + "epoch": 1.34, + "learning_rate": 4.337727127099639e-05, + "loss": 2.3311, + "step": 29440 + }, + { + "epoch": 1.34, + "learning_rate": 4.33749828367431e-05, + "loss": 2.1258, + "step": 29450 + }, + { + "epoch": 1.35, + "learning_rate": 4.337269440248982e-05, + "loss": 2.1338, + "step": 29460 + }, + { + "epoch": 1.35, + "learning_rate": 4.337040596823653e-05, + "loss": 2.1954, + "step": 29470 + }, + { + "epoch": 1.35, + "learning_rate": 4.336811753398325e-05, + "loss": 2.5336, + "step": 29480 + }, + { + "epoch": 1.35, + "learning_rate": 4.336582909972997e-05, + "loss": 2.3028, + "step": 29490 + }, + { + "epoch": 1.35, + "learning_rate": 4.336354066547668e-05, + "loss": 2.3005, + "step": 29500 + }, + { + "epoch": 1.35, + "learning_rate": 4.3361252231223396e-05, + "loss": 2.2478, + "step": 29510 + }, + { + "epoch": 1.35, + "learning_rate": 4.335896379697012e-05, + "loss": 2.4291, + "step": 29520 + }, + { + "epoch": 1.35, + "learning_rate": 4.335667536271683e-05, + "loss": 2.392, + "step": 29530 + }, + { + "epoch": 1.35, + "learning_rate": 4.3354386928463545e-05, + "loss": 2.1811, + "step": 29540 + }, + { + "epoch": 1.35, + "learning_rate": 4.3352098494210266e-05, + "loss": 2.1566, + "step": 29550 + }, + { + "epoch": 1.35, + "learning_rate": 4.334981005995698e-05, + "loss": 2.3832, + "step": 29560 + }, + { + "epoch": 1.35, + "learning_rate": 4.3347521625703694e-05, + "loss": 2.2504, + "step": 29570 + }, + { + "epoch": 1.35, + "learning_rate": 4.3345233191450415e-05, + "loss": 2.4139, + "step": 29580 + }, + { + "epoch": 1.35, + "learning_rate": 4.334294475719713e-05, + "loss": 2.1897, + "step": 29590 + }, + { + "epoch": 1.35, + "learning_rate": 4.3340656322943843e-05, + "loss": 2.2243, + "step": 29600 + }, + { + "epoch": 1.35, + "learning_rate": 4.3338367888690564e-05, + "loss": 2.1772, + "step": 29610 + }, + { + "epoch": 1.35, + "learning_rate": 4.333607945443728e-05, + "loss": 2.2222, + "step": 29620 + }, + { + "epoch": 1.35, + "learning_rate": 4.333379102018399e-05, + "loss": 2.1948, + "step": 29630 + }, + { + "epoch": 1.35, + "learning_rate": 4.333150258593071e-05, + "loss": 2.1693, + "step": 29640 + }, + { + "epoch": 1.35, + "learning_rate": 4.332921415167742e-05, + "loss": 2.162, + "step": 29650 + }, + { + "epoch": 1.35, + "learning_rate": 4.3326925717424135e-05, + "loss": 2.2364, + "step": 29660 + }, + { + "epoch": 1.35, + "learning_rate": 4.3324637283170856e-05, + "loss": 2.3002, + "step": 29670 + }, + { + "epoch": 1.36, + "learning_rate": 4.332234884891757e-05, + "loss": 2.2576, + "step": 29680 + }, + { + "epoch": 1.36, + "learning_rate": 4.3320060414664284e-05, + "loss": 2.2445, + "step": 29690 + }, + { + "epoch": 1.36, + "learning_rate": 4.3317771980411005e-05, + "loss": 2.1332, + "step": 29700 + }, + { + "epoch": 1.36, + "learning_rate": 4.331548354615772e-05, + "loss": 2.1975, + "step": 29710 + }, + { + "epoch": 1.36, + "learning_rate": 4.331319511190443e-05, + "loss": 2.2195, + "step": 29720 + }, + { + "epoch": 1.36, + "learning_rate": 4.3310906677651154e-05, + "loss": 2.2482, + "step": 29730 + }, + { + "epoch": 1.36, + "learning_rate": 4.330861824339787e-05, + "loss": 2.1857, + "step": 29740 + }, + { + "epoch": 1.36, + "learning_rate": 4.330632980914458e-05, + "loss": 2.2443, + "step": 29750 + }, + { + "epoch": 1.36, + "learning_rate": 4.3304041374891303e-05, + "loss": 2.1629, + "step": 29760 + }, + { + "epoch": 1.36, + "learning_rate": 4.330175294063802e-05, + "loss": 2.196, + "step": 29770 + }, + { + "epoch": 1.36, + "learning_rate": 4.329946450638473e-05, + "loss": 2.174, + "step": 29780 + }, + { + "epoch": 1.36, + "learning_rate": 4.329717607213145e-05, + "loss": 2.3651, + "step": 29790 + }, + { + "epoch": 1.36, + "learning_rate": 4.329488763787817e-05, + "loss": 2.2519, + "step": 29800 + }, + { + "epoch": 1.36, + "learning_rate": 4.329259920362488e-05, + "loss": 2.2075, + "step": 29810 + }, + { + "epoch": 1.36, + "learning_rate": 4.32903107693716e-05, + "loss": 2.2942, + "step": 29820 + }, + { + "epoch": 1.36, + "learning_rate": 4.3288022335118316e-05, + "loss": 2.3036, + "step": 29830 + }, + { + "epoch": 1.36, + "learning_rate": 4.328573390086503e-05, + "loss": 2.184, + "step": 29840 + }, + { + "epoch": 1.36, + "learning_rate": 4.3283445466611744e-05, + "loss": 2.0728, + "step": 29850 + }, + { + "epoch": 1.36, + "learning_rate": 4.328115703235846e-05, + "loss": 2.0687, + "step": 29860 + }, + { + "epoch": 1.36, + "learning_rate": 4.327886859810518e-05, + "loss": 2.3478, + "step": 29870 + }, + { + "epoch": 1.36, + "learning_rate": 4.327658016385189e-05, + "loss": 2.0224, + "step": 29880 + }, + { + "epoch": 1.36, + "learning_rate": 4.327429172959861e-05, + "loss": 2.1782, + "step": 29890 + }, + { + "epoch": 1.37, + "learning_rate": 4.327200329534533e-05, + "loss": 2.2575, + "step": 29900 + }, + { + "epoch": 1.37, + "learning_rate": 4.326971486109204e-05, + "loss": 2.1917, + "step": 29910 + }, + { + "epoch": 1.37, + "learning_rate": 4.326742642683876e-05, + "loss": 2.2868, + "step": 29920 + }, + { + "epoch": 1.37, + "learning_rate": 4.326513799258548e-05, + "loss": 2.1408, + "step": 29930 + }, + { + "epoch": 1.37, + "learning_rate": 4.326284955833219e-05, + "loss": 2.0978, + "step": 29940 + }, + { + "epoch": 1.37, + "learning_rate": 4.3260561124078906e-05, + "loss": 2.102, + "step": 29950 + }, + { + "epoch": 1.37, + "learning_rate": 4.325827268982563e-05, + "loss": 2.0969, + "step": 29960 + }, + { + "epoch": 1.37, + "learning_rate": 4.325598425557234e-05, + "loss": 2.1811, + "step": 29970 + }, + { + "epoch": 1.37, + "learning_rate": 4.3253695821319055e-05, + "loss": 2.1494, + "step": 29980 + }, + { + "epoch": 1.37, + "learning_rate": 4.3251407387065776e-05, + "loss": 2.1645, + "step": 29990 + }, + { + "epoch": 1.37, + "learning_rate": 4.324911895281249e-05, + "loss": 2.4111, + "step": 30000 + }, + { + "epoch": 1.37, + "learning_rate": 4.3246830518559204e-05, + "loss": 2.1847, + "step": 30010 + }, + { + "epoch": 1.37, + "learning_rate": 4.3244542084305925e-05, + "loss": 2.154, + "step": 30020 + }, + { + "epoch": 1.37, + "learning_rate": 4.324225365005264e-05, + "loss": 2.1531, + "step": 30030 + }, + { + "epoch": 1.37, + "learning_rate": 4.3239965215799347e-05, + "loss": 2.2329, + "step": 30040 + }, + { + "epoch": 1.37, + "learning_rate": 4.323767678154607e-05, + "loss": 2.3304, + "step": 30050 + }, + { + "epoch": 1.37, + "learning_rate": 4.323538834729278e-05, + "loss": 2.2206, + "step": 30060 + }, + { + "epoch": 1.37, + "learning_rate": 4.3233099913039496e-05, + "loss": 2.0979, + "step": 30070 + }, + { + "epoch": 1.37, + "learning_rate": 4.323081147878622e-05, + "loss": 2.2306, + "step": 30080 + }, + { + "epoch": 1.37, + "learning_rate": 4.322852304453293e-05, + "loss": 2.2456, + "step": 30090 + }, + { + "epoch": 1.37, + "learning_rate": 4.3226234610279645e-05, + "loss": 2.248, + "step": 30100 + }, + { + "epoch": 1.37, + "learning_rate": 4.3223946176026366e-05, + "loss": 2.3657, + "step": 30110 + }, + { + "epoch": 1.38, + "learning_rate": 4.322165774177308e-05, + "loss": 2.3088, + "step": 30120 + }, + { + "epoch": 1.38, + "learning_rate": 4.3219369307519794e-05, + "loss": 2.0992, + "step": 30130 + }, + { + "epoch": 1.38, + "learning_rate": 4.3217080873266515e-05, + "loss": 2.1818, + "step": 30140 + }, + { + "epoch": 1.38, + "learning_rate": 4.321479243901323e-05, + "loss": 2.174, + "step": 30150 + }, + { + "epoch": 1.38, + "learning_rate": 4.321250400475994e-05, + "loss": 2.2604, + "step": 30160 + }, + { + "epoch": 1.38, + "learning_rate": 4.3210215570506664e-05, + "loss": 2.0914, + "step": 30170 + }, + { + "epoch": 1.38, + "learning_rate": 4.320792713625338e-05, + "loss": 2.1598, + "step": 30180 + }, + { + "epoch": 1.38, + "learning_rate": 4.320563870200009e-05, + "loss": 2.2641, + "step": 30190 + }, + { + "epoch": 1.38, + "learning_rate": 4.3203350267746813e-05, + "loss": 2.2908, + "step": 30200 + }, + { + "epoch": 1.38, + "learning_rate": 4.320106183349353e-05, + "loss": 2.1448, + "step": 30210 + }, + { + "epoch": 1.38, + "learning_rate": 4.319877339924024e-05, + "loss": 2.205, + "step": 30220 + }, + { + "epoch": 1.38, + "learning_rate": 4.319648496498696e-05, + "loss": 2.3599, + "step": 30230 + }, + { + "epoch": 1.38, + "learning_rate": 4.319419653073367e-05, + "loss": 2.1949, + "step": 30240 + }, + { + "epoch": 1.38, + "learning_rate": 4.319190809648039e-05, + "loss": 2.2394, + "step": 30250 + }, + { + "epoch": 1.38, + "learning_rate": 4.3189619662227105e-05, + "loss": 2.227, + "step": 30260 + }, + { + "epoch": 1.38, + "learning_rate": 4.318733122797382e-05, + "loss": 2.2041, + "step": 30270 + }, + { + "epoch": 1.38, + "learning_rate": 4.318504279372054e-05, + "loss": 2.2031, + "step": 30280 + }, + { + "epoch": 1.38, + "learning_rate": 4.3182754359467254e-05, + "loss": 2.2333, + "step": 30290 + }, + { + "epoch": 1.38, + "learning_rate": 4.318046592521397e-05, + "loss": 2.2222, + "step": 30300 + }, + { + "epoch": 1.38, + "learning_rate": 4.317817749096069e-05, + "loss": 2.2026, + "step": 30310 + }, + { + "epoch": 1.38, + "learning_rate": 4.31758890567074e-05, + "loss": 2.2933, + "step": 30320 + }, + { + "epoch": 1.38, + "learning_rate": 4.317360062245412e-05, + "loss": 2.2006, + "step": 30330 + }, + { + "epoch": 1.39, + "learning_rate": 4.317131218820084e-05, + "loss": 2.212, + "step": 30340 + }, + { + "epoch": 1.39, + "learning_rate": 4.316902375394755e-05, + "loss": 2.1924, + "step": 30350 + }, + { + "epoch": 1.39, + "learning_rate": 4.316673531969427e-05, + "loss": 2.1161, + "step": 30360 + }, + { + "epoch": 1.39, + "learning_rate": 4.316444688544099e-05, + "loss": 2.2663, + "step": 30370 + }, + { + "epoch": 1.39, + "learning_rate": 4.31621584511877e-05, + "loss": 2.1717, + "step": 30380 + }, + { + "epoch": 1.39, + "learning_rate": 4.3159870016934416e-05, + "loss": 2.3292, + "step": 30390 + }, + { + "epoch": 1.39, + "learning_rate": 4.315758158268114e-05, + "loss": 2.1534, + "step": 30400 + }, + { + "epoch": 1.39, + "learning_rate": 4.315529314842785e-05, + "loss": 2.1418, + "step": 30410 + }, + { + "epoch": 1.39, + "learning_rate": 4.3153004714174565e-05, + "loss": 2.2721, + "step": 30420 + }, + { + "epoch": 1.39, + "learning_rate": 4.315071627992128e-05, + "loss": 2.1498, + "step": 30430 + }, + { + "epoch": 1.39, + "learning_rate": 4.314842784566799e-05, + "loss": 2.2679, + "step": 30440 + }, + { + "epoch": 1.39, + "learning_rate": 4.314613941141471e-05, + "loss": 2.1634, + "step": 30450 + }, + { + "epoch": 1.39, + "learning_rate": 4.314385097716143e-05, + "loss": 2.0886, + "step": 30460 + }, + { + "epoch": 1.39, + "learning_rate": 4.314156254290814e-05, + "loss": 1.9656, + "step": 30470 + }, + { + "epoch": 1.39, + "learning_rate": 4.3139274108654857e-05, + "loss": 2.1539, + "step": 30480 + }, + { + "epoch": 1.39, + "learning_rate": 4.313698567440158e-05, + "loss": 2.4574, + "step": 30490 + }, + { + "epoch": 1.39, + "learning_rate": 4.313469724014829e-05, + "loss": 2.3302, + "step": 30500 + }, + { + "epoch": 1.39, + "learning_rate": 4.3132408805895006e-05, + "loss": 2.1459, + "step": 30510 + }, + { + "epoch": 1.39, + "learning_rate": 4.313012037164173e-05, + "loss": 2.1973, + "step": 30520 + }, + { + "epoch": 1.39, + "learning_rate": 4.312783193738844e-05, + "loss": 2.088, + "step": 30530 + }, + { + "epoch": 1.39, + "learning_rate": 4.3125543503135155e-05, + "loss": 2.2457, + "step": 30540 + }, + { + "epoch": 1.39, + "learning_rate": 4.3123255068881876e-05, + "loss": 2.1393, + "step": 30550 + }, + { + "epoch": 1.4, + "learning_rate": 4.312096663462859e-05, + "loss": 2.2307, + "step": 30560 + }, + { + "epoch": 1.4, + "learning_rate": 4.3118678200375304e-05, + "loss": 2.3452, + "step": 30570 + }, + { + "epoch": 1.4, + "learning_rate": 4.3116389766122025e-05, + "loss": 2.3315, + "step": 30580 + }, + { + "epoch": 1.4, + "learning_rate": 4.311410133186874e-05, + "loss": 2.3237, + "step": 30590 + }, + { + "epoch": 1.4, + "learning_rate": 4.311181289761545e-05, + "loss": 2.3163, + "step": 30600 + }, + { + "epoch": 1.4, + "learning_rate": 4.3109524463362174e-05, + "loss": 2.1789, + "step": 30610 + }, + { + "epoch": 1.4, + "learning_rate": 4.310723602910889e-05, + "loss": 2.214, + "step": 30620 + }, + { + "epoch": 1.4, + "learning_rate": 4.31049475948556e-05, + "loss": 2.2299, + "step": 30630 + }, + { + "epoch": 1.4, + "learning_rate": 4.3102659160602317e-05, + "loss": 2.218, + "step": 30640 + }, + { + "epoch": 1.4, + "learning_rate": 4.310037072634903e-05, + "loss": 2.2392, + "step": 30650 + }, + { + "epoch": 1.4, + "learning_rate": 4.309808229209575e-05, + "loss": 2.1551, + "step": 30660 + }, + { + "epoch": 1.4, + "learning_rate": 4.3095793857842466e-05, + "loss": 2.0402, + "step": 30670 + }, + { + "epoch": 1.4, + "learning_rate": 4.309350542358918e-05, + "loss": 2.219, + "step": 30680 + }, + { + "epoch": 1.4, + "learning_rate": 4.30912169893359e-05, + "loss": 2.3105, + "step": 30690 + }, + { + "epoch": 1.4, + "learning_rate": 4.3088928555082615e-05, + "loss": 2.3078, + "step": 30700 + }, + { + "epoch": 1.4, + "learning_rate": 4.308664012082933e-05, + "loss": 2.2992, + "step": 30710 + }, + { + "epoch": 1.4, + "learning_rate": 4.308435168657604e-05, + "loss": 2.0893, + "step": 30720 + }, + { + "epoch": 1.4, + "learning_rate": 4.3082063252322764e-05, + "loss": 2.1845, + "step": 30730 + }, + { + "epoch": 1.4, + "learning_rate": 4.307977481806948e-05, + "loss": 2.3656, + "step": 30740 + }, + { + "epoch": 1.4, + "learning_rate": 4.307748638381619e-05, + "loss": 2.1835, + "step": 30750 + }, + { + "epoch": 1.4, + "learning_rate": 4.307519794956291e-05, + "loss": 2.1121, + "step": 30760 + }, + { + "epoch": 1.41, + "learning_rate": 4.307290951530963e-05, + "loss": 2.2853, + "step": 30770 + }, + { + "epoch": 1.41, + "learning_rate": 4.307062108105634e-05, + "loss": 2.2598, + "step": 30780 + }, + { + "epoch": 1.41, + "learning_rate": 4.306833264680306e-05, + "loss": 2.2574, + "step": 30790 + }, + { + "epoch": 1.41, + "learning_rate": 4.3066044212549777e-05, + "loss": 2.143, + "step": 30800 + }, + { + "epoch": 1.41, + "learning_rate": 4.306375577829649e-05, + "loss": 2.1987, + "step": 30810 + }, + { + "epoch": 1.41, + "learning_rate": 4.306146734404321e-05, + "loss": 2.2918, + "step": 30820 + }, + { + "epoch": 1.41, + "learning_rate": 4.305917890978992e-05, + "loss": 2.1506, + "step": 30830 + }, + { + "epoch": 1.41, + "learning_rate": 4.305689047553664e-05, + "loss": 2.3496, + "step": 30840 + }, + { + "epoch": 1.41, + "learning_rate": 4.3054602041283354e-05, + "loss": 2.2199, + "step": 30850 + }, + { + "epoch": 1.41, + "learning_rate": 4.305231360703007e-05, + "loss": 2.1885, + "step": 30860 + }, + { + "epoch": 1.41, + "learning_rate": 4.305002517277679e-05, + "loss": 2.1256, + "step": 30870 + }, + { + "epoch": 1.41, + "learning_rate": 4.30477367385235e-05, + "loss": 2.2452, + "step": 30880 + }, + { + "epoch": 1.41, + "learning_rate": 4.304544830427022e-05, + "loss": 2.198, + "step": 30890 + }, + { + "epoch": 1.41, + "learning_rate": 4.304315987001694e-05, + "loss": 2.1818, + "step": 30900 + }, + { + "epoch": 1.41, + "learning_rate": 4.304087143576365e-05, + "loss": 2.3153, + "step": 30910 + }, + { + "epoch": 1.41, + "learning_rate": 4.3038583001510367e-05, + "loss": 2.311, + "step": 30920 + }, + { + "epoch": 1.41, + "learning_rate": 4.303629456725709e-05, + "loss": 2.1966, + "step": 30930 + }, + { + "epoch": 1.41, + "learning_rate": 4.30340061330038e-05, + "loss": 2.289, + "step": 30940 + }, + { + "epoch": 1.41, + "learning_rate": 4.3031717698750516e-05, + "loss": 2.2648, + "step": 30950 + }, + { + "epoch": 1.41, + "learning_rate": 4.302942926449724e-05, + "loss": 2.1904, + "step": 30960 + }, + { + "epoch": 1.41, + "learning_rate": 4.302714083024395e-05, + "loss": 2.3601, + "step": 30970 + }, + { + "epoch": 1.41, + "learning_rate": 4.3024852395990665e-05, + "loss": 2.2747, + "step": 30980 + }, + { + "epoch": 1.42, + "learning_rate": 4.3022563961737386e-05, + "loss": 2.2631, + "step": 30990 + }, + { + "epoch": 1.42, + "learning_rate": 4.30202755274841e-05, + "loss": 2.2069, + "step": 31000 + }, + { + "epoch": 1.42, + "learning_rate": 4.3017987093230814e-05, + "loss": 2.3265, + "step": 31010 + }, + { + "epoch": 1.42, + "learning_rate": 4.3015698658977535e-05, + "loss": 2.4389, + "step": 31020 + }, + { + "epoch": 1.42, + "learning_rate": 4.301341022472424e-05, + "loss": 2.2436, + "step": 31030 + }, + { + "epoch": 1.42, + "learning_rate": 4.3011121790470956e-05, + "loss": 2.1312, + "step": 31040 + }, + { + "epoch": 1.42, + "learning_rate": 4.300883335621768e-05, + "loss": 2.3308, + "step": 31050 + }, + { + "epoch": 1.42, + "learning_rate": 4.300654492196439e-05, + "loss": 2.114, + "step": 31060 + }, + { + "epoch": 1.42, + "learning_rate": 4.3004256487711106e-05, + "loss": 2.1551, + "step": 31070 + }, + { + "epoch": 1.42, + "learning_rate": 4.3001968053457827e-05, + "loss": 2.1645, + "step": 31080 + }, + { + "epoch": 1.42, + "learning_rate": 4.299967961920454e-05, + "loss": 2.3614, + "step": 31090 + }, + { + "epoch": 1.42, + "learning_rate": 4.2997391184951255e-05, + "loss": 2.2709, + "step": 31100 + }, + { + "epoch": 1.42, + "learning_rate": 4.2995102750697976e-05, + "loss": 2.3112, + "step": 31110 + }, + { + "epoch": 1.42, + "learning_rate": 4.299281431644469e-05, + "loss": 2.4847, + "step": 31120 + }, + { + "epoch": 1.42, + "learning_rate": 4.2990525882191404e-05, + "loss": 2.1995, + "step": 31130 + }, + { + "epoch": 1.42, + "learning_rate": 4.2988237447938125e-05, + "loss": 2.3218, + "step": 31140 + }, + { + "epoch": 1.42, + "learning_rate": 4.298594901368484e-05, + "loss": 1.9954, + "step": 31150 + }, + { + "epoch": 1.42, + "learning_rate": 4.298366057943155e-05, + "loss": 2.1424, + "step": 31160 + }, + { + "epoch": 1.42, + "learning_rate": 4.2981372145178274e-05, + "loss": 2.0122, + "step": 31170 + }, + { + "epoch": 1.42, + "learning_rate": 4.297908371092499e-05, + "loss": 2.2239, + "step": 31180 + }, + { + "epoch": 1.42, + "learning_rate": 4.29767952766717e-05, + "loss": 2.2821, + "step": 31190 + }, + { + "epoch": 1.42, + "learning_rate": 4.297450684241842e-05, + "loss": 2.274, + "step": 31200 + }, + { + "epoch": 1.43, + "learning_rate": 4.297221840816514e-05, + "loss": 2.2991, + "step": 31210 + }, + { + "epoch": 1.43, + "learning_rate": 4.296992997391185e-05, + "loss": 2.1071, + "step": 31220 + }, + { + "epoch": 1.43, + "learning_rate": 4.2967641539658566e-05, + "loss": 2.3295, + "step": 31230 + }, + { + "epoch": 1.43, + "learning_rate": 4.296535310540528e-05, + "loss": 2.2285, + "step": 31240 + }, + { + "epoch": 1.43, + "learning_rate": 4.2963064671152e-05, + "loss": 1.9909, + "step": 31250 + }, + { + "epoch": 1.43, + "learning_rate": 4.2960776236898715e-05, + "loss": 2.1087, + "step": 31260 + }, + { + "epoch": 1.43, + "learning_rate": 4.295848780264543e-05, + "loss": 2.1625, + "step": 31270 + }, + { + "epoch": 1.43, + "learning_rate": 4.295619936839215e-05, + "loss": 2.1927, + "step": 31280 + }, + { + "epoch": 1.43, + "learning_rate": 4.2953910934138864e-05, + "loss": 2.1889, + "step": 31290 + }, + { + "epoch": 1.43, + "learning_rate": 4.295162249988558e-05, + "loss": 2.2267, + "step": 31300 + }, + { + "epoch": 1.43, + "learning_rate": 4.29493340656323e-05, + "loss": 2.2689, + "step": 31310 + }, + { + "epoch": 1.43, + "learning_rate": 4.294704563137901e-05, + "loss": 2.2412, + "step": 31320 + }, + { + "epoch": 1.43, + "learning_rate": 4.294475719712573e-05, + "loss": 2.4487, + "step": 31330 + }, + { + "epoch": 1.43, + "learning_rate": 4.294246876287245e-05, + "loss": 2.3758, + "step": 31340 + }, + { + "epoch": 1.43, + "learning_rate": 4.294018032861916e-05, + "loss": 2.1497, + "step": 31350 + }, + { + "epoch": 1.43, + "learning_rate": 4.2937891894365876e-05, + "loss": 2.1315, + "step": 31360 + }, + { + "epoch": 1.43, + "learning_rate": 4.29356034601126e-05, + "loss": 2.3703, + "step": 31370 + }, + { + "epoch": 1.43, + "learning_rate": 4.293331502585931e-05, + "loss": 2.2968, + "step": 31380 + }, + { + "epoch": 1.43, + "learning_rate": 4.2931026591606026e-05, + "loss": 2.3343, + "step": 31390 + }, + { + "epoch": 1.43, + "learning_rate": 4.2928738157352747e-05, + "loss": 2.2466, + "step": 31400 + }, + { + "epoch": 1.43, + "learning_rate": 4.292644972309946e-05, + "loss": 2.387, + "step": 31410 + }, + { + "epoch": 1.43, + "learning_rate": 4.292416128884617e-05, + "loss": 2.2064, + "step": 31420 + }, + { + "epoch": 1.44, + "learning_rate": 4.292187285459289e-05, + "loss": 2.2675, + "step": 31430 + }, + { + "epoch": 1.44, + "learning_rate": 4.29195844203396e-05, + "loss": 2.1657, + "step": 31440 + }, + { + "epoch": 1.44, + "learning_rate": 4.291729598608632e-05, + "loss": 2.1825, + "step": 31450 + }, + { + "epoch": 1.44, + "learning_rate": 4.291500755183304e-05, + "loss": 2.33, + "step": 31460 + }, + { + "epoch": 1.44, + "learning_rate": 4.291271911757975e-05, + "loss": 2.2375, + "step": 31470 + }, + { + "epoch": 1.44, + "learning_rate": 4.2910430683326466e-05, + "loss": 2.3336, + "step": 31480 + }, + { + "epoch": 1.44, + "learning_rate": 4.290814224907319e-05, + "loss": 2.1744, + "step": 31490 + }, + { + "epoch": 1.44, + "learning_rate": 4.29058538148199e-05, + "loss": 2.1692, + "step": 31500 + }, + { + "epoch": 1.44, + "learning_rate": 4.2903565380566616e-05, + "loss": 2.2882, + "step": 31510 + }, + { + "epoch": 1.44, + "learning_rate": 4.2901276946313337e-05, + "loss": 2.0783, + "step": 31520 + }, + { + "epoch": 1.44, + "learning_rate": 4.289898851206005e-05, + "loss": 2.2574, + "step": 31530 + }, + { + "epoch": 1.44, + "learning_rate": 4.2896700077806765e-05, + "loss": 2.2084, + "step": 31540 + }, + { + "epoch": 1.44, + "learning_rate": 4.2894411643553486e-05, + "loss": 2.2735, + "step": 31550 + }, + { + "epoch": 1.44, + "learning_rate": 4.28921232093002e-05, + "loss": 2.1513, + "step": 31560 + }, + { + "epoch": 1.44, + "learning_rate": 4.2889834775046914e-05, + "loss": 2.118, + "step": 31570 + }, + { + "epoch": 1.44, + "learning_rate": 4.2887546340793635e-05, + "loss": 2.2275, + "step": 31580 + }, + { + "epoch": 1.44, + "learning_rate": 4.288525790654035e-05, + "loss": 2.0872, + "step": 31590 + }, + { + "epoch": 1.44, + "learning_rate": 4.288296947228706e-05, + "loss": 2.2174, + "step": 31600 + }, + { + "epoch": 1.44, + "learning_rate": 4.2880681038033784e-05, + "loss": 2.1212, + "step": 31610 + }, + { + "epoch": 1.44, + "learning_rate": 4.287839260378049e-05, + "loss": 2.3599, + "step": 31620 + }, + { + "epoch": 1.44, + "learning_rate": 4.287610416952721e-05, + "loss": 2.2492, + "step": 31630 + }, + { + "epoch": 1.44, + "learning_rate": 4.2873815735273926e-05, + "loss": 2.2715, + "step": 31640 + }, + { + "epoch": 1.45, + "learning_rate": 4.287152730102064e-05, + "loss": 2.2337, + "step": 31650 + }, + { + "epoch": 1.45, + "learning_rate": 4.286923886676736e-05, + "loss": 2.173, + "step": 31660 + }, + { + "epoch": 1.45, + "learning_rate": 4.2866950432514076e-05, + "loss": 2.2317, + "step": 31670 + }, + { + "epoch": 1.45, + "learning_rate": 4.286466199826079e-05, + "loss": 2.2065, + "step": 31680 + }, + { + "epoch": 1.45, + "learning_rate": 4.286237356400751e-05, + "loss": 2.3149, + "step": 31690 + }, + { + "epoch": 1.45, + "learning_rate": 4.2860085129754225e-05, + "loss": 2.3307, + "step": 31700 + }, + { + "epoch": 1.45, + "learning_rate": 4.285779669550094e-05, + "loss": 2.31, + "step": 31710 + }, + { + "epoch": 1.45, + "learning_rate": 4.285550826124766e-05, + "loss": 2.145, + "step": 31720 + }, + { + "epoch": 1.45, + "learning_rate": 4.2853219826994374e-05, + "loss": 2.2352, + "step": 31730 + }, + { + "epoch": 1.45, + "learning_rate": 4.285093139274109e-05, + "loss": 2.2666, + "step": 31740 + }, + { + "epoch": 1.45, + "learning_rate": 4.284864295848781e-05, + "loss": 2.1685, + "step": 31750 + }, + { + "epoch": 1.45, + "learning_rate": 4.284635452423452e-05, + "loss": 2.1901, + "step": 31760 + }, + { + "epoch": 1.45, + "learning_rate": 4.284406608998124e-05, + "loss": 2.2585, + "step": 31770 + }, + { + "epoch": 1.45, + "learning_rate": 4.284177765572796e-05, + "loss": 2.0612, + "step": 31780 + }, + { + "epoch": 1.45, + "learning_rate": 4.283948922147467e-05, + "loss": 2.1315, + "step": 31790 + }, + { + "epoch": 1.45, + "learning_rate": 4.2837200787221386e-05, + "loss": 2.166, + "step": 31800 + }, + { + "epoch": 1.45, + "learning_rate": 4.283491235296811e-05, + "loss": 2.2068, + "step": 31810 + }, + { + "epoch": 1.45, + "learning_rate": 4.2832623918714815e-05, + "loss": 2.3342, + "step": 31820 + }, + { + "epoch": 1.45, + "learning_rate": 4.283033548446153e-05, + "loss": 2.1124, + "step": 31830 + }, + { + "epoch": 1.45, + "learning_rate": 4.282804705020825e-05, + "loss": 2.3784, + "step": 31840 + }, + { + "epoch": 1.45, + "learning_rate": 4.2825758615954964e-05, + "loss": 2.3891, + "step": 31850 + }, + { + "epoch": 1.45, + "learning_rate": 4.282347018170168e-05, + "loss": 2.11, + "step": 31860 + }, + { + "epoch": 1.46, + "learning_rate": 4.28211817474484e-05, + "loss": 2.2112, + "step": 31870 + }, + { + "epoch": 1.46, + "learning_rate": 4.281889331319511e-05, + "loss": 2.2235, + "step": 31880 + }, + { + "epoch": 1.46, + "learning_rate": 4.281660487894183e-05, + "loss": 2.289, + "step": 31890 + }, + { + "epoch": 1.46, + "learning_rate": 4.281431644468855e-05, + "loss": 2.3127, + "step": 31900 + }, + { + "epoch": 1.46, + "learning_rate": 4.281202801043526e-05, + "loss": 2.2106, + "step": 31910 + }, + { + "epoch": 1.46, + "learning_rate": 4.2809739576181976e-05, + "loss": 2.1248, + "step": 31920 + }, + { + "epoch": 1.46, + "learning_rate": 4.28074511419287e-05, + "loss": 2.3596, + "step": 31930 + }, + { + "epoch": 1.46, + "learning_rate": 4.280516270767541e-05, + "loss": 2.3839, + "step": 31940 + }, + { + "epoch": 1.46, + "learning_rate": 4.2802874273422126e-05, + "loss": 2.2054, + "step": 31950 + }, + { + "epoch": 1.46, + "learning_rate": 4.2800585839168846e-05, + "loss": 2.1694, + "step": 31960 + }, + { + "epoch": 1.46, + "learning_rate": 4.279829740491556e-05, + "loss": 2.4445, + "step": 31970 + }, + { + "epoch": 1.46, + "learning_rate": 4.2796008970662275e-05, + "loss": 2.2234, + "step": 31980 + }, + { + "epoch": 1.46, + "learning_rate": 4.2793720536408996e-05, + "loss": 2.2469, + "step": 31990 + }, + { + "epoch": 1.46, + "learning_rate": 4.279143210215571e-05, + "loss": 2.2219, + "step": 32000 + }, + { + "epoch": 1.46, + "learning_rate": 4.278914366790242e-05, + "loss": 2.0641, + "step": 32010 + }, + { + "epoch": 1.46, + "learning_rate": 4.278685523364914e-05, + "loss": 2.2913, + "step": 32020 + }, + { + "epoch": 1.46, + "learning_rate": 4.278456679939585e-05, + "loss": 2.2551, + "step": 32030 + }, + { + "epoch": 1.46, + "learning_rate": 4.2782278365142566e-05, + "loss": 2.1322, + "step": 32040 + }, + { + "epoch": 1.46, + "learning_rate": 4.277998993088929e-05, + "loss": 2.2193, + "step": 32050 + }, + { + "epoch": 1.46, + "learning_rate": 4.2777701496636e-05, + "loss": 2.2741, + "step": 32060 + }, + { + "epoch": 1.46, + "learning_rate": 4.2775413062382716e-05, + "loss": 2.1849, + "step": 32070 + }, + { + "epoch": 1.46, + "learning_rate": 4.2773124628129436e-05, + "loss": 2.2053, + "step": 32080 + }, + { + "epoch": 1.47, + "learning_rate": 4.277083619387615e-05, + "loss": 2.1136, + "step": 32090 + }, + { + "epoch": 1.47, + "learning_rate": 4.2768547759622865e-05, + "loss": 2.3134, + "step": 32100 + }, + { + "epoch": 1.47, + "learning_rate": 4.2766259325369586e-05, + "loss": 2.2523, + "step": 32110 + }, + { + "epoch": 1.47, + "learning_rate": 4.27639708911163e-05, + "loss": 2.2155, + "step": 32120 + }, + { + "epoch": 1.47, + "learning_rate": 4.2761682456863014e-05, + "loss": 2.2054, + "step": 32130 + }, + { + "epoch": 1.47, + "learning_rate": 4.2759394022609735e-05, + "loss": 2.3381, + "step": 32140 + }, + { + "epoch": 1.47, + "learning_rate": 4.275710558835645e-05, + "loss": 2.2805, + "step": 32150 + }, + { + "epoch": 1.47, + "learning_rate": 4.275481715410316e-05, + "loss": 2.1963, + "step": 32160 + }, + { + "epoch": 1.47, + "learning_rate": 4.2752528719849884e-05, + "loss": 2.2045, + "step": 32170 + }, + { + "epoch": 1.47, + "learning_rate": 4.27502402855966e-05, + "loss": 2.1517, + "step": 32180 + }, + { + "epoch": 1.47, + "learning_rate": 4.274795185134331e-05, + "loss": 2.1496, + "step": 32190 + }, + { + "epoch": 1.47, + "learning_rate": 4.274566341709003e-05, + "loss": 2.3078, + "step": 32200 + }, + { + "epoch": 1.47, + "learning_rate": 4.274337498283674e-05, + "loss": 2.0796, + "step": 32210 + }, + { + "epoch": 1.47, + "learning_rate": 4.274108654858346e-05, + "loss": 2.3359, + "step": 32220 + }, + { + "epoch": 1.47, + "learning_rate": 4.2738798114330176e-05, + "loss": 2.1233, + "step": 32230 + }, + { + "epoch": 1.47, + "learning_rate": 4.273650968007689e-05, + "loss": 2.2771, + "step": 32240 + }, + { + "epoch": 1.47, + "learning_rate": 4.273422124582361e-05, + "loss": 2.1216, + "step": 32250 + }, + { + "epoch": 1.47, + "learning_rate": 4.2731932811570325e-05, + "loss": 2.2559, + "step": 32260 + }, + { + "epoch": 1.47, + "learning_rate": 4.272964437731704e-05, + "loss": 2.2291, + "step": 32270 + }, + { + "epoch": 1.47, + "learning_rate": 4.272735594306376e-05, + "loss": 2.1884, + "step": 32280 + }, + { + "epoch": 1.47, + "learning_rate": 4.2725067508810474e-05, + "loss": 2.0338, + "step": 32290 + }, + { + "epoch": 1.47, + "learning_rate": 4.272277907455719e-05, + "loss": 2.162, + "step": 32300 + }, + { + "epoch": 1.48, + "learning_rate": 4.272049064030391e-05, + "loss": 2.153, + "step": 32310 + }, + { + "epoch": 1.48, + "learning_rate": 4.271820220605062e-05, + "loss": 2.2952, + "step": 32320 + }, + { + "epoch": 1.48, + "learning_rate": 4.271591377179734e-05, + "loss": 2.3006, + "step": 32330 + }, + { + "epoch": 1.48, + "learning_rate": 4.271362533754406e-05, + "loss": 2.1166, + "step": 32340 + }, + { + "epoch": 1.48, + "learning_rate": 4.271133690329077e-05, + "loss": 2.2669, + "step": 32350 + }, + { + "epoch": 1.48, + "learning_rate": 4.2709048469037486e-05, + "loss": 2.1376, + "step": 32360 + }, + { + "epoch": 1.48, + "learning_rate": 4.270676003478421e-05, + "loss": 2.1991, + "step": 32370 + }, + { + "epoch": 1.48, + "learning_rate": 4.270447160053092e-05, + "loss": 2.0621, + "step": 32380 + }, + { + "epoch": 1.48, + "learning_rate": 4.2702183166277636e-05, + "loss": 2.4651, + "step": 32390 + }, + { + "epoch": 1.48, + "learning_rate": 4.2699894732024356e-05, + "loss": 2.3465, + "step": 32400 + }, + { + "epoch": 1.48, + "learning_rate": 4.2697606297771064e-05, + "loss": 2.3222, + "step": 32410 + }, + { + "epoch": 1.48, + "learning_rate": 4.269531786351778e-05, + "loss": 2.0481, + "step": 32420 + }, + { + "epoch": 1.48, + "learning_rate": 4.26930294292645e-05, + "loss": 2.2245, + "step": 32430 + }, + { + "epoch": 1.48, + "learning_rate": 4.269074099501121e-05, + "loss": 2.1254, + "step": 32440 + }, + { + "epoch": 1.48, + "learning_rate": 4.268845256075793e-05, + "loss": 2.2384, + "step": 32450 + }, + { + "epoch": 1.48, + "learning_rate": 4.268616412650465e-05, + "loss": 2.1082, + "step": 32460 + }, + { + "epoch": 1.48, + "learning_rate": 4.268387569225136e-05, + "loss": 2.1515, + "step": 32470 + }, + { + "epoch": 1.48, + "learning_rate": 4.2681587257998076e-05, + "loss": 2.1837, + "step": 32480 + }, + { + "epoch": 1.48, + "learning_rate": 4.26792988237448e-05, + "loss": 2.254, + "step": 32490 + }, + { + "epoch": 1.48, + "learning_rate": 4.267701038949151e-05, + "loss": 2.3806, + "step": 32500 + }, + { + "epoch": 1.48, + "learning_rate": 4.2674721955238225e-05, + "loss": 2.1795, + "step": 32510 + }, + { + "epoch": 1.48, + "learning_rate": 4.2672433520984946e-05, + "loss": 2.1577, + "step": 32520 + }, + { + "epoch": 1.49, + "learning_rate": 4.267014508673166e-05, + "loss": 2.1453, + "step": 32530 + }, + { + "epoch": 1.49, + "learning_rate": 4.2667856652478375e-05, + "loss": 2.1211, + "step": 32540 + }, + { + "epoch": 1.49, + "learning_rate": 4.2665568218225096e-05, + "loss": 2.3648, + "step": 32550 + }, + { + "epoch": 1.49, + "learning_rate": 4.266327978397181e-05, + "loss": 2.2016, + "step": 32560 + }, + { + "epoch": 1.49, + "learning_rate": 4.2660991349718524e-05, + "loss": 2.2246, + "step": 32570 + }, + { + "epoch": 1.49, + "learning_rate": 4.2658702915465245e-05, + "loss": 2.2008, + "step": 32580 + }, + { + "epoch": 1.49, + "learning_rate": 4.265641448121196e-05, + "loss": 2.0195, + "step": 32590 + }, + { + "epoch": 1.49, + "learning_rate": 4.265412604695867e-05, + "loss": 2.3262, + "step": 32600 + }, + { + "epoch": 1.49, + "learning_rate": 4.265183761270539e-05, + "loss": 2.2448, + "step": 32610 + }, + { + "epoch": 1.49, + "learning_rate": 4.26495491784521e-05, + "loss": 2.1386, + "step": 32620 + }, + { + "epoch": 1.49, + "learning_rate": 4.264726074419882e-05, + "loss": 2.1245, + "step": 32630 + }, + { + "epoch": 1.49, + "learning_rate": 4.2644972309945536e-05, + "loss": 2.3334, + "step": 32640 + }, + { + "epoch": 1.49, + "learning_rate": 4.264268387569225e-05, + "loss": 2.2253, + "step": 32650 + }, + { + "epoch": 1.49, + "learning_rate": 4.264039544143897e-05, + "loss": 2.2033, + "step": 32660 + }, + { + "epoch": 1.49, + "learning_rate": 4.2638107007185685e-05, + "loss": 2.2252, + "step": 32670 + }, + { + "epoch": 1.49, + "learning_rate": 4.26358185729324e-05, + "loss": 2.2026, + "step": 32680 + }, + { + "epoch": 1.49, + "learning_rate": 4.263353013867912e-05, + "loss": 2.0728, + "step": 32690 + }, + { + "epoch": 1.49, + "learning_rate": 4.2631241704425835e-05, + "loss": 2.2078, + "step": 32700 + }, + { + "epoch": 1.49, + "learning_rate": 4.262895327017255e-05, + "loss": 2.3617, + "step": 32710 + }, + { + "epoch": 1.49, + "learning_rate": 4.262666483591927e-05, + "loss": 2.1821, + "step": 32720 + }, + { + "epoch": 1.49, + "learning_rate": 4.2624376401665984e-05, + "loss": 2.247, + "step": 32730 + }, + { + "epoch": 1.49, + "learning_rate": 4.26220879674127e-05, + "loss": 2.1951, + "step": 32740 + }, + { + "epoch": 1.5, + "learning_rate": 4.261979953315942e-05, + "loss": 2.1331, + "step": 32750 + }, + { + "epoch": 1.5, + "learning_rate": 4.261751109890613e-05, + "loss": 2.1596, + "step": 32760 + }, + { + "epoch": 1.5, + "learning_rate": 4.261522266465285e-05, + "loss": 2.1194, + "step": 32770 + }, + { + "epoch": 1.5, + "learning_rate": 4.261293423039957e-05, + "loss": 2.3136, + "step": 32780 + }, + { + "epoch": 1.5, + "learning_rate": 4.261064579614628e-05, + "loss": 2.2408, + "step": 32790 + }, + { + "epoch": 1.5, + "learning_rate": 4.260835736189299e-05, + "loss": 2.1984, + "step": 32800 + }, + { + "epoch": 1.5, + "learning_rate": 4.260606892763971e-05, + "loss": 2.2737, + "step": 32810 + }, + { + "epoch": 1.5, + "learning_rate": 4.2603780493386425e-05, + "loss": 2.0658, + "step": 32820 + }, + { + "epoch": 1.5, + "learning_rate": 4.260149205913314e-05, + "loss": 2.1589, + "step": 32830 + }, + { + "epoch": 1.5, + "learning_rate": 4.259920362487986e-05, + "loss": 2.0915, + "step": 32840 + }, + { + "epoch": 1.5, + "learning_rate": 4.2596915190626574e-05, + "loss": 2.3036, + "step": 32850 + }, + { + "epoch": 1.5, + "learning_rate": 4.259462675637329e-05, + "loss": 2.3611, + "step": 32860 + }, + { + "epoch": 1.5, + "learning_rate": 4.259233832212001e-05, + "loss": 2.3285, + "step": 32870 + }, + { + "epoch": 1.5, + "learning_rate": 4.259004988786672e-05, + "loss": 2.2063, + "step": 32880 + }, + { + "epoch": 1.5, + "learning_rate": 4.258776145361344e-05, + "loss": 2.2708, + "step": 32890 + }, + { + "epoch": 1.5, + "learning_rate": 4.258547301936016e-05, + "loss": 2.1761, + "step": 32900 + }, + { + "epoch": 1.5, + "learning_rate": 4.258318458510687e-05, + "loss": 2.1592, + "step": 32910 + }, + { + "epoch": 1.5, + "learning_rate": 4.2580896150853586e-05, + "loss": 2.2499, + "step": 32920 + }, + { + "epoch": 1.5, + "learning_rate": 4.257860771660031e-05, + "loss": 2.2562, + "step": 32930 + }, + { + "epoch": 1.5, + "learning_rate": 4.257631928234702e-05, + "loss": 2.2092, + "step": 32940 + }, + { + "epoch": 1.5, + "learning_rate": 4.2574030848093735e-05, + "loss": 2.1096, + "step": 32950 + }, + { + "epoch": 1.51, + "learning_rate": 4.2571742413840456e-05, + "loss": 2.2354, + "step": 32960 + }, + { + "epoch": 1.51, + "learning_rate": 4.256945397958717e-05, + "loss": 2.2108, + "step": 32970 + }, + { + "epoch": 1.51, + "learning_rate": 4.2567165545333885e-05, + "loss": 2.287, + "step": 32980 + }, + { + "epoch": 1.51, + "learning_rate": 4.2564877111080606e-05, + "loss": 2.2402, + "step": 32990 + }, + { + "epoch": 1.51, + "learning_rate": 4.256258867682731e-05, + "loss": 2.3406, + "step": 33000 + }, + { + "epoch": 1.51, + "learning_rate": 4.2560300242574034e-05, + "loss": 2.3176, + "step": 33010 + }, + { + "epoch": 1.51, + "learning_rate": 4.255801180832075e-05, + "loss": 2.161, + "step": 33020 + }, + { + "epoch": 1.51, + "learning_rate": 4.255572337406746e-05, + "loss": 2.1451, + "step": 33030 + }, + { + "epoch": 1.51, + "learning_rate": 4.255343493981418e-05, + "loss": 2.235, + "step": 33040 + }, + { + "epoch": 1.51, + "learning_rate": 4.25511465055609e-05, + "loss": 2.1922, + "step": 33050 + }, + { + "epoch": 1.51, + "learning_rate": 4.254885807130761e-05, + "loss": 2.2576, + "step": 33060 + }, + { + "epoch": 1.51, + "learning_rate": 4.2546569637054325e-05, + "loss": 2.1162, + "step": 33070 + }, + { + "epoch": 1.51, + "learning_rate": 4.2544281202801046e-05, + "loss": 2.3137, + "step": 33080 + }, + { + "epoch": 1.51, + "learning_rate": 4.254199276854776e-05, + "loss": 2.2528, + "step": 33090 + }, + { + "epoch": 1.51, + "learning_rate": 4.2539704334294475e-05, + "loss": 2.1501, + "step": 33100 + }, + { + "epoch": 1.51, + "learning_rate": 4.2537415900041195e-05, + "loss": 2.0808, + "step": 33110 + }, + { + "epoch": 1.51, + "learning_rate": 4.253512746578791e-05, + "loss": 2.0351, + "step": 33120 + }, + { + "epoch": 1.51, + "learning_rate": 4.2532839031534624e-05, + "loss": 2.2041, + "step": 33130 + }, + { + "epoch": 1.51, + "learning_rate": 4.2530550597281345e-05, + "loss": 2.2334, + "step": 33140 + }, + { + "epoch": 1.51, + "learning_rate": 4.252826216302806e-05, + "loss": 2.347, + "step": 33150 + }, + { + "epoch": 1.51, + "learning_rate": 4.252597372877477e-05, + "loss": 2.1335, + "step": 33160 + }, + { + "epoch": 1.51, + "learning_rate": 4.2523685294521494e-05, + "loss": 2.1263, + "step": 33170 + }, + { + "epoch": 1.52, + "learning_rate": 4.252139686026821e-05, + "loss": 2.3133, + "step": 33180 + }, + { + "epoch": 1.52, + "learning_rate": 4.251910842601492e-05, + "loss": 2.4409, + "step": 33190 + }, + { + "epoch": 1.52, + "learning_rate": 4.2516819991761636e-05, + "loss": 2.1563, + "step": 33200 + }, + { + "epoch": 1.52, + "learning_rate": 4.251453155750835e-05, + "loss": 2.1349, + "step": 33210 + }, + { + "epoch": 1.52, + "learning_rate": 4.251224312325507e-05, + "loss": 2.1547, + "step": 33220 + }, + { + "epoch": 1.52, + "learning_rate": 4.2509954689001785e-05, + "loss": 2.0367, + "step": 33230 + }, + { + "epoch": 1.52, + "learning_rate": 4.25076662547485e-05, + "loss": 2.245, + "step": 33240 + }, + { + "epoch": 1.52, + "learning_rate": 4.250537782049522e-05, + "loss": 2.1288, + "step": 33250 + }, + { + "epoch": 1.52, + "learning_rate": 4.2503089386241935e-05, + "loss": 2.2669, + "step": 33260 + }, + { + "epoch": 1.52, + "learning_rate": 4.250080095198865e-05, + "loss": 2.0192, + "step": 33270 + }, + { + "epoch": 1.52, + "learning_rate": 4.249851251773537e-05, + "loss": 2.2243, + "step": 33280 + }, + { + "epoch": 1.52, + "learning_rate": 4.2496224083482084e-05, + "loss": 2.3135, + "step": 33290 + }, + { + "epoch": 1.52, + "learning_rate": 4.24939356492288e-05, + "loss": 2.1103, + "step": 33300 + }, + { + "epoch": 1.52, + "learning_rate": 4.249164721497552e-05, + "loss": 2.1592, + "step": 33310 + }, + { + "epoch": 1.52, + "learning_rate": 4.248935878072223e-05, + "loss": 2.1977, + "step": 33320 + }, + { + "epoch": 1.52, + "learning_rate": 4.248707034646895e-05, + "loss": 2.2873, + "step": 33330 + }, + { + "epoch": 1.52, + "learning_rate": 4.248478191221567e-05, + "loss": 2.2804, + "step": 33340 + }, + { + "epoch": 1.52, + "learning_rate": 4.248249347796238e-05, + "loss": 2.268, + "step": 33350 + }, + { + "epoch": 1.52, + "learning_rate": 4.2480205043709096e-05, + "loss": 2.0027, + "step": 33360 + }, + { + "epoch": 1.52, + "learning_rate": 4.247791660945582e-05, + "loss": 2.2616, + "step": 33370 + }, + { + "epoch": 1.52, + "learning_rate": 4.247562817520253e-05, + "loss": 2.1679, + "step": 33380 + }, + { + "epoch": 1.52, + "learning_rate": 4.2473339740949245e-05, + "loss": 2.0082, + "step": 33390 + }, + { + "epoch": 1.53, + "learning_rate": 4.247105130669596e-05, + "loss": 2.1035, + "step": 33400 + }, + { + "epoch": 1.53, + "learning_rate": 4.2468762872442674e-05, + "loss": 2.2817, + "step": 33410 + }, + { + "epoch": 1.53, + "learning_rate": 4.246647443818939e-05, + "loss": 2.2879, + "step": 33420 + }, + { + "epoch": 1.53, + "learning_rate": 4.246418600393611e-05, + "loss": 2.1779, + "step": 33430 + }, + { + "epoch": 1.53, + "learning_rate": 4.246189756968282e-05, + "loss": 2.3953, + "step": 33440 + }, + { + "epoch": 1.53, + "learning_rate": 4.245960913542954e-05, + "loss": 2.1585, + "step": 33450 + }, + { + "epoch": 1.53, + "learning_rate": 4.245732070117626e-05, + "loss": 2.2779, + "step": 33460 + }, + { + "epoch": 1.53, + "learning_rate": 4.245503226692297e-05, + "loss": 2.228, + "step": 33470 + }, + { + "epoch": 1.53, + "learning_rate": 4.2452743832669686e-05, + "loss": 2.1003, + "step": 33480 + }, + { + "epoch": 1.53, + "learning_rate": 4.245045539841641e-05, + "loss": 2.2574, + "step": 33490 + }, + { + "epoch": 1.53, + "learning_rate": 4.244816696416312e-05, + "loss": 2.0659, + "step": 33500 + }, + { + "epoch": 1.53, + "learning_rate": 4.2445878529909835e-05, + "loss": 2.2704, + "step": 33510 + }, + { + "epoch": 1.53, + "learning_rate": 4.2443590095656556e-05, + "loss": 2.2114, + "step": 33520 + }, + { + "epoch": 1.53, + "learning_rate": 4.244130166140327e-05, + "loss": 2.3033, + "step": 33530 + }, + { + "epoch": 1.53, + "learning_rate": 4.2439013227149985e-05, + "loss": 2.0605, + "step": 33540 + }, + { + "epoch": 1.53, + "learning_rate": 4.2436724792896705e-05, + "loss": 2.0271, + "step": 33550 + }, + { + "epoch": 1.53, + "learning_rate": 4.243443635864342e-05, + "loss": 2.305, + "step": 33560 + }, + { + "epoch": 1.53, + "learning_rate": 4.2432147924390134e-05, + "loss": 2.2433, + "step": 33570 + }, + { + "epoch": 1.53, + "learning_rate": 4.2429859490136855e-05, + "loss": 2.2109, + "step": 33580 + }, + { + "epoch": 1.53, + "learning_rate": 4.242757105588356e-05, + "loss": 2.1544, + "step": 33590 + }, + { + "epoch": 1.53, + "learning_rate": 4.242528262163028e-05, + "loss": 2.1839, + "step": 33600 + }, + { + "epoch": 1.53, + "learning_rate": 4.2422994187377e-05, + "loss": 2.2283, + "step": 33610 + }, + { + "epoch": 1.54, + "learning_rate": 4.242070575312371e-05, + "loss": 2.2768, + "step": 33620 + }, + { + "epoch": 1.54, + "learning_rate": 4.241841731887043e-05, + "loss": 2.0847, + "step": 33630 + }, + { + "epoch": 1.54, + "learning_rate": 4.2416128884617146e-05, + "loss": 2.0689, + "step": 33640 + }, + { + "epoch": 1.54, + "learning_rate": 4.241384045036386e-05, + "loss": 2.2655, + "step": 33650 + }, + { + "epoch": 1.54, + "learning_rate": 4.241155201611058e-05, + "loss": 2.2705, + "step": 33660 + }, + { + "epoch": 1.54, + "learning_rate": 4.2409263581857295e-05, + "loss": 2.2235, + "step": 33670 + }, + { + "epoch": 1.54, + "learning_rate": 4.240697514760401e-05, + "loss": 2.251, + "step": 33680 + }, + { + "epoch": 1.54, + "learning_rate": 4.240468671335073e-05, + "loss": 2.1739, + "step": 33690 + }, + { + "epoch": 1.54, + "learning_rate": 4.2402398279097445e-05, + "loss": 2.2423, + "step": 33700 + }, + { + "epoch": 1.54, + "learning_rate": 4.240010984484416e-05, + "loss": 2.1197, + "step": 33710 + }, + { + "epoch": 1.54, + "learning_rate": 4.239782141059088e-05, + "loss": 2.2901, + "step": 33720 + }, + { + "epoch": 1.54, + "learning_rate": 4.2395532976337594e-05, + "loss": 2.186, + "step": 33730 + }, + { + "epoch": 1.54, + "learning_rate": 4.239324454208431e-05, + "loss": 2.0998, + "step": 33740 + }, + { + "epoch": 1.54, + "learning_rate": 4.239095610783103e-05, + "loss": 2.1911, + "step": 33750 + }, + { + "epoch": 1.54, + "learning_rate": 4.238866767357774e-05, + "loss": 2.308, + "step": 33760 + }, + { + "epoch": 1.54, + "learning_rate": 4.238637923932446e-05, + "loss": 2.2337, + "step": 33770 + }, + { + "epoch": 1.54, + "learning_rate": 4.238409080507118e-05, + "loss": 2.1828, + "step": 33780 + }, + { + "epoch": 1.54, + "learning_rate": 4.2381802370817885e-05, + "loss": 2.1256, + "step": 33790 + }, + { + "epoch": 1.54, + "learning_rate": 4.23795139365646e-05, + "loss": 2.1925, + "step": 33800 + }, + { + "epoch": 1.54, + "learning_rate": 4.237722550231132e-05, + "loss": 2.3036, + "step": 33810 + }, + { + "epoch": 1.54, + "learning_rate": 4.2374937068058034e-05, + "loss": 2.2225, + "step": 33820 + }, + { + "epoch": 1.54, + "learning_rate": 4.237264863380475e-05, + "loss": 2.1024, + "step": 33830 + }, + { + "epoch": 1.55, + "learning_rate": 4.237036019955147e-05, + "loss": 2.2119, + "step": 33840 + }, + { + "epoch": 1.55, + "learning_rate": 4.2368071765298184e-05, + "loss": 2.2511, + "step": 33850 + }, + { + "epoch": 1.55, + "learning_rate": 4.23657833310449e-05, + "loss": 2.2003, + "step": 33860 + }, + { + "epoch": 1.55, + "learning_rate": 4.236349489679162e-05, + "loss": 2.0514, + "step": 33870 + }, + { + "epoch": 1.55, + "learning_rate": 4.236120646253833e-05, + "loss": 2.2364, + "step": 33880 + }, + { + "epoch": 1.55, + "learning_rate": 4.235891802828505e-05, + "loss": 2.2835, + "step": 33890 + }, + { + "epoch": 1.55, + "learning_rate": 4.235662959403177e-05, + "loss": 2.043, + "step": 33900 + }, + { + "epoch": 1.55, + "learning_rate": 4.235434115977848e-05, + "loss": 2.3057, + "step": 33910 + }, + { + "epoch": 1.55, + "learning_rate": 4.2352052725525196e-05, + "loss": 2.2361, + "step": 33920 + }, + { + "epoch": 1.55, + "learning_rate": 4.234976429127192e-05, + "loss": 2.3479, + "step": 33930 + }, + { + "epoch": 1.55, + "learning_rate": 4.234747585701863e-05, + "loss": 2.2349, + "step": 33940 + }, + { + "epoch": 1.55, + "learning_rate": 4.2345187422765345e-05, + "loss": 2.2076, + "step": 33950 + }, + { + "epoch": 1.55, + "learning_rate": 4.2342898988512066e-05, + "loss": 2.1177, + "step": 33960 + }, + { + "epoch": 1.55, + "learning_rate": 4.234061055425878e-05, + "loss": 2.2607, + "step": 33970 + }, + { + "epoch": 1.55, + "learning_rate": 4.2338322120005494e-05, + "loss": 2.271, + "step": 33980 + }, + { + "epoch": 1.55, + "learning_rate": 4.233603368575221e-05, + "loss": 2.2826, + "step": 33990 + }, + { + "epoch": 1.55, + "learning_rate": 4.233374525149892e-05, + "loss": 2.2464, + "step": 34000 + }, + { + "epoch": 1.55, + "learning_rate": 4.2331456817245644e-05, + "loss": 2.1265, + "step": 34010 + }, + { + "epoch": 1.55, + "learning_rate": 4.232916838299236e-05, + "loss": 2.1062, + "step": 34020 + }, + { + "epoch": 1.55, + "learning_rate": 4.232687994873907e-05, + "loss": 2.1647, + "step": 34030 + }, + { + "epoch": 1.55, + "learning_rate": 4.232459151448579e-05, + "loss": 2.1633, + "step": 34040 + }, + { + "epoch": 1.55, + "learning_rate": 4.232230308023251e-05, + "loss": 2.2624, + "step": 34050 + }, + { + "epoch": 1.56, + "learning_rate": 4.232001464597922e-05, + "loss": 2.3734, + "step": 34060 + }, + { + "epoch": 1.56, + "learning_rate": 4.231772621172594e-05, + "loss": 2.2075, + "step": 34070 + }, + { + "epoch": 1.56, + "learning_rate": 4.2315437777472656e-05, + "loss": 2.2135, + "step": 34080 + }, + { + "epoch": 1.56, + "learning_rate": 4.231314934321937e-05, + "loss": 2.2792, + "step": 34090 + }, + { + "epoch": 1.56, + "learning_rate": 4.231086090896609e-05, + "loss": 2.1923, + "step": 34100 + }, + { + "epoch": 1.56, + "learning_rate": 4.2308572474712805e-05, + "loss": 2.2663, + "step": 34110 + }, + { + "epoch": 1.56, + "learning_rate": 4.230628404045952e-05, + "loss": 2.2341, + "step": 34120 + }, + { + "epoch": 1.56, + "learning_rate": 4.230399560620624e-05, + "loss": 2.1135, + "step": 34130 + }, + { + "epoch": 1.56, + "learning_rate": 4.2301707171952955e-05, + "loss": 2.1812, + "step": 34140 + }, + { + "epoch": 1.56, + "learning_rate": 4.229941873769967e-05, + "loss": 2.2677, + "step": 34150 + }, + { + "epoch": 1.56, + "learning_rate": 4.229713030344639e-05, + "loss": 2.1136, + "step": 34160 + }, + { + "epoch": 1.56, + "learning_rate": 4.2294841869193104e-05, + "loss": 2.4226, + "step": 34170 + }, + { + "epoch": 1.56, + "learning_rate": 4.229255343493982e-05, + "loss": 2.2432, + "step": 34180 + }, + { + "epoch": 1.56, + "learning_rate": 4.229026500068653e-05, + "loss": 2.1768, + "step": 34190 + }, + { + "epoch": 1.56, + "learning_rate": 4.2287976566433246e-05, + "loss": 2.2269, + "step": 34200 + }, + { + "epoch": 1.56, + "learning_rate": 4.228568813217996e-05, + "loss": 2.222, + "step": 34210 + }, + { + "epoch": 1.56, + "learning_rate": 4.228339969792668e-05, + "loss": 2.1754, + "step": 34220 + }, + { + "epoch": 1.56, + "learning_rate": 4.2281111263673395e-05, + "loss": 2.3036, + "step": 34230 + }, + { + "epoch": 1.56, + "learning_rate": 4.227882282942011e-05, + "loss": 2.2271, + "step": 34240 + }, + { + "epoch": 1.56, + "learning_rate": 4.227653439516683e-05, + "loss": 2.2327, + "step": 34250 + }, + { + "epoch": 1.56, + "learning_rate": 4.2274245960913544e-05, + "loss": 2.2574, + "step": 34260 + }, + { + "epoch": 1.56, + "learning_rate": 4.227195752666026e-05, + "loss": 2.2664, + "step": 34270 + }, + { + "epoch": 1.57, + "learning_rate": 4.226966909240698e-05, + "loss": 2.2366, + "step": 34280 + }, + { + "epoch": 1.57, + "learning_rate": 4.2267380658153694e-05, + "loss": 2.1192, + "step": 34290 + }, + { + "epoch": 1.57, + "learning_rate": 4.226509222390041e-05, + "loss": 2.1495, + "step": 34300 + }, + { + "epoch": 1.57, + "learning_rate": 4.226280378964713e-05, + "loss": 2.2084, + "step": 34310 + }, + { + "epoch": 1.57, + "learning_rate": 4.226051535539384e-05, + "loss": 2.1518, + "step": 34320 + }, + { + "epoch": 1.57, + "learning_rate": 4.225822692114056e-05, + "loss": 2.4809, + "step": 34330 + }, + { + "epoch": 1.57, + "learning_rate": 4.225593848688728e-05, + "loss": 2.2692, + "step": 34340 + }, + { + "epoch": 1.57, + "learning_rate": 4.225365005263399e-05, + "loss": 2.1326, + "step": 34350 + }, + { + "epoch": 1.57, + "learning_rate": 4.2251361618380706e-05, + "loss": 2.1786, + "step": 34360 + }, + { + "epoch": 1.57, + "learning_rate": 4.224907318412743e-05, + "loss": 2.1346, + "step": 34370 + }, + { + "epoch": 1.57, + "learning_rate": 4.2246784749874134e-05, + "loss": 2.1845, + "step": 34380 + }, + { + "epoch": 1.57, + "learning_rate": 4.224449631562085e-05, + "loss": 2.4012, + "step": 34390 + }, + { + "epoch": 1.57, + "learning_rate": 4.224220788136757e-05, + "loss": 2.2577, + "step": 34400 + }, + { + "epoch": 1.57, + "learning_rate": 4.2239919447114284e-05, + "loss": 2.1266, + "step": 34410 + }, + { + "epoch": 1.57, + "learning_rate": 4.2237631012861e-05, + "loss": 2.18, + "step": 34420 + }, + { + "epoch": 1.57, + "learning_rate": 4.223534257860772e-05, + "loss": 2.1683, + "step": 34430 + }, + { + "epoch": 1.57, + "learning_rate": 4.223305414435443e-05, + "loss": 2.1835, + "step": 34440 + }, + { + "epoch": 1.57, + "learning_rate": 4.223076571010115e-05, + "loss": 2.0724, + "step": 34450 + }, + { + "epoch": 1.57, + "learning_rate": 4.222847727584787e-05, + "loss": 2.1576, + "step": 34460 + }, + { + "epoch": 1.57, + "learning_rate": 4.222618884159458e-05, + "loss": 2.2414, + "step": 34470 + }, + { + "epoch": 1.57, + "learning_rate": 4.2223900407341296e-05, + "loss": 2.2214, + "step": 34480 + }, + { + "epoch": 1.57, + "learning_rate": 4.222161197308802e-05, + "loss": 2.2895, + "step": 34490 + }, + { + "epoch": 1.58, + "learning_rate": 4.221932353883473e-05, + "loss": 2.1561, + "step": 34500 + }, + { + "epoch": 1.58, + "learning_rate": 4.2217035104581445e-05, + "loss": 2.1956, + "step": 34510 + }, + { + "epoch": 1.58, + "learning_rate": 4.2214746670328166e-05, + "loss": 2.3574, + "step": 34520 + }, + { + "epoch": 1.58, + "learning_rate": 4.221245823607488e-05, + "loss": 2.2993, + "step": 34530 + }, + { + "epoch": 1.58, + "learning_rate": 4.2210169801821594e-05, + "loss": 2.1817, + "step": 34540 + }, + { + "epoch": 1.58, + "learning_rate": 4.2207881367568315e-05, + "loss": 2.2136, + "step": 34550 + }, + { + "epoch": 1.58, + "learning_rate": 4.220559293331503e-05, + "loss": 2.1727, + "step": 34560 + }, + { + "epoch": 1.58, + "learning_rate": 4.2203304499061744e-05, + "loss": 2.0491, + "step": 34570 + }, + { + "epoch": 1.58, + "learning_rate": 4.220101606480846e-05, + "loss": 2.1265, + "step": 34580 + }, + { + "epoch": 1.58, + "learning_rate": 4.219872763055517e-05, + "loss": 2.2473, + "step": 34590 + }, + { + "epoch": 1.58, + "learning_rate": 4.219643919630189e-05, + "loss": 2.1427, + "step": 34600 + }, + { + "epoch": 1.58, + "learning_rate": 4.219415076204861e-05, + "loss": 2.3436, + "step": 34610 + }, + { + "epoch": 1.58, + "learning_rate": 4.219186232779532e-05, + "loss": 2.1976, + "step": 34620 + }, + { + "epoch": 1.58, + "learning_rate": 4.218957389354204e-05, + "loss": 2.0964, + "step": 34630 + }, + { + "epoch": 1.58, + "learning_rate": 4.2187285459288756e-05, + "loss": 2.1698, + "step": 34640 + }, + { + "epoch": 1.58, + "learning_rate": 4.218499702503547e-05, + "loss": 2.3433, + "step": 34650 + }, + { + "epoch": 1.58, + "learning_rate": 4.218270859078219e-05, + "loss": 2.0955, + "step": 34660 + }, + { + "epoch": 1.58, + "learning_rate": 4.2180420156528905e-05, + "loss": 2.2234, + "step": 34670 + }, + { + "epoch": 1.58, + "learning_rate": 4.217813172227562e-05, + "loss": 2.2557, + "step": 34680 + }, + { + "epoch": 1.58, + "learning_rate": 4.217584328802234e-05, + "loss": 2.3399, + "step": 34690 + }, + { + "epoch": 1.58, + "learning_rate": 4.2173554853769054e-05, + "loss": 2.0831, + "step": 34700 + }, + { + "epoch": 1.58, + "learning_rate": 4.217126641951577e-05, + "loss": 2.2014, + "step": 34710 + }, + { + "epoch": 1.59, + "learning_rate": 4.216897798526249e-05, + "loss": 2.1051, + "step": 34720 + }, + { + "epoch": 1.59, + "learning_rate": 4.2166689551009204e-05, + "loss": 2.2036, + "step": 34730 + }, + { + "epoch": 1.59, + "learning_rate": 4.216440111675592e-05, + "loss": 2.1197, + "step": 34740 + }, + { + "epoch": 1.59, + "learning_rate": 4.216211268250264e-05, + "loss": 2.357, + "step": 34750 + }, + { + "epoch": 1.59, + "learning_rate": 4.215982424824935e-05, + "loss": 2.2071, + "step": 34760 + }, + { + "epoch": 1.59, + "learning_rate": 4.215753581399607e-05, + "loss": 2.2252, + "step": 34770 + }, + { + "epoch": 1.59, + "learning_rate": 4.215524737974278e-05, + "loss": 2.0906, + "step": 34780 + }, + { + "epoch": 1.59, + "learning_rate": 4.2152958945489495e-05, + "loss": 2.2578, + "step": 34790 + }, + { + "epoch": 1.59, + "learning_rate": 4.215067051123621e-05, + "loss": 1.9847, + "step": 34800 + }, + { + "epoch": 1.59, + "learning_rate": 4.214838207698293e-05, + "loss": 2.2064, + "step": 34810 + }, + { + "epoch": 1.59, + "learning_rate": 4.2146093642729644e-05, + "loss": 2.3614, + "step": 34820 + }, + { + "epoch": 1.59, + "learning_rate": 4.214380520847636e-05, + "loss": 2.3928, + "step": 34830 + }, + { + "epoch": 1.59, + "learning_rate": 4.214151677422308e-05, + "loss": 2.265, + "step": 34840 + }, + { + "epoch": 1.59, + "learning_rate": 4.2139228339969794e-05, + "loss": 2.1797, + "step": 34850 + }, + { + "epoch": 1.59, + "learning_rate": 4.213693990571651e-05, + "loss": 2.2691, + "step": 34860 + }, + { + "epoch": 1.59, + "learning_rate": 4.213465147146323e-05, + "loss": 2.347, + "step": 34870 + }, + { + "epoch": 1.59, + "learning_rate": 4.213236303720994e-05, + "loss": 2.0507, + "step": 34880 + }, + { + "epoch": 1.59, + "learning_rate": 4.213007460295666e-05, + "loss": 2.1732, + "step": 34890 + }, + { + "epoch": 1.59, + "learning_rate": 4.212778616870338e-05, + "loss": 2.0792, + "step": 34900 + }, + { + "epoch": 1.59, + "learning_rate": 4.212549773445009e-05, + "loss": 2.2117, + "step": 34910 + }, + { + "epoch": 1.59, + "learning_rate": 4.2123209300196806e-05, + "loss": 2.2029, + "step": 34920 + }, + { + "epoch": 1.59, + "learning_rate": 4.212092086594353e-05, + "loss": 2.1016, + "step": 34930 + }, + { + "epoch": 1.6, + "learning_rate": 4.211863243169024e-05, + "loss": 2.1843, + "step": 34940 + }, + { + "epoch": 1.6, + "learning_rate": 4.2116343997436955e-05, + "loss": 2.24, + "step": 34950 + }, + { + "epoch": 1.6, + "learning_rate": 4.2114055563183676e-05, + "loss": 2.0722, + "step": 34960 + }, + { + "epoch": 1.6, + "learning_rate": 4.211176712893039e-05, + "loss": 2.1436, + "step": 34970 + }, + { + "epoch": 1.6, + "learning_rate": 4.2109478694677104e-05, + "loss": 2.1719, + "step": 34980 + }, + { + "epoch": 1.6, + "learning_rate": 4.210719026042382e-05, + "loss": 2.1263, + "step": 34990 + }, + { + "epoch": 1.6, + "learning_rate": 4.210490182617053e-05, + "loss": 2.2023, + "step": 35000 + }, + { + "epoch": 1.6, + "learning_rate": 4.2102613391917254e-05, + "loss": 2.2307, + "step": 35010 + }, + { + "epoch": 1.6, + "learning_rate": 4.210032495766397e-05, + "loss": 2.2277, + "step": 35020 + }, + { + "epoch": 1.6, + "learning_rate": 4.209803652341068e-05, + "loss": 2.1839, + "step": 35030 + }, + { + "epoch": 1.6, + "learning_rate": 4.20957480891574e-05, + "loss": 2.2606, + "step": 35040 + }, + { + "epoch": 1.6, + "learning_rate": 4.209345965490412e-05, + "loss": 2.0376, + "step": 35050 + }, + { + "epoch": 1.6, + "learning_rate": 4.209117122065083e-05, + "loss": 2.1165, + "step": 35060 + }, + { + "epoch": 1.6, + "learning_rate": 4.208888278639755e-05, + "loss": 2.3497, + "step": 35070 + }, + { + "epoch": 1.6, + "learning_rate": 4.2086594352144266e-05, + "loss": 2.3684, + "step": 35080 + }, + { + "epoch": 1.6, + "learning_rate": 4.208430591789098e-05, + "loss": 2.1159, + "step": 35090 + }, + { + "epoch": 1.6, + "learning_rate": 4.20820174836377e-05, + "loss": 2.2071, + "step": 35100 + }, + { + "epoch": 1.6, + "learning_rate": 4.2079729049384415e-05, + "loss": 2.2192, + "step": 35110 + }, + { + "epoch": 1.6, + "learning_rate": 4.207744061513113e-05, + "loss": 2.303, + "step": 35120 + }, + { + "epoch": 1.6, + "learning_rate": 4.207515218087785e-05, + "loss": 2.2169, + "step": 35130 + }, + { + "epoch": 1.6, + "learning_rate": 4.2072863746624564e-05, + "loss": 2.209, + "step": 35140 + }, + { + "epoch": 1.61, + "learning_rate": 4.207057531237128e-05, + "loss": 2.2105, + "step": 35150 + }, + { + "epoch": 1.61, + "learning_rate": 4.2068286878118e-05, + "loss": 2.392, + "step": 35160 + }, + { + "epoch": 1.61, + "learning_rate": 4.206599844386471e-05, + "loss": 2.2361, + "step": 35170 + }, + { + "epoch": 1.61, + "learning_rate": 4.206371000961142e-05, + "loss": 2.148, + "step": 35180 + }, + { + "epoch": 1.61, + "learning_rate": 4.206142157535814e-05, + "loss": 2.2382, + "step": 35190 + }, + { + "epoch": 1.61, + "learning_rate": 4.2059133141104856e-05, + "loss": 2.2803, + "step": 35200 + }, + { + "epoch": 1.61, + "learning_rate": 4.205684470685157e-05, + "loss": 2.0674, + "step": 35210 + }, + { + "epoch": 1.61, + "learning_rate": 4.205455627259829e-05, + "loss": 2.23, + "step": 35220 + }, + { + "epoch": 1.61, + "learning_rate": 4.2052267838345005e-05, + "loss": 2.1598, + "step": 35230 + }, + { + "epoch": 1.61, + "learning_rate": 4.204997940409172e-05, + "loss": 2.2057, + "step": 35240 + }, + { + "epoch": 1.61, + "learning_rate": 4.204769096983844e-05, + "loss": 2.2194, + "step": 35250 + }, + { + "epoch": 1.61, + "learning_rate": 4.2045402535585154e-05, + "loss": 2.1509, + "step": 35260 + }, + { + "epoch": 1.61, + "learning_rate": 4.204311410133187e-05, + "loss": 2.2268, + "step": 35270 + }, + { + "epoch": 1.61, + "learning_rate": 4.204082566707859e-05, + "loss": 2.303, + "step": 35280 + }, + { + "epoch": 1.61, + "learning_rate": 4.2038537232825303e-05, + "loss": 2.1539, + "step": 35290 + }, + { + "epoch": 1.61, + "learning_rate": 4.203624879857202e-05, + "loss": 2.3462, + "step": 35300 + }, + { + "epoch": 1.61, + "learning_rate": 4.203396036431874e-05, + "loss": 2.2761, + "step": 35310 + }, + { + "epoch": 1.61, + "learning_rate": 4.203167193006545e-05, + "loss": 2.194, + "step": 35320 + }, + { + "epoch": 1.61, + "learning_rate": 4.202938349581217e-05, + "loss": 2.2376, + "step": 35330 + }, + { + "epoch": 1.61, + "learning_rate": 4.202709506155889e-05, + "loss": 2.2305, + "step": 35340 + }, + { + "epoch": 1.61, + "learning_rate": 4.20248066273056e-05, + "loss": 2.3042, + "step": 35350 + }, + { + "epoch": 1.61, + "learning_rate": 4.2022518193052316e-05, + "loss": 2.0659, + "step": 35360 + }, + { + "epoch": 1.62, + "learning_rate": 4.202022975879903e-05, + "loss": 2.0786, + "step": 35370 + }, + { + "epoch": 1.62, + "learning_rate": 4.2017941324545744e-05, + "loss": 2.2156, + "step": 35380 + }, + { + "epoch": 1.62, + "learning_rate": 4.2015652890292465e-05, + "loss": 2.1824, + "step": 35390 + }, + { + "epoch": 1.62, + "learning_rate": 4.201336445603918e-05, + "loss": 2.2179, + "step": 35400 + }, + { + "epoch": 1.62, + "learning_rate": 4.2011076021785893e-05, + "loss": 2.1806, + "step": 35410 + }, + { + "epoch": 1.62, + "learning_rate": 4.200878758753261e-05, + "loss": 2.192, + "step": 35420 + }, + { + "epoch": 1.62, + "learning_rate": 4.200649915327933e-05, + "loss": 2.2702, + "step": 35430 + }, + { + "epoch": 1.62, + "learning_rate": 4.200421071902604e-05, + "loss": 2.1344, + "step": 35440 + }, + { + "epoch": 1.62, + "learning_rate": 4.200192228477276e-05, + "loss": 2.1847, + "step": 35450 + }, + { + "epoch": 1.62, + "learning_rate": 4.199963385051948e-05, + "loss": 2.2211, + "step": 35460 + }, + { + "epoch": 1.62, + "learning_rate": 4.199734541626619e-05, + "loss": 2.2877, + "step": 35470 + }, + { + "epoch": 1.62, + "learning_rate": 4.1995056982012906e-05, + "loss": 2.2058, + "step": 35480 + }, + { + "epoch": 1.62, + "learning_rate": 4.199276854775963e-05, + "loss": 2.2641, + "step": 35490 + }, + { + "epoch": 1.62, + "learning_rate": 4.199048011350634e-05, + "loss": 2.1562, + "step": 35500 + }, + { + "epoch": 1.62, + "learning_rate": 4.1988191679253055e-05, + "loss": 2.0076, + "step": 35510 + }, + { + "epoch": 1.62, + "learning_rate": 4.1985903244999776e-05, + "loss": 2.1856, + "step": 35520 + }, + { + "epoch": 1.62, + "learning_rate": 4.198361481074649e-05, + "loss": 2.3047, + "step": 35530 + }, + { + "epoch": 1.62, + "learning_rate": 4.1981326376493204e-05, + "loss": 2.4964, + "step": 35540 + }, + { + "epoch": 1.62, + "learning_rate": 4.1979037942239925e-05, + "loss": 2.1926, + "step": 35550 + }, + { + "epoch": 1.62, + "learning_rate": 4.197674950798664e-05, + "loss": 2.1895, + "step": 35560 + }, + { + "epoch": 1.62, + "learning_rate": 4.1974461073733353e-05, + "loss": 2.2181, + "step": 35570 + }, + { + "epoch": 1.62, + "learning_rate": 4.197217263948007e-05, + "loss": 2.2334, + "step": 35580 + }, + { + "epoch": 1.63, + "learning_rate": 4.196988420522678e-05, + "loss": 2.2817, + "step": 35590 + }, + { + "epoch": 1.63, + "learning_rate": 4.19675957709735e-05, + "loss": 2.2428, + "step": 35600 + }, + { + "epoch": 1.63, + "learning_rate": 4.196530733672022e-05, + "loss": 1.9813, + "step": 35610 + }, + { + "epoch": 1.63, + "learning_rate": 4.196301890246693e-05, + "loss": 2.4459, + "step": 35620 + }, + { + "epoch": 1.63, + "learning_rate": 4.196073046821365e-05, + "loss": 2.0868, + "step": 35630 + }, + { + "epoch": 1.63, + "learning_rate": 4.1958442033960366e-05, + "loss": 2.1707, + "step": 35640 + }, + { + "epoch": 1.63, + "learning_rate": 4.195615359970708e-05, + "loss": 2.2882, + "step": 35650 + }, + { + "epoch": 1.63, + "learning_rate": 4.19538651654538e-05, + "loss": 2.267, + "step": 35660 + }, + { + "epoch": 1.63, + "learning_rate": 4.1951576731200515e-05, + "loss": 2.1949, + "step": 35670 + }, + { + "epoch": 1.63, + "learning_rate": 4.194928829694723e-05, + "loss": 2.2133, + "step": 35680 + }, + { + "epoch": 1.63, + "learning_rate": 4.194699986269395e-05, + "loss": 2.2678, + "step": 35690 + }, + { + "epoch": 1.63, + "learning_rate": 4.1944711428440664e-05, + "loss": 2.2512, + "step": 35700 + }, + { + "epoch": 1.63, + "learning_rate": 4.194242299418738e-05, + "loss": 2.2948, + "step": 35710 + }, + { + "epoch": 1.63, + "learning_rate": 4.19401345599341e-05, + "loss": 2.1776, + "step": 35720 + }, + { + "epoch": 1.63, + "learning_rate": 4.1937846125680813e-05, + "loss": 2.172, + "step": 35730 + }, + { + "epoch": 1.63, + "learning_rate": 4.193555769142753e-05, + "loss": 2.1015, + "step": 35740 + }, + { + "epoch": 1.63, + "learning_rate": 4.193326925717425e-05, + "loss": 2.1385, + "step": 35750 + }, + { + "epoch": 1.63, + "learning_rate": 4.193098082292096e-05, + "loss": 2.1631, + "step": 35760 + }, + { + "epoch": 1.63, + "learning_rate": 4.192869238866767e-05, + "loss": 2.1801, + "step": 35770 + }, + { + "epoch": 1.63, + "learning_rate": 4.192640395441439e-05, + "loss": 2.1689, + "step": 35780 + }, + { + "epoch": 1.63, + "learning_rate": 4.1924115520161105e-05, + "loss": 2.4101, + "step": 35790 + }, + { + "epoch": 1.63, + "learning_rate": 4.192182708590782e-05, + "loss": 2.1634, + "step": 35800 + }, + { + "epoch": 1.64, + "learning_rate": 4.191953865165454e-05, + "loss": 2.196, + "step": 35810 + }, + { + "epoch": 1.64, + "learning_rate": 4.1917250217401254e-05, + "loss": 2.0348, + "step": 35820 + }, + { + "epoch": 1.64, + "learning_rate": 4.191496178314797e-05, + "loss": 2.0357, + "step": 35830 + }, + { + "epoch": 1.64, + "learning_rate": 4.191267334889469e-05, + "loss": 2.1229, + "step": 35840 + }, + { + "epoch": 1.64, + "learning_rate": 4.19103849146414e-05, + "loss": 2.1913, + "step": 35850 + }, + { + "epoch": 1.64, + "learning_rate": 4.190809648038812e-05, + "loss": 2.1357, + "step": 35860 + }, + { + "epoch": 1.64, + "learning_rate": 4.190580804613484e-05, + "loss": 1.9988, + "step": 35870 + }, + { + "epoch": 1.64, + "learning_rate": 4.190351961188155e-05, + "loss": 2.1352, + "step": 35880 + }, + { + "epoch": 1.64, + "learning_rate": 4.190123117762827e-05, + "loss": 2.013, + "step": 35890 + }, + { + "epoch": 1.64, + "learning_rate": 4.189894274337499e-05, + "loss": 2.3225, + "step": 35900 + }, + { + "epoch": 1.64, + "learning_rate": 4.18966543091217e-05, + "loss": 2.0352, + "step": 35910 + }, + { + "epoch": 1.64, + "learning_rate": 4.1894365874868416e-05, + "loss": 2.2108, + "step": 35920 + }, + { + "epoch": 1.64, + "learning_rate": 4.189207744061514e-05, + "loss": 2.2225, + "step": 35930 + }, + { + "epoch": 1.64, + "learning_rate": 4.188978900636185e-05, + "loss": 2.2175, + "step": 35940 + }, + { + "epoch": 1.64, + "learning_rate": 4.1887500572108565e-05, + "loss": 2.1259, + "step": 35950 + }, + { + "epoch": 1.64, + "learning_rate": 4.1885212137855286e-05, + "loss": 2.2423, + "step": 35960 + }, + { + "epoch": 1.64, + "learning_rate": 4.188292370360199e-05, + "loss": 2.2987, + "step": 35970 + }, + { + "epoch": 1.64, + "learning_rate": 4.1880635269348714e-05, + "loss": 2.1954, + "step": 35980 + }, + { + "epoch": 1.64, + "learning_rate": 4.187834683509543e-05, + "loss": 2.2845, + "step": 35990 + }, + { + "epoch": 1.64, + "learning_rate": 4.187605840084214e-05, + "loss": 2.1676, + "step": 36000 + }, + { + "epoch": 1.64, + "learning_rate": 4.1873769966588863e-05, + "loss": 2.2662, + "step": 36010 + }, + { + "epoch": 1.64, + "learning_rate": 4.187148153233558e-05, + "loss": 2.204, + "step": 36020 + }, + { + "epoch": 1.65, + "learning_rate": 4.186919309808229e-05, + "loss": 2.3134, + "step": 36030 + }, + { + "epoch": 1.65, + "learning_rate": 4.186690466382901e-05, + "loss": 2.1039, + "step": 36040 + }, + { + "epoch": 1.65, + "learning_rate": 4.186461622957573e-05, + "loss": 2.2618, + "step": 36050 + }, + { + "epoch": 1.65, + "learning_rate": 4.186232779532244e-05, + "loss": 2.1634, + "step": 36060 + }, + { + "epoch": 1.65, + "learning_rate": 4.186003936106916e-05, + "loss": 2.1834, + "step": 36070 + }, + { + "epoch": 1.65, + "learning_rate": 4.1857750926815876e-05, + "loss": 2.2815, + "step": 36080 + }, + { + "epoch": 1.65, + "learning_rate": 4.185546249256259e-05, + "loss": 2.2876, + "step": 36090 + }, + { + "epoch": 1.65, + "learning_rate": 4.185317405830931e-05, + "loss": 2.3654, + "step": 36100 + }, + { + "epoch": 1.65, + "learning_rate": 4.1850885624056025e-05, + "loss": 2.3615, + "step": 36110 + }, + { + "epoch": 1.65, + "learning_rate": 4.184859718980274e-05, + "loss": 2.2244, + "step": 36120 + }, + { + "epoch": 1.65, + "learning_rate": 4.184630875554946e-05, + "loss": 2.1728, + "step": 36130 + }, + { + "epoch": 1.65, + "learning_rate": 4.1844020321296174e-05, + "loss": 2.1475, + "step": 36140 + }, + { + "epoch": 1.65, + "learning_rate": 4.184173188704289e-05, + "loss": 2.1333, + "step": 36150 + }, + { + "epoch": 1.65, + "learning_rate": 4.18394434527896e-05, + "loss": 2.2034, + "step": 36160 + }, + { + "epoch": 1.65, + "learning_rate": 4.183715501853632e-05, + "loss": 2.1358, + "step": 36170 + }, + { + "epoch": 1.65, + "learning_rate": 4.183486658428303e-05, + "loss": 2.1884, + "step": 36180 + }, + { + "epoch": 1.65, + "learning_rate": 4.183257815002975e-05, + "loss": 2.1662, + "step": 36190 + }, + { + "epoch": 1.65, + "learning_rate": 4.1830289715776466e-05, + "loss": 2.1639, + "step": 36200 + }, + { + "epoch": 1.65, + "learning_rate": 4.182800128152318e-05, + "loss": 2.0841, + "step": 36210 + }, + { + "epoch": 1.65, + "learning_rate": 4.18257128472699e-05, + "loss": 2.1795, + "step": 36220 + }, + { + "epoch": 1.65, + "learning_rate": 4.1823424413016615e-05, + "loss": 2.14, + "step": 36230 + }, + { + "epoch": 1.65, + "learning_rate": 4.182113597876333e-05, + "loss": 2.1729, + "step": 36240 + }, + { + "epoch": 1.66, + "learning_rate": 4.181884754451005e-05, + "loss": 2.1571, + "step": 36250 + }, + { + "epoch": 1.66, + "learning_rate": 4.1816559110256764e-05, + "loss": 2.1301, + "step": 36260 + }, + { + "epoch": 1.66, + "learning_rate": 4.181427067600348e-05, + "loss": 2.2749, + "step": 36270 + }, + { + "epoch": 1.66, + "learning_rate": 4.18119822417502e-05, + "loss": 2.138, + "step": 36280 + }, + { + "epoch": 1.66, + "learning_rate": 4.180969380749691e-05, + "loss": 2.3211, + "step": 36290 + }, + { + "epoch": 1.66, + "learning_rate": 4.180740537324363e-05, + "loss": 2.1484, + "step": 36300 + }, + { + "epoch": 1.66, + "learning_rate": 4.180511693899035e-05, + "loss": 2.22, + "step": 36310 + }, + { + "epoch": 1.66, + "learning_rate": 4.180282850473706e-05, + "loss": 2.0964, + "step": 36320 + }, + { + "epoch": 1.66, + "learning_rate": 4.180054007048378e-05, + "loss": 2.2091, + "step": 36330 + }, + { + "epoch": 1.66, + "learning_rate": 4.17982516362305e-05, + "loss": 2.3755, + "step": 36340 + }, + { + "epoch": 1.66, + "learning_rate": 4.179596320197721e-05, + "loss": 2.1484, + "step": 36350 + }, + { + "epoch": 1.66, + "learning_rate": 4.1793674767723926e-05, + "loss": 2.1213, + "step": 36360 + }, + { + "epoch": 1.66, + "learning_rate": 4.179138633347064e-05, + "loss": 2.1509, + "step": 36370 + }, + { + "epoch": 1.66, + "learning_rate": 4.1789097899217354e-05, + "loss": 2.2193, + "step": 36380 + }, + { + "epoch": 1.66, + "learning_rate": 4.1786809464964075e-05, + "loss": 2.2811, + "step": 36390 + }, + { + "epoch": 1.66, + "learning_rate": 4.178452103071079e-05, + "loss": 2.1019, + "step": 36400 + }, + { + "epoch": 1.66, + "learning_rate": 4.17822325964575e-05, + "loss": 2.2255, + "step": 36410 + }, + { + "epoch": 1.66, + "learning_rate": 4.1779944162204224e-05, + "loss": 2.2865, + "step": 36420 + }, + { + "epoch": 1.66, + "learning_rate": 4.177765572795094e-05, + "loss": 2.2252, + "step": 36430 + }, + { + "epoch": 1.66, + "learning_rate": 4.177536729369765e-05, + "loss": 2.2945, + "step": 36440 + }, + { + "epoch": 1.66, + "learning_rate": 4.177307885944437e-05, + "loss": 2.2925, + "step": 36450 + }, + { + "epoch": 1.66, + "learning_rate": 4.177079042519109e-05, + "loss": 2.1438, + "step": 36460 + }, + { + "epoch": 1.67, + "learning_rate": 4.17685019909378e-05, + "loss": 2.3323, + "step": 36470 + }, + { + "epoch": 1.67, + "learning_rate": 4.176621355668452e-05, + "loss": 2.3067, + "step": 36480 + }, + { + "epoch": 1.67, + "learning_rate": 4.176392512243124e-05, + "loss": 2.1549, + "step": 36490 + }, + { + "epoch": 1.67, + "learning_rate": 4.176163668817795e-05, + "loss": 2.3096, + "step": 36500 + }, + { + "epoch": 1.67, + "learning_rate": 4.1759348253924665e-05, + "loss": 2.2461, + "step": 36510 + }, + { + "epoch": 1.67, + "learning_rate": 4.1757059819671386e-05, + "loss": 2.2352, + "step": 36520 + }, + { + "epoch": 1.67, + "learning_rate": 4.17547713854181e-05, + "loss": 2.2697, + "step": 36530 + }, + { + "epoch": 1.67, + "learning_rate": 4.1752482951164814e-05, + "loss": 2.1806, + "step": 36540 + }, + { + "epoch": 1.67, + "learning_rate": 4.1750194516911535e-05, + "loss": 2.3018, + "step": 36550 + }, + { + "epoch": 1.67, + "learning_rate": 4.174790608265824e-05, + "loss": 2.2201, + "step": 36560 + }, + { + "epoch": 1.67, + "learning_rate": 4.174561764840496e-05, + "loss": 2.0023, + "step": 36570 + }, + { + "epoch": 1.67, + "learning_rate": 4.174332921415168e-05, + "loss": 2.3606, + "step": 36580 + }, + { + "epoch": 1.67, + "learning_rate": 4.174104077989839e-05, + "loss": 2.0941, + "step": 36590 + }, + { + "epoch": 1.67, + "learning_rate": 4.173875234564511e-05, + "loss": 2.1995, + "step": 36600 + }, + { + "epoch": 1.67, + "learning_rate": 4.1736463911391827e-05, + "loss": 2.2288, + "step": 36610 + }, + { + "epoch": 1.67, + "learning_rate": 4.173417547713854e-05, + "loss": 2.1888, + "step": 36620 + }, + { + "epoch": 1.67, + "learning_rate": 4.173188704288526e-05, + "loss": 2.156, + "step": 36630 + }, + { + "epoch": 1.67, + "learning_rate": 4.1729598608631976e-05, + "loss": 2.1314, + "step": 36640 + }, + { + "epoch": 1.67, + "learning_rate": 4.172731017437869e-05, + "loss": 2.2787, + "step": 36650 + }, + { + "epoch": 1.67, + "learning_rate": 4.172502174012541e-05, + "loss": 1.9101, + "step": 36660 + }, + { + "epoch": 1.67, + "learning_rate": 4.1722733305872125e-05, + "loss": 2.1467, + "step": 36670 + }, + { + "epoch": 1.67, + "learning_rate": 4.172044487161884e-05, + "loss": 2.1305, + "step": 36680 + }, + { + "epoch": 1.68, + "learning_rate": 4.171815643736556e-05, + "loss": 2.182, + "step": 36690 + }, + { + "epoch": 1.68, + "learning_rate": 4.1715868003112274e-05, + "loss": 2.3326, + "step": 36700 + }, + { + "epoch": 1.68, + "learning_rate": 4.171357956885899e-05, + "loss": 2.2136, + "step": 36710 + }, + { + "epoch": 1.68, + "learning_rate": 4.171129113460571e-05, + "loss": 2.2463, + "step": 36720 + }, + { + "epoch": 1.68, + "learning_rate": 4.170900270035242e-05, + "loss": 2.1664, + "step": 36730 + }, + { + "epoch": 1.68, + "learning_rate": 4.170671426609914e-05, + "loss": 2.271, + "step": 36740 + }, + { + "epoch": 1.68, + "learning_rate": 4.170442583184586e-05, + "loss": 2.2753, + "step": 36750 + }, + { + "epoch": 1.68, + "learning_rate": 4.1702137397592566e-05, + "loss": 2.1586, + "step": 36760 + }, + { + "epoch": 1.68, + "learning_rate": 4.169984896333928e-05, + "loss": 2.1588, + "step": 36770 + }, + { + "epoch": 1.68, + "learning_rate": 4.1697560529086e-05, + "loss": 2.1353, + "step": 36780 + }, + { + "epoch": 1.68, + "learning_rate": 4.1695272094832715e-05, + "loss": 2.0176, + "step": 36790 + }, + { + "epoch": 1.68, + "learning_rate": 4.169298366057943e-05, + "loss": 2.0487, + "step": 36800 + }, + { + "epoch": 1.68, + "learning_rate": 4.169069522632615e-05, + "loss": 2.1775, + "step": 36810 + }, + { + "epoch": 1.68, + "learning_rate": 4.1688406792072864e-05, + "loss": 2.1976, + "step": 36820 + }, + { + "epoch": 1.68, + "learning_rate": 4.168611835781958e-05, + "loss": 2.2163, + "step": 36830 + }, + { + "epoch": 1.68, + "learning_rate": 4.16838299235663e-05, + "loss": 2.1888, + "step": 36840 + }, + { + "epoch": 1.68, + "learning_rate": 4.168154148931301e-05, + "loss": 2.2368, + "step": 36850 + }, + { + "epoch": 1.68, + "learning_rate": 4.167925305505973e-05, + "loss": 2.2871, + "step": 36860 + }, + { + "epoch": 1.68, + "learning_rate": 4.167696462080645e-05, + "loss": 2.2263, + "step": 36870 + }, + { + "epoch": 1.68, + "learning_rate": 4.167467618655316e-05, + "loss": 2.3149, + "step": 36880 + }, + { + "epoch": 1.68, + "learning_rate": 4.1672387752299877e-05, + "loss": 2.2872, + "step": 36890 + }, + { + "epoch": 1.68, + "learning_rate": 4.16700993180466e-05, + "loss": 2.1909, + "step": 36900 + }, + { + "epoch": 1.69, + "learning_rate": 4.166781088379331e-05, + "loss": 2.3224, + "step": 36910 + }, + { + "epoch": 1.69, + "learning_rate": 4.1665522449540026e-05, + "loss": 2.2413, + "step": 36920 + }, + { + "epoch": 1.69, + "learning_rate": 4.166323401528675e-05, + "loss": 2.1985, + "step": 36930 + }, + { + "epoch": 1.69, + "learning_rate": 4.166094558103346e-05, + "loss": 2.1627, + "step": 36940 + }, + { + "epoch": 1.69, + "learning_rate": 4.1658657146780175e-05, + "loss": 2.1385, + "step": 36950 + }, + { + "epoch": 1.69, + "learning_rate": 4.165636871252689e-05, + "loss": 2.2795, + "step": 36960 + }, + { + "epoch": 1.69, + "learning_rate": 4.16540802782736e-05, + "loss": 2.2094, + "step": 36970 + }, + { + "epoch": 1.69, + "learning_rate": 4.1651791844020324e-05, + "loss": 2.1953, + "step": 36980 + }, + { + "epoch": 1.69, + "learning_rate": 4.164950340976704e-05, + "loss": 2.2623, + "step": 36990 + }, + { + "epoch": 1.69, + "learning_rate": 4.164721497551375e-05, + "loss": 2.2628, + "step": 37000 + }, + { + "epoch": 1.69, + "learning_rate": 4.164492654126047e-05, + "loss": 2.2957, + "step": 37010 + }, + { + "epoch": 1.69, + "learning_rate": 4.164263810700719e-05, + "loss": 2.2331, + "step": 37020 + }, + { + "epoch": 1.69, + "learning_rate": 4.16403496727539e-05, + "loss": 2.1442, + "step": 37030 + }, + { + "epoch": 1.69, + "learning_rate": 4.163806123850062e-05, + "loss": 2.1199, + "step": 37040 + }, + { + "epoch": 1.69, + "learning_rate": 4.1635772804247337e-05, + "loss": 2.1521, + "step": 37050 + }, + { + "epoch": 1.69, + "learning_rate": 4.163348436999405e-05, + "loss": 2.1245, + "step": 37060 + }, + { + "epoch": 1.69, + "learning_rate": 4.163119593574077e-05, + "loss": 2.1625, + "step": 37070 + }, + { + "epoch": 1.69, + "learning_rate": 4.1628907501487486e-05, + "loss": 2.1908, + "step": 37080 + }, + { + "epoch": 1.69, + "learning_rate": 4.16266190672342e-05, + "loss": 2.1789, + "step": 37090 + }, + { + "epoch": 1.69, + "learning_rate": 4.162433063298092e-05, + "loss": 2.0488, + "step": 37100 + }, + { + "epoch": 1.69, + "learning_rate": 4.1622042198727635e-05, + "loss": 2.1799, + "step": 37110 + }, + { + "epoch": 1.69, + "learning_rate": 4.161975376447435e-05, + "loss": 2.2801, + "step": 37120 + }, + { + "epoch": 1.7, + "learning_rate": 4.161746533022107e-05, + "loss": 2.2477, + "step": 37130 + }, + { + "epoch": 1.7, + "learning_rate": 4.1615176895967784e-05, + "loss": 2.1636, + "step": 37140 + }, + { + "epoch": 1.7, + "learning_rate": 4.161288846171449e-05, + "loss": 2.3028, + "step": 37150 + }, + { + "epoch": 1.7, + "learning_rate": 4.161060002746121e-05, + "loss": 2.271, + "step": 37160 + }, + { + "epoch": 1.7, + "learning_rate": 4.1608311593207927e-05, + "loss": 2.3294, + "step": 37170 + }, + { + "epoch": 1.7, + "learning_rate": 4.160602315895464e-05, + "loss": 2.2149, + "step": 37180 + }, + { + "epoch": 1.7, + "learning_rate": 4.160373472470136e-05, + "loss": 2.1625, + "step": 37190 + }, + { + "epoch": 1.7, + "learning_rate": 4.1601446290448076e-05, + "loss": 2.3488, + "step": 37200 + }, + { + "epoch": 1.7, + "learning_rate": 4.159915785619479e-05, + "loss": 2.157, + "step": 37210 + }, + { + "epoch": 1.7, + "learning_rate": 4.159686942194151e-05, + "loss": 2.0363, + "step": 37220 + }, + { + "epoch": 1.7, + "learning_rate": 4.1594580987688225e-05, + "loss": 2.1384, + "step": 37230 + }, + { + "epoch": 1.7, + "learning_rate": 4.159229255343494e-05, + "loss": 2.1258, + "step": 37240 + }, + { + "epoch": 1.7, + "learning_rate": 4.159000411918166e-05, + "loss": 2.2632, + "step": 37250 + }, + { + "epoch": 1.7, + "learning_rate": 4.1587715684928374e-05, + "loss": 2.07, + "step": 37260 + }, + { + "epoch": 1.7, + "learning_rate": 4.158542725067509e-05, + "loss": 2.2473, + "step": 37270 + }, + { + "epoch": 1.7, + "learning_rate": 4.158313881642181e-05, + "loss": 2.1957, + "step": 37280 + }, + { + "epoch": 1.7, + "learning_rate": 4.158085038216852e-05, + "loss": 2.2194, + "step": 37290 + }, + { + "epoch": 1.7, + "learning_rate": 4.157856194791524e-05, + "loss": 2.1726, + "step": 37300 + }, + { + "epoch": 1.7, + "learning_rate": 4.157627351366196e-05, + "loss": 2.0769, + "step": 37310 + }, + { + "epoch": 1.7, + "learning_rate": 4.157398507940867e-05, + "loss": 2.2937, + "step": 37320 + }, + { + "epoch": 1.7, + "learning_rate": 4.1571696645155387e-05, + "loss": 2.1365, + "step": 37330 + }, + { + "epoch": 1.71, + "learning_rate": 4.156940821090211e-05, + "loss": 2.067, + "step": 37340 + }, + { + "epoch": 1.71, + "learning_rate": 4.1567119776648815e-05, + "loss": 2.1474, + "step": 37350 + }, + { + "epoch": 1.71, + "learning_rate": 4.1564831342395536e-05, + "loss": 2.1031, + "step": 37360 + }, + { + "epoch": 1.71, + "learning_rate": 4.156254290814225e-05, + "loss": 2.1677, + "step": 37370 + }, + { + "epoch": 1.71, + "learning_rate": 4.1560254473888964e-05, + "loss": 2.2256, + "step": 37380 + }, + { + "epoch": 1.71, + "learning_rate": 4.1557966039635685e-05, + "loss": 2.3279, + "step": 37390 + }, + { + "epoch": 1.71, + "learning_rate": 4.15556776053824e-05, + "loss": 2.1411, + "step": 37400 + }, + { + "epoch": 1.71, + "learning_rate": 4.155338917112911e-05, + "loss": 2.3298, + "step": 37410 + }, + { + "epoch": 1.71, + "learning_rate": 4.1551100736875834e-05, + "loss": 2.2089, + "step": 37420 + }, + { + "epoch": 1.71, + "learning_rate": 4.154881230262255e-05, + "loss": 2.1381, + "step": 37430 + }, + { + "epoch": 1.71, + "learning_rate": 4.154652386836926e-05, + "loss": 2.1928, + "step": 37440 + }, + { + "epoch": 1.71, + "learning_rate": 4.154423543411598e-05, + "loss": 2.2086, + "step": 37450 + }, + { + "epoch": 1.71, + "learning_rate": 4.15419469998627e-05, + "loss": 2.1065, + "step": 37460 + }, + { + "epoch": 1.71, + "learning_rate": 4.153965856560941e-05, + "loss": 2.2628, + "step": 37470 + }, + { + "epoch": 1.71, + "learning_rate": 4.153737013135613e-05, + "loss": 2.2599, + "step": 37480 + }, + { + "epoch": 1.71, + "learning_rate": 4.1535081697102847e-05, + "loss": 2.1146, + "step": 37490 + }, + { + "epoch": 1.71, + "learning_rate": 4.153279326284956e-05, + "loss": 2.2617, + "step": 37500 + }, + { + "epoch": 1.71, + "learning_rate": 4.153050482859628e-05, + "loss": 2.0535, + "step": 37510 + }, + { + "epoch": 1.71, + "learning_rate": 4.1528216394342996e-05, + "loss": 2.3426, + "step": 37520 + }, + { + "epoch": 1.71, + "learning_rate": 4.152592796008971e-05, + "loss": 2.3054, + "step": 37530 + }, + { + "epoch": 1.71, + "learning_rate": 4.152363952583643e-05, + "loss": 2.1889, + "step": 37540 + }, + { + "epoch": 1.71, + "learning_rate": 4.152135109158314e-05, + "loss": 2.2644, + "step": 37550 + }, + { + "epoch": 1.72, + "learning_rate": 4.151906265732985e-05, + "loss": 2.2223, + "step": 37560 + }, + { + "epoch": 1.72, + "learning_rate": 4.151677422307657e-05, + "loss": 2.2571, + "step": 37570 + }, + { + "epoch": 1.72, + "learning_rate": 4.151448578882329e-05, + "loss": 2.2414, + "step": 37580 + }, + { + "epoch": 1.72, + "learning_rate": 4.151219735457e-05, + "loss": 2.383, + "step": 37590 + }, + { + "epoch": 1.72, + "learning_rate": 4.150990892031672e-05, + "loss": 2.1785, + "step": 37600 + }, + { + "epoch": 1.72, + "learning_rate": 4.1507620486063436e-05, + "loss": 2.0195, + "step": 37610 + }, + { + "epoch": 1.72, + "learning_rate": 4.150533205181015e-05, + "loss": 2.2087, + "step": 37620 + }, + { + "epoch": 1.72, + "learning_rate": 4.150304361755687e-05, + "loss": 2.0984, + "step": 37630 + }, + { + "epoch": 1.72, + "learning_rate": 4.1500755183303586e-05, + "loss": 2.2044, + "step": 37640 + }, + { + "epoch": 1.72, + "learning_rate": 4.14984667490503e-05, + "loss": 2.2106, + "step": 37650 + }, + { + "epoch": 1.72, + "learning_rate": 4.149617831479702e-05, + "loss": 2.1247, + "step": 37660 + }, + { + "epoch": 1.72, + "learning_rate": 4.1493889880543735e-05, + "loss": 2.118, + "step": 37670 + }, + { + "epoch": 1.72, + "learning_rate": 4.149160144629045e-05, + "loss": 2.2166, + "step": 37680 + }, + { + "epoch": 1.72, + "learning_rate": 4.148931301203717e-05, + "loss": 2.2971, + "step": 37690 + }, + { + "epoch": 1.72, + "learning_rate": 4.1487024577783884e-05, + "loss": 2.2142, + "step": 37700 + }, + { + "epoch": 1.72, + "learning_rate": 4.14847361435306e-05, + "loss": 2.214, + "step": 37710 + }, + { + "epoch": 1.72, + "learning_rate": 4.148244770927732e-05, + "loss": 2.2088, + "step": 37720 + }, + { + "epoch": 1.72, + "learning_rate": 4.148015927502403e-05, + "loss": 2.0875, + "step": 37730 + }, + { + "epoch": 1.72, + "learning_rate": 4.147787084077074e-05, + "loss": 2.2364, + "step": 37740 + }, + { + "epoch": 1.72, + "learning_rate": 4.147558240651746e-05, + "loss": 2.1708, + "step": 37750 + }, + { + "epoch": 1.72, + "learning_rate": 4.1473293972264176e-05, + "loss": 2.1383, + "step": 37760 + }, + { + "epoch": 1.72, + "learning_rate": 4.147100553801089e-05, + "loss": 2.0741, + "step": 37770 + }, + { + "epoch": 1.73, + "learning_rate": 4.146871710375761e-05, + "loss": 2.2901, + "step": 37780 + }, + { + "epoch": 1.73, + "learning_rate": 4.1466428669504325e-05, + "loss": 2.1857, + "step": 37790 + }, + { + "epoch": 1.73, + "learning_rate": 4.146414023525104e-05, + "loss": 2.0843, + "step": 37800 + }, + { + "epoch": 1.73, + "learning_rate": 4.146185180099776e-05, + "loss": 2.1526, + "step": 37810 + }, + { + "epoch": 1.73, + "learning_rate": 4.1459563366744474e-05, + "loss": 2.0827, + "step": 37820 + }, + { + "epoch": 1.73, + "learning_rate": 4.145727493249119e-05, + "loss": 2.1325, + "step": 37830 + }, + { + "epoch": 1.73, + "learning_rate": 4.145498649823791e-05, + "loss": 2.2094, + "step": 37840 + }, + { + "epoch": 1.73, + "learning_rate": 4.145269806398462e-05, + "loss": 2.2227, + "step": 37850 + }, + { + "epoch": 1.73, + "learning_rate": 4.145040962973134e-05, + "loss": 2.2341, + "step": 37860 + }, + { + "epoch": 1.73, + "learning_rate": 4.144812119547806e-05, + "loss": 2.1889, + "step": 37870 + }, + { + "epoch": 1.73, + "learning_rate": 4.144583276122477e-05, + "loss": 2.2434, + "step": 37880 + }, + { + "epoch": 1.73, + "learning_rate": 4.1443544326971486e-05, + "loss": 2.114, + "step": 37890 + }, + { + "epoch": 1.73, + "learning_rate": 4.144125589271821e-05, + "loss": 2.3075, + "step": 37900 + }, + { + "epoch": 1.73, + "learning_rate": 4.143896745846492e-05, + "loss": 2.2841, + "step": 37910 + }, + { + "epoch": 1.73, + "learning_rate": 4.1436679024211636e-05, + "loss": 2.1142, + "step": 37920 + }, + { + "epoch": 1.73, + "learning_rate": 4.1434390589958357e-05, + "loss": 2.167, + "step": 37930 + }, + { + "epoch": 1.73, + "learning_rate": 4.1432102155705064e-05, + "loss": 2.1222, + "step": 37940 + }, + { + "epoch": 1.73, + "learning_rate": 4.1429813721451785e-05, + "loss": 2.1025, + "step": 37950 + }, + { + "epoch": 1.73, + "learning_rate": 4.14275252871985e-05, + "loss": 2.241, + "step": 37960 + }, + { + "epoch": 1.73, + "learning_rate": 4.142523685294521e-05, + "loss": 2.1692, + "step": 37970 + }, + { + "epoch": 1.73, + "learning_rate": 4.1422948418691934e-05, + "loss": 2.1924, + "step": 37980 + }, + { + "epoch": 1.73, + "learning_rate": 4.142065998443865e-05, + "loss": 2.2379, + "step": 37990 + }, + { + "epoch": 1.74, + "learning_rate": 4.141837155018536e-05, + "loss": 2.1543, + "step": 38000 + }, + { + "epoch": 1.74, + "learning_rate": 4.141608311593208e-05, + "loss": 2.218, + "step": 38010 + }, + { + "epoch": 1.74, + "learning_rate": 4.14137946816788e-05, + "loss": 2.2276, + "step": 38020 + }, + { + "epoch": 1.74, + "learning_rate": 4.141150624742551e-05, + "loss": 2.1639, + "step": 38030 + }, + { + "epoch": 1.74, + "learning_rate": 4.140921781317223e-05, + "loss": 2.0979, + "step": 38040 + }, + { + "epoch": 1.74, + "learning_rate": 4.1406929378918946e-05, + "loss": 2.0996, + "step": 38050 + }, + { + "epoch": 1.74, + "learning_rate": 4.140464094466566e-05, + "loss": 2.0435, + "step": 38060 + }, + { + "epoch": 1.74, + "learning_rate": 4.140235251041238e-05, + "loss": 2.2641, + "step": 38070 + }, + { + "epoch": 1.74, + "learning_rate": 4.1400064076159096e-05, + "loss": 2.1746, + "step": 38080 + }, + { + "epoch": 1.74, + "learning_rate": 4.139777564190581e-05, + "loss": 2.2327, + "step": 38090 + }, + { + "epoch": 1.74, + "learning_rate": 4.139548720765253e-05, + "loss": 2.2072, + "step": 38100 + }, + { + "epoch": 1.74, + "learning_rate": 4.1393198773399245e-05, + "loss": 2.234, + "step": 38110 + }, + { + "epoch": 1.74, + "learning_rate": 4.139091033914596e-05, + "loss": 2.1555, + "step": 38120 + }, + { + "epoch": 1.74, + "learning_rate": 4.138862190489268e-05, + "loss": 1.9149, + "step": 38130 + }, + { + "epoch": 1.74, + "learning_rate": 4.138633347063939e-05, + "loss": 2.2824, + "step": 38140 + }, + { + "epoch": 1.74, + "learning_rate": 4.13840450363861e-05, + "loss": 2.1314, + "step": 38150 + }, + { + "epoch": 1.74, + "learning_rate": 4.138175660213282e-05, + "loss": 2.1325, + "step": 38160 + }, + { + "epoch": 1.74, + "learning_rate": 4.1379468167879536e-05, + "loss": 2.0525, + "step": 38170 + }, + { + "epoch": 1.74, + "learning_rate": 4.137717973362625e-05, + "loss": 2.228, + "step": 38180 + }, + { + "epoch": 1.74, + "learning_rate": 4.137489129937297e-05, + "loss": 2.1375, + "step": 38190 + }, + { + "epoch": 1.74, + "learning_rate": 4.1372602865119686e-05, + "loss": 2.1777, + "step": 38200 + }, + { + "epoch": 1.74, + "learning_rate": 4.13703144308664e-05, + "loss": 2.0687, + "step": 38210 + }, + { + "epoch": 1.75, + "learning_rate": 4.136802599661312e-05, + "loss": 2.1486, + "step": 38220 + }, + { + "epoch": 1.75, + "learning_rate": 4.1365737562359835e-05, + "loss": 2.2134, + "step": 38230 + }, + { + "epoch": 1.75, + "learning_rate": 4.136344912810655e-05, + "loss": 2.1767, + "step": 38240 + }, + { + "epoch": 1.75, + "learning_rate": 4.136116069385327e-05, + "loss": 2.1079, + "step": 38250 + }, + { + "epoch": 1.75, + "learning_rate": 4.1358872259599984e-05, + "loss": 2.0588, + "step": 38260 + }, + { + "epoch": 1.75, + "learning_rate": 4.13565838253467e-05, + "loss": 2.2988, + "step": 38270 + }, + { + "epoch": 1.75, + "learning_rate": 4.135429539109342e-05, + "loss": 2.1837, + "step": 38280 + }, + { + "epoch": 1.75, + "learning_rate": 4.135200695684013e-05, + "loss": 2.1671, + "step": 38290 + }, + { + "epoch": 1.75, + "learning_rate": 4.134971852258685e-05, + "loss": 2.1453, + "step": 38300 + }, + { + "epoch": 1.75, + "learning_rate": 4.134743008833357e-05, + "loss": 2.141, + "step": 38310 + }, + { + "epoch": 1.75, + "learning_rate": 4.134514165408028e-05, + "loss": 2.2167, + "step": 38320 + }, + { + "epoch": 1.75, + "learning_rate": 4.1342853219826996e-05, + "loss": 2.1915, + "step": 38330 + }, + { + "epoch": 1.75, + "learning_rate": 4.134056478557371e-05, + "loss": 2.18, + "step": 38340 + }, + { + "epoch": 1.75, + "learning_rate": 4.1338276351320425e-05, + "loss": 2.2206, + "step": 38350 + }, + { + "epoch": 1.75, + "learning_rate": 4.1335987917067146e-05, + "loss": 2.2051, + "step": 38360 + }, + { + "epoch": 1.75, + "learning_rate": 4.133369948281386e-05, + "loss": 2.0144, + "step": 38370 + }, + { + "epoch": 1.75, + "learning_rate": 4.1331411048560574e-05, + "loss": 2.1683, + "step": 38380 + }, + { + "epoch": 1.75, + "learning_rate": 4.1329122614307295e-05, + "loss": 2.1199, + "step": 38390 + }, + { + "epoch": 1.75, + "learning_rate": 4.132683418005401e-05, + "loss": 2.2722, + "step": 38400 + }, + { + "epoch": 1.75, + "learning_rate": 4.132454574580072e-05, + "loss": 2.2842, + "step": 38410 + }, + { + "epoch": 1.75, + "learning_rate": 4.1322257311547444e-05, + "loss": 2.0474, + "step": 38420 + }, + { + "epoch": 1.75, + "learning_rate": 4.131996887729416e-05, + "loss": 2.2541, + "step": 38430 + }, + { + "epoch": 1.76, + "learning_rate": 4.131768044304087e-05, + "loss": 2.1254, + "step": 38440 + }, + { + "epoch": 1.76, + "learning_rate": 4.131539200878759e-05, + "loss": 2.3087, + "step": 38450 + }, + { + "epoch": 1.76, + "learning_rate": 4.131310357453431e-05, + "loss": 2.0168, + "step": 38460 + }, + { + "epoch": 1.76, + "learning_rate": 4.131081514028102e-05, + "loss": 2.0471, + "step": 38470 + }, + { + "epoch": 1.76, + "learning_rate": 4.130852670602774e-05, + "loss": 2.3601, + "step": 38480 + }, + { + "epoch": 1.76, + "learning_rate": 4.1306238271774456e-05, + "loss": 2.1867, + "step": 38490 + }, + { + "epoch": 1.76, + "learning_rate": 4.130394983752117e-05, + "loss": 2.192, + "step": 38500 + }, + { + "epoch": 1.76, + "learning_rate": 4.130166140326789e-05, + "loss": 2.145, + "step": 38510 + }, + { + "epoch": 1.76, + "learning_rate": 4.1299372969014606e-05, + "loss": 2.2573, + "step": 38520 + }, + { + "epoch": 1.76, + "learning_rate": 4.129708453476131e-05, + "loss": 2.1622, + "step": 38530 + }, + { + "epoch": 1.76, + "learning_rate": 4.1294796100508034e-05, + "loss": 2.126, + "step": 38540 + }, + { + "epoch": 1.76, + "learning_rate": 4.129250766625475e-05, + "loss": 2.1473, + "step": 38550 + }, + { + "epoch": 1.76, + "learning_rate": 4.129021923200146e-05, + "loss": 2.3138, + "step": 38560 + }, + { + "epoch": 1.76, + "learning_rate": 4.128793079774818e-05, + "loss": 2.2366, + "step": 38570 + }, + { + "epoch": 1.76, + "learning_rate": 4.12856423634949e-05, + "loss": 2.238, + "step": 38580 + }, + { + "epoch": 1.76, + "learning_rate": 4.128335392924161e-05, + "loss": 2.1841, + "step": 38590 + }, + { + "epoch": 1.76, + "learning_rate": 4.128106549498833e-05, + "loss": 2.1524, + "step": 38600 + }, + { + "epoch": 1.76, + "learning_rate": 4.1278777060735046e-05, + "loss": 2.1959, + "step": 38610 + }, + { + "epoch": 1.76, + "learning_rate": 4.127648862648176e-05, + "loss": 2.2588, + "step": 38620 + }, + { + "epoch": 1.76, + "learning_rate": 4.127420019222848e-05, + "loss": 2.3358, + "step": 38630 + }, + { + "epoch": 1.76, + "learning_rate": 4.1271911757975196e-05, + "loss": 2.1211, + "step": 38640 + }, + { + "epoch": 1.76, + "learning_rate": 4.126962332372191e-05, + "loss": 2.199, + "step": 38650 + }, + { + "epoch": 1.77, + "learning_rate": 4.126733488946863e-05, + "loss": 2.1338, + "step": 38660 + }, + { + "epoch": 1.77, + "learning_rate": 4.1265046455215345e-05, + "loss": 2.1359, + "step": 38670 + }, + { + "epoch": 1.77, + "learning_rate": 4.126275802096206e-05, + "loss": 2.2716, + "step": 38680 + }, + { + "epoch": 1.77, + "learning_rate": 4.126046958670878e-05, + "loss": 2.264, + "step": 38690 + }, + { + "epoch": 1.77, + "learning_rate": 4.1258181152455494e-05, + "loss": 2.0558, + "step": 38700 + }, + { + "epoch": 1.77, + "learning_rate": 4.125589271820221e-05, + "loss": 2.3346, + "step": 38710 + }, + { + "epoch": 1.77, + "learning_rate": 4.125360428394893e-05, + "loss": 2.1571, + "step": 38720 + }, + { + "epoch": 1.77, + "learning_rate": 4.1251315849695636e-05, + "loss": 2.1586, + "step": 38730 + }, + { + "epoch": 1.77, + "learning_rate": 4.124902741544236e-05, + "loss": 2.2251, + "step": 38740 + }, + { + "epoch": 1.77, + "learning_rate": 4.124673898118907e-05, + "loss": 2.0975, + "step": 38750 + }, + { + "epoch": 1.77, + "learning_rate": 4.1244450546935785e-05, + "loss": 2.2225, + "step": 38760 + }, + { + "epoch": 1.77, + "learning_rate": 4.1242162112682506e-05, + "loss": 2.1715, + "step": 38770 + }, + { + "epoch": 1.77, + "learning_rate": 4.123987367842922e-05, + "loss": 2.1969, + "step": 38780 + }, + { + "epoch": 1.77, + "learning_rate": 4.1237585244175935e-05, + "loss": 2.134, + "step": 38790 + }, + { + "epoch": 1.77, + "learning_rate": 4.1235296809922656e-05, + "loss": 2.2035, + "step": 38800 + }, + { + "epoch": 1.77, + "learning_rate": 4.123300837566937e-05, + "loss": 2.0994, + "step": 38810 + }, + { + "epoch": 1.77, + "learning_rate": 4.1230719941416084e-05, + "loss": 2.1816, + "step": 38820 + }, + { + "epoch": 1.77, + "learning_rate": 4.1228431507162805e-05, + "loss": 2.1874, + "step": 38830 + }, + { + "epoch": 1.77, + "learning_rate": 4.122614307290952e-05, + "loss": 2.3196, + "step": 38840 + }, + { + "epoch": 1.77, + "learning_rate": 4.122385463865623e-05, + "loss": 2.1777, + "step": 38850 + }, + { + "epoch": 1.77, + "learning_rate": 4.122156620440295e-05, + "loss": 2.2047, + "step": 38860 + }, + { + "epoch": 1.77, + "learning_rate": 4.121927777014967e-05, + "loss": 2.1705, + "step": 38870 + }, + { + "epoch": 1.78, + "learning_rate": 4.121698933589638e-05, + "loss": 2.0595, + "step": 38880 + }, + { + "epoch": 1.78, + "learning_rate": 4.1214700901643096e-05, + "loss": 2.299, + "step": 38890 + }, + { + "epoch": 1.78, + "learning_rate": 4.121241246738982e-05, + "loss": 2.1413, + "step": 38900 + }, + { + "epoch": 1.78, + "learning_rate": 4.121012403313653e-05, + "loss": 2.0972, + "step": 38910 + }, + { + "epoch": 1.78, + "learning_rate": 4.1207835598883245e-05, + "loss": 2.21, + "step": 38920 + }, + { + "epoch": 1.78, + "learning_rate": 4.120554716462996e-05, + "loss": 2.3894, + "step": 38930 + }, + { + "epoch": 1.78, + "learning_rate": 4.1203258730376674e-05, + "loss": 2.1682, + "step": 38940 + }, + { + "epoch": 1.78, + "learning_rate": 4.1200970296123395e-05, + "loss": 2.2609, + "step": 38950 + }, + { + "epoch": 1.78, + "learning_rate": 4.119868186187011e-05, + "loss": 2.1922, + "step": 38960 + }, + { + "epoch": 1.78, + "learning_rate": 4.119639342761682e-05, + "loss": 2.2527, + "step": 38970 + }, + { + "epoch": 1.78, + "learning_rate": 4.1194104993363544e-05, + "loss": 2.1982, + "step": 38980 + }, + { + "epoch": 1.78, + "learning_rate": 4.119181655911026e-05, + "loss": 2.15, + "step": 38990 + }, + { + "epoch": 1.78, + "learning_rate": 4.118952812485697e-05, + "loss": 2.0954, + "step": 39000 + }, + { + "epoch": 1.78, + "learning_rate": 4.118723969060369e-05, + "loss": 2.2751, + "step": 39010 + }, + { + "epoch": 1.78, + "learning_rate": 4.118495125635041e-05, + "loss": 2.2016, + "step": 39020 + }, + { + "epoch": 1.78, + "learning_rate": 4.118266282209712e-05, + "loss": 2.1387, + "step": 39030 + }, + { + "epoch": 1.78, + "learning_rate": 4.118037438784384e-05, + "loss": 2.2879, + "step": 39040 + }, + { + "epoch": 1.78, + "learning_rate": 4.1178085953590556e-05, + "loss": 2.1391, + "step": 39050 + }, + { + "epoch": 1.78, + "learning_rate": 4.117579751933727e-05, + "loss": 2.0418, + "step": 39060 + }, + { + "epoch": 1.78, + "learning_rate": 4.117350908508399e-05, + "loss": 2.2904, + "step": 39070 + }, + { + "epoch": 1.78, + "learning_rate": 4.1171220650830705e-05, + "loss": 2.188, + "step": 39080 + }, + { + "epoch": 1.78, + "learning_rate": 4.116893221657742e-05, + "loss": 2.06, + "step": 39090 + }, + { + "epoch": 1.79, + "learning_rate": 4.116664378232414e-05, + "loss": 2.0672, + "step": 39100 + }, + { + "epoch": 1.79, + "learning_rate": 4.1164355348070855e-05, + "loss": 2.2613, + "step": 39110 + }, + { + "epoch": 1.79, + "learning_rate": 4.116206691381757e-05, + "loss": 2.1939, + "step": 39120 + }, + { + "epoch": 1.79, + "learning_rate": 4.115977847956428e-05, + "loss": 2.2416, + "step": 39130 + }, + { + "epoch": 1.79, + "learning_rate": 4.1157490045311e-05, + "loss": 2.2868, + "step": 39140 + }, + { + "epoch": 1.79, + "learning_rate": 4.115520161105771e-05, + "loss": 2.2093, + "step": 39150 + }, + { + "epoch": 1.79, + "learning_rate": 4.115291317680443e-05, + "loss": 2.1679, + "step": 39160 + }, + { + "epoch": 1.79, + "learning_rate": 4.1150624742551146e-05, + "loss": 2.0698, + "step": 39170 + }, + { + "epoch": 1.79, + "learning_rate": 4.114833630829786e-05, + "loss": 2.1162, + "step": 39180 + }, + { + "epoch": 1.79, + "learning_rate": 4.114604787404458e-05, + "loss": 2.2055, + "step": 39190 + }, + { + "epoch": 1.79, + "learning_rate": 4.1143759439791295e-05, + "loss": 2.1557, + "step": 39200 + }, + { + "epoch": 1.79, + "learning_rate": 4.114147100553801e-05, + "loss": 2.2299, + "step": 39210 + }, + { + "epoch": 1.79, + "learning_rate": 4.113918257128473e-05, + "loss": 2.2486, + "step": 39220 + }, + { + "epoch": 1.79, + "learning_rate": 4.1136894137031445e-05, + "loss": 2.2951, + "step": 39230 + }, + { + "epoch": 1.79, + "learning_rate": 4.113460570277816e-05, + "loss": 2.1963, + "step": 39240 + }, + { + "epoch": 1.79, + "learning_rate": 4.113231726852488e-05, + "loss": 2.3398, + "step": 39250 + }, + { + "epoch": 1.79, + "learning_rate": 4.1130028834271594e-05, + "loss": 2.1408, + "step": 39260 + }, + { + "epoch": 1.79, + "learning_rate": 4.112774040001831e-05, + "loss": 2.2601, + "step": 39270 + }, + { + "epoch": 1.79, + "learning_rate": 4.112545196576503e-05, + "loss": 2.31, + "step": 39280 + }, + { + "epoch": 1.79, + "learning_rate": 4.112316353151174e-05, + "loss": 2.1315, + "step": 39290 + }, + { + "epoch": 1.79, + "learning_rate": 4.112087509725846e-05, + "loss": 2.0875, + "step": 39300 + }, + { + "epoch": 1.79, + "learning_rate": 4.111858666300518e-05, + "loss": 2.1373, + "step": 39310 + }, + { + "epoch": 1.8, + "learning_rate": 4.1116298228751885e-05, + "loss": 2.1675, + "step": 39320 + }, + { + "epoch": 1.8, + "learning_rate": 4.1114009794498606e-05, + "loss": 2.0679, + "step": 39330 + }, + { + "epoch": 1.8, + "learning_rate": 4.111172136024532e-05, + "loss": 2.2478, + "step": 39340 + }, + { + "epoch": 1.8, + "learning_rate": 4.1109432925992035e-05, + "loss": 2.1606, + "step": 39350 + }, + { + "epoch": 1.8, + "learning_rate": 4.1107144491738755e-05, + "loss": 2.1569, + "step": 39360 + }, + { + "epoch": 1.8, + "learning_rate": 4.110485605748547e-05, + "loss": 2.1405, + "step": 39370 + }, + { + "epoch": 1.8, + "learning_rate": 4.1102567623232184e-05, + "loss": 2.0152, + "step": 39380 + }, + { + "epoch": 1.8, + "learning_rate": 4.1100279188978905e-05, + "loss": 2.1416, + "step": 39390 + }, + { + "epoch": 1.8, + "learning_rate": 4.109799075472562e-05, + "loss": 2.1287, + "step": 39400 + }, + { + "epoch": 1.8, + "learning_rate": 4.109570232047233e-05, + "loss": 2.257, + "step": 39410 + }, + { + "epoch": 1.8, + "learning_rate": 4.1093413886219054e-05, + "loss": 2.3246, + "step": 39420 + }, + { + "epoch": 1.8, + "learning_rate": 4.109112545196577e-05, + "loss": 2.1754, + "step": 39430 + }, + { + "epoch": 1.8, + "learning_rate": 4.108883701771248e-05, + "loss": 2.2396, + "step": 39440 + }, + { + "epoch": 1.8, + "learning_rate": 4.10865485834592e-05, + "loss": 2.1357, + "step": 39450 + }, + { + "epoch": 1.8, + "learning_rate": 4.108426014920592e-05, + "loss": 2.2138, + "step": 39460 + }, + { + "epoch": 1.8, + "learning_rate": 4.108197171495263e-05, + "loss": 2.154, + "step": 39470 + }, + { + "epoch": 1.8, + "learning_rate": 4.107968328069935e-05, + "loss": 2.2399, + "step": 39480 + }, + { + "epoch": 1.8, + "learning_rate": 4.1077394846446066e-05, + "loss": 2.2361, + "step": 39490 + }, + { + "epoch": 1.8, + "learning_rate": 4.107510641219278e-05, + "loss": 2.256, + "step": 39500 + }, + { + "epoch": 1.8, + "learning_rate": 4.10728179779395e-05, + "loss": 2.3559, + "step": 39510 + }, + { + "epoch": 1.8, + "learning_rate": 4.107052954368621e-05, + "loss": 2.1513, + "step": 39520 + }, + { + "epoch": 1.81, + "learning_rate": 4.106824110943292e-05, + "loss": 2.1737, + "step": 39530 + }, + { + "epoch": 1.81, + "learning_rate": 4.1065952675179644e-05, + "loss": 2.255, + "step": 39540 + }, + { + "epoch": 1.81, + "learning_rate": 4.106366424092636e-05, + "loss": 2.2463, + "step": 39550 + }, + { + "epoch": 1.81, + "learning_rate": 4.106137580667307e-05, + "loss": 2.1985, + "step": 39560 + }, + { + "epoch": 1.81, + "learning_rate": 4.105908737241979e-05, + "loss": 1.9957, + "step": 39570 + }, + { + "epoch": 1.81, + "learning_rate": 4.105679893816651e-05, + "loss": 2.2229, + "step": 39580 + }, + { + "epoch": 1.81, + "learning_rate": 4.105451050391322e-05, + "loss": 2.2627, + "step": 39590 + }, + { + "epoch": 1.81, + "learning_rate": 4.105222206965994e-05, + "loss": 2.1253, + "step": 39600 + }, + { + "epoch": 1.81, + "learning_rate": 4.1049933635406656e-05, + "loss": 2.1862, + "step": 39610 + }, + { + "epoch": 1.81, + "learning_rate": 4.104764520115337e-05, + "loss": 2.2142, + "step": 39620 + }, + { + "epoch": 1.81, + "learning_rate": 4.104535676690009e-05, + "loss": 2.241, + "step": 39630 + }, + { + "epoch": 1.81, + "learning_rate": 4.1043068332646805e-05, + "loss": 2.3106, + "step": 39640 + }, + { + "epoch": 1.81, + "learning_rate": 4.104077989839352e-05, + "loss": 2.0868, + "step": 39650 + }, + { + "epoch": 1.81, + "learning_rate": 4.103849146414024e-05, + "loss": 2.093, + "step": 39660 + }, + { + "epoch": 1.81, + "learning_rate": 4.1036203029886955e-05, + "loss": 2.337, + "step": 39670 + }, + { + "epoch": 1.81, + "learning_rate": 4.103391459563367e-05, + "loss": 2.1856, + "step": 39680 + }, + { + "epoch": 1.81, + "learning_rate": 4.103162616138039e-05, + "loss": 2.2584, + "step": 39690 + }, + { + "epoch": 1.81, + "learning_rate": 4.1029337727127104e-05, + "loss": 2.2237, + "step": 39700 + }, + { + "epoch": 1.81, + "learning_rate": 4.102704929287382e-05, + "loss": 2.1445, + "step": 39710 + }, + { + "epoch": 1.81, + "learning_rate": 4.102476085862053e-05, + "loss": 2.1212, + "step": 39720 + }, + { + "epoch": 1.81, + "learning_rate": 4.1022472424367246e-05, + "loss": 2.3353, + "step": 39730 + }, + { + "epoch": 1.81, + "learning_rate": 4.102018399011397e-05, + "loss": 2.299, + "step": 39740 + }, + { + "epoch": 1.82, + "learning_rate": 4.101789555586068e-05, + "loss": 2.1968, + "step": 39750 + }, + { + "epoch": 1.82, + "learning_rate": 4.1015607121607395e-05, + "loss": 2.2367, + "step": 39760 + }, + { + "epoch": 1.82, + "learning_rate": 4.1013318687354116e-05, + "loss": 2.1524, + "step": 39770 + }, + { + "epoch": 1.82, + "learning_rate": 4.101103025310083e-05, + "loss": 2.0985, + "step": 39780 + }, + { + "epoch": 1.82, + "learning_rate": 4.1008741818847544e-05, + "loss": 2.2321, + "step": 39790 + }, + { + "epoch": 1.82, + "learning_rate": 4.1006453384594265e-05, + "loss": 2.2173, + "step": 39800 + }, + { + "epoch": 1.82, + "learning_rate": 4.100416495034098e-05, + "loss": 2.1457, + "step": 39810 + }, + { + "epoch": 1.82, + "learning_rate": 4.1001876516087694e-05, + "loss": 2.1098, + "step": 39820 + }, + { + "epoch": 1.82, + "learning_rate": 4.0999588081834415e-05, + "loss": 2.1585, + "step": 39830 + }, + { + "epoch": 1.82, + "learning_rate": 4.099729964758113e-05, + "loss": 2.1385, + "step": 39840 + }, + { + "epoch": 1.82, + "learning_rate": 4.099501121332784e-05, + "loss": 2.2087, + "step": 39850 + }, + { + "epoch": 1.82, + "learning_rate": 4.0992722779074564e-05, + "loss": 2.1661, + "step": 39860 + }, + { + "epoch": 1.82, + "learning_rate": 4.099043434482128e-05, + "loss": 2.3008, + "step": 39870 + }, + { + "epoch": 1.82, + "learning_rate": 4.098814591056799e-05, + "loss": 2.1823, + "step": 39880 + }, + { + "epoch": 1.82, + "learning_rate": 4.098585747631471e-05, + "loss": 2.1454, + "step": 39890 + }, + { + "epoch": 1.82, + "learning_rate": 4.098356904206143e-05, + "loss": 2.4635, + "step": 39900 + }, + { + "epoch": 1.82, + "learning_rate": 4.098128060780814e-05, + "loss": 2.1268, + "step": 39910 + }, + { + "epoch": 1.82, + "learning_rate": 4.0978992173554855e-05, + "loss": 2.2736, + "step": 39920 + }, + { + "epoch": 1.82, + "learning_rate": 4.097670373930157e-05, + "loss": 2.1329, + "step": 39930 + }, + { + "epoch": 1.82, + "learning_rate": 4.0974415305048284e-05, + "loss": 2.1597, + "step": 39940 + }, + { + "epoch": 1.82, + "learning_rate": 4.0972126870795005e-05, + "loss": 2.2167, + "step": 39950 + }, + { + "epoch": 1.82, + "learning_rate": 4.096983843654172e-05, + "loss": 2.2999, + "step": 39960 + }, + { + "epoch": 1.83, + "learning_rate": 4.096755000228843e-05, + "loss": 2.1745, + "step": 39970 + }, + { + "epoch": 1.83, + "learning_rate": 4.0965261568035154e-05, + "loss": 2.1963, + "step": 39980 + }, + { + "epoch": 1.83, + "learning_rate": 4.096297313378187e-05, + "loss": 2.2926, + "step": 39990 + }, + { + "epoch": 1.83, + "learning_rate": 4.096068469952858e-05, + "loss": 2.2, + "step": 40000 + }, + { + "epoch": 1.83, + "learning_rate": 4.09583962652753e-05, + "loss": 2.3226, + "step": 40010 + }, + { + "epoch": 1.83, + "learning_rate": 4.095610783102202e-05, + "loss": 2.0848, + "step": 40020 + }, + { + "epoch": 1.83, + "learning_rate": 4.095381939676873e-05, + "loss": 2.0445, + "step": 40030 + }, + { + "epoch": 1.83, + "learning_rate": 4.095153096251545e-05, + "loss": 2.0948, + "step": 40040 + }, + { + "epoch": 1.83, + "learning_rate": 4.0949242528262166e-05, + "loss": 2.3205, + "step": 40050 + }, + { + "epoch": 1.83, + "learning_rate": 4.094695409400888e-05, + "loss": 2.2164, + "step": 40060 + }, + { + "epoch": 1.83, + "learning_rate": 4.09446656597556e-05, + "loss": 2.2814, + "step": 40070 + }, + { + "epoch": 1.83, + "learning_rate": 4.0942377225502315e-05, + "loss": 2.0768, + "step": 40080 + }, + { + "epoch": 1.83, + "learning_rate": 4.094008879124903e-05, + "loss": 2.2799, + "step": 40090 + }, + { + "epoch": 1.83, + "learning_rate": 4.093780035699575e-05, + "loss": 2.1764, + "step": 40100 + }, + { + "epoch": 1.83, + "learning_rate": 4.093551192274246e-05, + "loss": 2.2403, + "step": 40110 + }, + { + "epoch": 1.83, + "learning_rate": 4.093322348848917e-05, + "loss": 2.1449, + "step": 40120 + }, + { + "epoch": 1.83, + "learning_rate": 4.093093505423589e-05, + "loss": 2.0456, + "step": 40130 + }, + { + "epoch": 1.83, + "learning_rate": 4.092864661998261e-05, + "loss": 2.2687, + "step": 40140 + }, + { + "epoch": 1.83, + "learning_rate": 4.092635818572932e-05, + "loss": 2.1992, + "step": 40150 + }, + { + "epoch": 1.83, + "learning_rate": 4.092406975147604e-05, + "loss": 2.083, + "step": 40160 + }, + { + "epoch": 1.83, + "learning_rate": 4.0921781317222756e-05, + "loss": 2.2419, + "step": 40170 + }, + { + "epoch": 1.83, + "learning_rate": 4.091949288296947e-05, + "loss": 2.376, + "step": 40180 + }, + { + "epoch": 1.84, + "learning_rate": 4.091720444871619e-05, + "loss": 2.2283, + "step": 40190 + }, + { + "epoch": 1.84, + "learning_rate": 4.0914916014462905e-05, + "loss": 2.1643, + "step": 40200 + }, + { + "epoch": 1.84, + "learning_rate": 4.091262758020962e-05, + "loss": 2.1368, + "step": 40210 + }, + { + "epoch": 1.84, + "learning_rate": 4.091033914595634e-05, + "loss": 2.1561, + "step": 40220 + }, + { + "epoch": 1.84, + "learning_rate": 4.0908050711703054e-05, + "loss": 2.1866, + "step": 40230 + }, + { + "epoch": 1.84, + "learning_rate": 4.090576227744977e-05, + "loss": 2.1337, + "step": 40240 + }, + { + "epoch": 1.84, + "learning_rate": 4.090347384319649e-05, + "loss": 2.3041, + "step": 40250 + }, + { + "epoch": 1.84, + "learning_rate": 4.0901185408943204e-05, + "loss": 2.2959, + "step": 40260 + }, + { + "epoch": 1.84, + "learning_rate": 4.089889697468992e-05, + "loss": 2.2023, + "step": 40270 + }, + { + "epoch": 1.84, + "learning_rate": 4.089660854043664e-05, + "loss": 2.2773, + "step": 40280 + }, + { + "epoch": 1.84, + "learning_rate": 4.089432010618335e-05, + "loss": 2.1231, + "step": 40290 + }, + { + "epoch": 1.84, + "learning_rate": 4.089203167193007e-05, + "loss": 2.2303, + "step": 40300 + }, + { + "epoch": 1.84, + "learning_rate": 4.088974323767678e-05, + "loss": 2.239, + "step": 40310 + }, + { + "epoch": 1.84, + "learning_rate": 4.0887454803423495e-05, + "loss": 2.1627, + "step": 40320 + }, + { + "epoch": 1.84, + "learning_rate": 4.0885166369170216e-05, + "loss": 2.1551, + "step": 40330 + }, + { + "epoch": 1.84, + "learning_rate": 4.088287793491693e-05, + "loss": 2.2244, + "step": 40340 + }, + { + "epoch": 1.84, + "learning_rate": 4.0880589500663644e-05, + "loss": 2.0253, + "step": 40350 + }, + { + "epoch": 1.84, + "learning_rate": 4.0878301066410365e-05, + "loss": 2.1681, + "step": 40360 + }, + { + "epoch": 1.84, + "learning_rate": 4.087601263215708e-05, + "loss": 2.0612, + "step": 40370 + }, + { + "epoch": 1.84, + "learning_rate": 4.0873724197903794e-05, + "loss": 2.1681, + "step": 40380 + }, + { + "epoch": 1.84, + "learning_rate": 4.0871435763650514e-05, + "loss": 2.1546, + "step": 40390 + }, + { + "epoch": 1.84, + "learning_rate": 4.086914732939723e-05, + "loss": 2.2376, + "step": 40400 + }, + { + "epoch": 1.85, + "learning_rate": 4.086685889514394e-05, + "loss": 2.0168, + "step": 40410 + }, + { + "epoch": 1.85, + "learning_rate": 4.0864570460890664e-05, + "loss": 2.2137, + "step": 40420 + }, + { + "epoch": 1.85, + "learning_rate": 4.086228202663738e-05, + "loss": 2.0738, + "step": 40430 + }, + { + "epoch": 1.85, + "learning_rate": 4.085999359238409e-05, + "loss": 2.2133, + "step": 40440 + }, + { + "epoch": 1.85, + "learning_rate": 4.085770515813081e-05, + "loss": 2.1627, + "step": 40450 + }, + { + "epoch": 1.85, + "learning_rate": 4.085541672387753e-05, + "loss": 2.1162, + "step": 40460 + }, + { + "epoch": 1.85, + "learning_rate": 4.085312828962424e-05, + "loss": 2.2313, + "step": 40470 + }, + { + "epoch": 1.85, + "learning_rate": 4.085083985537096e-05, + "loss": 2.1265, + "step": 40480 + }, + { + "epoch": 1.85, + "learning_rate": 4.0848551421117676e-05, + "loss": 2.2138, + "step": 40490 + }, + { + "epoch": 1.85, + "learning_rate": 4.084626298686439e-05, + "loss": 2.171, + "step": 40500 + }, + { + "epoch": 1.85, + "learning_rate": 4.0843974552611104e-05, + "loss": 2.3195, + "step": 40510 + }, + { + "epoch": 1.85, + "learning_rate": 4.084168611835782e-05, + "loss": 2.1439, + "step": 40520 + }, + { + "epoch": 1.85, + "learning_rate": 4.083939768410453e-05, + "loss": 2.0584, + "step": 40530 + }, + { + "epoch": 1.85, + "learning_rate": 4.0837109249851254e-05, + "loss": 1.9797, + "step": 40540 + }, + { + "epoch": 1.85, + "learning_rate": 4.083482081559797e-05, + "loss": 2.0572, + "step": 40550 + }, + { + "epoch": 1.85, + "learning_rate": 4.083253238134468e-05, + "loss": 2.146, + "step": 40560 + }, + { + "epoch": 1.85, + "learning_rate": 4.08302439470914e-05, + "loss": 2.1449, + "step": 40570 + }, + { + "epoch": 1.85, + "learning_rate": 4.082795551283812e-05, + "loss": 2.1155, + "step": 40580 + }, + { + "epoch": 1.85, + "learning_rate": 4.082566707858483e-05, + "loss": 2.0917, + "step": 40590 + }, + { + "epoch": 1.85, + "learning_rate": 4.082337864433155e-05, + "loss": 2.2005, + "step": 40600 + }, + { + "epoch": 1.85, + "learning_rate": 4.0821090210078266e-05, + "loss": 2.2283, + "step": 40610 + }, + { + "epoch": 1.85, + "learning_rate": 4.081880177582498e-05, + "loss": 2.0801, + "step": 40620 + }, + { + "epoch": 1.86, + "learning_rate": 4.08165133415717e-05, + "loss": 2.1057, + "step": 40630 + }, + { + "epoch": 1.86, + "learning_rate": 4.0814224907318415e-05, + "loss": 2.1016, + "step": 40640 + }, + { + "epoch": 1.86, + "learning_rate": 4.081193647306513e-05, + "loss": 2.315, + "step": 40650 + }, + { + "epoch": 1.86, + "learning_rate": 4.080964803881185e-05, + "loss": 2.1776, + "step": 40660 + }, + { + "epoch": 1.86, + "learning_rate": 4.0807359604558564e-05, + "loss": 2.2283, + "step": 40670 + }, + { + "epoch": 1.86, + "learning_rate": 4.080507117030528e-05, + "loss": 2.208, + "step": 40680 + }, + { + "epoch": 1.86, + "learning_rate": 4.0802782736052e-05, + "loss": 2.1618, + "step": 40690 + }, + { + "epoch": 1.86, + "learning_rate": 4.0800494301798714e-05, + "loss": 2.2496, + "step": 40700 + }, + { + "epoch": 1.86, + "learning_rate": 4.079820586754543e-05, + "loss": 2.217, + "step": 40710 + }, + { + "epoch": 1.86, + "learning_rate": 4.079591743329214e-05, + "loss": 2.2432, + "step": 40720 + }, + { + "epoch": 1.86, + "learning_rate": 4.0793628999038856e-05, + "loss": 2.369, + "step": 40730 + }, + { + "epoch": 1.86, + "learning_rate": 4.079134056478558e-05, + "loss": 2.1811, + "step": 40740 + }, + { + "epoch": 1.86, + "learning_rate": 4.078905213053229e-05, + "loss": 2.2291, + "step": 40750 + }, + { + "epoch": 1.86, + "learning_rate": 4.0786763696279005e-05, + "loss": 2.1766, + "step": 40760 + }, + { + "epoch": 1.86, + "learning_rate": 4.0784475262025726e-05, + "loss": 2.2116, + "step": 40770 + }, + { + "epoch": 1.86, + "learning_rate": 4.078218682777244e-05, + "loss": 2.1091, + "step": 40780 + }, + { + "epoch": 1.86, + "learning_rate": 4.0779898393519154e-05, + "loss": 2.1593, + "step": 40790 + }, + { + "epoch": 1.86, + "learning_rate": 4.0777609959265875e-05, + "loss": 2.0119, + "step": 40800 + }, + { + "epoch": 1.86, + "learning_rate": 4.077532152501259e-05, + "loss": 2.0798, + "step": 40810 + }, + { + "epoch": 1.86, + "learning_rate": 4.0773033090759304e-05, + "loss": 2.159, + "step": 40820 + }, + { + "epoch": 1.86, + "learning_rate": 4.0770744656506024e-05, + "loss": 2.0073, + "step": 40830 + }, + { + "epoch": 1.86, + "learning_rate": 4.076845622225274e-05, + "loss": 2.2946, + "step": 40840 + }, + { + "epoch": 1.87, + "learning_rate": 4.076616778799945e-05, + "loss": 2.168, + "step": 40850 + }, + { + "epoch": 1.87, + "learning_rate": 4.0763879353746174e-05, + "loss": 2.3139, + "step": 40860 + }, + { + "epoch": 1.87, + "learning_rate": 4.076159091949289e-05, + "loss": 2.194, + "step": 40870 + }, + { + "epoch": 1.87, + "learning_rate": 4.07593024852396e-05, + "loss": 2.1492, + "step": 40880 + }, + { + "epoch": 1.87, + "learning_rate": 4.075701405098632e-05, + "loss": 2.1483, + "step": 40890 + }, + { + "epoch": 1.87, + "learning_rate": 4.075472561673303e-05, + "loss": 2.2923, + "step": 40900 + }, + { + "epoch": 1.87, + "learning_rate": 4.0752437182479744e-05, + "loss": 1.9377, + "step": 40910 + }, + { + "epoch": 1.87, + "learning_rate": 4.0750148748226465e-05, + "loss": 2.2351, + "step": 40920 + }, + { + "epoch": 1.87, + "learning_rate": 4.074786031397318e-05, + "loss": 2.1274, + "step": 40930 + }, + { + "epoch": 1.87, + "learning_rate": 4.0745571879719893e-05, + "loss": 2.1766, + "step": 40940 + }, + { + "epoch": 1.87, + "learning_rate": 4.0743283445466614e-05, + "loss": 2.1575, + "step": 40950 + }, + { + "epoch": 1.87, + "learning_rate": 4.074099501121333e-05, + "loss": 2.1387, + "step": 40960 + }, + { + "epoch": 1.87, + "learning_rate": 4.073870657696004e-05, + "loss": 2.2917, + "step": 40970 + }, + { + "epoch": 1.87, + "learning_rate": 4.0736418142706764e-05, + "loss": 2.0953, + "step": 40980 + }, + { + "epoch": 1.87, + "learning_rate": 4.073412970845348e-05, + "loss": 2.1902, + "step": 40990 + }, + { + "epoch": 1.87, + "learning_rate": 4.073184127420019e-05, + "loss": 2.1813, + "step": 41000 + }, + { + "epoch": 1.87, + "learning_rate": 4.072955283994691e-05, + "loss": 2.2685, + "step": 41010 + }, + { + "epoch": 1.87, + "learning_rate": 4.072726440569363e-05, + "loss": 2.0449, + "step": 41020 + }, + { + "epoch": 1.87, + "learning_rate": 4.072497597144034e-05, + "loss": 2.3287, + "step": 41030 + }, + { + "epoch": 1.87, + "learning_rate": 4.072268753718706e-05, + "loss": 2.0608, + "step": 41040 + }, + { + "epoch": 1.87, + "learning_rate": 4.0720399102933776e-05, + "loss": 2.103, + "step": 41050 + }, + { + "epoch": 1.87, + "learning_rate": 4.071811066868049e-05, + "loss": 2.1679, + "step": 41060 + }, + { + "epoch": 1.88, + "learning_rate": 4.071582223442721e-05, + "loss": 2.0936, + "step": 41070 + }, + { + "epoch": 1.88, + "learning_rate": 4.0713533800173925e-05, + "loss": 2.074, + "step": 41080 + }, + { + "epoch": 1.88, + "learning_rate": 4.071124536592064e-05, + "loss": 2.1266, + "step": 41090 + }, + { + "epoch": 1.88, + "learning_rate": 4.0708956931667353e-05, + "loss": 2.078, + "step": 41100 + }, + { + "epoch": 1.88, + "learning_rate": 4.070666849741407e-05, + "loss": 2.1907, + "step": 41110 + }, + { + "epoch": 1.88, + "learning_rate": 4.070438006316079e-05, + "loss": 2.209, + "step": 41120 + }, + { + "epoch": 1.88, + "learning_rate": 4.07020916289075e-05, + "loss": 2.1901, + "step": 41130 + }, + { + "epoch": 1.88, + "learning_rate": 4.069980319465422e-05, + "loss": 2.132, + "step": 41140 + }, + { + "epoch": 1.88, + "learning_rate": 4.069751476040094e-05, + "loss": 2.1383, + "step": 41150 + }, + { + "epoch": 1.88, + "learning_rate": 4.069522632614765e-05, + "loss": 2.1652, + "step": 41160 + }, + { + "epoch": 1.88, + "learning_rate": 4.0692937891894366e-05, + "loss": 2.0922, + "step": 41170 + }, + { + "epoch": 1.88, + "learning_rate": 4.069064945764109e-05, + "loss": 2.2562, + "step": 41180 + }, + { + "epoch": 1.88, + "learning_rate": 4.06883610233878e-05, + "loss": 2.2509, + "step": 41190 + }, + { + "epoch": 1.88, + "learning_rate": 4.0686072589134515e-05, + "loss": 2.1104, + "step": 41200 + }, + { + "epoch": 1.88, + "learning_rate": 4.068378415488123e-05, + "loss": 2.1704, + "step": 41210 + }, + { + "epoch": 1.88, + "learning_rate": 4.068149572062795e-05, + "loss": 2.1106, + "step": 41220 + }, + { + "epoch": 1.88, + "learning_rate": 4.0679207286374664e-05, + "loss": 2.1684, + "step": 41230 + }, + { + "epoch": 1.88, + "learning_rate": 4.067691885212138e-05, + "loss": 2.0325, + "step": 41240 + }, + { + "epoch": 1.88, + "learning_rate": 4.06746304178681e-05, + "loss": 2.1113, + "step": 41250 + }, + { + "epoch": 1.88, + "learning_rate": 4.0672341983614813e-05, + "loss": 2.2521, + "step": 41260 + }, + { + "epoch": 1.88, + "learning_rate": 4.067005354936153e-05, + "loss": 2.3279, + "step": 41270 + }, + { + "epoch": 1.88, + "learning_rate": 4.066776511510825e-05, + "loss": 2.2887, + "step": 41280 + }, + { + "epoch": 1.89, + "learning_rate": 4.066547668085496e-05, + "loss": 2.1898, + "step": 41290 + }, + { + "epoch": 1.89, + "learning_rate": 4.066318824660168e-05, + "loss": 2.0979, + "step": 41300 + }, + { + "epoch": 1.89, + "learning_rate": 4.066089981234839e-05, + "loss": 2.1183, + "step": 41310 + }, + { + "epoch": 1.89, + "learning_rate": 4.0658611378095105e-05, + "loss": 2.0995, + "step": 41320 + }, + { + "epoch": 1.89, + "learning_rate": 4.0656322943841826e-05, + "loss": 2.0172, + "step": 41330 + }, + { + "epoch": 1.89, + "learning_rate": 4.065403450958854e-05, + "loss": 2.1919, + "step": 41340 + }, + { + "epoch": 1.89, + "learning_rate": 4.0651746075335254e-05, + "loss": 2.1032, + "step": 41350 + }, + { + "epoch": 1.89, + "learning_rate": 4.0649457641081975e-05, + "loss": 2.1869, + "step": 41360 + }, + { + "epoch": 1.89, + "learning_rate": 4.064716920682869e-05, + "loss": 2.1594, + "step": 41370 + }, + { + "epoch": 1.89, + "learning_rate": 4.0644880772575403e-05, + "loss": 2.1721, + "step": 41380 + }, + { + "epoch": 1.89, + "learning_rate": 4.0642592338322124e-05, + "loss": 2.2419, + "step": 41390 + }, + { + "epoch": 1.89, + "learning_rate": 4.064030390406884e-05, + "loss": 2.1785, + "step": 41400 + }, + { + "epoch": 1.89, + "learning_rate": 4.063801546981555e-05, + "loss": 2.1928, + "step": 41410 + }, + { + "epoch": 1.89, + "learning_rate": 4.0635727035562274e-05, + "loss": 2.2218, + "step": 41420 + }, + { + "epoch": 1.89, + "learning_rate": 4.063343860130899e-05, + "loss": 2.2249, + "step": 41430 + }, + { + "epoch": 1.89, + "learning_rate": 4.06311501670557e-05, + "loss": 2.3241, + "step": 41440 + }, + { + "epoch": 1.89, + "learning_rate": 4.062886173280242e-05, + "loss": 1.9521, + "step": 41450 + }, + { + "epoch": 1.89, + "learning_rate": 4.062657329854914e-05, + "loss": 2.1092, + "step": 41460 + }, + { + "epoch": 1.89, + "learning_rate": 4.062428486429585e-05, + "loss": 1.9185, + "step": 41470 + }, + { + "epoch": 1.89, + "learning_rate": 4.062199643004257e-05, + "loss": 2.2682, + "step": 41480 + }, + { + "epoch": 1.89, + "learning_rate": 4.0619707995789286e-05, + "loss": 2.0564, + "step": 41490 + }, + { + "epoch": 1.89, + "learning_rate": 4.061741956153599e-05, + "loss": 2.0998, + "step": 41500 + }, + { + "epoch": 1.9, + "learning_rate": 4.0615131127282714e-05, + "loss": 2.0906, + "step": 41510 + }, + { + "epoch": 1.9, + "learning_rate": 4.061284269302943e-05, + "loss": 2.1912, + "step": 41520 + }, + { + "epoch": 1.9, + "learning_rate": 4.061055425877614e-05, + "loss": 2.157, + "step": 41530 + }, + { + "epoch": 1.9, + "learning_rate": 4.0608265824522863e-05, + "loss": 2.2169, + "step": 41540 + }, + { + "epoch": 1.9, + "learning_rate": 4.060597739026958e-05, + "loss": 2.1296, + "step": 41550 + }, + { + "epoch": 1.9, + "learning_rate": 4.060368895601629e-05, + "loss": 2.1135, + "step": 41560 + }, + { + "epoch": 1.9, + "learning_rate": 4.060140052176301e-05, + "loss": 2.0258, + "step": 41570 + }, + { + "epoch": 1.9, + "learning_rate": 4.059911208750973e-05, + "loss": 2.213, + "step": 41580 + }, + { + "epoch": 1.9, + "learning_rate": 4.059682365325644e-05, + "loss": 2.1745, + "step": 41590 + }, + { + "epoch": 1.9, + "learning_rate": 4.059453521900316e-05, + "loss": 2.2026, + "step": 41600 + }, + { + "epoch": 1.9, + "learning_rate": 4.0592246784749876e-05, + "loss": 2.2578, + "step": 41610 + }, + { + "epoch": 1.9, + "learning_rate": 4.058995835049659e-05, + "loss": 2.2816, + "step": 41620 + }, + { + "epoch": 1.9, + "learning_rate": 4.058766991624331e-05, + "loss": 2.0974, + "step": 41630 + }, + { + "epoch": 1.9, + "learning_rate": 4.0585381481990025e-05, + "loss": 2.2395, + "step": 41640 + }, + { + "epoch": 1.9, + "learning_rate": 4.058309304773674e-05, + "loss": 2.0941, + "step": 41650 + }, + { + "epoch": 1.9, + "learning_rate": 4.058080461348346e-05, + "loss": 1.9998, + "step": 41660 + }, + { + "epoch": 1.9, + "learning_rate": 4.0578516179230174e-05, + "loss": 2.2752, + "step": 41670 + }, + { + "epoch": 1.9, + "learning_rate": 4.057622774497689e-05, + "loss": 2.2109, + "step": 41680 + }, + { + "epoch": 1.9, + "learning_rate": 4.05739393107236e-05, + "loss": 2.163, + "step": 41690 + }, + { + "epoch": 1.9, + "learning_rate": 4.057165087647032e-05, + "loss": 2.3022, + "step": 41700 + }, + { + "epoch": 1.9, + "learning_rate": 4.056936244221704e-05, + "loss": 2.2563, + "step": 41710 + }, + { + "epoch": 1.91, + "learning_rate": 4.056707400796375e-05, + "loss": 2.3956, + "step": 41720 + }, + { + "epoch": 1.91, + "learning_rate": 4.0564785573710466e-05, + "loss": 2.2816, + "step": 41730 + }, + { + "epoch": 1.91, + "learning_rate": 4.056249713945719e-05, + "loss": 2.104, + "step": 41740 + }, + { + "epoch": 1.91, + "learning_rate": 4.05602087052039e-05, + "loss": 2.1239, + "step": 41750 + }, + { + "epoch": 1.91, + "learning_rate": 4.0557920270950615e-05, + "loss": 2.2892, + "step": 41760 + }, + { + "epoch": 1.91, + "learning_rate": 4.0555631836697336e-05, + "loss": 2.1902, + "step": 41770 + }, + { + "epoch": 1.91, + "learning_rate": 4.055334340244405e-05, + "loss": 2.1027, + "step": 41780 + }, + { + "epoch": 1.91, + "learning_rate": 4.0551054968190764e-05, + "loss": 2.2769, + "step": 41790 + }, + { + "epoch": 1.91, + "learning_rate": 4.0548766533937485e-05, + "loss": 2.2215, + "step": 41800 + }, + { + "epoch": 1.91, + "learning_rate": 4.05464780996842e-05, + "loss": 2.1912, + "step": 41810 + }, + { + "epoch": 1.91, + "learning_rate": 4.0544189665430913e-05, + "loss": 2.1736, + "step": 41820 + }, + { + "epoch": 1.91, + "learning_rate": 4.0541901231177634e-05, + "loss": 2.1454, + "step": 41830 + }, + { + "epoch": 1.91, + "learning_rate": 4.053961279692435e-05, + "loss": 2.1703, + "step": 41840 + }, + { + "epoch": 1.91, + "learning_rate": 4.053732436267106e-05, + "loss": 1.9324, + "step": 41850 + }, + { + "epoch": 1.91, + "learning_rate": 4.0535035928417783e-05, + "loss": 2.1962, + "step": 41860 + }, + { + "epoch": 1.91, + "learning_rate": 4.05327474941645e-05, + "loss": 2.296, + "step": 41870 + }, + { + "epoch": 1.91, + "learning_rate": 4.053045905991121e-05, + "loss": 2.2037, + "step": 41880 + }, + { + "epoch": 1.91, + "learning_rate": 4.0528170625657926e-05, + "loss": 2.0787, + "step": 41890 + }, + { + "epoch": 1.91, + "learning_rate": 4.052588219140464e-05, + "loss": 2.1846, + "step": 41900 + }, + { + "epoch": 1.91, + "learning_rate": 4.0523593757151354e-05, + "loss": 2.1403, + "step": 41910 + }, + { + "epoch": 1.91, + "learning_rate": 4.0521305322898075e-05, + "loss": 2.2452, + "step": 41920 + }, + { + "epoch": 1.91, + "learning_rate": 4.051901688864479e-05, + "loss": 2.1944, + "step": 41930 + }, + { + "epoch": 1.92, + "learning_rate": 4.05167284543915e-05, + "loss": 2.1472, + "step": 41940 + }, + { + "epoch": 1.92, + "learning_rate": 4.0514440020138224e-05, + "loss": 2.2134, + "step": 41950 + }, + { + "epoch": 1.92, + "learning_rate": 4.051215158588494e-05, + "loss": 2.1777, + "step": 41960 + }, + { + "epoch": 1.92, + "learning_rate": 4.050986315163165e-05, + "loss": 2.1252, + "step": 41970 + }, + { + "epoch": 1.92, + "learning_rate": 4.0507574717378373e-05, + "loss": 2.2653, + "step": 41980 + }, + { + "epoch": 1.92, + "learning_rate": 4.050528628312509e-05, + "loss": 2.3367, + "step": 41990 + }, + { + "epoch": 1.92, + "learning_rate": 4.05029978488718e-05, + "loss": 2.1017, + "step": 42000 + }, + { + "epoch": 1.92, + "learning_rate": 4.050070941461852e-05, + "loss": 2.2148, + "step": 42010 + }, + { + "epoch": 1.92, + "learning_rate": 4.049842098036524e-05, + "loss": 2.1486, + "step": 42020 + }, + { + "epoch": 1.92, + "learning_rate": 4.049613254611195e-05, + "loss": 2.2734, + "step": 42030 + }, + { + "epoch": 1.92, + "learning_rate": 4.049384411185867e-05, + "loss": 2.1611, + "step": 42040 + }, + { + "epoch": 1.92, + "learning_rate": 4.0491555677605386e-05, + "loss": 2.0876, + "step": 42050 + }, + { + "epoch": 1.92, + "learning_rate": 4.04892672433521e-05, + "loss": 2.1585, + "step": 42060 + }, + { + "epoch": 1.92, + "learning_rate": 4.048697880909882e-05, + "loss": 2.1434, + "step": 42070 + }, + { + "epoch": 1.92, + "learning_rate": 4.0484690374845535e-05, + "loss": 2.224, + "step": 42080 + }, + { + "epoch": 1.92, + "learning_rate": 4.048240194059225e-05, + "loss": 2.1643, + "step": 42090 + }, + { + "epoch": 1.92, + "learning_rate": 4.048011350633896e-05, + "loss": 2.0263, + "step": 42100 + }, + { + "epoch": 1.92, + "learning_rate": 4.047782507208568e-05, + "loss": 2.1302, + "step": 42110 + }, + { + "epoch": 1.92, + "learning_rate": 4.04755366378324e-05, + "loss": 2.1036, + "step": 42120 + }, + { + "epoch": 1.92, + "learning_rate": 4.047324820357911e-05, + "loss": 2.2291, + "step": 42130 + }, + { + "epoch": 1.92, + "learning_rate": 4.047095976932583e-05, + "loss": 2.0645, + "step": 42140 + }, + { + "epoch": 1.92, + "learning_rate": 4.046867133507255e-05, + "loss": 2.0748, + "step": 42150 + }, + { + "epoch": 1.93, + "learning_rate": 4.046638290081926e-05, + "loss": 2.1632, + "step": 42160 + }, + { + "epoch": 1.93, + "learning_rate": 4.0464094466565976e-05, + "loss": 2.1606, + "step": 42170 + }, + { + "epoch": 1.93, + "learning_rate": 4.04618060323127e-05, + "loss": 2.1809, + "step": 42180 + }, + { + "epoch": 1.93, + "learning_rate": 4.045951759805941e-05, + "loss": 2.2701, + "step": 42190 + }, + { + "epoch": 1.93, + "learning_rate": 4.0457229163806125e-05, + "loss": 2.2346, + "step": 42200 + }, + { + "epoch": 1.93, + "learning_rate": 4.0454940729552846e-05, + "loss": 2.0805, + "step": 42210 + }, + { + "epoch": 1.93, + "learning_rate": 4.045265229529956e-05, + "loss": 2.244, + "step": 42220 + }, + { + "epoch": 1.93, + "learning_rate": 4.0450363861046274e-05, + "loss": 2.2349, + "step": 42230 + }, + { + "epoch": 1.93, + "learning_rate": 4.0448075426792995e-05, + "loss": 2.15, + "step": 42240 + }, + { + "epoch": 1.93, + "learning_rate": 4.044578699253971e-05, + "loss": 2.1146, + "step": 42250 + }, + { + "epoch": 1.93, + "learning_rate": 4.044349855828642e-05, + "loss": 2.1266, + "step": 42260 + }, + { + "epoch": 1.93, + "learning_rate": 4.0441210124033144e-05, + "loss": 2.1063, + "step": 42270 + }, + { + "epoch": 1.93, + "learning_rate": 4.043892168977986e-05, + "loss": 2.1298, + "step": 42280 + }, + { + "epoch": 1.93, + "learning_rate": 4.0436633255526566e-05, + "loss": 2.0812, + "step": 42290 + }, + { + "epoch": 1.93, + "learning_rate": 4.043434482127329e-05, + "loss": 2.3123, + "step": 42300 + }, + { + "epoch": 1.93, + "learning_rate": 4.043205638702e-05, + "loss": 2.2748, + "step": 42310 + }, + { + "epoch": 1.93, + "learning_rate": 4.0429767952766715e-05, + "loss": 2.201, + "step": 42320 + }, + { + "epoch": 1.93, + "learning_rate": 4.0427479518513436e-05, + "loss": 2.0585, + "step": 42330 + }, + { + "epoch": 1.93, + "learning_rate": 4.042519108426015e-05, + "loss": 2.1917, + "step": 42340 + }, + { + "epoch": 1.93, + "learning_rate": 4.0422902650006864e-05, + "loss": 2.1519, + "step": 42350 + }, + { + "epoch": 1.93, + "learning_rate": 4.0420614215753585e-05, + "loss": 2.1532, + "step": 42360 + }, + { + "epoch": 1.93, + "learning_rate": 4.04183257815003e-05, + "loss": 2.2369, + "step": 42370 + }, + { + "epoch": 1.94, + "learning_rate": 4.041603734724701e-05, + "loss": 2.0533, + "step": 42380 + }, + { + "epoch": 1.94, + "learning_rate": 4.0413748912993734e-05, + "loss": 2.0653, + "step": 42390 + }, + { + "epoch": 1.94, + "learning_rate": 4.041146047874045e-05, + "loss": 2.1549, + "step": 42400 + }, + { + "epoch": 1.94, + "learning_rate": 4.040917204448716e-05, + "loss": 2.1142, + "step": 42410 + }, + { + "epoch": 1.94, + "learning_rate": 4.040688361023388e-05, + "loss": 2.0491, + "step": 42420 + }, + { + "epoch": 1.94, + "learning_rate": 4.04045951759806e-05, + "loss": 2.1749, + "step": 42430 + }, + { + "epoch": 1.94, + "learning_rate": 4.040230674172731e-05, + "loss": 2.2594, + "step": 42440 + }, + { + "epoch": 1.94, + "learning_rate": 4.040001830747403e-05, + "loss": 2.1713, + "step": 42450 + }, + { + "epoch": 1.94, + "learning_rate": 4.039772987322075e-05, + "loss": 2.1126, + "step": 42460 + }, + { + "epoch": 1.94, + "learning_rate": 4.039544143896746e-05, + "loss": 2.1659, + "step": 42470 + }, + { + "epoch": 1.94, + "learning_rate": 4.0393153004714175e-05, + "loss": 2.0997, + "step": 42480 + }, + { + "epoch": 1.94, + "learning_rate": 4.039086457046089e-05, + "loss": 2.1626, + "step": 42490 + }, + { + "epoch": 1.94, + "learning_rate": 4.03885761362076e-05, + "loss": 2.1459, + "step": 42500 + }, + { + "epoch": 1.94, + "learning_rate": 4.0386287701954324e-05, + "loss": 2.2279, + "step": 42510 + }, + { + "epoch": 1.94, + "learning_rate": 4.038399926770104e-05, + "loss": 2.2609, + "step": 42520 + }, + { + "epoch": 1.94, + "learning_rate": 4.038171083344775e-05, + "loss": 1.9888, + "step": 42530 + }, + { + "epoch": 1.94, + "learning_rate": 4.037942239919447e-05, + "loss": 2.101, + "step": 42540 + }, + { + "epoch": 1.94, + "learning_rate": 4.037713396494119e-05, + "loss": 2.0828, + "step": 42550 + }, + { + "epoch": 1.94, + "learning_rate": 4.03748455306879e-05, + "loss": 2.3087, + "step": 42560 + }, + { + "epoch": 1.94, + "learning_rate": 4.037255709643462e-05, + "loss": 2.0694, + "step": 42570 + }, + { + "epoch": 1.94, + "learning_rate": 4.037026866218134e-05, + "loss": 2.1631, + "step": 42580 + }, + { + "epoch": 1.94, + "learning_rate": 4.036798022792805e-05, + "loss": 2.1853, + "step": 42590 + }, + { + "epoch": 1.95, + "learning_rate": 4.036569179367477e-05, + "loss": 2.329, + "step": 42600 + }, + { + "epoch": 1.95, + "learning_rate": 4.0363403359421486e-05, + "loss": 2.0256, + "step": 42610 + }, + { + "epoch": 1.95, + "learning_rate": 4.03611149251682e-05, + "loss": 2.1325, + "step": 42620 + }, + { + "epoch": 1.95, + "learning_rate": 4.035882649091492e-05, + "loss": 2.2292, + "step": 42630 + }, + { + "epoch": 1.95, + "learning_rate": 4.0356538056661635e-05, + "loss": 2.276, + "step": 42640 + }, + { + "epoch": 1.95, + "learning_rate": 4.035424962240835e-05, + "loss": 2.0988, + "step": 42650 + }, + { + "epoch": 1.95, + "learning_rate": 4.035196118815507e-05, + "loss": 2.1693, + "step": 42660 + }, + { + "epoch": 1.95, + "learning_rate": 4.0349672753901784e-05, + "loss": 2.0843, + "step": 42670 + }, + { + "epoch": 1.95, + "learning_rate": 4.03473843196485e-05, + "loss": 2.0869, + "step": 42680 + }, + { + "epoch": 1.95, + "learning_rate": 4.034509588539521e-05, + "loss": 2.3715, + "step": 42690 + }, + { + "epoch": 1.95, + "learning_rate": 4.0342807451141927e-05, + "loss": 2.2155, + "step": 42700 + }, + { + "epoch": 1.95, + "learning_rate": 4.034051901688865e-05, + "loss": 2.0673, + "step": 42710 + }, + { + "epoch": 1.95, + "learning_rate": 4.033823058263536e-05, + "loss": 2.247, + "step": 42720 + }, + { + "epoch": 1.95, + "learning_rate": 4.0335942148382076e-05, + "loss": 2.2714, + "step": 42730 + }, + { + "epoch": 1.95, + "learning_rate": 4.03336537141288e-05, + "loss": 2.2215, + "step": 42740 + }, + { + "epoch": 1.95, + "learning_rate": 4.033136527987551e-05, + "loss": 2.2548, + "step": 42750 + }, + { + "epoch": 1.95, + "learning_rate": 4.0329076845622225e-05, + "loss": 2.1048, + "step": 42760 + }, + { + "epoch": 1.95, + "learning_rate": 4.0326788411368946e-05, + "loss": 2.1272, + "step": 42770 + }, + { + "epoch": 1.95, + "learning_rate": 4.032449997711566e-05, + "loss": 2.2124, + "step": 42780 + }, + { + "epoch": 1.95, + "learning_rate": 4.0322211542862374e-05, + "loss": 2.3664, + "step": 42790 + }, + { + "epoch": 1.95, + "learning_rate": 4.0319923108609095e-05, + "loss": 2.035, + "step": 42800 + }, + { + "epoch": 1.95, + "learning_rate": 4.031763467435581e-05, + "loss": 2.3679, + "step": 42810 + }, + { + "epoch": 1.96, + "learning_rate": 4.031534624010252e-05, + "loss": 2.1594, + "step": 42820 + }, + { + "epoch": 1.96, + "learning_rate": 4.0313057805849244e-05, + "loss": 2.1219, + "step": 42830 + }, + { + "epoch": 1.96, + "learning_rate": 4.031076937159596e-05, + "loss": 2.0039, + "step": 42840 + }, + { + "epoch": 1.96, + "learning_rate": 4.030848093734267e-05, + "loss": 2.0945, + "step": 42850 + }, + { + "epoch": 1.96, + "learning_rate": 4.030619250308939e-05, + "loss": 2.1949, + "step": 42860 + }, + { + "epoch": 1.96, + "learning_rate": 4.030390406883611e-05, + "loss": 2.1756, + "step": 42870 + }, + { + "epoch": 1.96, + "learning_rate": 4.0301615634582815e-05, + "loss": 2.077, + "step": 42880 + }, + { + "epoch": 1.96, + "learning_rate": 4.0299327200329536e-05, + "loss": 2.1333, + "step": 42890 + }, + { + "epoch": 1.96, + "learning_rate": 4.029703876607625e-05, + "loss": 2.0794, + "step": 42900 + }, + { + "epoch": 1.96, + "learning_rate": 4.0294750331822964e-05, + "loss": 2.1696, + "step": 42910 + }, + { + "epoch": 1.96, + "learning_rate": 4.0292461897569685e-05, + "loss": 2.1866, + "step": 42920 + }, + { + "epoch": 1.96, + "learning_rate": 4.02901734633164e-05, + "loss": 1.9593, + "step": 42930 + }, + { + "epoch": 1.96, + "learning_rate": 4.028788502906311e-05, + "loss": 2.1689, + "step": 42940 + }, + { + "epoch": 1.96, + "learning_rate": 4.0285596594809834e-05, + "loss": 2.1732, + "step": 42950 + }, + { + "epoch": 1.96, + "learning_rate": 4.028330816055655e-05, + "loss": 2.0622, + "step": 42960 + }, + { + "epoch": 1.96, + "learning_rate": 4.028101972630326e-05, + "loss": 2.1236, + "step": 42970 + }, + { + "epoch": 1.96, + "learning_rate": 4.027873129204998e-05, + "loss": 2.0977, + "step": 42980 + }, + { + "epoch": 1.96, + "learning_rate": 4.02764428577967e-05, + "loss": 2.1726, + "step": 42990 + }, + { + "epoch": 1.96, + "learning_rate": 4.027415442354341e-05, + "loss": 2.285, + "step": 43000 + }, + { + "epoch": 1.96, + "learning_rate": 4.027186598929013e-05, + "loss": 2.1645, + "step": 43010 + }, + { + "epoch": 1.96, + "learning_rate": 4.0269577555036847e-05, + "loss": 1.9987, + "step": 43020 + }, + { + "epoch": 1.96, + "learning_rate": 4.026728912078356e-05, + "loss": 2.1143, + "step": 43030 + }, + { + "epoch": 1.97, + "learning_rate": 4.026500068653028e-05, + "loss": 2.2973, + "step": 43040 + }, + { + "epoch": 1.97, + "learning_rate": 4.0262712252276996e-05, + "loss": 2.1446, + "step": 43050 + }, + { + "epoch": 1.97, + "learning_rate": 4.026042381802371e-05, + "loss": 2.1876, + "step": 43060 + }, + { + "epoch": 1.97, + "learning_rate": 4.025813538377043e-05, + "loss": 2.1576, + "step": 43070 + }, + { + "epoch": 1.97, + "learning_rate": 4.025584694951714e-05, + "loss": 2.1068, + "step": 43080 + }, + { + "epoch": 1.97, + "learning_rate": 4.025355851526386e-05, + "loss": 2.096, + "step": 43090 + }, + { + "epoch": 1.97, + "learning_rate": 4.025127008101057e-05, + "loss": 2.1522, + "step": 43100 + }, + { + "epoch": 1.97, + "learning_rate": 4.024898164675729e-05, + "loss": 2.2757, + "step": 43110 + }, + { + "epoch": 1.97, + "learning_rate": 4.024669321250401e-05, + "loss": 2.114, + "step": 43120 + }, + { + "epoch": 1.97, + "learning_rate": 4.024440477825072e-05, + "loss": 2.2968, + "step": 43130 + }, + { + "epoch": 1.97, + "learning_rate": 4.0242116343997437e-05, + "loss": 2.1175, + "step": 43140 + }, + { + "epoch": 1.97, + "learning_rate": 4.023982790974416e-05, + "loss": 2.2303, + "step": 43150 + }, + { + "epoch": 1.97, + "learning_rate": 4.023753947549087e-05, + "loss": 2.05, + "step": 43160 + }, + { + "epoch": 1.97, + "learning_rate": 4.0235251041237586e-05, + "loss": 2.2767, + "step": 43170 + }, + { + "epoch": 1.97, + "learning_rate": 4.0232962606984307e-05, + "loss": 2.1456, + "step": 43180 + }, + { + "epoch": 1.97, + "learning_rate": 4.023067417273102e-05, + "loss": 2.2191, + "step": 43190 + }, + { + "epoch": 1.97, + "learning_rate": 4.0228385738477735e-05, + "loss": 2.1719, + "step": 43200 + }, + { + "epoch": 1.97, + "learning_rate": 4.0226097304224456e-05, + "loss": 2.2707, + "step": 43210 + }, + { + "epoch": 1.97, + "learning_rate": 4.022380886997117e-05, + "loss": 2.1729, + "step": 43220 + }, + { + "epoch": 1.97, + "learning_rate": 4.0221520435717884e-05, + "loss": 2.2156, + "step": 43230 + }, + { + "epoch": 1.97, + "learning_rate": 4.0219232001464605e-05, + "loss": 2.1239, + "step": 43240 + }, + { + "epoch": 1.97, + "learning_rate": 4.021694356721132e-05, + "loss": 2.3339, + "step": 43250 + }, + { + "epoch": 1.98, + "learning_rate": 4.021465513295803e-05, + "loss": 2.1526, + "step": 43260 + }, + { + "epoch": 1.98, + "learning_rate": 4.021236669870475e-05, + "loss": 2.1376, + "step": 43270 + }, + { + "epoch": 1.98, + "learning_rate": 4.021007826445146e-05, + "loss": 2.2015, + "step": 43280 + }, + { + "epoch": 1.98, + "learning_rate": 4.0207789830198176e-05, + "loss": 2.0855, + "step": 43290 + }, + { + "epoch": 1.98, + "learning_rate": 4.0205501395944897e-05, + "loss": 2.0382, + "step": 43300 + }, + { + "epoch": 1.98, + "learning_rate": 4.020321296169161e-05, + "loss": 2.1346, + "step": 43310 + }, + { + "epoch": 1.98, + "learning_rate": 4.0200924527438325e-05, + "loss": 2.1081, + "step": 43320 + }, + { + "epoch": 1.98, + "learning_rate": 4.0198636093185046e-05, + "loss": 2.1694, + "step": 43330 + }, + { + "epoch": 1.98, + "learning_rate": 4.019634765893176e-05, + "loss": 2.2509, + "step": 43340 + }, + { + "epoch": 1.98, + "learning_rate": 4.0194059224678474e-05, + "loss": 2.1679, + "step": 43350 + }, + { + "epoch": 1.98, + "learning_rate": 4.0191770790425195e-05, + "loss": 2.2324, + "step": 43360 + }, + { + "epoch": 1.98, + "learning_rate": 4.018948235617191e-05, + "loss": 2.2324, + "step": 43370 + }, + { + "epoch": 1.98, + "learning_rate": 4.018719392191862e-05, + "loss": 2.0115, + "step": 43380 + }, + { + "epoch": 1.98, + "learning_rate": 4.0184905487665344e-05, + "loss": 2.2971, + "step": 43390 + }, + { + "epoch": 1.98, + "learning_rate": 4.018261705341206e-05, + "loss": 2.0845, + "step": 43400 + }, + { + "epoch": 1.98, + "learning_rate": 4.018032861915877e-05, + "loss": 2.3713, + "step": 43410 + }, + { + "epoch": 1.98, + "learning_rate": 4.017804018490549e-05, + "loss": 2.3598, + "step": 43420 + }, + { + "epoch": 1.98, + "learning_rate": 4.017575175065221e-05, + "loss": 1.9928, + "step": 43430 + }, + { + "epoch": 1.98, + "learning_rate": 4.017346331639892e-05, + "loss": 2.1497, + "step": 43440 + }, + { + "epoch": 1.98, + "learning_rate": 4.017117488214564e-05, + "loss": 2.2359, + "step": 43450 + }, + { + "epoch": 1.98, + "learning_rate": 4.0168886447892357e-05, + "loss": 2.1742, + "step": 43460 + }, + { + "epoch": 1.98, + "learning_rate": 4.016659801363907e-05, + "loss": 2.1448, + "step": 43470 + }, + { + "epoch": 1.99, + "learning_rate": 4.0164309579385785e-05, + "loss": 2.1013, + "step": 43480 + }, + { + "epoch": 1.99, + "learning_rate": 4.01620211451325e-05, + "loss": 2.1386, + "step": 43490 + }, + { + "epoch": 1.99, + "learning_rate": 4.015973271087922e-05, + "loss": 2.1619, + "step": 43500 + }, + { + "epoch": 1.99, + "learning_rate": 4.0157444276625934e-05, + "loss": 2.0753, + "step": 43510 + }, + { + "epoch": 1.99, + "learning_rate": 4.015515584237265e-05, + "loss": 2.1612, + "step": 43520 + }, + { + "epoch": 1.99, + "learning_rate": 4.015286740811936e-05, + "loss": 2.2096, + "step": 43530 + }, + { + "epoch": 1.99, + "learning_rate": 4.015057897386608e-05, + "loss": 2.1334, + "step": 43540 + }, + { + "epoch": 1.99, + "learning_rate": 4.01482905396128e-05, + "loss": 2.2236, + "step": 43550 + }, + { + "epoch": 1.99, + "learning_rate": 4.014600210535951e-05, + "loss": 2.2397, + "step": 43560 + }, + { + "epoch": 1.99, + "learning_rate": 4.014371367110623e-05, + "loss": 2.0609, + "step": 43570 + }, + { + "epoch": 1.99, + "learning_rate": 4.0141425236852946e-05, + "loss": 2.1561, + "step": 43580 + }, + { + "epoch": 1.99, + "learning_rate": 4.013913680259966e-05, + "loss": 2.2108, + "step": 43590 + }, + { + "epoch": 1.99, + "learning_rate": 4.013684836834638e-05, + "loss": 2.0336, + "step": 43600 + }, + { + "epoch": 1.99, + "learning_rate": 4.0134559934093096e-05, + "loss": 2.1386, + "step": 43610 + }, + { + "epoch": 1.99, + "learning_rate": 4.013227149983981e-05, + "loss": 2.0658, + "step": 43620 + }, + { + "epoch": 1.99, + "learning_rate": 4.012998306558653e-05, + "loss": 2.0606, + "step": 43630 + }, + { + "epoch": 1.99, + "learning_rate": 4.0127694631333245e-05, + "loss": 2.1555, + "step": 43640 + }, + { + "epoch": 1.99, + "learning_rate": 4.012540619707996e-05, + "loss": 2.2199, + "step": 43650 + }, + { + "epoch": 1.99, + "learning_rate": 4.012311776282668e-05, + "loss": 2.0866, + "step": 43660 + }, + { + "epoch": 1.99, + "learning_rate": 4.012082932857339e-05, + "loss": 2.2384, + "step": 43670 + }, + { + "epoch": 1.99, + "learning_rate": 4.011854089432011e-05, + "loss": 2.2946, + "step": 43680 + }, + { + "epoch": 1.99, + "learning_rate": 4.011625246006682e-05, + "loss": 2.1082, + "step": 43690 + }, + { + "epoch": 2.0, + "learning_rate": 4.0113964025813536e-05, + "loss": 2.2524, + "step": 43700 + }, + { + "epoch": 2.0, + "learning_rate": 4.011167559156026e-05, + "loss": 2.2067, + "step": 43710 + }, + { + "epoch": 2.0, + "learning_rate": 4.010938715730697e-05, + "loss": 2.1595, + "step": 43720 + }, + { + "epoch": 2.0, + "learning_rate": 4.0107098723053686e-05, + "loss": 2.084, + "step": 43730 + }, + { + "epoch": 2.0, + "learning_rate": 4.0104810288800407e-05, + "loss": 2.1358, + "step": 43740 + }, + { + "epoch": 2.0, + "learning_rate": 4.010252185454712e-05, + "loss": 2.082, + "step": 43750 + }, + { + "epoch": 2.0, + "learning_rate": 4.0100233420293835e-05, + "loss": 2.3063, + "step": 43760 + }, + { + "epoch": 2.0, + "learning_rate": 4.0097944986040556e-05, + "loss": 2.1795, + "step": 43770 + }, + { + "epoch": 2.0, + "learning_rate": 4.009565655178727e-05, + "loss": 2.0539, + "step": 43780 + }, + { + "epoch": 2.0, + "learning_rate": 4.0093368117533984e-05, + "loss": 2.1856, + "step": 43790 + }, + { + "epoch": 2.0, + "eval_cer": 0.6803315908802483, + "eval_em": 0.00728476821192053, + "eval_f1": 0.00728476821192053, + "eval_loss": 2.0877115726470947, + "eval_runtime": 2655.7777, + "eval_samples_per_second": 3.98, + "eval_steps_per_second": 1.99, + "eval_wer": 0.9927152317880795, + "step": 43799 + }, + { + "epoch": 2.0, + "learning_rate": 4.0091079683280705e-05, + "loss": 2.068, + "step": 43800 + }, + { + "epoch": 2.0, + "learning_rate": 4.008879124902742e-05, + "loss": 2.0781, + "step": 43810 + }, + { + "epoch": 2.0, + "learning_rate": 4.008650281477413e-05, + "loss": 2.0973, + "step": 43820 + }, + { + "epoch": 2.0, + "learning_rate": 4.0084214380520854e-05, + "loss": 2.2002, + "step": 43830 + }, + { + "epoch": 2.0, + "learning_rate": 4.008192594626757e-05, + "loss": 2.2435, + "step": 43840 + }, + { + "epoch": 2.0, + "learning_rate": 4.007963751201428e-05, + "loss": 2.1009, + "step": 43850 + }, + { + "epoch": 2.0, + "learning_rate": 4.0077349077761e-05, + "loss": 2.3265, + "step": 43860 + }, + { + "epoch": 2.0, + "learning_rate": 4.007506064350771e-05, + "loss": 2.1283, + "step": 43870 + }, + { + "epoch": 2.0, + "learning_rate": 4.0072772209254425e-05, + "loss": 2.1477, + "step": 43880 + }, + { + "epoch": 2.0, + "learning_rate": 4.0070483775001146e-05, + "loss": 2.2385, + "step": 43890 + }, + { + "epoch": 2.0, + "learning_rate": 4.006819534074786e-05, + "loss": 2.1562, + "step": 43900 + }, + { + "epoch": 2.01, + "learning_rate": 4.0065906906494574e-05, + "loss": 2.2254, + "step": 43910 + }, + { + "epoch": 2.01, + "learning_rate": 4.0063618472241295e-05, + "loss": 2.2111, + "step": 43920 + }, + { + "epoch": 2.01, + "learning_rate": 4.006133003798801e-05, + "loss": 2.0934, + "step": 43930 + }, + { + "epoch": 2.01, + "learning_rate": 4.005904160373472e-05, + "loss": 2.0573, + "step": 43940 + }, + { + "epoch": 2.01, + "learning_rate": 4.0056753169481444e-05, + "loss": 2.1335, + "step": 43950 + }, + { + "epoch": 2.01, + "learning_rate": 4.005446473522816e-05, + "loss": 2.2072, + "step": 43960 + }, + { + "epoch": 2.01, + "learning_rate": 4.005217630097487e-05, + "loss": 2.1794, + "step": 43970 + }, + { + "epoch": 2.01, + "learning_rate": 4.004988786672159e-05, + "loss": 1.9912, + "step": 43980 + }, + { + "epoch": 2.01, + "learning_rate": 4.004759943246831e-05, + "loss": 1.9587, + "step": 43990 + }, + { + "epoch": 2.01, + "learning_rate": 4.004531099821502e-05, + "loss": 2.1115, + "step": 44000 + }, + { + "epoch": 2.01, + "learning_rate": 4.004302256396174e-05, + "loss": 2.312, + "step": 44010 + }, + { + "epoch": 2.01, + "learning_rate": 4.0040734129708456e-05, + "loss": 2.1002, + "step": 44020 + }, + { + "epoch": 2.01, + "learning_rate": 4.003844569545517e-05, + "loss": 2.0632, + "step": 44030 + }, + { + "epoch": 2.01, + "learning_rate": 4.003615726120189e-05, + "loss": 2.3169, + "step": 44040 + }, + { + "epoch": 2.01, + "learning_rate": 4.0033868826948606e-05, + "loss": 2.1994, + "step": 44050 + }, + { + "epoch": 2.01, + "learning_rate": 4.003158039269532e-05, + "loss": 2.1358, + "step": 44060 + }, + { + "epoch": 2.01, + "learning_rate": 4.0029291958442034e-05, + "loss": 1.958, + "step": 44070 + }, + { + "epoch": 2.01, + "learning_rate": 4.002700352418875e-05, + "loss": 2.1652, + "step": 44080 + }, + { + "epoch": 2.01, + "learning_rate": 4.002471508993547e-05, + "loss": 2.1721, + "step": 44090 + }, + { + "epoch": 2.01, + "learning_rate": 4.002242665568218e-05, + "loss": 2.1231, + "step": 44100 + }, + { + "epoch": 2.01, + "learning_rate": 4.00201382214289e-05, + "loss": 2.0832, + "step": 44110 + }, + { + "epoch": 2.01, + "learning_rate": 4.001784978717562e-05, + "loss": 2.1321, + "step": 44120 + }, + { + "epoch": 2.02, + "learning_rate": 4.001556135292233e-05, + "loss": 2.2346, + "step": 44130 + }, + { + "epoch": 2.02, + "learning_rate": 4.0013272918669046e-05, + "loss": 2.1374, + "step": 44140 + }, + { + "epoch": 2.02, + "learning_rate": 4.001098448441577e-05, + "loss": 2.049, + "step": 44150 + }, + { + "epoch": 2.02, + "learning_rate": 4.000869605016248e-05, + "loss": 2.1991, + "step": 44160 + }, + { + "epoch": 2.02, + "learning_rate": 4.0006407615909196e-05, + "loss": 2.2057, + "step": 44170 + }, + { + "epoch": 2.02, + "learning_rate": 4.0004119181655916e-05, + "loss": 2.1019, + "step": 44180 + }, + { + "epoch": 2.02, + "learning_rate": 4.000183074740263e-05, + "loss": 2.1055, + "step": 44190 + }, + { + "epoch": 2.02, + "learning_rate": 3.9999542313149345e-05, + "loss": 2.1823, + "step": 44200 + }, + { + "epoch": 2.02, + "learning_rate": 3.9997253878896066e-05, + "loss": 2.346, + "step": 44210 + }, + { + "epoch": 2.02, + "learning_rate": 3.999496544464278e-05, + "loss": 2.0932, + "step": 44220 + }, + { + "epoch": 2.02, + "learning_rate": 3.9992677010389494e-05, + "loss": 2.1734, + "step": 44230 + }, + { + "epoch": 2.02, + "learning_rate": 3.9990388576136215e-05, + "loss": 2.1016, + "step": 44240 + }, + { + "epoch": 2.02, + "learning_rate": 3.998810014188293e-05, + "loss": 2.0696, + "step": 44250 + }, + { + "epoch": 2.02, + "learning_rate": 3.9985811707629636e-05, + "loss": 2.19, + "step": 44260 + }, + { + "epoch": 2.02, + "learning_rate": 3.998352327337636e-05, + "loss": 1.9556, + "step": 44270 + }, + { + "epoch": 2.02, + "learning_rate": 3.998123483912307e-05, + "loss": 2.1191, + "step": 44280 + }, + { + "epoch": 2.02, + "learning_rate": 3.9978946404869785e-05, + "loss": 2.1409, + "step": 44290 + }, + { + "epoch": 2.02, + "learning_rate": 3.9976657970616506e-05, + "loss": 2.1453, + "step": 44300 + }, + { + "epoch": 2.02, + "learning_rate": 3.997436953636322e-05, + "loss": 2.2592, + "step": 44310 + }, + { + "epoch": 2.02, + "learning_rate": 3.9972081102109935e-05, + "loss": 1.9952, + "step": 44320 + }, + { + "epoch": 2.02, + "learning_rate": 3.9969792667856656e-05, + "loss": 2.1462, + "step": 44330 + }, + { + "epoch": 2.02, + "learning_rate": 3.996750423360337e-05, + "loss": 2.1939, + "step": 44340 + }, + { + "epoch": 2.03, + "learning_rate": 3.9965215799350084e-05, + "loss": 2.1064, + "step": 44350 + }, + { + "epoch": 2.03, + "learning_rate": 3.9962927365096805e-05, + "loss": 2.1621, + "step": 44360 + }, + { + "epoch": 2.03, + "learning_rate": 3.996063893084352e-05, + "loss": 2.2163, + "step": 44370 + }, + { + "epoch": 2.03, + "learning_rate": 3.995835049659023e-05, + "loss": 2.0581, + "step": 44380 + }, + { + "epoch": 2.03, + "learning_rate": 3.9956062062336954e-05, + "loss": 2.075, + "step": 44390 + }, + { + "epoch": 2.03, + "learning_rate": 3.995377362808367e-05, + "loss": 2.0796, + "step": 44400 + }, + { + "epoch": 2.03, + "learning_rate": 3.995148519383038e-05, + "loss": 2.0189, + "step": 44410 + }, + { + "epoch": 2.03, + "learning_rate": 3.99491967595771e-05, + "loss": 1.9997, + "step": 44420 + }, + { + "epoch": 2.03, + "learning_rate": 3.994690832532382e-05, + "loss": 2.3219, + "step": 44430 + }, + { + "epoch": 2.03, + "learning_rate": 3.994461989107053e-05, + "loss": 2.0995, + "step": 44440 + }, + { + "epoch": 2.03, + "learning_rate": 3.994233145681725e-05, + "loss": 2.2545, + "step": 44450 + }, + { + "epoch": 2.03, + "learning_rate": 3.994004302256396e-05, + "loss": 2.2726, + "step": 44460 + }, + { + "epoch": 2.03, + "learning_rate": 3.993775458831068e-05, + "loss": 2.3233, + "step": 44470 + }, + { + "epoch": 2.03, + "learning_rate": 3.9935466154057395e-05, + "loss": 2.1245, + "step": 44480 + }, + { + "epoch": 2.03, + "learning_rate": 3.993317771980411e-05, + "loss": 2.0066, + "step": 44490 + }, + { + "epoch": 2.03, + "learning_rate": 3.993088928555083e-05, + "loss": 2.1064, + "step": 44500 + }, + { + "epoch": 2.03, + "learning_rate": 3.9928600851297544e-05, + "loss": 2.1372, + "step": 44510 + }, + { + "epoch": 2.03, + "learning_rate": 3.992631241704426e-05, + "loss": 2.2571, + "step": 44520 + }, + { + "epoch": 2.03, + "learning_rate": 3.992402398279098e-05, + "loss": 2.0638, + "step": 44530 + }, + { + "epoch": 2.03, + "learning_rate": 3.992173554853769e-05, + "loss": 2.1493, + "step": 44540 + }, + { + "epoch": 2.03, + "learning_rate": 3.991944711428441e-05, + "loss": 2.0984, + "step": 44550 + }, + { + "epoch": 2.03, + "learning_rate": 3.991715868003113e-05, + "loss": 2.1422, + "step": 44560 + }, + { + "epoch": 2.04, + "learning_rate": 3.991487024577784e-05, + "loss": 2.1596, + "step": 44570 + }, + { + "epoch": 2.04, + "learning_rate": 3.9912581811524556e-05, + "loss": 2.3457, + "step": 44580 + }, + { + "epoch": 2.04, + "learning_rate": 3.991029337727128e-05, + "loss": 2.1048, + "step": 44590 + }, + { + "epoch": 2.04, + "learning_rate": 3.990800494301799e-05, + "loss": 2.0255, + "step": 44600 + }, + { + "epoch": 2.04, + "learning_rate": 3.9905716508764706e-05, + "loss": 2.1514, + "step": 44610 + }, + { + "epoch": 2.04, + "learning_rate": 3.9903428074511426e-05, + "loss": 2.0737, + "step": 44620 + }, + { + "epoch": 2.04, + "learning_rate": 3.990113964025814e-05, + "loss": 2.1085, + "step": 44630 + }, + { + "epoch": 2.04, + "learning_rate": 3.9898851206004855e-05, + "loss": 1.9769, + "step": 44640 + }, + { + "epoch": 2.04, + "learning_rate": 3.989656277175157e-05, + "loss": 2.0106, + "step": 44650 + }, + { + "epoch": 2.04, + "learning_rate": 3.989427433749828e-05, + "loss": 2.1209, + "step": 44660 + }, + { + "epoch": 2.04, + "learning_rate": 3.9891985903245e-05, + "loss": 2.0847, + "step": 44670 + }, + { + "epoch": 2.04, + "learning_rate": 3.988969746899172e-05, + "loss": 2.157, + "step": 44680 + }, + { + "epoch": 2.04, + "learning_rate": 3.988740903473843e-05, + "loss": 2.1885, + "step": 44690 + }, + { + "epoch": 2.04, + "learning_rate": 3.9885120600485146e-05, + "loss": 2.0697, + "step": 44700 + }, + { + "epoch": 2.04, + "learning_rate": 3.988283216623187e-05, + "loss": 2.1716, + "step": 44710 + }, + { + "epoch": 2.04, + "learning_rate": 3.988054373197858e-05, + "loss": 2.1129, + "step": 44720 + }, + { + "epoch": 2.04, + "learning_rate": 3.9878255297725295e-05, + "loss": 2.0408, + "step": 44730 + }, + { + "epoch": 2.04, + "learning_rate": 3.9875966863472016e-05, + "loss": 1.997, + "step": 44740 + }, + { + "epoch": 2.04, + "learning_rate": 3.987367842921873e-05, + "loss": 2.0894, + "step": 44750 + }, + { + "epoch": 2.04, + "learning_rate": 3.9871389994965445e-05, + "loss": 2.144, + "step": 44760 + }, + { + "epoch": 2.04, + "learning_rate": 3.9869101560712166e-05, + "loss": 2.0767, + "step": 44770 + }, + { + "epoch": 2.04, + "learning_rate": 3.986681312645888e-05, + "loss": 2.1087, + "step": 44780 + }, + { + "epoch": 2.05, + "learning_rate": 3.9864524692205594e-05, + "loss": 2.1549, + "step": 44790 + }, + { + "epoch": 2.05, + "learning_rate": 3.9862236257952315e-05, + "loss": 2.2952, + "step": 44800 + }, + { + "epoch": 2.05, + "learning_rate": 3.985994782369903e-05, + "loss": 2.1928, + "step": 44810 + }, + { + "epoch": 2.05, + "learning_rate": 3.985765938944574e-05, + "loss": 2.1544, + "step": 44820 + }, + { + "epoch": 2.05, + "learning_rate": 3.9855370955192464e-05, + "loss": 2.1965, + "step": 44830 + }, + { + "epoch": 2.05, + "learning_rate": 3.985308252093918e-05, + "loss": 2.059, + "step": 44840 + }, + { + "epoch": 2.05, + "learning_rate": 3.9850794086685885e-05, + "loss": 2.176, + "step": 44850 + }, + { + "epoch": 2.05, + "learning_rate": 3.9848505652432606e-05, + "loss": 2.0571, + "step": 44860 + }, + { + "epoch": 2.05, + "learning_rate": 3.984621721817932e-05, + "loss": 2.1609, + "step": 44870 + }, + { + "epoch": 2.05, + "learning_rate": 3.9843928783926035e-05, + "loss": 2.2207, + "step": 44880 + }, + { + "epoch": 2.05, + "learning_rate": 3.9841640349672755e-05, + "loss": 2.3255, + "step": 44890 + }, + { + "epoch": 2.05, + "learning_rate": 3.983935191541947e-05, + "loss": 2.0508, + "step": 44900 + }, + { + "epoch": 2.05, + "learning_rate": 3.9837063481166184e-05, + "loss": 2.096, + "step": 44910 + }, + { + "epoch": 2.05, + "learning_rate": 3.9834775046912905e-05, + "loss": 2.2331, + "step": 44920 + }, + { + "epoch": 2.05, + "learning_rate": 3.983248661265962e-05, + "loss": 2.2008, + "step": 44930 + }, + { + "epoch": 2.05, + "learning_rate": 3.983019817840633e-05, + "loss": 2.2905, + "step": 44940 + }, + { + "epoch": 2.05, + "learning_rate": 3.9827909744153054e-05, + "loss": 2.0257, + "step": 44950 + }, + { + "epoch": 2.05, + "learning_rate": 3.982562130989977e-05, + "loss": 2.3328, + "step": 44960 + }, + { + "epoch": 2.05, + "learning_rate": 3.982333287564648e-05, + "loss": 2.1619, + "step": 44970 + }, + { + "epoch": 2.05, + "learning_rate": 3.98210444413932e-05, + "loss": 2.2712, + "step": 44980 + }, + { + "epoch": 2.05, + "learning_rate": 3.981875600713992e-05, + "loss": 2.142, + "step": 44990 + }, + { + "epoch": 2.05, + "learning_rate": 3.981646757288663e-05, + "loss": 2.2381, + "step": 45000 + }, + { + "epoch": 2.06, + "learning_rate": 3.981417913863335e-05, + "loss": 2.1213, + "step": 45010 + }, + { + "epoch": 2.06, + "learning_rate": 3.9811890704380066e-05, + "loss": 2.0988, + "step": 45020 + }, + { + "epoch": 2.06, + "learning_rate": 3.980960227012678e-05, + "loss": 2.1052, + "step": 45030 + }, + { + "epoch": 2.06, + "learning_rate": 3.98073138358735e-05, + "loss": 2.1316, + "step": 45040 + }, + { + "epoch": 2.06, + "learning_rate": 3.980502540162021e-05, + "loss": 2.1439, + "step": 45050 + }, + { + "epoch": 2.06, + "learning_rate": 3.980273696736693e-05, + "loss": 2.1584, + "step": 45060 + }, + { + "epoch": 2.06, + "learning_rate": 3.9800448533113644e-05, + "loss": 2.2457, + "step": 45070 + }, + { + "epoch": 2.06, + "learning_rate": 3.979816009886036e-05, + "loss": 2.1363, + "step": 45080 + }, + { + "epoch": 2.06, + "learning_rate": 3.979587166460708e-05, + "loss": 1.9597, + "step": 45090 + }, + { + "epoch": 2.06, + "learning_rate": 3.979358323035379e-05, + "loss": 1.9604, + "step": 45100 + }, + { + "epoch": 2.06, + "learning_rate": 3.979129479610051e-05, + "loss": 2.2077, + "step": 45110 + }, + { + "epoch": 2.06, + "learning_rate": 3.978900636184723e-05, + "loss": 2.1129, + "step": 45120 + }, + { + "epoch": 2.06, + "learning_rate": 3.978671792759394e-05, + "loss": 2.2119, + "step": 45130 + }, + { + "epoch": 2.06, + "learning_rate": 3.9784429493340656e-05, + "loss": 2.148, + "step": 45140 + }, + { + "epoch": 2.06, + "learning_rate": 3.978214105908738e-05, + "loss": 2.0753, + "step": 45150 + }, + { + "epoch": 2.06, + "learning_rate": 3.977985262483409e-05, + "loss": 2.1517, + "step": 45160 + }, + { + "epoch": 2.06, + "learning_rate": 3.9777564190580805e-05, + "loss": 2.2632, + "step": 45170 + }, + { + "epoch": 2.06, + "learning_rate": 3.9775275756327526e-05, + "loss": 2.0414, + "step": 45180 + }, + { + "epoch": 2.06, + "learning_rate": 3.977298732207424e-05, + "loss": 2.1213, + "step": 45190 + }, + { + "epoch": 2.06, + "learning_rate": 3.9770698887820955e-05, + "loss": 2.0444, + "step": 45200 + }, + { + "epoch": 2.06, + "learning_rate": 3.9768410453567676e-05, + "loss": 2.2012, + "step": 45210 + }, + { + "epoch": 2.06, + "learning_rate": 3.976612201931439e-05, + "loss": 2.0186, + "step": 45220 + }, + { + "epoch": 2.07, + "learning_rate": 3.9763833585061104e-05, + "loss": 2.1593, + "step": 45230 + }, + { + "epoch": 2.07, + "learning_rate": 3.9761545150807825e-05, + "loss": 1.9909, + "step": 45240 + }, + { + "epoch": 2.07, + "learning_rate": 3.975925671655453e-05, + "loss": 2.1351, + "step": 45250 + }, + { + "epoch": 2.07, + "learning_rate": 3.9756968282301246e-05, + "loss": 2.078, + "step": 45260 + }, + { + "epoch": 2.07, + "learning_rate": 3.975467984804797e-05, + "loss": 2.1609, + "step": 45270 + }, + { + "epoch": 2.07, + "learning_rate": 3.975239141379468e-05, + "loss": 1.9537, + "step": 45280 + }, + { + "epoch": 2.07, + "learning_rate": 3.9750102979541395e-05, + "loss": 2.1635, + "step": 45290 + }, + { + "epoch": 2.07, + "learning_rate": 3.9747814545288116e-05, + "loss": 2.2814, + "step": 45300 + }, + { + "epoch": 2.07, + "learning_rate": 3.974552611103483e-05, + "loss": 2.264, + "step": 45310 + }, + { + "epoch": 2.07, + "learning_rate": 3.9743237676781545e-05, + "loss": 2.1667, + "step": 45320 + }, + { + "epoch": 2.07, + "learning_rate": 3.9740949242528265e-05, + "loss": 2.1343, + "step": 45330 + }, + { + "epoch": 2.07, + "learning_rate": 3.973866080827498e-05, + "loss": 2.2072, + "step": 45340 + }, + { + "epoch": 2.07, + "learning_rate": 3.9736372374021694e-05, + "loss": 2.1273, + "step": 45350 + }, + { + "epoch": 2.07, + "learning_rate": 3.9734083939768415e-05, + "loss": 2.3258, + "step": 45360 + }, + { + "epoch": 2.07, + "learning_rate": 3.973179550551513e-05, + "loss": 2.0087, + "step": 45370 + }, + { + "epoch": 2.07, + "learning_rate": 3.972950707126184e-05, + "loss": 2.1923, + "step": 45380 + }, + { + "epoch": 2.07, + "learning_rate": 3.9727218637008564e-05, + "loss": 2.049, + "step": 45390 + }, + { + "epoch": 2.07, + "learning_rate": 3.972493020275528e-05, + "loss": 2.2572, + "step": 45400 + }, + { + "epoch": 2.07, + "learning_rate": 3.972264176850199e-05, + "loss": 2.1778, + "step": 45410 + }, + { + "epoch": 2.07, + "learning_rate": 3.972035333424871e-05, + "loss": 2.1429, + "step": 45420 + }, + { + "epoch": 2.07, + "learning_rate": 3.971806489999543e-05, + "loss": 2.0242, + "step": 45430 + }, + { + "epoch": 2.07, + "learning_rate": 3.971577646574214e-05, + "loss": 2.0323, + "step": 45440 + }, + { + "epoch": 2.08, + "learning_rate": 3.9713488031488855e-05, + "loss": 2.0182, + "step": 45450 + }, + { + "epoch": 2.08, + "learning_rate": 3.971119959723557e-05, + "loss": 2.0533, + "step": 45460 + }, + { + "epoch": 2.08, + "learning_rate": 3.970891116298229e-05, + "loss": 2.1162, + "step": 45470 + }, + { + "epoch": 2.08, + "learning_rate": 3.9706622728729005e-05, + "loss": 2.2697, + "step": 45480 + }, + { + "epoch": 2.08, + "learning_rate": 3.970433429447572e-05, + "loss": 2.1407, + "step": 45490 + }, + { + "epoch": 2.08, + "learning_rate": 3.970204586022244e-05, + "loss": 2.129, + "step": 45500 + }, + { + "epoch": 2.08, + "learning_rate": 3.9699757425969154e-05, + "loss": 2.1573, + "step": 45510 + }, + { + "epoch": 2.08, + "learning_rate": 3.969746899171587e-05, + "loss": 2.1791, + "step": 45520 + }, + { + "epoch": 2.08, + "learning_rate": 3.969518055746259e-05, + "loss": 2.0688, + "step": 45530 + }, + { + "epoch": 2.08, + "learning_rate": 3.96928921232093e-05, + "loss": 2.1886, + "step": 45540 + }, + { + "epoch": 2.08, + "learning_rate": 3.969060368895602e-05, + "loss": 2.1915, + "step": 45550 + }, + { + "epoch": 2.08, + "learning_rate": 3.968831525470274e-05, + "loss": 2.1243, + "step": 45560 + }, + { + "epoch": 2.08, + "learning_rate": 3.968602682044945e-05, + "loss": 2.1967, + "step": 45570 + }, + { + "epoch": 2.08, + "learning_rate": 3.9683738386196166e-05, + "loss": 2.2301, + "step": 45580 + }, + { + "epoch": 2.08, + "learning_rate": 3.968144995194289e-05, + "loss": 1.9758, + "step": 45590 + }, + { + "epoch": 2.08, + "learning_rate": 3.96791615176896e-05, + "loss": 2.279, + "step": 45600 + }, + { + "epoch": 2.08, + "learning_rate": 3.9676873083436315e-05, + "loss": 2.1666, + "step": 45610 + }, + { + "epoch": 2.08, + "learning_rate": 3.9674584649183036e-05, + "loss": 2.3857, + "step": 45620 + }, + { + "epoch": 2.08, + "learning_rate": 3.967229621492975e-05, + "loss": 2.1314, + "step": 45630 + }, + { + "epoch": 2.08, + "learning_rate": 3.9670007780676465e-05, + "loss": 2.2925, + "step": 45640 + }, + { + "epoch": 2.08, + "learning_rate": 3.966771934642318e-05, + "loss": 2.1168, + "step": 45650 + }, + { + "epoch": 2.08, + "learning_rate": 3.966543091216989e-05, + "loss": 2.1833, + "step": 45660 + }, + { + "epoch": 2.09, + "learning_rate": 3.966314247791661e-05, + "loss": 2.057, + "step": 45670 + }, + { + "epoch": 2.09, + "learning_rate": 3.966085404366333e-05, + "loss": 2.1769, + "step": 45680 + }, + { + "epoch": 2.09, + "learning_rate": 3.965856560941004e-05, + "loss": 2.0007, + "step": 45690 + }, + { + "epoch": 2.09, + "learning_rate": 3.9656277175156756e-05, + "loss": 2.0927, + "step": 45700 + }, + { + "epoch": 2.09, + "learning_rate": 3.965398874090348e-05, + "loss": 2.0708, + "step": 45710 + }, + { + "epoch": 2.09, + "learning_rate": 3.965170030665019e-05, + "loss": 2.2017, + "step": 45720 + }, + { + "epoch": 2.09, + "learning_rate": 3.9649411872396905e-05, + "loss": 2.1507, + "step": 45730 + }, + { + "epoch": 2.09, + "learning_rate": 3.9647123438143626e-05, + "loss": 2.078, + "step": 45740 + }, + { + "epoch": 2.09, + "learning_rate": 3.964483500389034e-05, + "loss": 2.1122, + "step": 45750 + }, + { + "epoch": 2.09, + "learning_rate": 3.9642546569637055e-05, + "loss": 2.2172, + "step": 45760 + }, + { + "epoch": 2.09, + "learning_rate": 3.9640258135383775e-05, + "loss": 2.039, + "step": 45770 + }, + { + "epoch": 2.09, + "learning_rate": 3.963796970113049e-05, + "loss": 2.1516, + "step": 45780 + }, + { + "epoch": 2.09, + "learning_rate": 3.9635681266877204e-05, + "loss": 2.2883, + "step": 45790 + }, + { + "epoch": 2.09, + "learning_rate": 3.9633392832623925e-05, + "loss": 2.0931, + "step": 45800 + }, + { + "epoch": 2.09, + "learning_rate": 3.963110439837064e-05, + "loss": 2.2002, + "step": 45810 + }, + { + "epoch": 2.09, + "learning_rate": 3.962881596411735e-05, + "loss": 2.223, + "step": 45820 + }, + { + "epoch": 2.09, + "learning_rate": 3.9626527529864074e-05, + "loss": 2.1603, + "step": 45830 + }, + { + "epoch": 2.09, + "learning_rate": 3.962423909561078e-05, + "loss": 2.0405, + "step": 45840 + }, + { + "epoch": 2.09, + "learning_rate": 3.96219506613575e-05, + "loss": 2.1259, + "step": 45850 + }, + { + "epoch": 2.09, + "learning_rate": 3.9619662227104216e-05, + "loss": 2.1603, + "step": 45860 + }, + { + "epoch": 2.09, + "learning_rate": 3.961737379285093e-05, + "loss": 2.0888, + "step": 45870 + }, + { + "epoch": 2.1, + "learning_rate": 3.9615085358597644e-05, + "loss": 2.036, + "step": 45880 + }, + { + "epoch": 2.1, + "learning_rate": 3.9612796924344365e-05, + "loss": 2.0684, + "step": 45890 + }, + { + "epoch": 2.1, + "learning_rate": 3.961050849009108e-05, + "loss": 2.1832, + "step": 45900 + }, + { + "epoch": 2.1, + "learning_rate": 3.9608220055837794e-05, + "loss": 2.1936, + "step": 45910 + }, + { + "epoch": 2.1, + "learning_rate": 3.9605931621584515e-05, + "loss": 2.2078, + "step": 45920 + }, + { + "epoch": 2.1, + "learning_rate": 3.960364318733123e-05, + "loss": 2.0127, + "step": 45930 + }, + { + "epoch": 2.1, + "learning_rate": 3.960135475307794e-05, + "loss": 2.1972, + "step": 45940 + }, + { + "epoch": 2.1, + "learning_rate": 3.9599066318824664e-05, + "loss": 2.1599, + "step": 45950 + }, + { + "epoch": 2.1, + "learning_rate": 3.959677788457138e-05, + "loss": 2.0825, + "step": 45960 + }, + { + "epoch": 2.1, + "learning_rate": 3.959448945031809e-05, + "loss": 2.1752, + "step": 45970 + }, + { + "epoch": 2.1, + "learning_rate": 3.959220101606481e-05, + "loss": 2.0778, + "step": 45980 + }, + { + "epoch": 2.1, + "learning_rate": 3.958991258181153e-05, + "loss": 2.1243, + "step": 45990 + }, + { + "epoch": 2.1, + "learning_rate": 3.958762414755824e-05, + "loss": 2.0431, + "step": 46000 + }, + { + "epoch": 2.1, + "learning_rate": 3.958533571330496e-05, + "loss": 2.088, + "step": 46010 + }, + { + "epoch": 2.1, + "learning_rate": 3.9583047279051676e-05, + "loss": 2.0745, + "step": 46020 + }, + { + "epoch": 2.1, + "learning_rate": 3.958075884479839e-05, + "loss": 2.0858, + "step": 46030 + }, + { + "epoch": 2.1, + "learning_rate": 3.9578470410545104e-05, + "loss": 2.1764, + "step": 46040 + }, + { + "epoch": 2.1, + "learning_rate": 3.957618197629182e-05, + "loss": 2.2337, + "step": 46050 + }, + { + "epoch": 2.1, + "learning_rate": 3.957389354203854e-05, + "loss": 2.0471, + "step": 46060 + }, + { + "epoch": 2.1, + "learning_rate": 3.9571605107785254e-05, + "loss": 1.9367, + "step": 46070 + }, + { + "epoch": 2.1, + "learning_rate": 3.956931667353197e-05, + "loss": 2.0568, + "step": 46080 + }, + { + "epoch": 2.1, + "learning_rate": 3.956702823927869e-05, + "loss": 2.0782, + "step": 46090 + }, + { + "epoch": 2.11, + "learning_rate": 3.95647398050254e-05, + "loss": 1.9797, + "step": 46100 + }, + { + "epoch": 2.11, + "learning_rate": 3.956245137077212e-05, + "loss": 2.0825, + "step": 46110 + }, + { + "epoch": 2.11, + "learning_rate": 3.956016293651884e-05, + "loss": 2.1537, + "step": 46120 + }, + { + "epoch": 2.11, + "learning_rate": 3.955787450226555e-05, + "loss": 2.2567, + "step": 46130 + }, + { + "epoch": 2.11, + "learning_rate": 3.9555586068012266e-05, + "loss": 2.299, + "step": 46140 + }, + { + "epoch": 2.11, + "learning_rate": 3.955329763375899e-05, + "loss": 2.0935, + "step": 46150 + }, + { + "epoch": 2.11, + "learning_rate": 3.95510091995057e-05, + "loss": 2.148, + "step": 46160 + }, + { + "epoch": 2.11, + "learning_rate": 3.9548720765252415e-05, + "loss": 2.2409, + "step": 46170 + }, + { + "epoch": 2.11, + "learning_rate": 3.9546432330999136e-05, + "loss": 2.1539, + "step": 46180 + }, + { + "epoch": 2.11, + "learning_rate": 3.954414389674585e-05, + "loss": 2.0304, + "step": 46190 + }, + { + "epoch": 2.11, + "learning_rate": 3.9541855462492564e-05, + "loss": 2.0006, + "step": 46200 + }, + { + "epoch": 2.11, + "learning_rate": 3.9539567028239285e-05, + "loss": 2.1048, + "step": 46210 + }, + { + "epoch": 2.11, + "learning_rate": 3.9537278593986e-05, + "loss": 2.1856, + "step": 46220 + }, + { + "epoch": 2.11, + "learning_rate": 3.9534990159732714e-05, + "loss": 2.1029, + "step": 46230 + }, + { + "epoch": 2.11, + "learning_rate": 3.953270172547943e-05, + "loss": 2.2018, + "step": 46240 + }, + { + "epoch": 2.11, + "learning_rate": 3.953041329122614e-05, + "loss": 2.2724, + "step": 46250 + }, + { + "epoch": 2.11, + "learning_rate": 3.9528124856972856e-05, + "loss": 1.8865, + "step": 46260 + }, + { + "epoch": 2.11, + "learning_rate": 3.952583642271958e-05, + "loss": 2.1468, + "step": 46270 + }, + { + "epoch": 2.11, + "learning_rate": 3.952354798846629e-05, + "loss": 2.2495, + "step": 46280 + }, + { + "epoch": 2.11, + "learning_rate": 3.9521259554213005e-05, + "loss": 2.1469, + "step": 46290 + }, + { + "epoch": 2.11, + "learning_rate": 3.9518971119959726e-05, + "loss": 2.0946, + "step": 46300 + }, + { + "epoch": 2.11, + "learning_rate": 3.951668268570644e-05, + "loss": 2.1642, + "step": 46310 + }, + { + "epoch": 2.12, + "learning_rate": 3.9514394251453154e-05, + "loss": 2.122, + "step": 46320 + }, + { + "epoch": 2.12, + "learning_rate": 3.9512105817199875e-05, + "loss": 2.1853, + "step": 46330 + }, + { + "epoch": 2.12, + "learning_rate": 3.950981738294659e-05, + "loss": 2.1397, + "step": 46340 + }, + { + "epoch": 2.12, + "learning_rate": 3.9507528948693304e-05, + "loss": 1.9184, + "step": 46350 + }, + { + "epoch": 2.12, + "learning_rate": 3.9505240514440024e-05, + "loss": 2.2584, + "step": 46360 + }, + { + "epoch": 2.12, + "learning_rate": 3.950295208018674e-05, + "loss": 1.9638, + "step": 46370 + }, + { + "epoch": 2.12, + "learning_rate": 3.950066364593345e-05, + "loss": 2.1507, + "step": 46380 + }, + { + "epoch": 2.12, + "learning_rate": 3.9498375211680174e-05, + "loss": 2.111, + "step": 46390 + }, + { + "epoch": 2.12, + "learning_rate": 3.949608677742689e-05, + "loss": 2.0705, + "step": 46400 + }, + { + "epoch": 2.12, + "learning_rate": 3.94937983431736e-05, + "loss": 2.2116, + "step": 46410 + }, + { + "epoch": 2.12, + "learning_rate": 3.949150990892032e-05, + "loss": 2.1783, + "step": 46420 + }, + { + "epoch": 2.12, + "learning_rate": 3.948922147466704e-05, + "loss": 2.2152, + "step": 46430 + }, + { + "epoch": 2.12, + "learning_rate": 3.948693304041375e-05, + "loss": 2.1482, + "step": 46440 + }, + { + "epoch": 2.12, + "learning_rate": 3.9484644606160465e-05, + "loss": 2.1685, + "step": 46450 + }, + { + "epoch": 2.12, + "learning_rate": 3.948235617190718e-05, + "loss": 2.1667, + "step": 46460 + }, + { + "epoch": 2.12, + "learning_rate": 3.94800677376539e-05, + "loss": 2.1654, + "step": 46470 + }, + { + "epoch": 2.12, + "learning_rate": 3.9477779303400614e-05, + "loss": 2.075, + "step": 46480 + }, + { + "epoch": 2.12, + "learning_rate": 3.947549086914733e-05, + "loss": 2.2146, + "step": 46490 + }, + { + "epoch": 2.12, + "learning_rate": 3.947320243489405e-05, + "loss": 2.0415, + "step": 46500 + }, + { + "epoch": 2.12, + "learning_rate": 3.9470914000640764e-05, + "loss": 2.0157, + "step": 46510 + }, + { + "epoch": 2.12, + "learning_rate": 3.946862556638748e-05, + "loss": 2.0698, + "step": 46520 + }, + { + "epoch": 2.12, + "learning_rate": 3.94663371321342e-05, + "loss": 2.1039, + "step": 46530 + }, + { + "epoch": 2.13, + "learning_rate": 3.946404869788091e-05, + "loss": 2.5307, + "step": 46540 + }, + { + "epoch": 2.13, + "learning_rate": 3.946176026362763e-05, + "loss": 2.0225, + "step": 46550 + }, + { + "epoch": 2.13, + "learning_rate": 3.945947182937435e-05, + "loss": 2.1736, + "step": 46560 + }, + { + "epoch": 2.13, + "learning_rate": 3.945718339512106e-05, + "loss": 1.9992, + "step": 46570 + }, + { + "epoch": 2.13, + "learning_rate": 3.9454894960867776e-05, + "loss": 2.1352, + "step": 46580 + }, + { + "epoch": 2.13, + "learning_rate": 3.94526065266145e-05, + "loss": 2.2198, + "step": 46590 + }, + { + "epoch": 2.13, + "learning_rate": 3.945031809236121e-05, + "loss": 2.0998, + "step": 46600 + }, + { + "epoch": 2.13, + "learning_rate": 3.9448029658107925e-05, + "loss": 2.0463, + "step": 46610 + }, + { + "epoch": 2.13, + "learning_rate": 3.9445741223854646e-05, + "loss": 2.1323, + "step": 46620 + }, + { + "epoch": 2.13, + "learning_rate": 3.9443452789601354e-05, + "loss": 2.0414, + "step": 46630 + }, + { + "epoch": 2.13, + "learning_rate": 3.944116435534807e-05, + "loss": 2.1022, + "step": 46640 + }, + { + "epoch": 2.13, + "learning_rate": 3.943887592109479e-05, + "loss": 1.9823, + "step": 46650 + }, + { + "epoch": 2.13, + "learning_rate": 3.94365874868415e-05, + "loss": 2.1385, + "step": 46660 + }, + { + "epoch": 2.13, + "learning_rate": 3.943429905258822e-05, + "loss": 1.9906, + "step": 46670 + }, + { + "epoch": 2.13, + "learning_rate": 3.943201061833494e-05, + "loss": 2.0898, + "step": 46680 + }, + { + "epoch": 2.13, + "learning_rate": 3.942972218408165e-05, + "loss": 2.0516, + "step": 46690 + }, + { + "epoch": 2.13, + "learning_rate": 3.9427433749828366e-05, + "loss": 1.9933, + "step": 46700 + }, + { + "epoch": 2.13, + "learning_rate": 3.942514531557509e-05, + "loss": 2.1243, + "step": 46710 + }, + { + "epoch": 2.13, + "learning_rate": 3.94228568813218e-05, + "loss": 2.1474, + "step": 46720 + }, + { + "epoch": 2.13, + "learning_rate": 3.9420568447068515e-05, + "loss": 2.2017, + "step": 46730 + }, + { + "epoch": 2.13, + "learning_rate": 3.9418280012815236e-05, + "loss": 2.1995, + "step": 46740 + }, + { + "epoch": 2.13, + "learning_rate": 3.941599157856195e-05, + "loss": 2.223, + "step": 46750 + }, + { + "epoch": 2.14, + "learning_rate": 3.9413703144308664e-05, + "loss": 2.0641, + "step": 46760 + }, + { + "epoch": 2.14, + "learning_rate": 3.9411414710055385e-05, + "loss": 2.1539, + "step": 46770 + }, + { + "epoch": 2.14, + "learning_rate": 3.94091262758021e-05, + "loss": 1.962, + "step": 46780 + }, + { + "epoch": 2.14, + "learning_rate": 3.9406837841548814e-05, + "loss": 2.1593, + "step": 46790 + }, + { + "epoch": 2.14, + "learning_rate": 3.9404549407295534e-05, + "loss": 2.0852, + "step": 46800 + }, + { + "epoch": 2.14, + "learning_rate": 3.940226097304225e-05, + "loss": 2.1276, + "step": 46810 + }, + { + "epoch": 2.14, + "learning_rate": 3.939997253878896e-05, + "loss": 2.1777, + "step": 46820 + }, + { + "epoch": 2.14, + "learning_rate": 3.939768410453568e-05, + "loss": 2.1439, + "step": 46830 + }, + { + "epoch": 2.14, + "learning_rate": 3.939539567028239e-05, + "loss": 2.0445, + "step": 46840 + }, + { + "epoch": 2.14, + "learning_rate": 3.939310723602911e-05, + "loss": 2.3366, + "step": 46850 + }, + { + "epoch": 2.14, + "learning_rate": 3.9390818801775826e-05, + "loss": 1.9631, + "step": 46860 + }, + { + "epoch": 2.14, + "learning_rate": 3.938853036752254e-05, + "loss": 2.0999, + "step": 46870 + }, + { + "epoch": 2.14, + "learning_rate": 3.938624193326926e-05, + "loss": 2.1832, + "step": 46880 + }, + { + "epoch": 2.14, + "learning_rate": 3.9383953499015975e-05, + "loss": 2.1353, + "step": 46890 + }, + { + "epoch": 2.14, + "learning_rate": 3.938166506476269e-05, + "loss": 2.2176, + "step": 46900 + }, + { + "epoch": 2.14, + "learning_rate": 3.937937663050941e-05, + "loss": 2.1035, + "step": 46910 + }, + { + "epoch": 2.14, + "learning_rate": 3.9377088196256124e-05, + "loss": 2.0285, + "step": 46920 + }, + { + "epoch": 2.14, + "learning_rate": 3.937479976200284e-05, + "loss": 2.268, + "step": 46930 + }, + { + "epoch": 2.14, + "learning_rate": 3.937251132774956e-05, + "loss": 2.1951, + "step": 46940 + }, + { + "epoch": 2.14, + "learning_rate": 3.9370222893496274e-05, + "loss": 2.0705, + "step": 46950 + }, + { + "epoch": 2.14, + "learning_rate": 3.936793445924299e-05, + "loss": 2.1762, + "step": 46960 + }, + { + "epoch": 2.14, + "learning_rate": 3.93656460249897e-05, + "loss": 2.1088, + "step": 46970 + }, + { + "epoch": 2.15, + "learning_rate": 3.936335759073642e-05, + "loss": 2.2773, + "step": 46980 + }, + { + "epoch": 2.15, + "learning_rate": 3.936106915648314e-05, + "loss": 2.1462, + "step": 46990 + }, + { + "epoch": 2.15, + "learning_rate": 3.935878072222985e-05, + "loss": 2.0329, + "step": 47000 + }, + { + "epoch": 2.15, + "learning_rate": 3.935649228797657e-05, + "loss": 2.1224, + "step": 47010 + }, + { + "epoch": 2.15, + "learning_rate": 3.9354203853723286e-05, + "loss": 2.2246, + "step": 47020 + }, + { + "epoch": 2.15, + "learning_rate": 3.935191541947e-05, + "loss": 2.007, + "step": 47030 + }, + { + "epoch": 2.15, + "learning_rate": 3.9349626985216714e-05, + "loss": 2.2435, + "step": 47040 + }, + { + "epoch": 2.15, + "learning_rate": 3.934733855096343e-05, + "loss": 2.2642, + "step": 47050 + }, + { + "epoch": 2.15, + "learning_rate": 3.934505011671015e-05, + "loss": 2.0647, + "step": 47060 + }, + { + "epoch": 2.15, + "learning_rate": 3.9342761682456864e-05, + "loss": 2.1486, + "step": 47070 + }, + { + "epoch": 2.15, + "learning_rate": 3.934047324820358e-05, + "loss": 2.1652, + "step": 47080 + }, + { + "epoch": 2.15, + "learning_rate": 3.93381848139503e-05, + "loss": 2.1048, + "step": 47090 + }, + { + "epoch": 2.15, + "learning_rate": 3.933589637969701e-05, + "loss": 2.0926, + "step": 47100 + }, + { + "epoch": 2.15, + "learning_rate": 3.933360794544373e-05, + "loss": 2.0286, + "step": 47110 + }, + { + "epoch": 2.15, + "learning_rate": 3.933131951119045e-05, + "loss": 2.2732, + "step": 47120 + }, + { + "epoch": 2.15, + "learning_rate": 3.932903107693716e-05, + "loss": 2.2196, + "step": 47130 + }, + { + "epoch": 2.15, + "learning_rate": 3.9326742642683876e-05, + "loss": 2.0621, + "step": 47140 + }, + { + "epoch": 2.15, + "learning_rate": 3.93244542084306e-05, + "loss": 2.2639, + "step": 47150 + }, + { + "epoch": 2.15, + "learning_rate": 3.932216577417731e-05, + "loss": 2.0945, + "step": 47160 + }, + { + "epoch": 2.15, + "learning_rate": 3.9319877339924025e-05, + "loss": 2.0831, + "step": 47170 + }, + { + "epoch": 2.15, + "learning_rate": 3.9317588905670746e-05, + "loss": 2.1228, + "step": 47180 + }, + { + "epoch": 2.15, + "learning_rate": 3.931530047141746e-05, + "loss": 2.1384, + "step": 47190 + }, + { + "epoch": 2.16, + "learning_rate": 3.9313012037164174e-05, + "loss": 2.1789, + "step": 47200 + }, + { + "epoch": 2.16, + "learning_rate": 3.9310723602910895e-05, + "loss": 2.2875, + "step": 47210 + }, + { + "epoch": 2.16, + "learning_rate": 3.930843516865761e-05, + "loss": 2.0704, + "step": 47220 + }, + { + "epoch": 2.16, + "learning_rate": 3.930614673440432e-05, + "loss": 2.0716, + "step": 47230 + }, + { + "epoch": 2.16, + "learning_rate": 3.930385830015104e-05, + "loss": 2.0722, + "step": 47240 + }, + { + "epoch": 2.16, + "learning_rate": 3.930156986589775e-05, + "loss": 2.1781, + "step": 47250 + }, + { + "epoch": 2.16, + "learning_rate": 3.9299281431644466e-05, + "loss": 2.2662, + "step": 47260 + }, + { + "epoch": 2.16, + "learning_rate": 3.929699299739119e-05, + "loss": 2.1319, + "step": 47270 + }, + { + "epoch": 2.16, + "learning_rate": 3.92947045631379e-05, + "loss": 1.9706, + "step": 47280 + }, + { + "epoch": 2.16, + "learning_rate": 3.9292416128884615e-05, + "loss": 2.0597, + "step": 47290 + }, + { + "epoch": 2.16, + "learning_rate": 3.9290127694631336e-05, + "loss": 2.1308, + "step": 47300 + }, + { + "epoch": 2.16, + "learning_rate": 3.928783926037805e-05, + "loss": 2.1059, + "step": 47310 + }, + { + "epoch": 2.16, + "learning_rate": 3.9285550826124764e-05, + "loss": 2.1393, + "step": 47320 + }, + { + "epoch": 2.16, + "learning_rate": 3.9283262391871485e-05, + "loss": 2.0494, + "step": 47330 + }, + { + "epoch": 2.16, + "learning_rate": 3.92809739576182e-05, + "loss": 2.1099, + "step": 47340 + }, + { + "epoch": 2.16, + "learning_rate": 3.9278685523364913e-05, + "loss": 2.0949, + "step": 47350 + }, + { + "epoch": 2.16, + "learning_rate": 3.9276397089111634e-05, + "loss": 2.0336, + "step": 47360 + }, + { + "epoch": 2.16, + "learning_rate": 3.927410865485835e-05, + "loss": 2.0894, + "step": 47370 + }, + { + "epoch": 2.16, + "learning_rate": 3.927182022060506e-05, + "loss": 2.1, + "step": 47380 + }, + { + "epoch": 2.16, + "learning_rate": 3.9269531786351784e-05, + "loss": 2.0308, + "step": 47390 + }, + { + "epoch": 2.16, + "learning_rate": 3.92672433520985e-05, + "loss": 2.0726, + "step": 47400 + }, + { + "epoch": 2.16, + "learning_rate": 3.926495491784521e-05, + "loss": 2.0852, + "step": 47410 + }, + { + "epoch": 2.17, + "learning_rate": 3.9262666483591926e-05, + "loss": 2.0567, + "step": 47420 + }, + { + "epoch": 2.17, + "learning_rate": 3.926037804933864e-05, + "loss": 2.119, + "step": 47430 + }, + { + "epoch": 2.17, + "learning_rate": 3.925808961508536e-05, + "loss": 2.1944, + "step": 47440 + }, + { + "epoch": 2.17, + "learning_rate": 3.9255801180832075e-05, + "loss": 2.0604, + "step": 47450 + }, + { + "epoch": 2.17, + "learning_rate": 3.925351274657879e-05, + "loss": 2.2514, + "step": 47460 + }, + { + "epoch": 2.17, + "learning_rate": 3.925122431232551e-05, + "loss": 2.1075, + "step": 47470 + }, + { + "epoch": 2.17, + "learning_rate": 3.9248935878072224e-05, + "loss": 2.1664, + "step": 47480 + }, + { + "epoch": 2.17, + "learning_rate": 3.924664744381894e-05, + "loss": 2.081, + "step": 47490 + }, + { + "epoch": 2.17, + "learning_rate": 3.924435900956566e-05, + "loss": 2.1088, + "step": 47500 + }, + { + "epoch": 2.17, + "learning_rate": 3.9242070575312373e-05, + "loss": 2.0347, + "step": 47510 + }, + { + "epoch": 2.17, + "learning_rate": 3.923978214105909e-05, + "loss": 2.0807, + "step": 47520 + }, + { + "epoch": 2.17, + "learning_rate": 3.923749370680581e-05, + "loss": 2.0928, + "step": 47530 + }, + { + "epoch": 2.17, + "learning_rate": 3.923520527255252e-05, + "loss": 2.1616, + "step": 47540 + }, + { + "epoch": 2.17, + "learning_rate": 3.923291683829924e-05, + "loss": 2.0876, + "step": 47550 + }, + { + "epoch": 2.17, + "learning_rate": 3.923062840404596e-05, + "loss": 1.9977, + "step": 47560 + }, + { + "epoch": 2.17, + "learning_rate": 3.922833996979267e-05, + "loss": 2.1167, + "step": 47570 + }, + { + "epoch": 2.17, + "learning_rate": 3.9226051535539386e-05, + "loss": 2.1003, + "step": 47580 + }, + { + "epoch": 2.17, + "learning_rate": 3.922376310128611e-05, + "loss": 2.1648, + "step": 47590 + }, + { + "epoch": 2.17, + "learning_rate": 3.922147466703282e-05, + "loss": 2.1422, + "step": 47600 + }, + { + "epoch": 2.17, + "learning_rate": 3.9219186232779535e-05, + "loss": 2.1331, + "step": 47610 + }, + { + "epoch": 2.17, + "learning_rate": 3.921689779852625e-05, + "loss": 2.0277, + "step": 47620 + }, + { + "epoch": 2.17, + "learning_rate": 3.9214609364272963e-05, + "loss": 2.2131, + "step": 47630 + }, + { + "epoch": 2.18, + "learning_rate": 3.921232093001968e-05, + "loss": 2.1237, + "step": 47640 + }, + { + "epoch": 2.18, + "learning_rate": 3.92100324957664e-05, + "loss": 2.1095, + "step": 47650 + }, + { + "epoch": 2.18, + "learning_rate": 3.920774406151311e-05, + "loss": 2.0905, + "step": 47660 + }, + { + "epoch": 2.18, + "learning_rate": 3.920545562725983e-05, + "loss": 2.0226, + "step": 47670 + }, + { + "epoch": 2.18, + "learning_rate": 3.920316719300655e-05, + "loss": 2.1161, + "step": 47680 + }, + { + "epoch": 2.18, + "learning_rate": 3.920087875875326e-05, + "loss": 2.0459, + "step": 47690 + }, + { + "epoch": 2.18, + "learning_rate": 3.9198590324499976e-05, + "loss": 1.9904, + "step": 47700 + }, + { + "epoch": 2.18, + "learning_rate": 3.91963018902467e-05, + "loss": 2.0351, + "step": 47710 + }, + { + "epoch": 2.18, + "learning_rate": 3.919401345599341e-05, + "loss": 1.9596, + "step": 47720 + }, + { + "epoch": 2.18, + "learning_rate": 3.9191725021740125e-05, + "loss": 2.149, + "step": 47730 + }, + { + "epoch": 2.18, + "learning_rate": 3.9189436587486846e-05, + "loss": 2.3412, + "step": 47740 + }, + { + "epoch": 2.18, + "learning_rate": 3.918714815323356e-05, + "loss": 2.1434, + "step": 47750 + }, + { + "epoch": 2.18, + "learning_rate": 3.9184859718980274e-05, + "loss": 2.1307, + "step": 47760 + }, + { + "epoch": 2.18, + "learning_rate": 3.9182571284726995e-05, + "loss": 2.1789, + "step": 47770 + }, + { + "epoch": 2.18, + "learning_rate": 3.918028285047371e-05, + "loss": 2.067, + "step": 47780 + }, + { + "epoch": 2.18, + "learning_rate": 3.9177994416220423e-05, + "loss": 1.9824, + "step": 47790 + }, + { + "epoch": 2.18, + "learning_rate": 3.9175705981967144e-05, + "loss": 2.1465, + "step": 47800 + }, + { + "epoch": 2.18, + "learning_rate": 3.917341754771386e-05, + "loss": 2.0306, + "step": 47810 + }, + { + "epoch": 2.18, + "learning_rate": 3.917112911346057e-05, + "loss": 2.2313, + "step": 47820 + }, + { + "epoch": 2.18, + "learning_rate": 3.916884067920729e-05, + "loss": 2.2388, + "step": 47830 + }, + { + "epoch": 2.18, + "learning_rate": 3.9166552244954e-05, + "loss": 2.2025, + "step": 47840 + }, + { + "epoch": 2.18, + "learning_rate": 3.916426381070072e-05, + "loss": 2.1539, + "step": 47850 + }, + { + "epoch": 2.19, + "learning_rate": 3.9161975376447436e-05, + "loss": 2.2095, + "step": 47860 + }, + { + "epoch": 2.19, + "learning_rate": 3.915968694219415e-05, + "loss": 2.0798, + "step": 47870 + }, + { + "epoch": 2.19, + "learning_rate": 3.915739850794087e-05, + "loss": 2.0238, + "step": 47880 + }, + { + "epoch": 2.19, + "learning_rate": 3.9155110073687585e-05, + "loss": 2.0203, + "step": 47890 + }, + { + "epoch": 2.19, + "learning_rate": 3.91528216394343e-05, + "loss": 2.2616, + "step": 47900 + }, + { + "epoch": 2.19, + "learning_rate": 3.915053320518102e-05, + "loss": 2.0437, + "step": 47910 + }, + { + "epoch": 2.19, + "learning_rate": 3.9148244770927734e-05, + "loss": 2.2033, + "step": 47920 + }, + { + "epoch": 2.19, + "learning_rate": 3.914595633667445e-05, + "loss": 2.1951, + "step": 47930 + }, + { + "epoch": 2.19, + "learning_rate": 3.914366790242117e-05, + "loss": 2.1867, + "step": 47940 + }, + { + "epoch": 2.19, + "learning_rate": 3.9141379468167883e-05, + "loss": 2.1719, + "step": 47950 + }, + { + "epoch": 2.19, + "learning_rate": 3.91390910339146e-05, + "loss": 2.0207, + "step": 47960 + }, + { + "epoch": 2.19, + "learning_rate": 3.913680259966132e-05, + "loss": 1.9708, + "step": 47970 + }, + { + "epoch": 2.19, + "learning_rate": 3.913451416540803e-05, + "loss": 2.0069, + "step": 47980 + }, + { + "epoch": 2.19, + "learning_rate": 3.913222573115475e-05, + "loss": 1.9804, + "step": 47990 + }, + { + "epoch": 2.19, + "learning_rate": 3.912993729690147e-05, + "loss": 2.1072, + "step": 48000 + }, + { + "epoch": 2.19, + "learning_rate": 3.912764886264818e-05, + "loss": 2.141, + "step": 48010 + }, + { + "epoch": 2.19, + "learning_rate": 3.912536042839489e-05, + "loss": 2.0372, + "step": 48020 + }, + { + "epoch": 2.19, + "learning_rate": 3.912307199414161e-05, + "loss": 1.9954, + "step": 48030 + }, + { + "epoch": 2.19, + "learning_rate": 3.9120783559888324e-05, + "loss": 2.1736, + "step": 48040 + }, + { + "epoch": 2.19, + "learning_rate": 3.911849512563504e-05, + "loss": 2.0503, + "step": 48050 + }, + { + "epoch": 2.19, + "learning_rate": 3.911620669138176e-05, + "loss": 2.0308, + "step": 48060 + }, + { + "epoch": 2.2, + "learning_rate": 3.911391825712847e-05, + "loss": 2.0254, + "step": 48070 + }, + { + "epoch": 2.2, + "learning_rate": 3.911162982287519e-05, + "loss": 2.225, + "step": 48080 + }, + { + "epoch": 2.2, + "learning_rate": 3.910934138862191e-05, + "loss": 2.1156, + "step": 48090 + }, + { + "epoch": 2.2, + "learning_rate": 3.910705295436862e-05, + "loss": 2.0516, + "step": 48100 + }, + { + "epoch": 2.2, + "learning_rate": 3.910476452011534e-05, + "loss": 2.1484, + "step": 48110 + }, + { + "epoch": 2.2, + "learning_rate": 3.910247608586206e-05, + "loss": 2.1058, + "step": 48120 + }, + { + "epoch": 2.2, + "learning_rate": 3.910018765160877e-05, + "loss": 2.2021, + "step": 48130 + }, + { + "epoch": 2.2, + "learning_rate": 3.9097899217355486e-05, + "loss": 2.0625, + "step": 48140 + }, + { + "epoch": 2.2, + "learning_rate": 3.909561078310221e-05, + "loss": 2.1299, + "step": 48150 + }, + { + "epoch": 2.2, + "learning_rate": 3.909332234884892e-05, + "loss": 2.1301, + "step": 48160 + }, + { + "epoch": 2.2, + "learning_rate": 3.9091033914595635e-05, + "loss": 2.0908, + "step": 48170 + }, + { + "epoch": 2.2, + "learning_rate": 3.9088745480342356e-05, + "loss": 2.088, + "step": 48180 + }, + { + "epoch": 2.2, + "learning_rate": 3.908645704608907e-05, + "loss": 2.2526, + "step": 48190 + }, + { + "epoch": 2.2, + "learning_rate": 3.9084168611835784e-05, + "loss": 1.9042, + "step": 48200 + }, + { + "epoch": 2.2, + "learning_rate": 3.90818801775825e-05, + "loss": 2.091, + "step": 48210 + }, + { + "epoch": 2.2, + "learning_rate": 3.907959174332921e-05, + "loss": 2.0557, + "step": 48220 + }, + { + "epoch": 2.2, + "learning_rate": 3.9077303309075927e-05, + "loss": 2.139, + "step": 48230 + }, + { + "epoch": 2.2, + "learning_rate": 3.907501487482265e-05, + "loss": 2.1324, + "step": 48240 + }, + { + "epoch": 2.2, + "learning_rate": 3.907272644056936e-05, + "loss": 2.0967, + "step": 48250 + }, + { + "epoch": 2.2, + "learning_rate": 3.9070438006316076e-05, + "loss": 2.2388, + "step": 48260 + }, + { + "epoch": 2.2, + "learning_rate": 3.90681495720628e-05, + "loss": 2.1678, + "step": 48270 + }, + { + "epoch": 2.2, + "learning_rate": 3.906586113780951e-05, + "loss": 2.158, + "step": 48280 + }, + { + "epoch": 2.21, + "learning_rate": 3.9063572703556225e-05, + "loss": 2.2548, + "step": 48290 + }, + { + "epoch": 2.21, + "learning_rate": 3.9061284269302946e-05, + "loss": 2.0729, + "step": 48300 + }, + { + "epoch": 2.21, + "learning_rate": 3.905899583504966e-05, + "loss": 2.0849, + "step": 48310 + }, + { + "epoch": 2.21, + "learning_rate": 3.9056707400796374e-05, + "loss": 2.1063, + "step": 48320 + }, + { + "epoch": 2.21, + "learning_rate": 3.9054418966543095e-05, + "loss": 2.1273, + "step": 48330 + }, + { + "epoch": 2.21, + "learning_rate": 3.905213053228981e-05, + "loss": 2.2323, + "step": 48340 + }, + { + "epoch": 2.21, + "learning_rate": 3.904984209803652e-05, + "loss": 2.1298, + "step": 48350 + }, + { + "epoch": 2.21, + "learning_rate": 3.9047553663783244e-05, + "loss": 2.203, + "step": 48360 + }, + { + "epoch": 2.21, + "learning_rate": 3.904526522952996e-05, + "loss": 2.1235, + "step": 48370 + }, + { + "epoch": 2.21, + "learning_rate": 3.904297679527667e-05, + "loss": 1.9759, + "step": 48380 + }, + { + "epoch": 2.21, + "learning_rate": 3.9040688361023393e-05, + "loss": 2.153, + "step": 48390 + }, + { + "epoch": 2.21, + "learning_rate": 3.903839992677011e-05, + "loss": 2.1184, + "step": 48400 + }, + { + "epoch": 2.21, + "learning_rate": 3.903611149251682e-05, + "loss": 2.1196, + "step": 48410 + }, + { + "epoch": 2.21, + "learning_rate": 3.9033823058263536e-05, + "loss": 2.1118, + "step": 48420 + }, + { + "epoch": 2.21, + "learning_rate": 3.903153462401025e-05, + "loss": 2.2099, + "step": 48430 + }, + { + "epoch": 2.21, + "learning_rate": 3.902924618975697e-05, + "loss": 2.2601, + "step": 48440 + }, + { + "epoch": 2.21, + "learning_rate": 3.9026957755503685e-05, + "loss": 2.2459, + "step": 48450 + }, + { + "epoch": 2.21, + "learning_rate": 3.90246693212504e-05, + "loss": 2.0059, + "step": 48460 + }, + { + "epoch": 2.21, + "learning_rate": 3.902238088699712e-05, + "loss": 2.2662, + "step": 48470 + }, + { + "epoch": 2.21, + "learning_rate": 3.9020092452743834e-05, + "loss": 2.1621, + "step": 48480 + }, + { + "epoch": 2.21, + "learning_rate": 3.901780401849055e-05, + "loss": 2.0599, + "step": 48490 + }, + { + "epoch": 2.21, + "learning_rate": 3.901551558423727e-05, + "loss": 1.9925, + "step": 48500 + }, + { + "epoch": 2.22, + "learning_rate": 3.901322714998398e-05, + "loss": 1.9964, + "step": 48510 + }, + { + "epoch": 2.22, + "learning_rate": 3.90109387157307e-05, + "loss": 2.136, + "step": 48520 + }, + { + "epoch": 2.22, + "learning_rate": 3.900865028147742e-05, + "loss": 2.1398, + "step": 48530 + }, + { + "epoch": 2.22, + "learning_rate": 3.900636184722413e-05, + "loss": 2.0984, + "step": 48540 + }, + { + "epoch": 2.22, + "learning_rate": 3.900407341297085e-05, + "loss": 2.1067, + "step": 48550 + }, + { + "epoch": 2.22, + "learning_rate": 3.900178497871757e-05, + "loss": 2.149, + "step": 48560 + }, + { + "epoch": 2.22, + "learning_rate": 3.899949654446428e-05, + "loss": 2.0383, + "step": 48570 + }, + { + "epoch": 2.22, + "learning_rate": 3.8997208110210996e-05, + "loss": 2.2194, + "step": 48580 + }, + { + "epoch": 2.22, + "learning_rate": 3.899491967595772e-05, + "loss": 2.0549, + "step": 48590 + }, + { + "epoch": 2.22, + "learning_rate": 3.899263124170443e-05, + "loss": 2.1741, + "step": 48600 + }, + { + "epoch": 2.22, + "learning_rate": 3.899034280745114e-05, + "loss": 2.1794, + "step": 48610 + }, + { + "epoch": 2.22, + "learning_rate": 3.898805437319786e-05, + "loss": 2.1417, + "step": 48620 + }, + { + "epoch": 2.22, + "learning_rate": 3.898576593894457e-05, + "loss": 2.1563, + "step": 48630 + }, + { + "epoch": 2.22, + "learning_rate": 3.898347750469129e-05, + "loss": 2.1586, + "step": 48640 + }, + { + "epoch": 2.22, + "learning_rate": 3.898118907043801e-05, + "loss": 2.0013, + "step": 48650 + }, + { + "epoch": 2.22, + "learning_rate": 3.897890063618472e-05, + "loss": 2.2536, + "step": 48660 + }, + { + "epoch": 2.22, + "learning_rate": 3.8976612201931437e-05, + "loss": 2.1487, + "step": 48670 + }, + { + "epoch": 2.22, + "learning_rate": 3.897432376767816e-05, + "loss": 2.2718, + "step": 48680 + }, + { + "epoch": 2.22, + "learning_rate": 3.897203533342487e-05, + "loss": 2.116, + "step": 48690 + }, + { + "epoch": 2.22, + "learning_rate": 3.8969746899171586e-05, + "loss": 2.0268, + "step": 48700 + }, + { + "epoch": 2.22, + "learning_rate": 3.896745846491831e-05, + "loss": 2.0296, + "step": 48710 + }, + { + "epoch": 2.22, + "learning_rate": 3.896517003066502e-05, + "loss": 2.2047, + "step": 48720 + }, + { + "epoch": 2.23, + "learning_rate": 3.8962881596411735e-05, + "loss": 2.1616, + "step": 48730 + }, + { + "epoch": 2.23, + "learning_rate": 3.8960593162158456e-05, + "loss": 2.1995, + "step": 48740 + }, + { + "epoch": 2.23, + "learning_rate": 3.895830472790517e-05, + "loss": 2.1266, + "step": 48750 + }, + { + "epoch": 2.23, + "learning_rate": 3.8956016293651884e-05, + "loss": 2.1413, + "step": 48760 + }, + { + "epoch": 2.23, + "learning_rate": 3.8953727859398605e-05, + "loss": 2.0414, + "step": 48770 + }, + { + "epoch": 2.23, + "learning_rate": 3.895143942514532e-05, + "loss": 2.1119, + "step": 48780 + }, + { + "epoch": 2.23, + "learning_rate": 3.894915099089203e-05, + "loss": 2.2525, + "step": 48790 + }, + { + "epoch": 2.23, + "learning_rate": 3.8946862556638754e-05, + "loss": 2.1574, + "step": 48800 + }, + { + "epoch": 2.23, + "learning_rate": 3.894457412238546e-05, + "loss": 2.0673, + "step": 48810 + }, + { + "epoch": 2.23, + "learning_rate": 3.894228568813218e-05, + "loss": 2.0946, + "step": 48820 + }, + { + "epoch": 2.23, + "learning_rate": 3.8939997253878897e-05, + "loss": 2.1382, + "step": 48830 + }, + { + "epoch": 2.23, + "learning_rate": 3.893770881962561e-05, + "loss": 2.0205, + "step": 48840 + }, + { + "epoch": 2.23, + "learning_rate": 3.893542038537233e-05, + "loss": 2.0873, + "step": 48850 + }, + { + "epoch": 2.23, + "learning_rate": 3.8933131951119046e-05, + "loss": 2.096, + "step": 48860 + }, + { + "epoch": 2.23, + "learning_rate": 3.893084351686576e-05, + "loss": 2.2307, + "step": 48870 + }, + { + "epoch": 2.23, + "learning_rate": 3.892855508261248e-05, + "loss": 2.1105, + "step": 48880 + }, + { + "epoch": 2.23, + "learning_rate": 3.8926266648359195e-05, + "loss": 2.0669, + "step": 48890 + }, + { + "epoch": 2.23, + "learning_rate": 3.892397821410591e-05, + "loss": 2.0435, + "step": 48900 + }, + { + "epoch": 2.23, + "learning_rate": 3.892168977985263e-05, + "loss": 2.1155, + "step": 48910 + }, + { + "epoch": 2.23, + "learning_rate": 3.8919401345599344e-05, + "loss": 2.2491, + "step": 48920 + }, + { + "epoch": 2.23, + "learning_rate": 3.891711291134606e-05, + "loss": 2.1616, + "step": 48930 + }, + { + "epoch": 2.23, + "learning_rate": 3.891482447709278e-05, + "loss": 2.2592, + "step": 48940 + }, + { + "epoch": 2.24, + "learning_rate": 3.891253604283949e-05, + "loss": 2.1397, + "step": 48950 + }, + { + "epoch": 2.24, + "learning_rate": 3.891024760858621e-05, + "loss": 2.1768, + "step": 48960 + }, + { + "epoch": 2.24, + "learning_rate": 3.890795917433293e-05, + "loss": 2.1034, + "step": 48970 + }, + { + "epoch": 2.24, + "learning_rate": 3.890567074007964e-05, + "loss": 2.2465, + "step": 48980 + }, + { + "epoch": 2.24, + "learning_rate": 3.890338230582636e-05, + "loss": 2.1126, + "step": 48990 + }, + { + "epoch": 2.24, + "learning_rate": 3.890109387157307e-05, + "loss": 2.1296, + "step": 49000 + }, + { + "epoch": 2.24, + "learning_rate": 3.8898805437319785e-05, + "loss": 2.1098, + "step": 49010 + }, + { + "epoch": 2.24, + "learning_rate": 3.88965170030665e-05, + "loss": 2.0403, + "step": 49020 + }, + { + "epoch": 2.24, + "learning_rate": 3.889422856881322e-05, + "loss": 2.1242, + "step": 49030 + }, + { + "epoch": 2.24, + "learning_rate": 3.8891940134559934e-05, + "loss": 1.9809, + "step": 49040 + }, + { + "epoch": 2.24, + "learning_rate": 3.888965170030665e-05, + "loss": 1.9988, + "step": 49050 + }, + { + "epoch": 2.24, + "learning_rate": 3.888736326605337e-05, + "loss": 2.0248, + "step": 49060 + }, + { + "epoch": 2.24, + "learning_rate": 3.888507483180008e-05, + "loss": 2.1326, + "step": 49070 + }, + { + "epoch": 2.24, + "learning_rate": 3.88827863975468e-05, + "loss": 2.1674, + "step": 49080 + }, + { + "epoch": 2.24, + "learning_rate": 3.888049796329352e-05, + "loss": 2.0436, + "step": 49090 + }, + { + "epoch": 2.24, + "learning_rate": 3.887820952904023e-05, + "loss": 2.0515, + "step": 49100 + }, + { + "epoch": 2.24, + "learning_rate": 3.8875921094786947e-05, + "loss": 2.1244, + "step": 49110 + }, + { + "epoch": 2.24, + "learning_rate": 3.887363266053367e-05, + "loss": 2.1201, + "step": 49120 + }, + { + "epoch": 2.24, + "learning_rate": 3.887134422628038e-05, + "loss": 2.1938, + "step": 49130 + }, + { + "epoch": 2.24, + "learning_rate": 3.8869055792027096e-05, + "loss": 2.1548, + "step": 49140 + }, + { + "epoch": 2.24, + "learning_rate": 3.886676735777382e-05, + "loss": 2.0863, + "step": 49150 + }, + { + "epoch": 2.24, + "learning_rate": 3.886447892352053e-05, + "loss": 2.0893, + "step": 49160 + }, + { + "epoch": 2.25, + "learning_rate": 3.8862190489267245e-05, + "loss": 2.1011, + "step": 49170 + }, + { + "epoch": 2.25, + "learning_rate": 3.8859902055013966e-05, + "loss": 2.1514, + "step": 49180 + }, + { + "epoch": 2.25, + "learning_rate": 3.885761362076068e-05, + "loss": 2.0747, + "step": 49190 + }, + { + "epoch": 2.25, + "learning_rate": 3.8855325186507394e-05, + "loss": 2.2197, + "step": 49200 + }, + { + "epoch": 2.25, + "learning_rate": 3.885303675225411e-05, + "loss": 2.0238, + "step": 49210 + }, + { + "epoch": 2.25, + "learning_rate": 3.885074831800082e-05, + "loss": 2.157, + "step": 49220 + }, + { + "epoch": 2.25, + "learning_rate": 3.884845988374754e-05, + "loss": 1.936, + "step": 49230 + }, + { + "epoch": 2.25, + "learning_rate": 3.884617144949426e-05, + "loss": 2.242, + "step": 49240 + }, + { + "epoch": 2.25, + "learning_rate": 3.884388301524097e-05, + "loss": 2.1498, + "step": 49250 + }, + { + "epoch": 2.25, + "learning_rate": 3.884159458098769e-05, + "loss": 2.0668, + "step": 49260 + }, + { + "epoch": 2.25, + "learning_rate": 3.8839306146734407e-05, + "loss": 2.1587, + "step": 49270 + }, + { + "epoch": 2.25, + "learning_rate": 3.883701771248112e-05, + "loss": 2.0574, + "step": 49280 + }, + { + "epoch": 2.25, + "learning_rate": 3.883472927822784e-05, + "loss": 1.98, + "step": 49290 + }, + { + "epoch": 2.25, + "learning_rate": 3.8832440843974556e-05, + "loss": 2.1095, + "step": 49300 + }, + { + "epoch": 2.25, + "learning_rate": 3.883015240972127e-05, + "loss": 2.0554, + "step": 49310 + }, + { + "epoch": 2.25, + "learning_rate": 3.8827863975467984e-05, + "loss": 2.1567, + "step": 49320 + }, + { + "epoch": 2.25, + "learning_rate": 3.8825575541214705e-05, + "loss": 2.0909, + "step": 49330 + }, + { + "epoch": 2.25, + "learning_rate": 3.882328710696142e-05, + "loss": 1.9946, + "step": 49340 + }, + { + "epoch": 2.25, + "learning_rate": 3.882099867270813e-05, + "loss": 2.1261, + "step": 49350 + }, + { + "epoch": 2.25, + "learning_rate": 3.8818710238454854e-05, + "loss": 2.2894, + "step": 49360 + }, + { + "epoch": 2.25, + "learning_rate": 3.881642180420157e-05, + "loss": 2.2577, + "step": 49370 + }, + { + "epoch": 2.25, + "learning_rate": 3.881413336994828e-05, + "loss": 2.0698, + "step": 49380 + }, + { + "epoch": 2.26, + "learning_rate": 3.8811844935695e-05, + "loss": 2.1284, + "step": 49390 + }, + { + "epoch": 2.26, + "learning_rate": 3.880955650144171e-05, + "loss": 2.0674, + "step": 49400 + }, + { + "epoch": 2.26, + "learning_rate": 3.880726806718843e-05, + "loss": 2.1366, + "step": 49410 + }, + { + "epoch": 2.26, + "learning_rate": 3.8804979632935146e-05, + "loss": 2.0845, + "step": 49420 + }, + { + "epoch": 2.26, + "learning_rate": 3.880269119868186e-05, + "loss": 2.1531, + "step": 49430 + }, + { + "epoch": 2.26, + "learning_rate": 3.880040276442858e-05, + "loss": 2.1782, + "step": 49440 + }, + { + "epoch": 2.26, + "learning_rate": 3.8798114330175295e-05, + "loss": 1.9761, + "step": 49450 + }, + { + "epoch": 2.26, + "learning_rate": 3.879582589592201e-05, + "loss": 2.2302, + "step": 49460 + }, + { + "epoch": 2.26, + "learning_rate": 3.879353746166873e-05, + "loss": 2.3322, + "step": 49470 + }, + { + "epoch": 2.26, + "learning_rate": 3.8791249027415444e-05, + "loss": 2.1389, + "step": 49480 + }, + { + "epoch": 2.26, + "learning_rate": 3.878896059316216e-05, + "loss": 2.1608, + "step": 49490 + }, + { + "epoch": 2.26, + "learning_rate": 3.878667215890888e-05, + "loss": 2.1195, + "step": 49500 + }, + { + "epoch": 2.26, + "learning_rate": 3.878438372465559e-05, + "loss": 2.2983, + "step": 49510 + }, + { + "epoch": 2.26, + "learning_rate": 3.878209529040231e-05, + "loss": 2.1761, + "step": 49520 + }, + { + "epoch": 2.26, + "learning_rate": 3.877980685614903e-05, + "loss": 2.161, + "step": 49530 + }, + { + "epoch": 2.26, + "learning_rate": 3.877751842189574e-05, + "loss": 2.0599, + "step": 49540 + }, + { + "epoch": 2.26, + "learning_rate": 3.8775229987642457e-05, + "loss": 2.0318, + "step": 49550 + }, + { + "epoch": 2.26, + "learning_rate": 3.877294155338918e-05, + "loss": 2.2334, + "step": 49560 + }, + { + "epoch": 2.26, + "learning_rate": 3.877065311913589e-05, + "loss": 2.1773, + "step": 49570 + }, + { + "epoch": 2.26, + "learning_rate": 3.8768364684882606e-05, + "loss": 2.258, + "step": 49580 + }, + { + "epoch": 2.26, + "learning_rate": 3.8766076250629327e-05, + "loss": 2.2573, + "step": 49590 + }, + { + "epoch": 2.26, + "learning_rate": 3.8763787816376034e-05, + "loss": 2.3803, + "step": 49600 + }, + { + "epoch": 2.27, + "learning_rate": 3.876149938212275e-05, + "loss": 2.1301, + "step": 49610 + }, + { + "epoch": 2.27, + "learning_rate": 3.875921094786947e-05, + "loss": 2.1291, + "step": 49620 + }, + { + "epoch": 2.27, + "learning_rate": 3.875692251361618e-05, + "loss": 2.2625, + "step": 49630 + }, + { + "epoch": 2.27, + "learning_rate": 3.87546340793629e-05, + "loss": 2.1612, + "step": 49640 + }, + { + "epoch": 2.27, + "learning_rate": 3.875234564510962e-05, + "loss": 2.2474, + "step": 49650 + }, + { + "epoch": 2.27, + "learning_rate": 3.875005721085633e-05, + "loss": 2.1393, + "step": 49660 + }, + { + "epoch": 2.27, + "learning_rate": 3.8747768776603046e-05, + "loss": 2.253, + "step": 49670 + }, + { + "epoch": 2.27, + "learning_rate": 3.874548034234977e-05, + "loss": 2.2509, + "step": 49680 + }, + { + "epoch": 2.27, + "learning_rate": 3.874319190809648e-05, + "loss": 2.1933, + "step": 49690 + }, + { + "epoch": 2.27, + "learning_rate": 3.8740903473843196e-05, + "loss": 2.0035, + "step": 49700 + }, + { + "epoch": 2.27, + "learning_rate": 3.8738615039589917e-05, + "loss": 2.0659, + "step": 49710 + }, + { + "epoch": 2.27, + "learning_rate": 3.873632660533663e-05, + "loss": 2.0804, + "step": 49720 + }, + { + "epoch": 2.27, + "learning_rate": 3.8734038171083345e-05, + "loss": 2.0729, + "step": 49730 + }, + { + "epoch": 2.27, + "learning_rate": 3.8731749736830066e-05, + "loss": 2.0256, + "step": 49740 + }, + { + "epoch": 2.27, + "learning_rate": 3.872946130257678e-05, + "loss": 2.2274, + "step": 49750 + }, + { + "epoch": 2.27, + "learning_rate": 3.8727172868323494e-05, + "loss": 2.1318, + "step": 49760 + }, + { + "epoch": 2.27, + "learning_rate": 3.8724884434070215e-05, + "loss": 2.1691, + "step": 49770 + }, + { + "epoch": 2.27, + "learning_rate": 3.872259599981693e-05, + "loss": 2.1749, + "step": 49780 + }, + { + "epoch": 2.27, + "learning_rate": 3.872030756556364e-05, + "loss": 2.2132, + "step": 49790 + }, + { + "epoch": 2.27, + "learning_rate": 3.871801913131036e-05, + "loss": 2.1117, + "step": 49800 + }, + { + "epoch": 2.27, + "learning_rate": 3.871573069705707e-05, + "loss": 2.2657, + "step": 49810 + }, + { + "epoch": 2.27, + "learning_rate": 3.871344226280379e-05, + "loss": 1.9524, + "step": 49820 + }, + { + "epoch": 2.28, + "learning_rate": 3.8711153828550506e-05, + "loss": 2.102, + "step": 49830 + }, + { + "epoch": 2.28, + "learning_rate": 3.870886539429722e-05, + "loss": 2.0142, + "step": 49840 + }, + { + "epoch": 2.28, + "learning_rate": 3.870657696004394e-05, + "loss": 1.9817, + "step": 49850 + }, + { + "epoch": 2.28, + "learning_rate": 3.8704288525790656e-05, + "loss": 2.1695, + "step": 49860 + }, + { + "epoch": 2.28, + "learning_rate": 3.870200009153737e-05, + "loss": 2.2847, + "step": 49870 + }, + { + "epoch": 2.28, + "learning_rate": 3.869971165728409e-05, + "loss": 2.0244, + "step": 49880 + }, + { + "epoch": 2.28, + "learning_rate": 3.8697423223030805e-05, + "loss": 2.3591, + "step": 49890 + }, + { + "epoch": 2.28, + "learning_rate": 3.869513478877752e-05, + "loss": 2.1937, + "step": 49900 + }, + { + "epoch": 2.28, + "learning_rate": 3.869284635452424e-05, + "loss": 2.1039, + "step": 49910 + }, + { + "epoch": 2.28, + "learning_rate": 3.8690557920270954e-05, + "loss": 2.0935, + "step": 49920 + }, + { + "epoch": 2.28, + "learning_rate": 3.868826948601767e-05, + "loss": 2.2592, + "step": 49930 + }, + { + "epoch": 2.28, + "learning_rate": 3.868598105176439e-05, + "loss": 2.1644, + "step": 49940 + }, + { + "epoch": 2.28, + "learning_rate": 3.86836926175111e-05, + "loss": 2.184, + "step": 49950 + }, + { + "epoch": 2.28, + "learning_rate": 3.868140418325782e-05, + "loss": 2.0912, + "step": 49960 + }, + { + "epoch": 2.28, + "learning_rate": 3.867911574900454e-05, + "loss": 2.1489, + "step": 49970 + }, + { + "epoch": 2.28, + "learning_rate": 3.867682731475125e-05, + "loss": 2.1004, + "step": 49980 + }, + { + "epoch": 2.28, + "learning_rate": 3.867453888049796e-05, + "loss": 2.1049, + "step": 49990 + }, + { + "epoch": 2.28, + "learning_rate": 3.867225044624468e-05, + "loss": 2.1222, + "step": 50000 + }, + { + "epoch": 2.28, + "learning_rate": 3.8669962011991395e-05, + "loss": 2.0034, + "step": 50010 + }, + { + "epoch": 2.28, + "learning_rate": 3.866767357773811e-05, + "loss": 2.2468, + "step": 50020 + }, + { + "epoch": 2.28, + "learning_rate": 3.866538514348483e-05, + "loss": 2.1985, + "step": 50030 + }, + { + "epoch": 2.28, + "learning_rate": 3.8663096709231544e-05, + "loss": 2.2666, + "step": 50040 + }, + { + "epoch": 2.29, + "learning_rate": 3.866080827497826e-05, + "loss": 1.9806, + "step": 50050 + }, + { + "epoch": 2.29, + "learning_rate": 3.865851984072498e-05, + "loss": 2.2344, + "step": 50060 + }, + { + "epoch": 2.29, + "learning_rate": 3.865623140647169e-05, + "loss": 2.1659, + "step": 50070 + }, + { + "epoch": 2.29, + "learning_rate": 3.865394297221841e-05, + "loss": 2.1695, + "step": 50080 + }, + { + "epoch": 2.29, + "learning_rate": 3.865165453796513e-05, + "loss": 1.9414, + "step": 50090 + }, + { + "epoch": 2.29, + "learning_rate": 3.864936610371184e-05, + "loss": 2.0546, + "step": 50100 + }, + { + "epoch": 2.29, + "learning_rate": 3.8647077669458556e-05, + "loss": 2.0776, + "step": 50110 + }, + { + "epoch": 2.29, + "learning_rate": 3.864478923520528e-05, + "loss": 2.1789, + "step": 50120 + }, + { + "epoch": 2.29, + "learning_rate": 3.864250080095199e-05, + "loss": 2.1126, + "step": 50130 + }, + { + "epoch": 2.29, + "learning_rate": 3.8640212366698706e-05, + "loss": 2.1298, + "step": 50140 + }, + { + "epoch": 2.29, + "learning_rate": 3.8637923932445426e-05, + "loss": 2.0951, + "step": 50150 + }, + { + "epoch": 2.29, + "learning_rate": 3.863563549819214e-05, + "loss": 2.1212, + "step": 50160 + }, + { + "epoch": 2.29, + "learning_rate": 3.8633347063938855e-05, + "loss": 2.1231, + "step": 50170 + }, + { + "epoch": 2.29, + "learning_rate": 3.8631058629685576e-05, + "loss": 2.0869, + "step": 50180 + }, + { + "epoch": 2.29, + "learning_rate": 3.862877019543228e-05, + "loss": 2.1711, + "step": 50190 + }, + { + "epoch": 2.29, + "learning_rate": 3.8626481761179004e-05, + "loss": 2.0133, + "step": 50200 + }, + { + "epoch": 2.29, + "learning_rate": 3.862419332692572e-05, + "loss": 2.0554, + "step": 50210 + }, + { + "epoch": 2.29, + "learning_rate": 3.862190489267243e-05, + "loss": 2.189, + "step": 50220 + }, + { + "epoch": 2.29, + "learning_rate": 3.861961645841915e-05, + "loss": 2.0093, + "step": 50230 + }, + { + "epoch": 2.29, + "learning_rate": 3.861732802416587e-05, + "loss": 1.981, + "step": 50240 + }, + { + "epoch": 2.29, + "learning_rate": 3.861503958991258e-05, + "loss": 2.1032, + "step": 50250 + }, + { + "epoch": 2.3, + "learning_rate": 3.86127511556593e-05, + "loss": 1.971, + "step": 50260 + }, + { + "epoch": 2.3, + "learning_rate": 3.8610462721406016e-05, + "loss": 2.11, + "step": 50270 + }, + { + "epoch": 2.3, + "learning_rate": 3.860817428715273e-05, + "loss": 1.9814, + "step": 50280 + }, + { + "epoch": 2.3, + "learning_rate": 3.860588585289945e-05, + "loss": 1.8928, + "step": 50290 + }, + { + "epoch": 2.3, + "learning_rate": 3.8603597418646166e-05, + "loss": 2.1477, + "step": 50300 + }, + { + "epoch": 2.3, + "learning_rate": 3.860130898439288e-05, + "loss": 2.0402, + "step": 50310 + }, + { + "epoch": 2.3, + "learning_rate": 3.85990205501396e-05, + "loss": 2.1096, + "step": 50320 + }, + { + "epoch": 2.3, + "learning_rate": 3.8596732115886315e-05, + "loss": 2.0049, + "step": 50330 + }, + { + "epoch": 2.3, + "learning_rate": 3.859444368163303e-05, + "loss": 2.1294, + "step": 50340 + }, + { + "epoch": 2.3, + "learning_rate": 3.859215524737975e-05, + "loss": 2.163, + "step": 50350 + }, + { + "epoch": 2.3, + "learning_rate": 3.8589866813126464e-05, + "loss": 2.0552, + "step": 50360 + }, + { + "epoch": 2.3, + "learning_rate": 3.858757837887318e-05, + "loss": 2.1217, + "step": 50370 + }, + { + "epoch": 2.3, + "learning_rate": 3.85852899446199e-05, + "loss": 2.2756, + "step": 50380 + }, + { + "epoch": 2.3, + "learning_rate": 3.8583001510366606e-05, + "loss": 2.1641, + "step": 50390 + }, + { + "epoch": 2.3, + "learning_rate": 3.858071307611332e-05, + "loss": 2.0336, + "step": 50400 + }, + { + "epoch": 2.3, + "learning_rate": 3.857842464186004e-05, + "loss": 2.1027, + "step": 50410 + }, + { + "epoch": 2.3, + "learning_rate": 3.8576136207606756e-05, + "loss": 2.0484, + "step": 50420 + }, + { + "epoch": 2.3, + "learning_rate": 3.857384777335347e-05, + "loss": 2.2565, + "step": 50430 + }, + { + "epoch": 2.3, + "learning_rate": 3.857155933910019e-05, + "loss": 2.1195, + "step": 50440 + }, + { + "epoch": 2.3, + "learning_rate": 3.8569270904846905e-05, + "loss": 1.9386, + "step": 50450 + }, + { + "epoch": 2.3, + "learning_rate": 3.856698247059362e-05, + "loss": 2.137, + "step": 50460 + }, + { + "epoch": 2.3, + "learning_rate": 3.856469403634034e-05, + "loss": 2.1935, + "step": 50470 + }, + { + "epoch": 2.31, + "learning_rate": 3.8562405602087054e-05, + "loss": 2.1224, + "step": 50480 + }, + { + "epoch": 2.31, + "learning_rate": 3.856011716783377e-05, + "loss": 2.1669, + "step": 50490 + }, + { + "epoch": 2.31, + "learning_rate": 3.855782873358049e-05, + "loss": 2.1013, + "step": 50500 + }, + { + "epoch": 2.31, + "learning_rate": 3.85555402993272e-05, + "loss": 2.0976, + "step": 50510 + }, + { + "epoch": 2.31, + "learning_rate": 3.855325186507392e-05, + "loss": 2.2225, + "step": 50520 + }, + { + "epoch": 2.31, + "learning_rate": 3.855096343082064e-05, + "loss": 2.0513, + "step": 50530 + }, + { + "epoch": 2.31, + "learning_rate": 3.854867499656735e-05, + "loss": 2.032, + "step": 50540 + }, + { + "epoch": 2.31, + "learning_rate": 3.8546386562314066e-05, + "loss": 2.2771, + "step": 50550 + }, + { + "epoch": 2.31, + "learning_rate": 3.854409812806079e-05, + "loss": 2.1413, + "step": 50560 + }, + { + "epoch": 2.31, + "learning_rate": 3.85418096938075e-05, + "loss": 2.3045, + "step": 50570 + }, + { + "epoch": 2.31, + "learning_rate": 3.853952125955421e-05, + "loss": 2.182, + "step": 50580 + }, + { + "epoch": 2.31, + "learning_rate": 3.853723282530093e-05, + "loss": 2.0662, + "step": 50590 + }, + { + "epoch": 2.31, + "learning_rate": 3.8534944391047644e-05, + "loss": 2.1612, + "step": 50600 + }, + { + "epoch": 2.31, + "learning_rate": 3.853265595679436e-05, + "loss": 2.096, + "step": 50610 + }, + { + "epoch": 2.31, + "learning_rate": 3.853036752254108e-05, + "loss": 2.128, + "step": 50620 + }, + { + "epoch": 2.31, + "learning_rate": 3.852807908828779e-05, + "loss": 2.2188, + "step": 50630 + }, + { + "epoch": 2.31, + "learning_rate": 3.852579065403451e-05, + "loss": 2.1602, + "step": 50640 + }, + { + "epoch": 2.31, + "learning_rate": 3.852350221978123e-05, + "loss": 2.0541, + "step": 50650 + }, + { + "epoch": 2.31, + "learning_rate": 3.852121378552794e-05, + "loss": 2.0798, + "step": 50660 + }, + { + "epoch": 2.31, + "learning_rate": 3.8518925351274656e-05, + "loss": 2.2255, + "step": 50670 + }, + { + "epoch": 2.31, + "learning_rate": 3.851663691702138e-05, + "loss": 1.976, + "step": 50680 + }, + { + "epoch": 2.31, + "learning_rate": 3.851434848276809e-05, + "loss": 2.2851, + "step": 50690 + }, + { + "epoch": 2.32, + "learning_rate": 3.8512060048514805e-05, + "loss": 2.1, + "step": 50700 + }, + { + "epoch": 2.32, + "learning_rate": 3.8509771614261526e-05, + "loss": 2.2103, + "step": 50710 + }, + { + "epoch": 2.32, + "learning_rate": 3.850748318000824e-05, + "loss": 2.049, + "step": 50720 + }, + { + "epoch": 2.32, + "learning_rate": 3.8505194745754955e-05, + "loss": 2.2592, + "step": 50730 + }, + { + "epoch": 2.32, + "learning_rate": 3.8502906311501676e-05, + "loss": 2.0832, + "step": 50740 + }, + { + "epoch": 2.32, + "learning_rate": 3.850061787724839e-05, + "loss": 2.2371, + "step": 50750 + }, + { + "epoch": 2.32, + "learning_rate": 3.8498329442995104e-05, + "loss": 2.1011, + "step": 50760 + }, + { + "epoch": 2.32, + "learning_rate": 3.8496041008741825e-05, + "loss": 2.142, + "step": 50770 + }, + { + "epoch": 2.32, + "learning_rate": 3.849375257448853e-05, + "loss": 2.1086, + "step": 50780 + }, + { + "epoch": 2.32, + "learning_rate": 3.849146414023525e-05, + "loss": 2.1281, + "step": 50790 + }, + { + "epoch": 2.32, + "learning_rate": 3.848917570598197e-05, + "loss": 2.0689, + "step": 50800 + }, + { + "epoch": 2.32, + "learning_rate": 3.848688727172868e-05, + "loss": 2.1096, + "step": 50810 + }, + { + "epoch": 2.32, + "learning_rate": 3.84845988374754e-05, + "loss": 2.108, + "step": 50820 + }, + { + "epoch": 2.32, + "learning_rate": 3.8482310403222116e-05, + "loss": 2.0857, + "step": 50830 + }, + { + "epoch": 2.32, + "learning_rate": 3.848002196896883e-05, + "loss": 2.0518, + "step": 50840 + }, + { + "epoch": 2.32, + "learning_rate": 3.847773353471555e-05, + "loss": 2.1641, + "step": 50850 + }, + { + "epoch": 2.32, + "learning_rate": 3.8475445100462266e-05, + "loss": 2.1298, + "step": 50860 + }, + { + "epoch": 2.32, + "learning_rate": 3.847315666620898e-05, + "loss": 2.0757, + "step": 50870 + }, + { + "epoch": 2.32, + "learning_rate": 3.84708682319557e-05, + "loss": 1.9765, + "step": 50880 + }, + { + "epoch": 2.32, + "learning_rate": 3.8468579797702415e-05, + "loss": 2.0261, + "step": 50890 + }, + { + "epoch": 2.32, + "learning_rate": 3.846629136344913e-05, + "loss": 2.1579, + "step": 50900 + }, + { + "epoch": 2.32, + "learning_rate": 3.846400292919585e-05, + "loss": 2.1198, + "step": 50910 + }, + { + "epoch": 2.33, + "learning_rate": 3.8461714494942564e-05, + "loss": 2.1848, + "step": 50920 + }, + { + "epoch": 2.33, + "learning_rate": 3.845942606068928e-05, + "loss": 2.0203, + "step": 50930 + }, + { + "epoch": 2.33, + "learning_rate": 3.8457137626436e-05, + "loss": 2.2157, + "step": 50940 + }, + { + "epoch": 2.33, + "learning_rate": 3.845484919218271e-05, + "loss": 2.0742, + "step": 50950 + }, + { + "epoch": 2.33, + "learning_rate": 3.845256075792943e-05, + "loss": 2.1496, + "step": 50960 + }, + { + "epoch": 2.33, + "learning_rate": 3.845027232367615e-05, + "loss": 2.2359, + "step": 50970 + }, + { + "epoch": 2.33, + "learning_rate": 3.8447983889422855e-05, + "loss": 2.0105, + "step": 50980 + }, + { + "epoch": 2.33, + "learning_rate": 3.844569545516957e-05, + "loss": 2.0373, + "step": 50990 + }, + { + "epoch": 2.33, + "learning_rate": 3.844340702091629e-05, + "loss": 2.1069, + "step": 51000 + }, + { + "epoch": 2.33, + "learning_rate": 3.8441118586663005e-05, + "loss": 2.0813, + "step": 51010 + }, + { + "epoch": 2.33, + "learning_rate": 3.843883015240972e-05, + "loss": 2.0863, + "step": 51020 + }, + { + "epoch": 2.33, + "learning_rate": 3.843654171815644e-05, + "loss": 2.1473, + "step": 51030 + }, + { + "epoch": 2.33, + "learning_rate": 3.8434253283903154e-05, + "loss": 2.1806, + "step": 51040 + }, + { + "epoch": 2.33, + "learning_rate": 3.843196484964987e-05, + "loss": 2.0446, + "step": 51050 + }, + { + "epoch": 2.33, + "learning_rate": 3.842967641539659e-05, + "loss": 2.3122, + "step": 51060 + }, + { + "epoch": 2.33, + "learning_rate": 3.84273879811433e-05, + "loss": 2.1379, + "step": 51070 + }, + { + "epoch": 2.33, + "learning_rate": 3.842509954689002e-05, + "loss": 1.9715, + "step": 51080 + }, + { + "epoch": 2.33, + "learning_rate": 3.842281111263674e-05, + "loss": 2.0568, + "step": 51090 + }, + { + "epoch": 2.33, + "learning_rate": 3.842052267838345e-05, + "loss": 2.0714, + "step": 51100 + }, + { + "epoch": 2.33, + "learning_rate": 3.8418234244130166e-05, + "loss": 1.9473, + "step": 51110 + }, + { + "epoch": 2.33, + "learning_rate": 3.841594580987689e-05, + "loss": 2.0012, + "step": 51120 + }, + { + "epoch": 2.33, + "learning_rate": 3.84136573756236e-05, + "loss": 2.0751, + "step": 51130 + }, + { + "epoch": 2.34, + "learning_rate": 3.8411368941370315e-05, + "loss": 2.2669, + "step": 51140 + }, + { + "epoch": 2.34, + "learning_rate": 3.8409080507117036e-05, + "loss": 2.3476, + "step": 51150 + }, + { + "epoch": 2.34, + "learning_rate": 3.840679207286375e-05, + "loss": 2.0941, + "step": 51160 + }, + { + "epoch": 2.34, + "learning_rate": 3.8404503638610465e-05, + "loss": 2.1436, + "step": 51170 + }, + { + "epoch": 2.34, + "learning_rate": 3.840221520435718e-05, + "loss": 2.1463, + "step": 51180 + }, + { + "epoch": 2.34, + "learning_rate": 3.839992677010389e-05, + "loss": 2.0512, + "step": 51190 + }, + { + "epoch": 2.34, + "learning_rate": 3.8397638335850614e-05, + "loss": 2.1011, + "step": 51200 + }, + { + "epoch": 2.34, + "learning_rate": 3.839534990159733e-05, + "loss": 2.0, + "step": 51210 + }, + { + "epoch": 2.34, + "learning_rate": 3.839306146734404e-05, + "loss": 2.1432, + "step": 51220 + }, + { + "epoch": 2.34, + "learning_rate": 3.839077303309076e-05, + "loss": 2.1033, + "step": 51230 + }, + { + "epoch": 2.34, + "learning_rate": 3.838848459883748e-05, + "loss": 2.2186, + "step": 51240 + }, + { + "epoch": 2.34, + "learning_rate": 3.838619616458419e-05, + "loss": 2.132, + "step": 51250 + }, + { + "epoch": 2.34, + "learning_rate": 3.838390773033091e-05, + "loss": 2.0954, + "step": 51260 + }, + { + "epoch": 2.34, + "learning_rate": 3.8381619296077626e-05, + "loss": 2.0263, + "step": 51270 + }, + { + "epoch": 2.34, + "learning_rate": 3.837933086182434e-05, + "loss": 2.0679, + "step": 51280 + }, + { + "epoch": 2.34, + "learning_rate": 3.837704242757106e-05, + "loss": 2.1228, + "step": 51290 + }, + { + "epoch": 2.34, + "learning_rate": 3.8374753993317775e-05, + "loss": 2.0461, + "step": 51300 + }, + { + "epoch": 2.34, + "learning_rate": 3.837246555906449e-05, + "loss": 2.0341, + "step": 51310 + }, + { + "epoch": 2.34, + "learning_rate": 3.837017712481121e-05, + "loss": 2.0894, + "step": 51320 + }, + { + "epoch": 2.34, + "learning_rate": 3.8367888690557925e-05, + "loss": 2.1274, + "step": 51330 + }, + { + "epoch": 2.34, + "learning_rate": 3.836560025630464e-05, + "loss": 2.1241, + "step": 51340 + }, + { + "epoch": 2.34, + "learning_rate": 3.836331182205136e-05, + "loss": 1.9904, + "step": 51350 + }, + { + "epoch": 2.35, + "learning_rate": 3.8361023387798074e-05, + "loss": 2.1198, + "step": 51360 + }, + { + "epoch": 2.35, + "learning_rate": 3.835873495354478e-05, + "loss": 2.0308, + "step": 51370 + }, + { + "epoch": 2.35, + "learning_rate": 3.83564465192915e-05, + "loss": 2.2135, + "step": 51380 + }, + { + "epoch": 2.35, + "learning_rate": 3.8354158085038216e-05, + "loss": 2.292, + "step": 51390 + }, + { + "epoch": 2.35, + "learning_rate": 3.835186965078493e-05, + "loss": 2.009, + "step": 51400 + }, + { + "epoch": 2.35, + "learning_rate": 3.834958121653165e-05, + "loss": 2.2131, + "step": 51410 + }, + { + "epoch": 2.35, + "learning_rate": 3.8347292782278365e-05, + "loss": 2.0647, + "step": 51420 + }, + { + "epoch": 2.35, + "learning_rate": 3.834500434802508e-05, + "loss": 1.9326, + "step": 51430 + }, + { + "epoch": 2.35, + "learning_rate": 3.83427159137718e-05, + "loss": 2.2176, + "step": 51440 + }, + { + "epoch": 2.35, + "learning_rate": 3.8340427479518515e-05, + "loss": 2.0945, + "step": 51450 + }, + { + "epoch": 2.35, + "learning_rate": 3.833813904526523e-05, + "loss": 2.0784, + "step": 51460 + }, + { + "epoch": 2.35, + "learning_rate": 3.833585061101195e-05, + "loss": 2.0984, + "step": 51470 + }, + { + "epoch": 2.35, + "learning_rate": 3.8333562176758664e-05, + "loss": 2.1016, + "step": 51480 + }, + { + "epoch": 2.35, + "learning_rate": 3.833127374250538e-05, + "loss": 1.9336, + "step": 51490 + }, + { + "epoch": 2.35, + "learning_rate": 3.83289853082521e-05, + "loss": 1.9483, + "step": 51500 + }, + { + "epoch": 2.35, + "learning_rate": 3.832669687399881e-05, + "loss": 2.1782, + "step": 51510 + }, + { + "epoch": 2.35, + "learning_rate": 3.832440843974553e-05, + "loss": 2.0435, + "step": 51520 + }, + { + "epoch": 2.35, + "learning_rate": 3.832212000549225e-05, + "loss": 2.1124, + "step": 51530 + }, + { + "epoch": 2.35, + "learning_rate": 3.831983157123896e-05, + "loss": 2.1835, + "step": 51540 + }, + { + "epoch": 2.35, + "learning_rate": 3.8317543136985676e-05, + "loss": 2.0966, + "step": 51550 + }, + { + "epoch": 2.35, + "learning_rate": 3.83152547027324e-05, + "loss": 2.0575, + "step": 51560 + }, + { + "epoch": 2.35, + "learning_rate": 3.8312966268479105e-05, + "loss": 2.1056, + "step": 51570 + }, + { + "epoch": 2.36, + "learning_rate": 3.8310677834225825e-05, + "loss": 2.1733, + "step": 51580 + }, + { + "epoch": 2.36, + "learning_rate": 3.830838939997254e-05, + "loss": 2.2471, + "step": 51590 + }, + { + "epoch": 2.36, + "learning_rate": 3.8306100965719254e-05, + "loss": 2.1342, + "step": 51600 + }, + { + "epoch": 2.36, + "learning_rate": 3.8303812531465975e-05, + "loss": 2.106, + "step": 51610 + }, + { + "epoch": 2.36, + "learning_rate": 3.830152409721269e-05, + "loss": 2.006, + "step": 51620 + }, + { + "epoch": 2.36, + "learning_rate": 3.82992356629594e-05, + "loss": 2.2349, + "step": 51630 + }, + { + "epoch": 2.36, + "learning_rate": 3.8296947228706124e-05, + "loss": 2.0237, + "step": 51640 + }, + { + "epoch": 2.36, + "learning_rate": 3.829465879445284e-05, + "loss": 2.1679, + "step": 51650 + }, + { + "epoch": 2.36, + "learning_rate": 3.829237036019955e-05, + "loss": 2.121, + "step": 51660 + }, + { + "epoch": 2.36, + "learning_rate": 3.8290081925946266e-05, + "loss": 2.139, + "step": 51670 + }, + { + "epoch": 2.36, + "learning_rate": 3.828779349169299e-05, + "loss": 1.9834, + "step": 51680 + }, + { + "epoch": 2.36, + "learning_rate": 3.82855050574397e-05, + "loss": 2.1793, + "step": 51690 + }, + { + "epoch": 2.36, + "learning_rate": 3.8283216623186415e-05, + "loss": 2.0125, + "step": 51700 + }, + { + "epoch": 2.36, + "learning_rate": 3.8280928188933136e-05, + "loss": 2.2065, + "step": 51710 + }, + { + "epoch": 2.36, + "learning_rate": 3.827863975467985e-05, + "loss": 2.047, + "step": 51720 + }, + { + "epoch": 2.36, + "learning_rate": 3.8276351320426565e-05, + "loss": 2.3328, + "step": 51730 + }, + { + "epoch": 2.36, + "learning_rate": 3.8274062886173285e-05, + "loss": 2.0506, + "step": 51740 + }, + { + "epoch": 2.36, + "learning_rate": 3.827177445192e-05, + "loss": 1.9986, + "step": 51750 + }, + { + "epoch": 2.36, + "learning_rate": 3.8269486017666714e-05, + "loss": 1.8926, + "step": 51760 + }, + { + "epoch": 2.36, + "learning_rate": 3.826719758341343e-05, + "loss": 1.9845, + "step": 51770 + }, + { + "epoch": 2.36, + "learning_rate": 3.826490914916014e-05, + "loss": 2.1111, + "step": 51780 + }, + { + "epoch": 2.36, + "learning_rate": 3.826262071490686e-05, + "loss": 2.1354, + "step": 51790 + }, + { + "epoch": 2.37, + "learning_rate": 3.826033228065358e-05, + "loss": 2.1618, + "step": 51800 + }, + { + "epoch": 2.37, + "learning_rate": 3.825804384640029e-05, + "loss": 2.1757, + "step": 51810 + }, + { + "epoch": 2.37, + "learning_rate": 3.825575541214701e-05, + "loss": 2.0079, + "step": 51820 + }, + { + "epoch": 2.37, + "learning_rate": 3.8253466977893726e-05, + "loss": 2.0823, + "step": 51830 + }, + { + "epoch": 2.37, + "learning_rate": 3.825117854364044e-05, + "loss": 2.0434, + "step": 51840 + }, + { + "epoch": 2.37, + "learning_rate": 3.824889010938716e-05, + "loss": 2.0382, + "step": 51850 + }, + { + "epoch": 2.37, + "learning_rate": 3.8246601675133875e-05, + "loss": 2.0787, + "step": 51860 + }, + { + "epoch": 2.37, + "learning_rate": 3.824431324088059e-05, + "loss": 2.0355, + "step": 51870 + }, + { + "epoch": 2.37, + "learning_rate": 3.824202480662731e-05, + "loss": 2.076, + "step": 51880 + }, + { + "epoch": 2.37, + "learning_rate": 3.8239736372374025e-05, + "loss": 2.124, + "step": 51890 + }, + { + "epoch": 2.37, + "learning_rate": 3.823744793812074e-05, + "loss": 2.0418, + "step": 51900 + }, + { + "epoch": 2.37, + "learning_rate": 3.823515950386746e-05, + "loss": 2.0612, + "step": 51910 + }, + { + "epoch": 2.37, + "learning_rate": 3.8232871069614174e-05, + "loss": 2.0437, + "step": 51920 + }, + { + "epoch": 2.37, + "learning_rate": 3.823058263536089e-05, + "loss": 2.0563, + "step": 51930 + }, + { + "epoch": 2.37, + "learning_rate": 3.822829420110761e-05, + "loss": 2.0505, + "step": 51940 + }, + { + "epoch": 2.37, + "learning_rate": 3.822600576685432e-05, + "loss": 2.1083, + "step": 51950 + }, + { + "epoch": 2.37, + "learning_rate": 3.822371733260104e-05, + "loss": 2.0186, + "step": 51960 + }, + { + "epoch": 2.37, + "learning_rate": 3.822142889834775e-05, + "loss": 2.0287, + "step": 51970 + }, + { + "epoch": 2.37, + "learning_rate": 3.8219140464094465e-05, + "loss": 2.0659, + "step": 51980 + }, + { + "epoch": 2.37, + "learning_rate": 3.821685202984118e-05, + "loss": 2.0006, + "step": 51990 + }, + { + "epoch": 2.37, + "learning_rate": 3.82145635955879e-05, + "loss": 2.0655, + "step": 52000 + }, + { + "epoch": 2.37, + "learning_rate": 3.8212275161334614e-05, + "loss": 2.0427, + "step": 52010 + }, + { + "epoch": 2.38, + "learning_rate": 3.820998672708133e-05, + "loss": 2.0517, + "step": 52020 + }, + { + "epoch": 2.38, + "learning_rate": 3.820769829282805e-05, + "loss": 2.2595, + "step": 52030 + }, + { + "epoch": 2.38, + "learning_rate": 3.8205409858574764e-05, + "loss": 2.2415, + "step": 52040 + }, + { + "epoch": 2.38, + "learning_rate": 3.820312142432148e-05, + "loss": 2.0681, + "step": 52050 + }, + { + "epoch": 2.38, + "learning_rate": 3.82008329900682e-05, + "loss": 2.1711, + "step": 52060 + }, + { + "epoch": 2.38, + "learning_rate": 3.819854455581491e-05, + "loss": 2.0362, + "step": 52070 + }, + { + "epoch": 2.38, + "learning_rate": 3.819625612156163e-05, + "loss": 2.0014, + "step": 52080 + }, + { + "epoch": 2.38, + "learning_rate": 3.819396768730835e-05, + "loss": 2.1327, + "step": 52090 + }, + { + "epoch": 2.38, + "learning_rate": 3.819167925305506e-05, + "loss": 1.9867, + "step": 52100 + }, + { + "epoch": 2.38, + "learning_rate": 3.8189390818801776e-05, + "loss": 2.1712, + "step": 52110 + }, + { + "epoch": 2.38, + "learning_rate": 3.81871023845485e-05, + "loss": 2.0778, + "step": 52120 + }, + { + "epoch": 2.38, + "learning_rate": 3.818481395029521e-05, + "loss": 2.1349, + "step": 52130 + }, + { + "epoch": 2.38, + "learning_rate": 3.8182525516041925e-05, + "loss": 2.0145, + "step": 52140 + }, + { + "epoch": 2.38, + "learning_rate": 3.8180237081788646e-05, + "loss": 2.0326, + "step": 52150 + }, + { + "epoch": 2.38, + "learning_rate": 3.8177948647535354e-05, + "loss": 2.2197, + "step": 52160 + }, + { + "epoch": 2.38, + "learning_rate": 3.8175660213282074e-05, + "loss": 2.1302, + "step": 52170 + }, + { + "epoch": 2.38, + "learning_rate": 3.817337177902879e-05, + "loss": 2.1389, + "step": 52180 + }, + { + "epoch": 2.38, + "learning_rate": 3.81710833447755e-05, + "loss": 2.1834, + "step": 52190 + }, + { + "epoch": 2.38, + "learning_rate": 3.8168794910522224e-05, + "loss": 2.2194, + "step": 52200 + }, + { + "epoch": 2.38, + "learning_rate": 3.816650647626894e-05, + "loss": 2.0031, + "step": 52210 + }, + { + "epoch": 2.38, + "learning_rate": 3.816421804201565e-05, + "loss": 2.0231, + "step": 52220 + }, + { + "epoch": 2.38, + "learning_rate": 3.816192960776237e-05, + "loss": 2.0943, + "step": 52230 + }, + { + "epoch": 2.39, + "learning_rate": 3.815964117350909e-05, + "loss": 2.0155, + "step": 52240 + }, + { + "epoch": 2.39, + "learning_rate": 3.81573527392558e-05, + "loss": 2.1102, + "step": 52250 + }, + { + "epoch": 2.39, + "learning_rate": 3.815506430500252e-05, + "loss": 1.9898, + "step": 52260 + }, + { + "epoch": 2.39, + "learning_rate": 3.8152775870749236e-05, + "loss": 2.129, + "step": 52270 + }, + { + "epoch": 2.39, + "learning_rate": 3.815048743649595e-05, + "loss": 2.1677, + "step": 52280 + }, + { + "epoch": 2.39, + "learning_rate": 3.814819900224267e-05, + "loss": 2.1734, + "step": 52290 + }, + { + "epoch": 2.39, + "learning_rate": 3.8145910567989385e-05, + "loss": 2.0654, + "step": 52300 + }, + { + "epoch": 2.39, + "learning_rate": 3.81436221337361e-05, + "loss": 2.0678, + "step": 52310 + }, + { + "epoch": 2.39, + "learning_rate": 3.814133369948282e-05, + "loss": 2.0278, + "step": 52320 + }, + { + "epoch": 2.39, + "learning_rate": 3.8139045265229535e-05, + "loss": 1.8912, + "step": 52330 + }, + { + "epoch": 2.39, + "learning_rate": 3.813675683097625e-05, + "loss": 2.1537, + "step": 52340 + }, + { + "epoch": 2.39, + "learning_rate": 3.813446839672297e-05, + "loss": 2.0677, + "step": 52350 + }, + { + "epoch": 2.39, + "learning_rate": 3.813217996246968e-05, + "loss": 2.1631, + "step": 52360 + }, + { + "epoch": 2.39, + "learning_rate": 3.812989152821639e-05, + "loss": 2.1152, + "step": 52370 + }, + { + "epoch": 2.39, + "learning_rate": 3.812760309396311e-05, + "loss": 1.9674, + "step": 52380 + }, + { + "epoch": 2.39, + "learning_rate": 3.8125314659709826e-05, + "loss": 2.0749, + "step": 52390 + }, + { + "epoch": 2.39, + "learning_rate": 3.812302622545654e-05, + "loss": 2.0694, + "step": 52400 + }, + { + "epoch": 2.39, + "learning_rate": 3.812073779120326e-05, + "loss": 2.2792, + "step": 52410 + }, + { + "epoch": 2.39, + "learning_rate": 3.8118449356949975e-05, + "loss": 1.9582, + "step": 52420 + }, + { + "epoch": 2.39, + "learning_rate": 3.811616092269669e-05, + "loss": 2.0275, + "step": 52430 + }, + { + "epoch": 2.39, + "learning_rate": 3.811387248844341e-05, + "loss": 2.2523, + "step": 52440 + }, + { + "epoch": 2.4, + "learning_rate": 3.8111584054190124e-05, + "loss": 2.1268, + "step": 52450 + }, + { + "epoch": 2.4, + "learning_rate": 3.810929561993684e-05, + "loss": 2.1986, + "step": 52460 + }, + { + "epoch": 2.4, + "learning_rate": 3.810700718568356e-05, + "loss": 2.1972, + "step": 52470 + }, + { + "epoch": 2.4, + "learning_rate": 3.8104718751430274e-05, + "loss": 2.1031, + "step": 52480 + }, + { + "epoch": 2.4, + "learning_rate": 3.810243031717699e-05, + "loss": 2.1115, + "step": 52490 + }, + { + "epoch": 2.4, + "learning_rate": 3.810014188292371e-05, + "loss": 2.155, + "step": 52500 + }, + { + "epoch": 2.4, + "learning_rate": 3.809785344867042e-05, + "loss": 2.2326, + "step": 52510 + }, + { + "epoch": 2.4, + "learning_rate": 3.809556501441714e-05, + "loss": 2.0367, + "step": 52520 + }, + { + "epoch": 2.4, + "learning_rate": 3.809327658016386e-05, + "loss": 2.1638, + "step": 52530 + }, + { + "epoch": 2.4, + "learning_rate": 3.809098814591057e-05, + "loss": 2.0417, + "step": 52540 + }, + { + "epoch": 2.4, + "learning_rate": 3.8088699711657286e-05, + "loss": 2.0686, + "step": 52550 + }, + { + "epoch": 2.4, + "learning_rate": 3.8086411277404e-05, + "loss": 2.0457, + "step": 52560 + }, + { + "epoch": 2.4, + "learning_rate": 3.8084122843150714e-05, + "loss": 2.175, + "step": 52570 + }, + { + "epoch": 2.4, + "learning_rate": 3.8081834408897435e-05, + "loss": 2.1104, + "step": 52580 + }, + { + "epoch": 2.4, + "learning_rate": 3.807954597464415e-05, + "loss": 2.0676, + "step": 52590 + }, + { + "epoch": 2.4, + "learning_rate": 3.8077257540390864e-05, + "loss": 2.0664, + "step": 52600 + }, + { + "epoch": 2.4, + "learning_rate": 3.8074969106137584e-05, + "loss": 2.0509, + "step": 52610 + }, + { + "epoch": 2.4, + "learning_rate": 3.80726806718843e-05, + "loss": 2.0892, + "step": 52620 + }, + { + "epoch": 2.4, + "learning_rate": 3.807039223763101e-05, + "loss": 2.1214, + "step": 52630 + }, + { + "epoch": 2.4, + "learning_rate": 3.8068103803377734e-05, + "loss": 2.0521, + "step": 52640 + }, + { + "epoch": 2.4, + "learning_rate": 3.806581536912445e-05, + "loss": 2.0869, + "step": 52650 + }, + { + "epoch": 2.4, + "learning_rate": 3.806352693487116e-05, + "loss": 2.232, + "step": 52660 + }, + { + "epoch": 2.41, + "learning_rate": 3.806123850061788e-05, + "loss": 2.0205, + "step": 52670 + }, + { + "epoch": 2.41, + "learning_rate": 3.80589500663646e-05, + "loss": 2.221, + "step": 52680 + }, + { + "epoch": 2.41, + "learning_rate": 3.805666163211131e-05, + "loss": 2.1396, + "step": 52690 + }, + { + "epoch": 2.41, + "learning_rate": 3.805437319785803e-05, + "loss": 2.2324, + "step": 52700 + }, + { + "epoch": 2.41, + "learning_rate": 3.8052084763604746e-05, + "loss": 2.1654, + "step": 52710 + }, + { + "epoch": 2.41, + "learning_rate": 3.804979632935146e-05, + "loss": 2.1377, + "step": 52720 + }, + { + "epoch": 2.41, + "learning_rate": 3.804750789509818e-05, + "loss": 2.0661, + "step": 52730 + }, + { + "epoch": 2.41, + "learning_rate": 3.8045219460844895e-05, + "loss": 2.1598, + "step": 52740 + }, + { + "epoch": 2.41, + "learning_rate": 3.804293102659161e-05, + "loss": 2.1578, + "step": 52750 + }, + { + "epoch": 2.41, + "learning_rate": 3.8040642592338324e-05, + "loss": 2.1061, + "step": 52760 + }, + { + "epoch": 2.41, + "learning_rate": 3.803835415808504e-05, + "loss": 1.9702, + "step": 52770 + }, + { + "epoch": 2.41, + "learning_rate": 3.803606572383175e-05, + "loss": 2.1122, + "step": 52780 + }, + { + "epoch": 2.41, + "learning_rate": 3.803377728957847e-05, + "loss": 1.9899, + "step": 52790 + }, + { + "epoch": 2.41, + "learning_rate": 3.803148885532519e-05, + "loss": 2.1356, + "step": 52800 + }, + { + "epoch": 2.41, + "learning_rate": 3.80292004210719e-05, + "loss": 2.1551, + "step": 52810 + }, + { + "epoch": 2.41, + "learning_rate": 3.802691198681862e-05, + "loss": 2.0418, + "step": 52820 + }, + { + "epoch": 2.41, + "learning_rate": 3.8024623552565336e-05, + "loss": 2.2507, + "step": 52830 + }, + { + "epoch": 2.41, + "learning_rate": 3.802233511831205e-05, + "loss": 2.1581, + "step": 52840 + }, + { + "epoch": 2.41, + "learning_rate": 3.802004668405877e-05, + "loss": 2.0773, + "step": 52850 + }, + { + "epoch": 2.41, + "learning_rate": 3.8017758249805485e-05, + "loss": 2.1546, + "step": 52860 + }, + { + "epoch": 2.41, + "learning_rate": 3.80154698155522e-05, + "loss": 1.9898, + "step": 52870 + }, + { + "epoch": 2.41, + "learning_rate": 3.801318138129892e-05, + "loss": 2.1816, + "step": 52880 + }, + { + "epoch": 2.42, + "learning_rate": 3.8010892947045634e-05, + "loss": 2.1261, + "step": 52890 + }, + { + "epoch": 2.42, + "learning_rate": 3.800860451279235e-05, + "loss": 1.9197, + "step": 52900 + }, + { + "epoch": 2.42, + "learning_rate": 3.800631607853907e-05, + "loss": 2.1006, + "step": 52910 + }, + { + "epoch": 2.42, + "learning_rate": 3.8004027644285784e-05, + "loss": 2.0812, + "step": 52920 + }, + { + "epoch": 2.42, + "learning_rate": 3.80017392100325e-05, + "loss": 2.2013, + "step": 52930 + }, + { + "epoch": 2.42, + "learning_rate": 3.799945077577922e-05, + "loss": 1.9934, + "step": 52940 + }, + { + "epoch": 2.42, + "learning_rate": 3.7997162341525926e-05, + "loss": 2.0336, + "step": 52950 + }, + { + "epoch": 2.42, + "learning_rate": 3.799487390727264e-05, + "loss": 2.2171, + "step": 52960 + }, + { + "epoch": 2.42, + "learning_rate": 3.799258547301936e-05, + "loss": 2.065, + "step": 52970 + }, + { + "epoch": 2.42, + "learning_rate": 3.7990297038766075e-05, + "loss": 2.0223, + "step": 52980 + }, + { + "epoch": 2.42, + "learning_rate": 3.798800860451279e-05, + "loss": 1.9035, + "step": 52990 + }, + { + "epoch": 2.42, + "learning_rate": 3.798572017025951e-05, + "loss": 2.0178, + "step": 53000 + }, + { + "epoch": 2.42, + "learning_rate": 3.7983431736006224e-05, + "loss": 2.2346, + "step": 53010 + }, + { + "epoch": 2.42, + "learning_rate": 3.798114330175294e-05, + "loss": 2.0685, + "step": 53020 + }, + { + "epoch": 2.42, + "learning_rate": 3.797885486749966e-05, + "loss": 2.1521, + "step": 53030 + }, + { + "epoch": 2.42, + "learning_rate": 3.7976566433246374e-05, + "loss": 2.0657, + "step": 53040 + }, + { + "epoch": 2.42, + "learning_rate": 3.797427799899309e-05, + "loss": 2.0312, + "step": 53050 + }, + { + "epoch": 2.42, + "learning_rate": 3.797198956473981e-05, + "loss": 2.2251, + "step": 53060 + }, + { + "epoch": 2.42, + "learning_rate": 3.796970113048652e-05, + "loss": 2.0927, + "step": 53070 + }, + { + "epoch": 2.42, + "learning_rate": 3.796741269623324e-05, + "loss": 1.9918, + "step": 53080 + }, + { + "epoch": 2.42, + "learning_rate": 3.796512426197996e-05, + "loss": 2.23, + "step": 53090 + }, + { + "epoch": 2.42, + "learning_rate": 3.796283582772667e-05, + "loss": 2.0668, + "step": 53100 + }, + { + "epoch": 2.43, + "learning_rate": 3.7960547393473386e-05, + "loss": 2.1497, + "step": 53110 + }, + { + "epoch": 2.43, + "learning_rate": 3.795825895922011e-05, + "loss": 2.1952, + "step": 53120 + }, + { + "epoch": 2.43, + "learning_rate": 3.795597052496682e-05, + "loss": 2.0981, + "step": 53130 + }, + { + "epoch": 2.43, + "learning_rate": 3.7953682090713535e-05, + "loss": 2.1518, + "step": 53140 + }, + { + "epoch": 2.43, + "learning_rate": 3.795139365646025e-05, + "loss": 2.1024, + "step": 53150 + }, + { + "epoch": 2.43, + "learning_rate": 3.7949105222206963e-05, + "loss": 2.3129, + "step": 53160 + }, + { + "epoch": 2.43, + "learning_rate": 3.7946816787953684e-05, + "loss": 2.1279, + "step": 53170 + }, + { + "epoch": 2.43, + "learning_rate": 3.79445283537004e-05, + "loss": 2.0288, + "step": 53180 + }, + { + "epoch": 2.43, + "learning_rate": 3.794223991944711e-05, + "loss": 1.966, + "step": 53190 + }, + { + "epoch": 2.43, + "learning_rate": 3.7939951485193834e-05, + "loss": 2.1596, + "step": 53200 + }, + { + "epoch": 2.43, + "learning_rate": 3.793766305094055e-05, + "loss": 2.1315, + "step": 53210 + }, + { + "epoch": 2.43, + "learning_rate": 3.793537461668726e-05, + "loss": 2.1474, + "step": 53220 + }, + { + "epoch": 2.43, + "learning_rate": 3.793308618243398e-05, + "loss": 2.1883, + "step": 53230 + }, + { + "epoch": 2.43, + "learning_rate": 3.79307977481807e-05, + "loss": 2.1089, + "step": 53240 + }, + { + "epoch": 2.43, + "learning_rate": 3.792850931392741e-05, + "loss": 2.1806, + "step": 53250 + }, + { + "epoch": 2.43, + "learning_rate": 3.792622087967413e-05, + "loss": 2.1233, + "step": 53260 + }, + { + "epoch": 2.43, + "learning_rate": 3.7923932445420846e-05, + "loss": 2.156, + "step": 53270 + }, + { + "epoch": 2.43, + "learning_rate": 3.792164401116756e-05, + "loss": 2.1202, + "step": 53280 + }, + { + "epoch": 2.43, + "learning_rate": 3.791935557691428e-05, + "loss": 2.1214, + "step": 53290 + }, + { + "epoch": 2.43, + "learning_rate": 3.7917067142660995e-05, + "loss": 2.0905, + "step": 53300 + }, + { + "epoch": 2.43, + "learning_rate": 3.791477870840771e-05, + "loss": 2.1636, + "step": 53310 + }, + { + "epoch": 2.43, + "learning_rate": 3.791249027415443e-05, + "loss": 2.047, + "step": 53320 + }, + { + "epoch": 2.44, + "learning_rate": 3.7910201839901144e-05, + "loss": 2.1588, + "step": 53330 + }, + { + "epoch": 2.44, + "learning_rate": 3.790791340564786e-05, + "loss": 2.0122, + "step": 53340 + }, + { + "epoch": 2.44, + "learning_rate": 3.790562497139457e-05, + "loss": 1.8912, + "step": 53350 + }, + { + "epoch": 2.44, + "learning_rate": 3.790333653714129e-05, + "loss": 2.0633, + "step": 53360 + }, + { + "epoch": 2.44, + "learning_rate": 3.7901048102888e-05, + "loss": 2.0528, + "step": 53370 + }, + { + "epoch": 2.44, + "learning_rate": 3.789875966863472e-05, + "loss": 1.9656, + "step": 53380 + }, + { + "epoch": 2.44, + "learning_rate": 3.7896471234381436e-05, + "loss": 2.2616, + "step": 53390 + }, + { + "epoch": 2.44, + "learning_rate": 3.789418280012815e-05, + "loss": 2.1235, + "step": 53400 + }, + { + "epoch": 2.44, + "learning_rate": 3.789189436587487e-05, + "loss": 2.1273, + "step": 53410 + }, + { + "epoch": 2.44, + "learning_rate": 3.7889605931621585e-05, + "loss": 2.0519, + "step": 53420 + }, + { + "epoch": 2.44, + "learning_rate": 3.78873174973683e-05, + "loss": 2.2067, + "step": 53430 + }, + { + "epoch": 2.44, + "learning_rate": 3.788502906311502e-05, + "loss": 2.0375, + "step": 53440 + }, + { + "epoch": 2.44, + "learning_rate": 3.7882740628861734e-05, + "loss": 2.1687, + "step": 53450 + }, + { + "epoch": 2.44, + "learning_rate": 3.788045219460845e-05, + "loss": 2.0649, + "step": 53460 + }, + { + "epoch": 2.44, + "learning_rate": 3.787816376035517e-05, + "loss": 2.115, + "step": 53470 + }, + { + "epoch": 2.44, + "learning_rate": 3.7875875326101883e-05, + "loss": 2.0573, + "step": 53480 + }, + { + "epoch": 2.44, + "learning_rate": 3.78735868918486e-05, + "loss": 1.995, + "step": 53490 + }, + { + "epoch": 2.44, + "learning_rate": 3.787129845759532e-05, + "loss": 2.0309, + "step": 53500 + }, + { + "epoch": 2.44, + "learning_rate": 3.786901002334203e-05, + "loss": 2.3506, + "step": 53510 + }, + { + "epoch": 2.44, + "learning_rate": 3.786672158908875e-05, + "loss": 2.0166, + "step": 53520 + }, + { + "epoch": 2.44, + "learning_rate": 3.786443315483547e-05, + "loss": 2.2474, + "step": 53530 + }, + { + "epoch": 2.44, + "learning_rate": 3.786214472058218e-05, + "loss": 2.0506, + "step": 53540 + }, + { + "epoch": 2.45, + "learning_rate": 3.7859856286328896e-05, + "loss": 1.9023, + "step": 53550 + }, + { + "epoch": 2.45, + "learning_rate": 3.785756785207561e-05, + "loss": 2.1758, + "step": 53560 + }, + { + "epoch": 2.45, + "learning_rate": 3.7855279417822324e-05, + "loss": 2.2241, + "step": 53570 + }, + { + "epoch": 2.45, + "learning_rate": 3.7852990983569045e-05, + "loss": 2.0969, + "step": 53580 + }, + { + "epoch": 2.45, + "learning_rate": 3.785070254931576e-05, + "loss": 2.0847, + "step": 53590 + }, + { + "epoch": 2.45, + "learning_rate": 3.7848414115062473e-05, + "loss": 1.9857, + "step": 53600 + }, + { + "epoch": 2.45, + "learning_rate": 3.7846125680809194e-05, + "loss": 2.0399, + "step": 53610 + }, + { + "epoch": 2.45, + "learning_rate": 3.784383724655591e-05, + "loss": 2.0017, + "step": 53620 + }, + { + "epoch": 2.45, + "learning_rate": 3.784154881230262e-05, + "loss": 2.1251, + "step": 53630 + }, + { + "epoch": 2.45, + "learning_rate": 3.7839260378049344e-05, + "loss": 2.2302, + "step": 53640 + }, + { + "epoch": 2.45, + "learning_rate": 3.783697194379606e-05, + "loss": 2.142, + "step": 53650 + }, + { + "epoch": 2.45, + "learning_rate": 3.783468350954277e-05, + "loss": 2.1778, + "step": 53660 + }, + { + "epoch": 2.45, + "learning_rate": 3.783239507528949e-05, + "loss": 2.0409, + "step": 53670 + }, + { + "epoch": 2.45, + "learning_rate": 3.783010664103621e-05, + "loss": 2.2633, + "step": 53680 + }, + { + "epoch": 2.45, + "learning_rate": 3.782781820678292e-05, + "loss": 2.1282, + "step": 53690 + }, + { + "epoch": 2.45, + "learning_rate": 3.782552977252964e-05, + "loss": 2.0592, + "step": 53700 + }, + { + "epoch": 2.45, + "learning_rate": 3.7823241338276356e-05, + "loss": 2.0712, + "step": 53710 + }, + { + "epoch": 2.45, + "learning_rate": 3.782095290402307e-05, + "loss": 2.2526, + "step": 53720 + }, + { + "epoch": 2.45, + "learning_rate": 3.781866446976979e-05, + "loss": 2.0818, + "step": 53730 + }, + { + "epoch": 2.45, + "learning_rate": 3.78163760355165e-05, + "loss": 1.9357, + "step": 53740 + }, + { + "epoch": 2.45, + "learning_rate": 3.781408760126321e-05, + "loss": 1.9911, + "step": 53750 + }, + { + "epoch": 2.45, + "learning_rate": 3.7811799167009933e-05, + "loss": 2.0886, + "step": 53760 + }, + { + "epoch": 2.46, + "learning_rate": 3.780951073275665e-05, + "loss": 2.1458, + "step": 53770 + }, + { + "epoch": 2.46, + "learning_rate": 3.780722229850336e-05, + "loss": 1.9885, + "step": 53780 + }, + { + "epoch": 2.46, + "learning_rate": 3.780493386425008e-05, + "loss": 2.131, + "step": 53790 + }, + { + "epoch": 2.46, + "learning_rate": 3.78026454299968e-05, + "loss": 2.0626, + "step": 53800 + }, + { + "epoch": 2.46, + "learning_rate": 3.780035699574351e-05, + "loss": 2.1246, + "step": 53810 + }, + { + "epoch": 2.46, + "learning_rate": 3.779806856149023e-05, + "loss": 2.0163, + "step": 53820 + }, + { + "epoch": 2.46, + "learning_rate": 3.7795780127236946e-05, + "loss": 2.2968, + "step": 53830 + }, + { + "epoch": 2.46, + "learning_rate": 3.779349169298366e-05, + "loss": 2.0274, + "step": 53840 + }, + { + "epoch": 2.46, + "learning_rate": 3.779120325873038e-05, + "loss": 2.1396, + "step": 53850 + }, + { + "epoch": 2.46, + "learning_rate": 3.7788914824477095e-05, + "loss": 2.1114, + "step": 53860 + }, + { + "epoch": 2.46, + "learning_rate": 3.778662639022381e-05, + "loss": 2.1231, + "step": 53870 + }, + { + "epoch": 2.46, + "learning_rate": 3.778433795597053e-05, + "loss": 2.2463, + "step": 53880 + }, + { + "epoch": 2.46, + "learning_rate": 3.7782049521717244e-05, + "loss": 1.9275, + "step": 53890 + }, + { + "epoch": 2.46, + "learning_rate": 3.777976108746396e-05, + "loss": 2.1139, + "step": 53900 + }, + { + "epoch": 2.46, + "learning_rate": 3.777747265321068e-05, + "loss": 2.1483, + "step": 53910 + }, + { + "epoch": 2.46, + "learning_rate": 3.7775184218957393e-05, + "loss": 2.1499, + "step": 53920 + }, + { + "epoch": 2.46, + "learning_rate": 3.777289578470411e-05, + "loss": 2.1123, + "step": 53930 + }, + { + "epoch": 2.46, + "learning_rate": 3.777060735045082e-05, + "loss": 2.0921, + "step": 53940 + }, + { + "epoch": 2.46, + "learning_rate": 3.7768318916197536e-05, + "loss": 2.0257, + "step": 53950 + }, + { + "epoch": 2.46, + "learning_rate": 3.776603048194426e-05, + "loss": 2.1954, + "step": 53960 + }, + { + "epoch": 2.46, + "learning_rate": 3.776374204769097e-05, + "loss": 1.8288, + "step": 53970 + }, + { + "epoch": 2.46, + "learning_rate": 3.7761453613437685e-05, + "loss": 2.132, + "step": 53980 + }, + { + "epoch": 2.47, + "learning_rate": 3.77591651791844e-05, + "loss": 2.1294, + "step": 53990 + }, + { + "epoch": 2.47, + "learning_rate": 3.775687674493112e-05, + "loss": 2.1889, + "step": 54000 + }, + { + "epoch": 2.47, + "learning_rate": 3.7754588310677834e-05, + "loss": 1.9717, + "step": 54010 + }, + { + "epoch": 2.47, + "learning_rate": 3.775229987642455e-05, + "loss": 2.2544, + "step": 54020 + }, + { + "epoch": 2.47, + "learning_rate": 3.775001144217127e-05, + "loss": 1.971, + "step": 54030 + }, + { + "epoch": 2.47, + "learning_rate": 3.7747723007917983e-05, + "loss": 2.0851, + "step": 54040 + }, + { + "epoch": 2.47, + "learning_rate": 3.77454345736647e-05, + "loss": 2.1712, + "step": 54050 + }, + { + "epoch": 2.47, + "learning_rate": 3.774314613941142e-05, + "loss": 2.1703, + "step": 54060 + }, + { + "epoch": 2.47, + "learning_rate": 3.774085770515813e-05, + "loss": 2.0685, + "step": 54070 + }, + { + "epoch": 2.47, + "learning_rate": 3.773856927090485e-05, + "loss": 2.0789, + "step": 54080 + }, + { + "epoch": 2.47, + "learning_rate": 3.773628083665157e-05, + "loss": 2.1583, + "step": 54090 + }, + { + "epoch": 2.47, + "learning_rate": 3.773399240239828e-05, + "loss": 2.0998, + "step": 54100 + }, + { + "epoch": 2.47, + "learning_rate": 3.7731703968144996e-05, + "loss": 2.0634, + "step": 54110 + }, + { + "epoch": 2.47, + "learning_rate": 3.772941553389172e-05, + "loss": 2.0597, + "step": 54120 + }, + { + "epoch": 2.47, + "learning_rate": 3.772712709963843e-05, + "loss": 2.2742, + "step": 54130 + }, + { + "epoch": 2.47, + "learning_rate": 3.7724838665385145e-05, + "loss": 2.1188, + "step": 54140 + }, + { + "epoch": 2.47, + "learning_rate": 3.772255023113186e-05, + "loss": 2.1912, + "step": 54150 + }, + { + "epoch": 2.47, + "learning_rate": 3.772026179687857e-05, + "loss": 2.0167, + "step": 54160 + }, + { + "epoch": 2.47, + "learning_rate": 3.7717973362625294e-05, + "loss": 2.1042, + "step": 54170 + }, + { + "epoch": 2.47, + "learning_rate": 3.771568492837201e-05, + "loss": 2.1339, + "step": 54180 + }, + { + "epoch": 2.47, + "learning_rate": 3.771339649411872e-05, + "loss": 2.061, + "step": 54190 + }, + { + "epoch": 2.47, + "learning_rate": 3.7711108059865443e-05, + "loss": 1.9906, + "step": 54200 + }, + { + "epoch": 2.48, + "learning_rate": 3.770881962561216e-05, + "loss": 2.1004, + "step": 54210 + }, + { + "epoch": 2.48, + "learning_rate": 3.770653119135887e-05, + "loss": 2.0601, + "step": 54220 + }, + { + "epoch": 2.48, + "learning_rate": 3.770424275710559e-05, + "loss": 2.1492, + "step": 54230 + }, + { + "epoch": 2.48, + "learning_rate": 3.770195432285231e-05, + "loss": 2.059, + "step": 54240 + }, + { + "epoch": 2.48, + "learning_rate": 3.769966588859902e-05, + "loss": 2.1444, + "step": 54250 + }, + { + "epoch": 2.48, + "learning_rate": 3.769737745434574e-05, + "loss": 2.2549, + "step": 54260 + }, + { + "epoch": 2.48, + "learning_rate": 3.7695089020092456e-05, + "loss": 2.1385, + "step": 54270 + }, + { + "epoch": 2.48, + "learning_rate": 3.769280058583917e-05, + "loss": 2.217, + "step": 54280 + }, + { + "epoch": 2.48, + "learning_rate": 3.769051215158589e-05, + "loss": 2.107, + "step": 54290 + }, + { + "epoch": 2.48, + "learning_rate": 3.7688223717332605e-05, + "loss": 2.0783, + "step": 54300 + }, + { + "epoch": 2.48, + "learning_rate": 3.768593528307932e-05, + "loss": 2.0685, + "step": 54310 + }, + { + "epoch": 2.48, + "learning_rate": 3.768364684882604e-05, + "loss": 2.0873, + "step": 54320 + }, + { + "epoch": 2.48, + "learning_rate": 3.7681358414572754e-05, + "loss": 2.2005, + "step": 54330 + }, + { + "epoch": 2.48, + "learning_rate": 3.767906998031946e-05, + "loss": 2.1855, + "step": 54340 + }, + { + "epoch": 2.48, + "learning_rate": 3.767678154606618e-05, + "loss": 2.0212, + "step": 54350 + }, + { + "epoch": 2.48, + "learning_rate": 3.76744931118129e-05, + "loss": 2.0476, + "step": 54360 + }, + { + "epoch": 2.48, + "learning_rate": 3.767220467755961e-05, + "loss": 2.1564, + "step": 54370 + }, + { + "epoch": 2.48, + "learning_rate": 3.766991624330633e-05, + "loss": 2.0888, + "step": 54380 + }, + { + "epoch": 2.48, + "learning_rate": 3.7667627809053046e-05, + "loss": 2.0253, + "step": 54390 + }, + { + "epoch": 2.48, + "learning_rate": 3.766533937479976e-05, + "loss": 2.0812, + "step": 54400 + }, + { + "epoch": 2.48, + "learning_rate": 3.766305094054648e-05, + "loss": 2.1892, + "step": 54410 + }, + { + "epoch": 2.48, + "learning_rate": 3.7660762506293195e-05, + "loss": 2.0332, + "step": 54420 + }, + { + "epoch": 2.49, + "learning_rate": 3.765847407203991e-05, + "loss": 2.0473, + "step": 54430 + }, + { + "epoch": 2.49, + "learning_rate": 3.765618563778663e-05, + "loss": 2.024, + "step": 54440 + }, + { + "epoch": 2.49, + "learning_rate": 3.7653897203533344e-05, + "loss": 1.9344, + "step": 54450 + }, + { + "epoch": 2.49, + "learning_rate": 3.765160876928006e-05, + "loss": 1.9635, + "step": 54460 + }, + { + "epoch": 2.49, + "learning_rate": 3.764932033502678e-05, + "loss": 2.0122, + "step": 54470 + }, + { + "epoch": 2.49, + "learning_rate": 3.764703190077349e-05, + "loss": 2.0122, + "step": 54480 + }, + { + "epoch": 2.49, + "learning_rate": 3.764474346652021e-05, + "loss": 2.0958, + "step": 54490 + }, + { + "epoch": 2.49, + "learning_rate": 3.764245503226693e-05, + "loss": 2.2532, + "step": 54500 + }, + { + "epoch": 2.49, + "learning_rate": 3.764016659801364e-05, + "loss": 2.0298, + "step": 54510 + }, + { + "epoch": 2.49, + "learning_rate": 3.763787816376036e-05, + "loss": 2.1321, + "step": 54520 + }, + { + "epoch": 2.49, + "learning_rate": 3.763558972950707e-05, + "loss": 2.0334, + "step": 54530 + }, + { + "epoch": 2.49, + "learning_rate": 3.7633301295253785e-05, + "loss": 2.0792, + "step": 54540 + }, + { + "epoch": 2.49, + "learning_rate": 3.7631012861000506e-05, + "loss": 2.1192, + "step": 54550 + }, + { + "epoch": 2.49, + "learning_rate": 3.762872442674722e-05, + "loss": 2.044, + "step": 54560 + }, + { + "epoch": 2.49, + "learning_rate": 3.7626435992493934e-05, + "loss": 2.1887, + "step": 54570 + }, + { + "epoch": 2.49, + "learning_rate": 3.7624147558240655e-05, + "loss": 2.0137, + "step": 54580 + }, + { + "epoch": 2.49, + "learning_rate": 3.762185912398737e-05, + "loss": 2.2364, + "step": 54590 + }, + { + "epoch": 2.49, + "learning_rate": 3.761957068973408e-05, + "loss": 2.0129, + "step": 54600 + }, + { + "epoch": 2.49, + "learning_rate": 3.7617282255480804e-05, + "loss": 1.9886, + "step": 54610 + }, + { + "epoch": 2.49, + "learning_rate": 3.761499382122752e-05, + "loss": 2.1535, + "step": 54620 + }, + { + "epoch": 2.49, + "learning_rate": 3.761270538697423e-05, + "loss": 2.1311, + "step": 54630 + }, + { + "epoch": 2.5, + "learning_rate": 3.761041695272095e-05, + "loss": 2.0224, + "step": 54640 + }, + { + "epoch": 2.5, + "learning_rate": 3.760812851846767e-05, + "loss": 2.023, + "step": 54650 + }, + { + "epoch": 2.5, + "learning_rate": 3.760584008421438e-05, + "loss": 2.0654, + "step": 54660 + }, + { + "epoch": 2.5, + "learning_rate": 3.76035516499611e-05, + "loss": 2.1174, + "step": 54670 + }, + { + "epoch": 2.5, + "learning_rate": 3.760126321570782e-05, + "loss": 1.9262, + "step": 54680 + }, + { + "epoch": 2.5, + "learning_rate": 3.759897478145453e-05, + "loss": 2.0423, + "step": 54690 + }, + { + "epoch": 2.5, + "learning_rate": 3.759668634720125e-05, + "loss": 2.1999, + "step": 54700 + }, + { + "epoch": 2.5, + "learning_rate": 3.7594397912947966e-05, + "loss": 2.0796, + "step": 54710 + }, + { + "epoch": 2.5, + "learning_rate": 3.759210947869468e-05, + "loss": 2.0349, + "step": 54720 + }, + { + "epoch": 2.5, + "learning_rate": 3.7589821044441394e-05, + "loss": 2.0472, + "step": 54730 + }, + { + "epoch": 2.5, + "learning_rate": 3.758753261018811e-05, + "loss": 2.0753, + "step": 54740 + }, + { + "epoch": 2.5, + "learning_rate": 3.758524417593482e-05, + "loss": 2.0556, + "step": 54750 + }, + { + "epoch": 2.5, + "learning_rate": 3.758295574168154e-05, + "loss": 2.061, + "step": 54760 + }, + { + "epoch": 2.5, + "learning_rate": 3.758066730742826e-05, + "loss": 2.0391, + "step": 54770 + }, + { + "epoch": 2.5, + "learning_rate": 3.757837887317497e-05, + "loss": 1.9065, + "step": 54780 + }, + { + "epoch": 2.5, + "learning_rate": 3.757609043892169e-05, + "loss": 1.9818, + "step": 54790 + }, + { + "epoch": 2.5, + "learning_rate": 3.757380200466841e-05, + "loss": 2.2574, + "step": 54800 + }, + { + "epoch": 2.5, + "learning_rate": 3.757151357041512e-05, + "loss": 2.1606, + "step": 54810 + }, + { + "epoch": 2.5, + "learning_rate": 3.756922513616184e-05, + "loss": 2.198, + "step": 54820 + }, + { + "epoch": 2.5, + "learning_rate": 3.7566936701908556e-05, + "loss": 2.1161, + "step": 54830 + }, + { + "epoch": 2.5, + "learning_rate": 3.756464826765527e-05, + "loss": 2.192, + "step": 54840 + }, + { + "epoch": 2.5, + "learning_rate": 3.756235983340199e-05, + "loss": 2.0151, + "step": 54850 + }, + { + "epoch": 2.51, + "learning_rate": 3.7560071399148705e-05, + "loss": 2.0372, + "step": 54860 + }, + { + "epoch": 2.51, + "learning_rate": 3.755778296489542e-05, + "loss": 2.0069, + "step": 54870 + }, + { + "epoch": 2.51, + "learning_rate": 3.755549453064214e-05, + "loss": 2.0536, + "step": 54880 + }, + { + "epoch": 2.51, + "learning_rate": 3.7553206096388854e-05, + "loss": 2.0625, + "step": 54890 + }, + { + "epoch": 2.51, + "learning_rate": 3.755091766213557e-05, + "loss": 2.1191, + "step": 54900 + }, + { + "epoch": 2.51, + "learning_rate": 3.754862922788229e-05, + "loss": 2.0426, + "step": 54910 + }, + { + "epoch": 2.51, + "learning_rate": 3.7546340793629e-05, + "loss": 2.0312, + "step": 54920 + }, + { + "epoch": 2.51, + "learning_rate": 3.754405235937572e-05, + "loss": 2.1156, + "step": 54930 + }, + { + "epoch": 2.51, + "learning_rate": 3.754176392512243e-05, + "loss": 2.1274, + "step": 54940 + }, + { + "epoch": 2.51, + "learning_rate": 3.7539475490869146e-05, + "loss": 2.0928, + "step": 54950 + }, + { + "epoch": 2.51, + "learning_rate": 3.753718705661587e-05, + "loss": 2.0822, + "step": 54960 + }, + { + "epoch": 2.51, + "learning_rate": 3.753489862236258e-05, + "loss": 2.0791, + "step": 54970 + }, + { + "epoch": 2.51, + "learning_rate": 3.7532610188109295e-05, + "loss": 2.0343, + "step": 54980 + }, + { + "epoch": 2.51, + "learning_rate": 3.7530321753856016e-05, + "loss": 1.9317, + "step": 54990 + }, + { + "epoch": 2.51, + "learning_rate": 3.752803331960273e-05, + "loss": 2.0684, + "step": 55000 + }, + { + "epoch": 2.51, + "learning_rate": 3.7525744885349444e-05, + "loss": 2.1137, + "step": 55010 + }, + { + "epoch": 2.51, + "learning_rate": 3.7523456451096165e-05, + "loss": 2.1406, + "step": 55020 + }, + { + "epoch": 2.51, + "learning_rate": 3.752116801684288e-05, + "loss": 2.1325, + "step": 55030 + }, + { + "epoch": 2.51, + "learning_rate": 3.751887958258959e-05, + "loss": 1.9589, + "step": 55040 + }, + { + "epoch": 2.51, + "learning_rate": 3.7516591148336314e-05, + "loss": 2.2374, + "step": 55050 + }, + { + "epoch": 2.51, + "learning_rate": 3.751430271408303e-05, + "loss": 2.1404, + "step": 55060 + }, + { + "epoch": 2.51, + "learning_rate": 3.751201427982974e-05, + "loss": 2.12, + "step": 55070 + }, + { + "epoch": 2.52, + "learning_rate": 3.750972584557646e-05, + "loss": 2.1285, + "step": 55080 + }, + { + "epoch": 2.52, + "learning_rate": 3.750743741132318e-05, + "loss": 2.0181, + "step": 55090 + }, + { + "epoch": 2.52, + "learning_rate": 3.750514897706989e-05, + "loss": 2.1276, + "step": 55100 + }, + { + "epoch": 2.52, + "learning_rate": 3.7502860542816606e-05, + "loss": 2.258, + "step": 55110 + }, + { + "epoch": 2.52, + "learning_rate": 3.750057210856333e-05, + "loss": 2.0513, + "step": 55120 + }, + { + "epoch": 2.52, + "learning_rate": 3.7498283674310034e-05, + "loss": 2.144, + "step": 55130 + }, + { + "epoch": 2.52, + "learning_rate": 3.7495995240056755e-05, + "loss": 2.0709, + "step": 55140 + }, + { + "epoch": 2.52, + "learning_rate": 3.749370680580347e-05, + "loss": 1.9705, + "step": 55150 + }, + { + "epoch": 2.52, + "learning_rate": 3.749141837155018e-05, + "loss": 2.1022, + "step": 55160 + }, + { + "epoch": 2.52, + "learning_rate": 3.7489129937296904e-05, + "loss": 2.2126, + "step": 55170 + }, + { + "epoch": 2.52, + "learning_rate": 3.748684150304362e-05, + "loss": 2.0057, + "step": 55180 + }, + { + "epoch": 2.52, + "learning_rate": 3.748455306879033e-05, + "loss": 2.0227, + "step": 55190 + }, + { + "epoch": 2.52, + "learning_rate": 3.748226463453705e-05, + "loss": 1.9519, + "step": 55200 + }, + { + "epoch": 2.52, + "learning_rate": 3.747997620028377e-05, + "loss": 2.1013, + "step": 55210 + }, + { + "epoch": 2.52, + "learning_rate": 3.747768776603048e-05, + "loss": 2.0136, + "step": 55220 + }, + { + "epoch": 2.52, + "learning_rate": 3.74753993317772e-05, + "loss": 1.9668, + "step": 55230 + }, + { + "epoch": 2.52, + "learning_rate": 3.7473110897523917e-05, + "loss": 2.1216, + "step": 55240 + }, + { + "epoch": 2.52, + "learning_rate": 3.747082246327063e-05, + "loss": 2.0082, + "step": 55250 + }, + { + "epoch": 2.52, + "learning_rate": 3.746853402901735e-05, + "loss": 2.0234, + "step": 55260 + }, + { + "epoch": 2.52, + "learning_rate": 3.7466245594764066e-05, + "loss": 2.1334, + "step": 55270 + }, + { + "epoch": 2.52, + "learning_rate": 3.746395716051078e-05, + "loss": 2.117, + "step": 55280 + }, + { + "epoch": 2.52, + "learning_rate": 3.74616687262575e-05, + "loss": 2.0839, + "step": 55290 + }, + { + "epoch": 2.53, + "learning_rate": 3.7459380292004215e-05, + "loss": 2.2928, + "step": 55300 + }, + { + "epoch": 2.53, + "learning_rate": 3.745709185775093e-05, + "loss": 2.0782, + "step": 55310 + }, + { + "epoch": 2.53, + "learning_rate": 3.745480342349765e-05, + "loss": 2.0692, + "step": 55320 + }, + { + "epoch": 2.53, + "learning_rate": 3.745251498924436e-05, + "loss": 2.1831, + "step": 55330 + }, + { + "epoch": 2.53, + "learning_rate": 3.745022655499107e-05, + "loss": 2.1831, + "step": 55340 + }, + { + "epoch": 2.53, + "learning_rate": 3.744793812073779e-05, + "loss": 2.0251, + "step": 55350 + }, + { + "epoch": 2.53, + "learning_rate": 3.7445649686484507e-05, + "loss": 2.229, + "step": 55360 + }, + { + "epoch": 2.53, + "learning_rate": 3.744336125223122e-05, + "loss": 2.0796, + "step": 55370 + }, + { + "epoch": 2.53, + "learning_rate": 3.744107281797794e-05, + "loss": 1.9666, + "step": 55380 + }, + { + "epoch": 2.53, + "learning_rate": 3.7438784383724656e-05, + "loss": 1.9538, + "step": 55390 + }, + { + "epoch": 2.53, + "learning_rate": 3.743649594947137e-05, + "loss": 2.2513, + "step": 55400 + }, + { + "epoch": 2.53, + "learning_rate": 3.743420751521809e-05, + "loss": 2.0895, + "step": 55410 + }, + { + "epoch": 2.53, + "learning_rate": 3.7431919080964805e-05, + "loss": 2.1402, + "step": 55420 + }, + { + "epoch": 2.53, + "learning_rate": 3.742963064671152e-05, + "loss": 1.9313, + "step": 55430 + }, + { + "epoch": 2.53, + "learning_rate": 3.742734221245824e-05, + "loss": 2.0794, + "step": 55440 + }, + { + "epoch": 2.53, + "learning_rate": 3.7425053778204954e-05, + "loss": 2.0605, + "step": 55450 + }, + { + "epoch": 2.53, + "learning_rate": 3.742276534395167e-05, + "loss": 2.0145, + "step": 55460 + }, + { + "epoch": 2.53, + "learning_rate": 3.742047690969839e-05, + "loss": 2.082, + "step": 55470 + }, + { + "epoch": 2.53, + "learning_rate": 3.74181884754451e-05, + "loss": 1.9713, + "step": 55480 + }, + { + "epoch": 2.53, + "learning_rate": 3.741590004119182e-05, + "loss": 1.9474, + "step": 55490 + }, + { + "epoch": 2.53, + "learning_rate": 3.741361160693854e-05, + "loss": 2.1112, + "step": 55500 + }, + { + "epoch": 2.53, + "learning_rate": 3.741132317268525e-05, + "loss": 2.1003, + "step": 55510 + }, + { + "epoch": 2.54, + "learning_rate": 3.7409034738431967e-05, + "loss": 1.9048, + "step": 55520 + }, + { + "epoch": 2.54, + "learning_rate": 3.740674630417868e-05, + "loss": 2.0815, + "step": 55530 + }, + { + "epoch": 2.54, + "learning_rate": 3.7404457869925395e-05, + "loss": 2.1491, + "step": 55540 + }, + { + "epoch": 2.54, + "learning_rate": 3.7402169435672116e-05, + "loss": 2.0447, + "step": 55550 + }, + { + "epoch": 2.54, + "learning_rate": 3.739988100141883e-05, + "loss": 2.133, + "step": 55560 + }, + { + "epoch": 2.54, + "learning_rate": 3.7397592567165544e-05, + "loss": 2.1885, + "step": 55570 + }, + { + "epoch": 2.54, + "learning_rate": 3.7395304132912265e-05, + "loss": 2.0825, + "step": 55580 + }, + { + "epoch": 2.54, + "learning_rate": 3.739301569865898e-05, + "loss": 2.062, + "step": 55590 + }, + { + "epoch": 2.54, + "learning_rate": 3.739072726440569e-05, + "loss": 2.1198, + "step": 55600 + }, + { + "epoch": 2.54, + "learning_rate": 3.7388438830152414e-05, + "loss": 2.0793, + "step": 55610 + }, + { + "epoch": 2.54, + "learning_rate": 3.738615039589913e-05, + "loss": 1.923, + "step": 55620 + }, + { + "epoch": 2.54, + "learning_rate": 3.738386196164584e-05, + "loss": 2.0132, + "step": 55630 + }, + { + "epoch": 2.54, + "learning_rate": 3.738157352739256e-05, + "loss": 2.1455, + "step": 55640 + }, + { + "epoch": 2.54, + "learning_rate": 3.737928509313928e-05, + "loss": 2.1385, + "step": 55650 + }, + { + "epoch": 2.54, + "learning_rate": 3.737699665888599e-05, + "loss": 2.0693, + "step": 55660 + }, + { + "epoch": 2.54, + "learning_rate": 3.737470822463271e-05, + "loss": 2.2256, + "step": 55670 + }, + { + "epoch": 2.54, + "learning_rate": 3.7372419790379427e-05, + "loss": 2.0247, + "step": 55680 + }, + { + "epoch": 2.54, + "learning_rate": 3.737013135612614e-05, + "loss": 2.1928, + "step": 55690 + }, + { + "epoch": 2.54, + "learning_rate": 3.736784292187286e-05, + "loss": 2.0533, + "step": 55700 + }, + { + "epoch": 2.54, + "learning_rate": 3.7365554487619576e-05, + "loss": 2.0897, + "step": 55710 + }, + { + "epoch": 2.54, + "learning_rate": 3.736326605336628e-05, + "loss": 2.2051, + "step": 55720 + }, + { + "epoch": 2.54, + "learning_rate": 3.7360977619113004e-05, + "loss": 2.0997, + "step": 55730 + }, + { + "epoch": 2.55, + "learning_rate": 3.735868918485972e-05, + "loss": 1.9786, + "step": 55740 + }, + { + "epoch": 2.55, + "learning_rate": 3.735640075060643e-05, + "loss": 2.0799, + "step": 55750 + }, + { + "epoch": 2.55, + "learning_rate": 3.735411231635315e-05, + "loss": 2.1259, + "step": 55760 + }, + { + "epoch": 2.55, + "learning_rate": 3.735182388209987e-05, + "loss": 2.07, + "step": 55770 + }, + { + "epoch": 2.55, + "learning_rate": 3.734953544784658e-05, + "loss": 2.0415, + "step": 55780 + }, + { + "epoch": 2.55, + "learning_rate": 3.73472470135933e-05, + "loss": 2.0219, + "step": 55790 + }, + { + "epoch": 2.55, + "learning_rate": 3.7344958579340016e-05, + "loss": 2.0663, + "step": 55800 + }, + { + "epoch": 2.55, + "learning_rate": 3.734267014508673e-05, + "loss": 1.9442, + "step": 55810 + }, + { + "epoch": 2.55, + "learning_rate": 3.734038171083345e-05, + "loss": 2.0578, + "step": 55820 + }, + { + "epoch": 2.55, + "learning_rate": 3.7338093276580166e-05, + "loss": 1.8445, + "step": 55830 + }, + { + "epoch": 2.55, + "learning_rate": 3.733580484232688e-05, + "loss": 2.0965, + "step": 55840 + }, + { + "epoch": 2.55, + "learning_rate": 3.73335164080736e-05, + "loss": 2.059, + "step": 55850 + }, + { + "epoch": 2.55, + "learning_rate": 3.7331227973820315e-05, + "loss": 2.116, + "step": 55860 + }, + { + "epoch": 2.55, + "learning_rate": 3.732893953956703e-05, + "loss": 2.1164, + "step": 55870 + }, + { + "epoch": 2.55, + "learning_rate": 3.732665110531375e-05, + "loss": 2.1189, + "step": 55880 + }, + { + "epoch": 2.55, + "learning_rate": 3.7324362671060464e-05, + "loss": 2.0734, + "step": 55890 + }, + { + "epoch": 2.55, + "learning_rate": 3.732207423680718e-05, + "loss": 1.9508, + "step": 55900 + }, + { + "epoch": 2.55, + "learning_rate": 3.73197858025539e-05, + "loss": 2.3485, + "step": 55910 + }, + { + "epoch": 2.55, + "learning_rate": 3.7317497368300606e-05, + "loss": 2.0454, + "step": 55920 + }, + { + "epoch": 2.55, + "learning_rate": 3.731520893404733e-05, + "loss": 2.1684, + "step": 55930 + }, + { + "epoch": 2.55, + "learning_rate": 3.731292049979404e-05, + "loss": 2.1762, + "step": 55940 + }, + { + "epoch": 2.55, + "learning_rate": 3.7310632065540756e-05, + "loss": 2.1283, + "step": 55950 + }, + { + "epoch": 2.56, + "learning_rate": 3.7308343631287477e-05, + "loss": 2.1153, + "step": 55960 + }, + { + "epoch": 2.56, + "learning_rate": 3.730605519703419e-05, + "loss": 1.9631, + "step": 55970 + }, + { + "epoch": 2.56, + "learning_rate": 3.7303766762780905e-05, + "loss": 2.1562, + "step": 55980 + }, + { + "epoch": 2.56, + "learning_rate": 3.7301478328527626e-05, + "loss": 2.1698, + "step": 55990 + }, + { + "epoch": 2.56, + "learning_rate": 3.729918989427434e-05, + "loss": 2.0108, + "step": 56000 + }, + { + "epoch": 2.56, + "learning_rate": 3.7296901460021054e-05, + "loss": 2.0552, + "step": 56010 + }, + { + "epoch": 2.56, + "learning_rate": 3.7294613025767775e-05, + "loss": 2.06, + "step": 56020 + }, + { + "epoch": 2.56, + "learning_rate": 3.729232459151449e-05, + "loss": 1.9603, + "step": 56030 + }, + { + "epoch": 2.56, + "learning_rate": 3.72900361572612e-05, + "loss": 2.3348, + "step": 56040 + }, + { + "epoch": 2.56, + "learning_rate": 3.7287747723007924e-05, + "loss": 2.0553, + "step": 56050 + }, + { + "epoch": 2.56, + "learning_rate": 3.728545928875464e-05, + "loss": 2.0116, + "step": 56060 + }, + { + "epoch": 2.56, + "learning_rate": 3.728317085450135e-05, + "loss": 2.1814, + "step": 56070 + }, + { + "epoch": 2.56, + "learning_rate": 3.728088242024807e-05, + "loss": 2.1096, + "step": 56080 + }, + { + "epoch": 2.56, + "learning_rate": 3.727859398599479e-05, + "loss": 2.2713, + "step": 56090 + }, + { + "epoch": 2.56, + "learning_rate": 3.72763055517415e-05, + "loss": 2.1631, + "step": 56100 + }, + { + "epoch": 2.56, + "learning_rate": 3.727401711748822e-05, + "loss": 2.0889, + "step": 56110 + }, + { + "epoch": 2.56, + "learning_rate": 3.727172868323493e-05, + "loss": 2.0393, + "step": 56120 + }, + { + "epoch": 2.56, + "learning_rate": 3.7269440248981644e-05, + "loss": 2.0818, + "step": 56130 + }, + { + "epoch": 2.56, + "learning_rate": 3.7267151814728365e-05, + "loss": 2.0089, + "step": 56140 + }, + { + "epoch": 2.56, + "learning_rate": 3.726486338047508e-05, + "loss": 2.1336, + "step": 56150 + }, + { + "epoch": 2.56, + "learning_rate": 3.726257494622179e-05, + "loss": 1.9101, + "step": 56160 + }, + { + "epoch": 2.56, + "learning_rate": 3.7260286511968514e-05, + "loss": 2.0744, + "step": 56170 + }, + { + "epoch": 2.57, + "learning_rate": 3.725799807771523e-05, + "loss": 2.0823, + "step": 56180 + }, + { + "epoch": 2.57, + "learning_rate": 3.725570964346194e-05, + "loss": 2.0133, + "step": 56190 + }, + { + "epoch": 2.57, + "learning_rate": 3.725342120920866e-05, + "loss": 2.1695, + "step": 56200 + }, + { + "epoch": 2.57, + "learning_rate": 3.725113277495538e-05, + "loss": 2.0733, + "step": 56210 + }, + { + "epoch": 2.57, + "learning_rate": 3.724884434070209e-05, + "loss": 2.1198, + "step": 56220 + }, + { + "epoch": 2.57, + "learning_rate": 3.724655590644881e-05, + "loss": 2.1105, + "step": 56230 + }, + { + "epoch": 2.57, + "learning_rate": 3.7244267472195526e-05, + "loss": 1.9211, + "step": 56240 + }, + { + "epoch": 2.57, + "learning_rate": 3.724197903794224e-05, + "loss": 2.0019, + "step": 56250 + }, + { + "epoch": 2.57, + "learning_rate": 3.723969060368896e-05, + "loss": 2.102, + "step": 56260 + }, + { + "epoch": 2.57, + "learning_rate": 3.7237402169435676e-05, + "loss": 2.1646, + "step": 56270 + }, + { + "epoch": 2.57, + "learning_rate": 3.723511373518239e-05, + "loss": 2.0723, + "step": 56280 + }, + { + "epoch": 2.57, + "learning_rate": 3.723282530092911e-05, + "loss": 2.1022, + "step": 56290 + }, + { + "epoch": 2.57, + "learning_rate": 3.7230536866675825e-05, + "loss": 1.9632, + "step": 56300 + }, + { + "epoch": 2.57, + "learning_rate": 3.722824843242254e-05, + "loss": 2.004, + "step": 56310 + }, + { + "epoch": 2.57, + "learning_rate": 3.722595999816925e-05, + "loss": 1.9706, + "step": 56320 + }, + { + "epoch": 2.57, + "learning_rate": 3.722367156391597e-05, + "loss": 1.9945, + "step": 56330 + }, + { + "epoch": 2.57, + "learning_rate": 3.722138312966268e-05, + "loss": 2.0379, + "step": 56340 + }, + { + "epoch": 2.57, + "learning_rate": 3.72190946954094e-05, + "loss": 2.082, + "step": 56350 + }, + { + "epoch": 2.57, + "learning_rate": 3.7216806261156116e-05, + "loss": 2.0053, + "step": 56360 + }, + { + "epoch": 2.57, + "learning_rate": 3.721451782690283e-05, + "loss": 2.2256, + "step": 56370 + }, + { + "epoch": 2.57, + "learning_rate": 3.721222939264955e-05, + "loss": 2.1377, + "step": 56380 + }, + { + "epoch": 2.57, + "learning_rate": 3.7209940958396266e-05, + "loss": 2.1441, + "step": 56390 + }, + { + "epoch": 2.58, + "learning_rate": 3.720765252414298e-05, + "loss": 2.097, + "step": 56400 + }, + { + "epoch": 2.58, + "learning_rate": 3.72053640898897e-05, + "loss": 2.0194, + "step": 56410 + }, + { + "epoch": 2.58, + "learning_rate": 3.7203075655636415e-05, + "loss": 1.9461, + "step": 56420 + }, + { + "epoch": 2.58, + "learning_rate": 3.720078722138313e-05, + "loss": 2.1546, + "step": 56430 + }, + { + "epoch": 2.58, + "learning_rate": 3.719849878712985e-05, + "loss": 1.9252, + "step": 56440 + }, + { + "epoch": 2.58, + "learning_rate": 3.7196210352876564e-05, + "loss": 2.1071, + "step": 56450 + }, + { + "epoch": 2.58, + "learning_rate": 3.719392191862328e-05, + "loss": 2.1992, + "step": 56460 + }, + { + "epoch": 2.58, + "learning_rate": 3.719163348437e-05, + "loss": 2.0437, + "step": 56470 + }, + { + "epoch": 2.58, + "learning_rate": 3.718934505011671e-05, + "loss": 2.0317, + "step": 56480 + }, + { + "epoch": 2.58, + "learning_rate": 3.718705661586343e-05, + "loss": 2.1424, + "step": 56490 + }, + { + "epoch": 2.58, + "learning_rate": 3.718476818161015e-05, + "loss": 1.916, + "step": 56500 + }, + { + "epoch": 2.58, + "learning_rate": 3.7182479747356855e-05, + "loss": 2.004, + "step": 56510 + }, + { + "epoch": 2.58, + "learning_rate": 3.7180191313103576e-05, + "loss": 2.029, + "step": 56520 + }, + { + "epoch": 2.58, + "learning_rate": 3.717790287885029e-05, + "loss": 1.9793, + "step": 56530 + }, + { + "epoch": 2.58, + "learning_rate": 3.7175614444597005e-05, + "loss": 1.9213, + "step": 56540 + }, + { + "epoch": 2.58, + "learning_rate": 3.7173326010343726e-05, + "loss": 2.0126, + "step": 56550 + }, + { + "epoch": 2.58, + "learning_rate": 3.717103757609044e-05, + "loss": 1.9991, + "step": 56560 + }, + { + "epoch": 2.58, + "learning_rate": 3.7168749141837154e-05, + "loss": 2.2415, + "step": 56570 + }, + { + "epoch": 2.58, + "learning_rate": 3.7166460707583875e-05, + "loss": 1.9796, + "step": 56580 + }, + { + "epoch": 2.58, + "learning_rate": 3.716417227333059e-05, + "loss": 2.1871, + "step": 56590 + }, + { + "epoch": 2.58, + "learning_rate": 3.71618838390773e-05, + "loss": 2.0648, + "step": 56600 + }, + { + "epoch": 2.58, + "learning_rate": 3.7159595404824024e-05, + "loss": 2.0426, + "step": 56610 + }, + { + "epoch": 2.59, + "learning_rate": 3.715730697057074e-05, + "loss": 2.1107, + "step": 56620 + }, + { + "epoch": 2.59, + "learning_rate": 3.715501853631745e-05, + "loss": 2.0771, + "step": 56630 + }, + { + "epoch": 2.59, + "learning_rate": 3.715273010206417e-05, + "loss": 2.0962, + "step": 56640 + }, + { + "epoch": 2.59, + "learning_rate": 3.715044166781089e-05, + "loss": 2.0463, + "step": 56650 + }, + { + "epoch": 2.59, + "learning_rate": 3.71481532335576e-05, + "loss": 2.1098, + "step": 56660 + }, + { + "epoch": 2.59, + "learning_rate": 3.714586479930432e-05, + "loss": 2.0142, + "step": 56670 + }, + { + "epoch": 2.59, + "learning_rate": 3.7143576365051036e-05, + "loss": 1.8776, + "step": 56680 + }, + { + "epoch": 2.59, + "learning_rate": 3.714128793079775e-05, + "loss": 1.9612, + "step": 56690 + }, + { + "epoch": 2.59, + "learning_rate": 3.713899949654447e-05, + "loss": 2.1125, + "step": 56700 + }, + { + "epoch": 2.59, + "learning_rate": 3.713671106229118e-05, + "loss": 2.0839, + "step": 56710 + }, + { + "epoch": 2.59, + "learning_rate": 3.713442262803789e-05, + "loss": 2.1675, + "step": 56720 + }, + { + "epoch": 2.59, + "learning_rate": 3.7132134193784614e-05, + "loss": 2.1217, + "step": 56730 + }, + { + "epoch": 2.59, + "learning_rate": 3.712984575953133e-05, + "loss": 2.0722, + "step": 56740 + }, + { + "epoch": 2.59, + "learning_rate": 3.712755732527804e-05, + "loss": 2.0234, + "step": 56750 + }, + { + "epoch": 2.59, + "learning_rate": 3.712526889102476e-05, + "loss": 2.1482, + "step": 56760 + }, + { + "epoch": 2.59, + "learning_rate": 3.712298045677148e-05, + "loss": 2.0112, + "step": 56770 + }, + { + "epoch": 2.59, + "learning_rate": 3.712069202251819e-05, + "loss": 1.98, + "step": 56780 + }, + { + "epoch": 2.59, + "learning_rate": 3.711840358826491e-05, + "loss": 2.0728, + "step": 56790 + }, + { + "epoch": 2.59, + "learning_rate": 3.7116115154011626e-05, + "loss": 2.1524, + "step": 56800 + }, + { + "epoch": 2.59, + "learning_rate": 3.711382671975834e-05, + "loss": 2.1106, + "step": 56810 + }, + { + "epoch": 2.59, + "learning_rate": 3.711153828550506e-05, + "loss": 1.9692, + "step": 56820 + }, + { + "epoch": 2.6, + "learning_rate": 3.7109249851251776e-05, + "loss": 2.092, + "step": 56830 + }, + { + "epoch": 2.6, + "learning_rate": 3.710696141699849e-05, + "loss": 2.1573, + "step": 56840 + }, + { + "epoch": 2.6, + "learning_rate": 3.710467298274521e-05, + "loss": 2.0802, + "step": 56850 + }, + { + "epoch": 2.6, + "learning_rate": 3.7102384548491925e-05, + "loss": 2.1592, + "step": 56860 + }, + { + "epoch": 2.6, + "learning_rate": 3.710009611423864e-05, + "loss": 2.0554, + "step": 56870 + }, + { + "epoch": 2.6, + "learning_rate": 3.709780767998536e-05, + "loss": 2.0852, + "step": 56880 + }, + { + "epoch": 2.6, + "learning_rate": 3.7095519245732074e-05, + "loss": 2.062, + "step": 56890 + }, + { + "epoch": 2.6, + "learning_rate": 3.709323081147879e-05, + "loss": 2.0247, + "step": 56900 + }, + { + "epoch": 2.6, + "learning_rate": 3.70909423772255e-05, + "loss": 2.155, + "step": 56910 + }, + { + "epoch": 2.6, + "learning_rate": 3.7088653942972216e-05, + "loss": 2.001, + "step": 56920 + }, + { + "epoch": 2.6, + "learning_rate": 3.708636550871894e-05, + "loss": 1.944, + "step": 56930 + }, + { + "epoch": 2.6, + "learning_rate": 3.708407707446565e-05, + "loss": 1.9979, + "step": 56940 + }, + { + "epoch": 2.6, + "learning_rate": 3.7081788640212365e-05, + "loss": 2.1044, + "step": 56950 + }, + { + "epoch": 2.6, + "learning_rate": 3.7079500205959086e-05, + "loss": 2.2294, + "step": 56960 + }, + { + "epoch": 2.6, + "learning_rate": 3.70772117717058e-05, + "loss": 1.9362, + "step": 56970 + }, + { + "epoch": 2.6, + "learning_rate": 3.7074923337452515e-05, + "loss": 2.0825, + "step": 56980 + }, + { + "epoch": 2.6, + "learning_rate": 3.7072634903199236e-05, + "loss": 2.1283, + "step": 56990 + }, + { + "epoch": 2.6, + "learning_rate": 3.707034646894595e-05, + "loss": 2.0789, + "step": 57000 + }, + { + "epoch": 2.6, + "learning_rate": 3.7068058034692664e-05, + "loss": 2.0858, + "step": 57010 + }, + { + "epoch": 2.6, + "learning_rate": 3.7065769600439385e-05, + "loss": 1.9174, + "step": 57020 + }, + { + "epoch": 2.6, + "learning_rate": 3.70634811661861e-05, + "loss": 1.9471, + "step": 57030 + }, + { + "epoch": 2.6, + "learning_rate": 3.706119273193281e-05, + "loss": 2.159, + "step": 57040 + }, + { + "epoch": 2.61, + "learning_rate": 3.7058904297679534e-05, + "loss": 2.0339, + "step": 57050 + }, + { + "epoch": 2.61, + "learning_rate": 3.705661586342625e-05, + "loss": 2.1899, + "step": 57060 + }, + { + "epoch": 2.61, + "learning_rate": 3.705432742917296e-05, + "loss": 2.0512, + "step": 57070 + }, + { + "epoch": 2.61, + "learning_rate": 3.705203899491968e-05, + "loss": 2.1914, + "step": 57080 + }, + { + "epoch": 2.61, + "learning_rate": 3.70497505606664e-05, + "loss": 1.8672, + "step": 57090 + }, + { + "epoch": 2.61, + "learning_rate": 3.7047462126413105e-05, + "loss": 2.1735, + "step": 57100 + }, + { + "epoch": 2.61, + "learning_rate": 3.7045173692159825e-05, + "loss": 2.0316, + "step": 57110 + }, + { + "epoch": 2.61, + "learning_rate": 3.704288525790654e-05, + "loss": 2.1796, + "step": 57120 + }, + { + "epoch": 2.61, + "learning_rate": 3.7040596823653254e-05, + "loss": 2.1074, + "step": 57130 + }, + { + "epoch": 2.61, + "learning_rate": 3.7038308389399975e-05, + "loss": 1.9527, + "step": 57140 + }, + { + "epoch": 2.61, + "learning_rate": 3.703601995514669e-05, + "loss": 2.19, + "step": 57150 + }, + { + "epoch": 2.61, + "learning_rate": 3.70337315208934e-05, + "loss": 2.1559, + "step": 57160 + }, + { + "epoch": 2.61, + "learning_rate": 3.7031443086640124e-05, + "loss": 1.9654, + "step": 57170 + }, + { + "epoch": 2.61, + "learning_rate": 3.702915465238684e-05, + "loss": 2.1255, + "step": 57180 + }, + { + "epoch": 2.61, + "learning_rate": 3.702686621813355e-05, + "loss": 1.9261, + "step": 57190 + }, + { + "epoch": 2.61, + "learning_rate": 3.702457778388027e-05, + "loss": 2.0065, + "step": 57200 + }, + { + "epoch": 2.61, + "learning_rate": 3.702228934962699e-05, + "loss": 2.0228, + "step": 57210 + }, + { + "epoch": 2.61, + "learning_rate": 3.70200009153737e-05, + "loss": 2.2086, + "step": 57220 + }, + { + "epoch": 2.61, + "learning_rate": 3.701771248112042e-05, + "loss": 2.1557, + "step": 57230 + }, + { + "epoch": 2.61, + "learning_rate": 3.7015424046867136e-05, + "loss": 2.1885, + "step": 57240 + }, + { + "epoch": 2.61, + "learning_rate": 3.701313561261385e-05, + "loss": 1.9917, + "step": 57250 + }, + { + "epoch": 2.61, + "learning_rate": 3.701084717836057e-05, + "loss": 2.1469, + "step": 57260 + }, + { + "epoch": 2.62, + "learning_rate": 3.7008558744107285e-05, + "loss": 2.1008, + "step": 57270 + }, + { + "epoch": 2.62, + "learning_rate": 3.7006270309854e-05, + "loss": 2.1187, + "step": 57280 + }, + { + "epoch": 2.62, + "learning_rate": 3.700398187560072e-05, + "loss": 2.2132, + "step": 57290 + }, + { + "epoch": 2.62, + "learning_rate": 3.700169344134743e-05, + "loss": 2.0774, + "step": 57300 + }, + { + "epoch": 2.62, + "learning_rate": 3.699940500709415e-05, + "loss": 1.8832, + "step": 57310 + }, + { + "epoch": 2.62, + "learning_rate": 3.699711657284086e-05, + "loss": 1.9734, + "step": 57320 + }, + { + "epoch": 2.62, + "learning_rate": 3.699482813858758e-05, + "loss": 2.102, + "step": 57330 + }, + { + "epoch": 2.62, + "learning_rate": 3.69925397043343e-05, + "loss": 2.0303, + "step": 57340 + }, + { + "epoch": 2.62, + "learning_rate": 3.699025127008101e-05, + "loss": 2.0072, + "step": 57350 + }, + { + "epoch": 2.62, + "learning_rate": 3.6987962835827726e-05, + "loss": 2.013, + "step": 57360 + }, + { + "epoch": 2.62, + "learning_rate": 3.698567440157445e-05, + "loss": 2.0624, + "step": 57370 + }, + { + "epoch": 2.62, + "learning_rate": 3.698338596732116e-05, + "loss": 2.1109, + "step": 57380 + }, + { + "epoch": 2.62, + "learning_rate": 3.6981097533067875e-05, + "loss": 2.18, + "step": 57390 + }, + { + "epoch": 2.62, + "learning_rate": 3.6978809098814596e-05, + "loss": 2.096, + "step": 57400 + }, + { + "epoch": 2.62, + "learning_rate": 3.697652066456131e-05, + "loss": 2.3225, + "step": 57410 + }, + { + "epoch": 2.62, + "learning_rate": 3.6974232230308025e-05, + "loss": 1.9349, + "step": 57420 + }, + { + "epoch": 2.62, + "learning_rate": 3.697194379605474e-05, + "loss": 2.1478, + "step": 57430 + }, + { + "epoch": 2.62, + "learning_rate": 3.696965536180146e-05, + "loss": 2.1915, + "step": 57440 + }, + { + "epoch": 2.62, + "learning_rate": 3.6967366927548174e-05, + "loss": 2.0166, + "step": 57450 + }, + { + "epoch": 2.62, + "learning_rate": 3.696507849329489e-05, + "loss": 1.8968, + "step": 57460 + }, + { + "epoch": 2.62, + "learning_rate": 3.696279005904161e-05, + "loss": 2.0278, + "step": 57470 + }, + { + "epoch": 2.62, + "learning_rate": 3.696050162478832e-05, + "loss": 2.0988, + "step": 57480 + }, + { + "epoch": 2.63, + "learning_rate": 3.695821319053504e-05, + "loss": 1.942, + "step": 57490 + }, + { + "epoch": 2.63, + "learning_rate": 3.695592475628175e-05, + "loss": 2.0734, + "step": 57500 + }, + { + "epoch": 2.63, + "learning_rate": 3.6953636322028465e-05, + "loss": 2.0561, + "step": 57510 + }, + { + "epoch": 2.63, + "learning_rate": 3.6951347887775186e-05, + "loss": 2.1839, + "step": 57520 + }, + { + "epoch": 2.63, + "learning_rate": 3.69490594535219e-05, + "loss": 2.0426, + "step": 57530 + }, + { + "epoch": 2.63, + "learning_rate": 3.6946771019268615e-05, + "loss": 1.938, + "step": 57540 + }, + { + "epoch": 2.63, + "learning_rate": 3.6944482585015335e-05, + "loss": 2.0839, + "step": 57550 + }, + { + "epoch": 2.63, + "learning_rate": 3.694219415076205e-05, + "loss": 2.1951, + "step": 57560 + }, + { + "epoch": 2.63, + "learning_rate": 3.6939905716508764e-05, + "loss": 1.9209, + "step": 57570 + }, + { + "epoch": 2.63, + "learning_rate": 3.6937617282255485e-05, + "loss": 1.9029, + "step": 57580 + }, + { + "epoch": 2.63, + "learning_rate": 3.69353288480022e-05, + "loss": 2.073, + "step": 57590 + }, + { + "epoch": 2.63, + "learning_rate": 3.693304041374891e-05, + "loss": 2.1439, + "step": 57600 + }, + { + "epoch": 2.63, + "learning_rate": 3.6930751979495634e-05, + "loss": 1.9993, + "step": 57610 + }, + { + "epoch": 2.63, + "learning_rate": 3.692846354524235e-05, + "loss": 2.1121, + "step": 57620 + }, + { + "epoch": 2.63, + "learning_rate": 3.692617511098906e-05, + "loss": 1.8303, + "step": 57630 + }, + { + "epoch": 2.63, + "learning_rate": 3.692388667673578e-05, + "loss": 2.0225, + "step": 57640 + }, + { + "epoch": 2.63, + "learning_rate": 3.69215982424825e-05, + "loss": 2.0697, + "step": 57650 + }, + { + "epoch": 2.63, + "learning_rate": 3.691930980822921e-05, + "loss": 1.9887, + "step": 57660 + }, + { + "epoch": 2.63, + "learning_rate": 3.691702137397593e-05, + "loss": 2.1407, + "step": 57670 + }, + { + "epoch": 2.63, + "learning_rate": 3.6914732939722646e-05, + "loss": 2.116, + "step": 57680 + }, + { + "epoch": 2.63, + "learning_rate": 3.691244450546936e-05, + "loss": 2.1433, + "step": 57690 + }, + { + "epoch": 2.63, + "learning_rate": 3.6910156071216075e-05, + "loss": 2.1158, + "step": 57700 + }, + { + "epoch": 2.64, + "learning_rate": 3.690786763696279e-05, + "loss": 2.0599, + "step": 57710 + }, + { + "epoch": 2.64, + "learning_rate": 3.69055792027095e-05, + "loss": 2.0965, + "step": 57720 + }, + { + "epoch": 2.64, + "learning_rate": 3.6903290768456224e-05, + "loss": 2.0685, + "step": 57730 + }, + { + "epoch": 2.64, + "learning_rate": 3.690100233420294e-05, + "loss": 2.1214, + "step": 57740 + }, + { + "epoch": 2.64, + "learning_rate": 3.689871389994965e-05, + "loss": 2.1619, + "step": 57750 + }, + { + "epoch": 2.64, + "learning_rate": 3.689642546569637e-05, + "loss": 2.1479, + "step": 57760 + }, + { + "epoch": 2.64, + "learning_rate": 3.689413703144309e-05, + "loss": 2.0556, + "step": 57770 + }, + { + "epoch": 2.64, + "learning_rate": 3.68918485971898e-05, + "loss": 2.0552, + "step": 57780 + }, + { + "epoch": 2.64, + "learning_rate": 3.688956016293652e-05, + "loss": 2.0345, + "step": 57790 + }, + { + "epoch": 2.64, + "learning_rate": 3.6887271728683236e-05, + "loss": 2.1808, + "step": 57800 + }, + { + "epoch": 2.64, + "learning_rate": 3.688498329442995e-05, + "loss": 2.0419, + "step": 57810 + }, + { + "epoch": 2.64, + "learning_rate": 3.688269486017667e-05, + "loss": 2.0243, + "step": 57820 + }, + { + "epoch": 2.64, + "learning_rate": 3.6880406425923385e-05, + "loss": 2.0117, + "step": 57830 + }, + { + "epoch": 2.64, + "learning_rate": 3.68781179916701e-05, + "loss": 2.1394, + "step": 57840 + }, + { + "epoch": 2.64, + "learning_rate": 3.687582955741682e-05, + "loss": 2.0451, + "step": 57850 + }, + { + "epoch": 2.64, + "learning_rate": 3.6873541123163535e-05, + "loss": 1.9846, + "step": 57860 + }, + { + "epoch": 2.64, + "learning_rate": 3.687125268891025e-05, + "loss": 2.0152, + "step": 57870 + }, + { + "epoch": 2.64, + "learning_rate": 3.686896425465697e-05, + "loss": 2.0128, + "step": 57880 + }, + { + "epoch": 2.64, + "learning_rate": 3.686667582040368e-05, + "loss": 1.9987, + "step": 57890 + }, + { + "epoch": 2.64, + "learning_rate": 3.68643873861504e-05, + "loss": 2.0913, + "step": 57900 + }, + { + "epoch": 2.64, + "learning_rate": 3.686209895189711e-05, + "loss": 2.0131, + "step": 57910 + }, + { + "epoch": 2.64, + "learning_rate": 3.6859810517643826e-05, + "loss": 2.1856, + "step": 57920 + }, + { + "epoch": 2.65, + "learning_rate": 3.685752208339055e-05, + "loss": 1.9502, + "step": 57930 + }, + { + "epoch": 2.65, + "learning_rate": 3.685523364913726e-05, + "loss": 2.0931, + "step": 57940 + }, + { + "epoch": 2.65, + "learning_rate": 3.6852945214883975e-05, + "loss": 2.1285, + "step": 57950 + }, + { + "epoch": 2.65, + "learning_rate": 3.6850656780630696e-05, + "loss": 2.0627, + "step": 57960 + }, + { + "epoch": 2.65, + "learning_rate": 3.684836834637741e-05, + "loss": 2.1386, + "step": 57970 + }, + { + "epoch": 2.65, + "learning_rate": 3.6846079912124124e-05, + "loss": 2.0847, + "step": 57980 + }, + { + "epoch": 2.65, + "learning_rate": 3.6843791477870845e-05, + "loss": 2.1057, + "step": 57990 + }, + { + "epoch": 2.65, + "learning_rate": 3.684150304361756e-05, + "loss": 1.9698, + "step": 58000 + }, + { + "epoch": 2.65, + "learning_rate": 3.6839214609364274e-05, + "loss": 1.9382, + "step": 58010 + }, + { + "epoch": 2.65, + "learning_rate": 3.6836926175110995e-05, + "loss": 2.1756, + "step": 58020 + }, + { + "epoch": 2.65, + "learning_rate": 3.683463774085771e-05, + "loss": 1.9959, + "step": 58030 + }, + { + "epoch": 2.65, + "learning_rate": 3.683234930660442e-05, + "loss": 2.0233, + "step": 58040 + }, + { + "epoch": 2.65, + "learning_rate": 3.6830060872351144e-05, + "loss": 2.0524, + "step": 58050 + }, + { + "epoch": 2.65, + "learning_rate": 3.682777243809786e-05, + "loss": 1.9634, + "step": 58060 + }, + { + "epoch": 2.65, + "learning_rate": 3.682548400384457e-05, + "loss": 2.0909, + "step": 58070 + }, + { + "epoch": 2.65, + "learning_rate": 3.682319556959129e-05, + "loss": 2.0212, + "step": 58080 + }, + { + "epoch": 2.65, + "learning_rate": 3.6820907135338e-05, + "loss": 2.0383, + "step": 58090 + }, + { + "epoch": 2.65, + "learning_rate": 3.6818618701084714e-05, + "loss": 2.1022, + "step": 58100 + }, + { + "epoch": 2.65, + "learning_rate": 3.6816330266831435e-05, + "loss": 2.07, + "step": 58110 + }, + { + "epoch": 2.65, + "learning_rate": 3.681404183257815e-05, + "loss": 2.0416, + "step": 58120 + }, + { + "epoch": 2.65, + "learning_rate": 3.6811753398324864e-05, + "loss": 2.2765, + "step": 58130 + }, + { + "epoch": 2.65, + "learning_rate": 3.6809464964071585e-05, + "loss": 1.9543, + "step": 58140 + }, + { + "epoch": 2.66, + "learning_rate": 3.68071765298183e-05, + "loss": 1.9225, + "step": 58150 + }, + { + "epoch": 2.66, + "learning_rate": 3.680488809556501e-05, + "loss": 2.0151, + "step": 58160 + }, + { + "epoch": 2.66, + "learning_rate": 3.6802599661311734e-05, + "loss": 2.066, + "step": 58170 + }, + { + "epoch": 2.66, + "learning_rate": 3.680031122705845e-05, + "loss": 2.0155, + "step": 58180 + }, + { + "epoch": 2.66, + "learning_rate": 3.679802279280516e-05, + "loss": 2.054, + "step": 58190 + }, + { + "epoch": 2.66, + "learning_rate": 3.679573435855188e-05, + "loss": 2.0897, + "step": 58200 + }, + { + "epoch": 2.66, + "learning_rate": 3.67934459242986e-05, + "loss": 2.007, + "step": 58210 + }, + { + "epoch": 2.66, + "learning_rate": 3.679115749004531e-05, + "loss": 1.9506, + "step": 58220 + }, + { + "epoch": 2.66, + "learning_rate": 3.678886905579203e-05, + "loss": 2.1325, + "step": 58230 + }, + { + "epoch": 2.66, + "learning_rate": 3.6786580621538746e-05, + "loss": 2.0709, + "step": 58240 + }, + { + "epoch": 2.66, + "learning_rate": 3.678429218728546e-05, + "loss": 2.0106, + "step": 58250 + }, + { + "epoch": 2.66, + "learning_rate": 3.678200375303218e-05, + "loss": 2.0859, + "step": 58260 + }, + { + "epoch": 2.66, + "learning_rate": 3.6779715318778895e-05, + "loss": 1.9996, + "step": 58270 + }, + { + "epoch": 2.66, + "learning_rate": 3.677742688452561e-05, + "loss": 1.9101, + "step": 58280 + }, + { + "epoch": 2.66, + "learning_rate": 3.6775138450272324e-05, + "loss": 1.9499, + "step": 58290 + }, + { + "epoch": 2.66, + "learning_rate": 3.677285001601904e-05, + "loss": 2.1478, + "step": 58300 + }, + { + "epoch": 2.66, + "learning_rate": 3.677056158176576e-05, + "loss": 1.9986, + "step": 58310 + }, + { + "epoch": 2.66, + "learning_rate": 3.676827314751247e-05, + "loss": 2.0749, + "step": 58320 + }, + { + "epoch": 2.66, + "learning_rate": 3.676598471325919e-05, + "loss": 1.9843, + "step": 58330 + }, + { + "epoch": 2.66, + "learning_rate": 3.676369627900591e-05, + "loss": 1.9746, + "step": 58340 + }, + { + "epoch": 2.66, + "learning_rate": 3.676140784475262e-05, + "loss": 2.0123, + "step": 58350 + }, + { + "epoch": 2.66, + "learning_rate": 3.6759119410499336e-05, + "loss": 2.0119, + "step": 58360 + }, + { + "epoch": 2.67, + "learning_rate": 3.675683097624606e-05, + "loss": 2.1912, + "step": 58370 + }, + { + "epoch": 2.67, + "learning_rate": 3.675454254199277e-05, + "loss": 1.9338, + "step": 58380 + }, + { + "epoch": 2.67, + "learning_rate": 3.6752254107739485e-05, + "loss": 2.0327, + "step": 58390 + }, + { + "epoch": 2.67, + "learning_rate": 3.6749965673486206e-05, + "loss": 2.0245, + "step": 58400 + }, + { + "epoch": 2.67, + "learning_rate": 3.674767723923292e-05, + "loss": 2.0826, + "step": 58410 + }, + { + "epoch": 2.67, + "learning_rate": 3.6745388804979634e-05, + "loss": 1.9785, + "step": 58420 + }, + { + "epoch": 2.67, + "learning_rate": 3.6743100370726355e-05, + "loss": 1.9895, + "step": 58430 + }, + { + "epoch": 2.67, + "learning_rate": 3.674081193647307e-05, + "loss": 1.9089, + "step": 58440 + }, + { + "epoch": 2.67, + "learning_rate": 3.6738523502219784e-05, + "loss": 1.9278, + "step": 58450 + }, + { + "epoch": 2.67, + "learning_rate": 3.6736235067966505e-05, + "loss": 1.9345, + "step": 58460 + }, + { + "epoch": 2.67, + "learning_rate": 3.673394663371322e-05, + "loss": 1.9415, + "step": 58470 + }, + { + "epoch": 2.67, + "learning_rate": 3.673165819945993e-05, + "loss": 2.0436, + "step": 58480 + }, + { + "epoch": 2.67, + "learning_rate": 3.672936976520665e-05, + "loss": 2.0148, + "step": 58490 + }, + { + "epoch": 2.67, + "learning_rate": 3.672708133095336e-05, + "loss": 1.9926, + "step": 58500 + }, + { + "epoch": 2.67, + "learning_rate": 3.6724792896700075e-05, + "loss": 1.9266, + "step": 58510 + }, + { + "epoch": 2.67, + "learning_rate": 3.6722504462446796e-05, + "loss": 2.046, + "step": 58520 + }, + { + "epoch": 2.67, + "learning_rate": 3.672021602819351e-05, + "loss": 2.0374, + "step": 58530 + }, + { + "epoch": 2.67, + "learning_rate": 3.6717927593940224e-05, + "loss": 2.0402, + "step": 58540 + }, + { + "epoch": 2.67, + "learning_rate": 3.6715639159686945e-05, + "loss": 2.0703, + "step": 58550 + }, + { + "epoch": 2.67, + "learning_rate": 3.671335072543366e-05, + "loss": 1.9894, + "step": 58560 + }, + { + "epoch": 2.67, + "learning_rate": 3.6711062291180374e-05, + "loss": 2.0043, + "step": 58570 + }, + { + "epoch": 2.67, + "learning_rate": 3.6708773856927094e-05, + "loss": 2.0521, + "step": 58580 + }, + { + "epoch": 2.68, + "learning_rate": 3.670648542267381e-05, + "loss": 2.0144, + "step": 58590 + }, + { + "epoch": 2.68, + "learning_rate": 3.670419698842052e-05, + "loss": 1.9865, + "step": 58600 + }, + { + "epoch": 2.68, + "learning_rate": 3.6701908554167244e-05, + "loss": 1.9444, + "step": 58610 + }, + { + "epoch": 2.68, + "learning_rate": 3.669962011991396e-05, + "loss": 2.091, + "step": 58620 + }, + { + "epoch": 2.68, + "learning_rate": 3.669733168566067e-05, + "loss": 2.1615, + "step": 58630 + }, + { + "epoch": 2.68, + "learning_rate": 3.669504325140739e-05, + "loss": 2.1239, + "step": 58640 + }, + { + "epoch": 2.68, + "learning_rate": 3.669275481715411e-05, + "loss": 2.1091, + "step": 58650 + }, + { + "epoch": 2.68, + "learning_rate": 3.669046638290082e-05, + "loss": 1.9792, + "step": 58660 + }, + { + "epoch": 2.68, + "learning_rate": 3.668817794864754e-05, + "loss": 2.0113, + "step": 58670 + }, + { + "epoch": 2.68, + "learning_rate": 3.668588951439425e-05, + "loss": 1.9905, + "step": 58680 + }, + { + "epoch": 2.68, + "learning_rate": 3.6683601080140964e-05, + "loss": 2.0136, + "step": 58690 + }, + { + "epoch": 2.68, + "learning_rate": 3.6681312645887684e-05, + "loss": 2.0869, + "step": 58700 + }, + { + "epoch": 2.68, + "learning_rate": 3.66790242116344e-05, + "loss": 2.1484, + "step": 58710 + }, + { + "epoch": 2.68, + "learning_rate": 3.667673577738111e-05, + "loss": 2.0519, + "step": 58720 + }, + { + "epoch": 2.68, + "learning_rate": 3.6674447343127834e-05, + "loss": 2.0373, + "step": 58730 + }, + { + "epoch": 2.68, + "learning_rate": 3.667215890887455e-05, + "loss": 2.0117, + "step": 58740 + }, + { + "epoch": 2.68, + "learning_rate": 3.666987047462126e-05, + "loss": 2.0485, + "step": 58750 + }, + { + "epoch": 2.68, + "learning_rate": 3.666758204036798e-05, + "loss": 2.1632, + "step": 58760 + }, + { + "epoch": 2.68, + "learning_rate": 3.66652936061147e-05, + "loss": 2.0138, + "step": 58770 + }, + { + "epoch": 2.68, + "learning_rate": 3.666300517186141e-05, + "loss": 1.9481, + "step": 58780 + }, + { + "epoch": 2.68, + "learning_rate": 3.666071673760813e-05, + "loss": 2.1327, + "step": 58790 + }, + { + "epoch": 2.68, + "learning_rate": 3.6658428303354846e-05, + "loss": 1.9908, + "step": 58800 + }, + { + "epoch": 2.69, + "learning_rate": 3.665613986910156e-05, + "loss": 1.9587, + "step": 58810 + }, + { + "epoch": 2.69, + "learning_rate": 3.665385143484828e-05, + "loss": 2.0894, + "step": 58820 + }, + { + "epoch": 2.69, + "learning_rate": 3.6651563000594995e-05, + "loss": 2.0452, + "step": 58830 + }, + { + "epoch": 2.69, + "learning_rate": 3.664927456634171e-05, + "loss": 1.9466, + "step": 58840 + }, + { + "epoch": 2.69, + "learning_rate": 3.664698613208843e-05, + "loss": 2.1746, + "step": 58850 + }, + { + "epoch": 2.69, + "learning_rate": 3.6644697697835144e-05, + "loss": 2.0489, + "step": 58860 + }, + { + "epoch": 2.69, + "learning_rate": 3.664240926358186e-05, + "loss": 2.1759, + "step": 58870 + }, + { + "epoch": 2.69, + "learning_rate": 3.664012082932857e-05, + "loss": 2.0698, + "step": 58880 + }, + { + "epoch": 2.69, + "learning_rate": 3.663783239507529e-05, + "loss": 2.1073, + "step": 58890 + }, + { + "epoch": 2.69, + "learning_rate": 3.663554396082201e-05, + "loss": 1.953, + "step": 58900 + }, + { + "epoch": 2.69, + "learning_rate": 3.663325552656872e-05, + "loss": 2.1363, + "step": 58910 + }, + { + "epoch": 2.69, + "learning_rate": 3.6630967092315436e-05, + "loss": 2.0793, + "step": 58920 + }, + { + "epoch": 2.69, + "learning_rate": 3.662867865806216e-05, + "loss": 2.0648, + "step": 58930 + }, + { + "epoch": 2.69, + "learning_rate": 3.662639022380887e-05, + "loss": 2.1383, + "step": 58940 + }, + { + "epoch": 2.69, + "learning_rate": 3.6624101789555585e-05, + "loss": 2.0385, + "step": 58950 + }, + { + "epoch": 2.69, + "learning_rate": 3.6621813355302306e-05, + "loss": 2.0749, + "step": 58960 + }, + { + "epoch": 2.69, + "learning_rate": 3.661952492104902e-05, + "loss": 2.0044, + "step": 58970 + }, + { + "epoch": 2.69, + "learning_rate": 3.6617236486795734e-05, + "loss": 1.9786, + "step": 58980 + }, + { + "epoch": 2.69, + "learning_rate": 3.6614948052542455e-05, + "loss": 2.0958, + "step": 58990 + }, + { + "epoch": 2.69, + "learning_rate": 3.661265961828917e-05, + "loss": 2.0193, + "step": 59000 + }, + { + "epoch": 2.69, + "learning_rate": 3.6610371184035884e-05, + "loss": 1.9841, + "step": 59010 + }, + { + "epoch": 2.7, + "learning_rate": 3.6608082749782604e-05, + "loss": 2.1085, + "step": 59020 + }, + { + "epoch": 2.7, + "learning_rate": 3.660579431552932e-05, + "loss": 2.1187, + "step": 59030 + }, + { + "epoch": 2.7, + "learning_rate": 3.660350588127603e-05, + "loss": 2.1762, + "step": 59040 + }, + { + "epoch": 2.7, + "learning_rate": 3.6601217447022754e-05, + "loss": 1.9308, + "step": 59050 + }, + { + "epoch": 2.7, + "learning_rate": 3.659892901276947e-05, + "loss": 1.9871, + "step": 59060 + }, + { + "epoch": 2.7, + "learning_rate": 3.659664057851618e-05, + "loss": 1.8631, + "step": 59070 + }, + { + "epoch": 2.7, + "learning_rate": 3.6594352144262896e-05, + "loss": 1.9796, + "step": 59080 + }, + { + "epoch": 2.7, + "learning_rate": 3.659206371000961e-05, + "loss": 2.0868, + "step": 59090 + }, + { + "epoch": 2.7, + "learning_rate": 3.6589775275756324e-05, + "loss": 2.0347, + "step": 59100 + }, + { + "epoch": 2.7, + "learning_rate": 3.6587486841503045e-05, + "loss": 1.8025, + "step": 59110 + }, + { + "epoch": 2.7, + "learning_rate": 3.658519840724976e-05, + "loss": 1.9625, + "step": 59120 + }, + { + "epoch": 2.7, + "learning_rate": 3.6582909972996473e-05, + "loss": 2.1019, + "step": 59130 + }, + { + "epoch": 2.7, + "learning_rate": 3.6580621538743194e-05, + "loss": 1.9888, + "step": 59140 + }, + { + "epoch": 2.7, + "learning_rate": 3.657833310448991e-05, + "loss": 2.0713, + "step": 59150 + }, + { + "epoch": 2.7, + "learning_rate": 3.657604467023662e-05, + "loss": 2.051, + "step": 59160 + }, + { + "epoch": 2.7, + "learning_rate": 3.6573756235983344e-05, + "loss": 1.9585, + "step": 59170 + }, + { + "epoch": 2.7, + "learning_rate": 3.657146780173006e-05, + "loss": 2.1061, + "step": 59180 + }, + { + "epoch": 2.7, + "learning_rate": 3.656917936747677e-05, + "loss": 2.0879, + "step": 59190 + }, + { + "epoch": 2.7, + "learning_rate": 3.656689093322349e-05, + "loss": 1.9454, + "step": 59200 + }, + { + "epoch": 2.7, + "learning_rate": 3.656460249897021e-05, + "loss": 2.132, + "step": 59210 + }, + { + "epoch": 2.7, + "learning_rate": 3.656231406471692e-05, + "loss": 1.9449, + "step": 59220 + }, + { + "epoch": 2.7, + "learning_rate": 3.656002563046364e-05, + "loss": 1.9215, + "step": 59230 + }, + { + "epoch": 2.71, + "learning_rate": 3.6557737196210356e-05, + "loss": 2.073, + "step": 59240 + }, + { + "epoch": 2.71, + "learning_rate": 3.655544876195707e-05, + "loss": 1.9028, + "step": 59250 + }, + { + "epoch": 2.71, + "learning_rate": 3.655316032770379e-05, + "loss": 2.0405, + "step": 59260 + }, + { + "epoch": 2.71, + "learning_rate": 3.6550871893450505e-05, + "loss": 1.997, + "step": 59270 + }, + { + "epoch": 2.71, + "learning_rate": 3.654858345919722e-05, + "loss": 1.9598, + "step": 59280 + }, + { + "epoch": 2.71, + "learning_rate": 3.6546295024943933e-05, + "loss": 1.866, + "step": 59290 + }, + { + "epoch": 2.71, + "learning_rate": 3.654400659069065e-05, + "loss": 2.0209, + "step": 59300 + }, + { + "epoch": 2.71, + "learning_rate": 3.654171815643737e-05, + "loss": 2.1247, + "step": 59310 + }, + { + "epoch": 2.71, + "learning_rate": 3.653942972218408e-05, + "loss": 2.1026, + "step": 59320 + }, + { + "epoch": 2.71, + "learning_rate": 3.65371412879308e-05, + "loss": 1.9478, + "step": 59330 + }, + { + "epoch": 2.71, + "learning_rate": 3.653485285367752e-05, + "loss": 2.0552, + "step": 59340 + }, + { + "epoch": 2.71, + "learning_rate": 3.653256441942423e-05, + "loss": 2.0881, + "step": 59350 + }, + { + "epoch": 2.71, + "learning_rate": 3.6530275985170946e-05, + "loss": 2.0333, + "step": 59360 + }, + { + "epoch": 2.71, + "learning_rate": 3.652798755091767e-05, + "loss": 2.0921, + "step": 59370 + }, + { + "epoch": 2.71, + "learning_rate": 3.652569911666438e-05, + "loss": 2.1748, + "step": 59380 + }, + { + "epoch": 2.71, + "learning_rate": 3.6523410682411095e-05, + "loss": 1.9012, + "step": 59390 + }, + { + "epoch": 2.71, + "learning_rate": 3.6521122248157816e-05, + "loss": 1.9632, + "step": 59400 + }, + { + "epoch": 2.71, + "learning_rate": 3.651883381390453e-05, + "loss": 2.0397, + "step": 59410 + }, + { + "epoch": 2.71, + "learning_rate": 3.6516545379651244e-05, + "loss": 1.987, + "step": 59420 + }, + { + "epoch": 2.71, + "learning_rate": 3.6514256945397965e-05, + "loss": 2.0182, + "step": 59430 + }, + { + "epoch": 2.71, + "learning_rate": 3.651196851114468e-05, + "loss": 2.098, + "step": 59440 + }, + { + "epoch": 2.71, + "learning_rate": 3.6509680076891394e-05, + "loss": 1.9469, + "step": 59450 + }, + { + "epoch": 2.72, + "learning_rate": 3.6507391642638114e-05, + "loss": 1.9248, + "step": 59460 + }, + { + "epoch": 2.72, + "learning_rate": 3.650510320838482e-05, + "loss": 1.9711, + "step": 59470 + }, + { + "epoch": 2.72, + "learning_rate": 3.6502814774131536e-05, + "loss": 1.909, + "step": 59480 + }, + { + "epoch": 2.72, + "learning_rate": 3.650052633987826e-05, + "loss": 1.998, + "step": 59490 + }, + { + "epoch": 2.72, + "learning_rate": 3.649823790562497e-05, + "loss": 1.9103, + "step": 59500 + }, + { + "epoch": 2.72, + "learning_rate": 3.6495949471371685e-05, + "loss": 2.1806, + "step": 59510 + }, + { + "epoch": 2.72, + "learning_rate": 3.6493661037118406e-05, + "loss": 2.0162, + "step": 59520 + }, + { + "epoch": 2.72, + "learning_rate": 3.649137260286512e-05, + "loss": 1.9769, + "step": 59530 + }, + { + "epoch": 2.72, + "learning_rate": 3.6489084168611834e-05, + "loss": 1.8979, + "step": 59540 + }, + { + "epoch": 2.72, + "learning_rate": 3.6486795734358555e-05, + "loss": 1.8916, + "step": 59550 + }, + { + "epoch": 2.72, + "learning_rate": 3.648450730010527e-05, + "loss": 2.0353, + "step": 59560 + }, + { + "epoch": 2.72, + "learning_rate": 3.6482218865851983e-05, + "loss": 2.0973, + "step": 59570 + }, + { + "epoch": 2.72, + "learning_rate": 3.6479930431598704e-05, + "loss": 2.0024, + "step": 59580 + }, + { + "epoch": 2.72, + "learning_rate": 3.647764199734542e-05, + "loss": 2.1468, + "step": 59590 + }, + { + "epoch": 2.72, + "learning_rate": 3.647535356309213e-05, + "loss": 2.1231, + "step": 59600 + }, + { + "epoch": 2.72, + "learning_rate": 3.6473065128838854e-05, + "loss": 1.9381, + "step": 59610 + }, + { + "epoch": 2.72, + "learning_rate": 3.647077669458557e-05, + "loss": 1.9635, + "step": 59620 + }, + { + "epoch": 2.72, + "learning_rate": 3.646848826033228e-05, + "loss": 2.0019, + "step": 59630 + }, + { + "epoch": 2.72, + "learning_rate": 3.6466199826079e-05, + "loss": 1.9443, + "step": 59640 + }, + { + "epoch": 2.72, + "learning_rate": 3.646391139182572e-05, + "loss": 1.9899, + "step": 59650 + }, + { + "epoch": 2.72, + "learning_rate": 3.646162295757243e-05, + "loss": 1.9079, + "step": 59660 + }, + { + "epoch": 2.72, + "learning_rate": 3.6459334523319145e-05, + "loss": 2.0221, + "step": 59670 + }, + { + "epoch": 2.73, + "learning_rate": 3.645704608906586e-05, + "loss": 2.0524, + "step": 59680 + }, + { + "epoch": 2.73, + "learning_rate": 3.645475765481258e-05, + "loss": 1.9912, + "step": 59690 + }, + { + "epoch": 2.73, + "learning_rate": 3.6452469220559294e-05, + "loss": 2.0082, + "step": 59700 + }, + { + "epoch": 2.73, + "learning_rate": 3.645018078630601e-05, + "loss": 1.9967, + "step": 59710 + }, + { + "epoch": 2.73, + "learning_rate": 3.644789235205273e-05, + "loss": 2.1516, + "step": 59720 + }, + { + "epoch": 2.73, + "learning_rate": 3.6445603917799443e-05, + "loss": 1.9955, + "step": 59730 + }, + { + "epoch": 2.73, + "learning_rate": 3.644331548354616e-05, + "loss": 2.223, + "step": 59740 + }, + { + "epoch": 2.73, + "learning_rate": 3.644102704929288e-05, + "loss": 1.8971, + "step": 59750 + }, + { + "epoch": 2.73, + "learning_rate": 3.643873861503959e-05, + "loss": 2.0708, + "step": 59760 + }, + { + "epoch": 2.73, + "learning_rate": 3.643645018078631e-05, + "loss": 2.0318, + "step": 59770 + }, + { + "epoch": 2.73, + "learning_rate": 3.643416174653302e-05, + "loss": 1.939, + "step": 59780 + }, + { + "epoch": 2.73, + "learning_rate": 3.643187331227974e-05, + "loss": 1.9177, + "step": 59790 + }, + { + "epoch": 2.73, + "learning_rate": 3.6429584878026456e-05, + "loss": 1.978, + "step": 59800 + }, + { + "epoch": 2.73, + "learning_rate": 3.642729644377317e-05, + "loss": 2.1113, + "step": 59810 + }, + { + "epoch": 2.73, + "learning_rate": 3.642500800951989e-05, + "loss": 1.9359, + "step": 59820 + }, + { + "epoch": 2.73, + "learning_rate": 3.6422719575266605e-05, + "loss": 2.0547, + "step": 59830 + }, + { + "epoch": 2.73, + "learning_rate": 3.642043114101332e-05, + "loss": 1.9669, + "step": 59840 + }, + { + "epoch": 2.73, + "learning_rate": 3.641814270676004e-05, + "loss": 2.021, + "step": 59850 + }, + { + "epoch": 2.73, + "learning_rate": 3.6415854272506754e-05, + "loss": 2.049, + "step": 59860 + }, + { + "epoch": 2.73, + "learning_rate": 3.641356583825347e-05, + "loss": 2.0174, + "step": 59870 + }, + { + "epoch": 2.73, + "learning_rate": 3.641127740400018e-05, + "loss": 1.9955, + "step": 59880 + }, + { + "epoch": 2.73, + "learning_rate": 3.64089889697469e-05, + "loss": 1.9543, + "step": 59890 + }, + { + "epoch": 2.74, + "learning_rate": 3.640670053549362e-05, + "loss": 2.0832, + "step": 59900 + }, + { + "epoch": 2.74, + "learning_rate": 3.640441210124033e-05, + "loss": 1.9544, + "step": 59910 + }, + { + "epoch": 2.74, + "learning_rate": 3.6402123666987046e-05, + "loss": 2.0689, + "step": 59920 + }, + { + "epoch": 2.74, + "learning_rate": 3.639983523273377e-05, + "loss": 2.1265, + "step": 59930 + }, + { + "epoch": 2.74, + "learning_rate": 3.639754679848048e-05, + "loss": 1.8493, + "step": 59940 + }, + { + "epoch": 2.74, + "learning_rate": 3.6395258364227195e-05, + "loss": 2.0218, + "step": 59950 + }, + { + "epoch": 2.74, + "learning_rate": 3.6392969929973916e-05, + "loss": 1.929, + "step": 59960 + }, + { + "epoch": 2.74, + "learning_rate": 3.639068149572063e-05, + "loss": 2.0547, + "step": 59970 + }, + { + "epoch": 2.74, + "learning_rate": 3.6388393061467344e-05, + "loss": 1.9919, + "step": 59980 + }, + { + "epoch": 2.74, + "learning_rate": 3.6386104627214065e-05, + "loss": 2.0245, + "step": 59990 + }, + { + "epoch": 2.74, + "learning_rate": 3.638381619296078e-05, + "loss": 1.9682, + "step": 60000 + }, + { + "epoch": 2.74, + "learning_rate": 3.6381527758707493e-05, + "loss": 1.9584, + "step": 60010 + }, + { + "epoch": 2.74, + "learning_rate": 3.6379239324454214e-05, + "loss": 2.0457, + "step": 60020 + }, + { + "epoch": 2.74, + "learning_rate": 3.637695089020093e-05, + "loss": 2.0441, + "step": 60030 + }, + { + "epoch": 2.74, + "learning_rate": 3.637466245594764e-05, + "loss": 2.0462, + "step": 60040 + }, + { + "epoch": 2.74, + "learning_rate": 3.6372374021694363e-05, + "loss": 2.0593, + "step": 60050 + }, + { + "epoch": 2.74, + "learning_rate": 3.637008558744108e-05, + "loss": 1.9978, + "step": 60060 + }, + { + "epoch": 2.74, + "learning_rate": 3.6367797153187785e-05, + "loss": 2.0878, + "step": 60070 + }, + { + "epoch": 2.74, + "learning_rate": 3.6365508718934506e-05, + "loss": 1.9447, + "step": 60080 + }, + { + "epoch": 2.74, + "learning_rate": 3.636322028468122e-05, + "loss": 2.0362, + "step": 60090 + }, + { + "epoch": 2.74, + "learning_rate": 3.6360931850427934e-05, + "loss": 2.0925, + "step": 60100 + }, + { + "epoch": 2.74, + "learning_rate": 3.6358643416174655e-05, + "loss": 1.9102, + "step": 60110 + }, + { + "epoch": 2.75, + "learning_rate": 3.635635498192137e-05, + "loss": 1.9583, + "step": 60120 + }, + { + "epoch": 2.75, + "learning_rate": 3.635406654766808e-05, + "loss": 1.8651, + "step": 60130 + }, + { + "epoch": 2.75, + "learning_rate": 3.6351778113414804e-05, + "loss": 1.9928, + "step": 60140 + }, + { + "epoch": 2.75, + "learning_rate": 3.634948967916152e-05, + "loss": 2.0876, + "step": 60150 + }, + { + "epoch": 2.75, + "learning_rate": 3.634720124490823e-05, + "loss": 1.8945, + "step": 60160 + }, + { + "epoch": 2.75, + "learning_rate": 3.6344912810654953e-05, + "loss": 1.7745, + "step": 60170 + }, + { + "epoch": 2.75, + "learning_rate": 3.634262437640167e-05, + "loss": 1.9067, + "step": 60180 + }, + { + "epoch": 2.75, + "learning_rate": 3.634033594214838e-05, + "loss": 1.9254, + "step": 60190 + }, + { + "epoch": 2.75, + "learning_rate": 3.63380475078951e-05, + "loss": 1.8875, + "step": 60200 + }, + { + "epoch": 2.75, + "learning_rate": 3.633575907364182e-05, + "loss": 2.0375, + "step": 60210 + }, + { + "epoch": 2.75, + "learning_rate": 3.633347063938853e-05, + "loss": 1.7968, + "step": 60220 + }, + { + "epoch": 2.75, + "learning_rate": 3.633118220513525e-05, + "loss": 2.1611, + "step": 60230 + }, + { + "epoch": 2.75, + "learning_rate": 3.6328893770881966e-05, + "loss": 2.1146, + "step": 60240 + }, + { + "epoch": 2.75, + "learning_rate": 3.632660533662868e-05, + "loss": 1.8987, + "step": 60250 + }, + { + "epoch": 2.75, + "learning_rate": 3.6324316902375394e-05, + "loss": 1.8894, + "step": 60260 + }, + { + "epoch": 2.75, + "learning_rate": 3.632202846812211e-05, + "loss": 2.0244, + "step": 60270 + }, + { + "epoch": 2.75, + "learning_rate": 3.631974003386883e-05, + "loss": 2.0659, + "step": 60280 + }, + { + "epoch": 2.75, + "learning_rate": 3.631745159961554e-05, + "loss": 2.0322, + "step": 60290 + }, + { + "epoch": 2.75, + "learning_rate": 3.631516316536226e-05, + "loss": 1.881, + "step": 60300 + }, + { + "epoch": 2.75, + "learning_rate": 3.631287473110898e-05, + "loss": 2.0464, + "step": 60310 + }, + { + "epoch": 2.75, + "learning_rate": 3.631058629685569e-05, + "loss": 1.9628, + "step": 60320 + }, + { + "epoch": 2.75, + "learning_rate": 3.630829786260241e-05, + "loss": 1.8972, + "step": 60330 + }, + { + "epoch": 2.76, + "learning_rate": 3.630600942834913e-05, + "loss": 2.088, + "step": 60340 + }, + { + "epoch": 2.76, + "learning_rate": 3.630372099409584e-05, + "loss": 2.0569, + "step": 60350 + }, + { + "epoch": 2.76, + "learning_rate": 3.6301432559842556e-05, + "loss": 1.9792, + "step": 60360 + }, + { + "epoch": 2.76, + "learning_rate": 3.629914412558928e-05, + "loss": 1.949, + "step": 60370 + }, + { + "epoch": 2.76, + "learning_rate": 3.629685569133599e-05, + "loss": 1.9344, + "step": 60380 + }, + { + "epoch": 2.76, + "learning_rate": 3.6294567257082705e-05, + "loss": 2.1063, + "step": 60390 + }, + { + "epoch": 2.76, + "learning_rate": 3.6292278822829426e-05, + "loss": 1.9026, + "step": 60400 + }, + { + "epoch": 2.76, + "learning_rate": 3.628999038857614e-05, + "loss": 2.0503, + "step": 60410 + }, + { + "epoch": 2.76, + "learning_rate": 3.6287701954322854e-05, + "loss": 2.1715, + "step": 60420 + }, + { + "epoch": 2.76, + "learning_rate": 3.6285413520069575e-05, + "loss": 2.0045, + "step": 60430 + }, + { + "epoch": 2.76, + "learning_rate": 3.628312508581629e-05, + "loss": 2.0622, + "step": 60440 + }, + { + "epoch": 2.76, + "learning_rate": 3.6280836651563e-05, + "loss": 1.934, + "step": 60450 + }, + { + "epoch": 2.76, + "learning_rate": 3.627854821730972e-05, + "loss": 1.8569, + "step": 60460 + }, + { + "epoch": 2.76, + "learning_rate": 3.627625978305643e-05, + "loss": 1.9467, + "step": 60470 + }, + { + "epoch": 2.76, + "learning_rate": 3.6273971348803146e-05, + "loss": 2.0438, + "step": 60480 + }, + { + "epoch": 2.76, + "learning_rate": 3.627168291454987e-05, + "loss": 2.0008, + "step": 60490 + }, + { + "epoch": 2.76, + "learning_rate": 3.626939448029658e-05, + "loss": 1.985, + "step": 60500 + }, + { + "epoch": 2.76, + "learning_rate": 3.6267106046043295e-05, + "loss": 2.0893, + "step": 60510 + }, + { + "epoch": 2.76, + "learning_rate": 3.6264817611790016e-05, + "loss": 1.9721, + "step": 60520 + }, + { + "epoch": 2.76, + "learning_rate": 3.626252917753673e-05, + "loss": 1.9079, + "step": 60530 + }, + { + "epoch": 2.76, + "learning_rate": 3.6260240743283444e-05, + "loss": 1.9903, + "step": 60540 + }, + { + "epoch": 2.76, + "learning_rate": 3.6257952309030165e-05, + "loss": 2.163, + "step": 60550 + }, + { + "epoch": 2.77, + "learning_rate": 3.625566387477688e-05, + "loss": 2.0605, + "step": 60560 + }, + { + "epoch": 2.77, + "learning_rate": 3.625337544052359e-05, + "loss": 1.8531, + "step": 60570 + }, + { + "epoch": 2.77, + "learning_rate": 3.6251087006270314e-05, + "loss": 2.0791, + "step": 60580 + }, + { + "epoch": 2.77, + "learning_rate": 3.624879857201703e-05, + "loss": 2.0383, + "step": 60590 + }, + { + "epoch": 2.77, + "learning_rate": 3.624651013776374e-05, + "loss": 1.9614, + "step": 60600 + }, + { + "epoch": 2.77, + "learning_rate": 3.6244221703510463e-05, + "loss": 2.0337, + "step": 60610 + }, + { + "epoch": 2.77, + "learning_rate": 3.624193326925718e-05, + "loss": 1.9638, + "step": 60620 + }, + { + "epoch": 2.77, + "learning_rate": 3.623964483500389e-05, + "loss": 1.9294, + "step": 60630 + }, + { + "epoch": 2.77, + "learning_rate": 3.623735640075061e-05, + "loss": 2.0662, + "step": 60640 + }, + { + "epoch": 2.77, + "learning_rate": 3.623506796649733e-05, + "loss": 1.9583, + "step": 60650 + }, + { + "epoch": 2.77, + "learning_rate": 3.623277953224404e-05, + "loss": 1.91, + "step": 60660 + }, + { + "epoch": 2.77, + "learning_rate": 3.6230491097990755e-05, + "loss": 1.8304, + "step": 60670 + }, + { + "epoch": 2.77, + "learning_rate": 3.622820266373747e-05, + "loss": 2.011, + "step": 60680 + }, + { + "epoch": 2.77, + "learning_rate": 3.622591422948419e-05, + "loss": 1.9931, + "step": 60690 + }, + { + "epoch": 2.77, + "learning_rate": 3.6223625795230904e-05, + "loss": 2.157, + "step": 60700 + }, + { + "epoch": 2.77, + "learning_rate": 3.622133736097762e-05, + "loss": 2.0211, + "step": 60710 + }, + { + "epoch": 2.77, + "learning_rate": 3.621904892672434e-05, + "loss": 1.9471, + "step": 60720 + }, + { + "epoch": 2.77, + "learning_rate": 3.621676049247105e-05, + "loss": 1.857, + "step": 60730 + }, + { + "epoch": 2.77, + "learning_rate": 3.621447205821777e-05, + "loss": 1.9129, + "step": 60740 + }, + { + "epoch": 2.77, + "learning_rate": 3.621218362396449e-05, + "loss": 1.8573, + "step": 60750 + }, + { + "epoch": 2.77, + "learning_rate": 3.62098951897112e-05, + "loss": 1.9954, + "step": 60760 + }, + { + "epoch": 2.77, + "learning_rate": 3.620760675545792e-05, + "loss": 1.9688, + "step": 60770 + }, + { + "epoch": 2.78, + "learning_rate": 3.620531832120464e-05, + "loss": 2.0694, + "step": 60780 + }, + { + "epoch": 2.78, + "learning_rate": 3.620302988695135e-05, + "loss": 1.9621, + "step": 60790 + }, + { + "epoch": 2.78, + "learning_rate": 3.6200741452698066e-05, + "loss": 2.0583, + "step": 60800 + }, + { + "epoch": 2.78, + "learning_rate": 3.619845301844479e-05, + "loss": 1.9476, + "step": 60810 + }, + { + "epoch": 2.78, + "learning_rate": 3.61961645841915e-05, + "loss": 1.9973, + "step": 60820 + }, + { + "epoch": 2.78, + "learning_rate": 3.6193876149938215e-05, + "loss": 1.9639, + "step": 60830 + }, + { + "epoch": 2.78, + "learning_rate": 3.6191587715684936e-05, + "loss": 2.0196, + "step": 60840 + }, + { + "epoch": 2.78, + "learning_rate": 3.618929928143165e-05, + "loss": 2.1245, + "step": 60850 + }, + { + "epoch": 2.78, + "learning_rate": 3.618701084717836e-05, + "loss": 2.0029, + "step": 60860 + }, + { + "epoch": 2.78, + "learning_rate": 3.618472241292508e-05, + "loss": 1.9786, + "step": 60870 + }, + { + "epoch": 2.78, + "learning_rate": 3.618243397867179e-05, + "loss": 2.0416, + "step": 60880 + }, + { + "epoch": 2.78, + "learning_rate": 3.6180145544418507e-05, + "loss": 1.945, + "step": 60890 + }, + { + "epoch": 2.78, + "learning_rate": 3.617785711016523e-05, + "loss": 2.0444, + "step": 60900 + }, + { + "epoch": 2.78, + "learning_rate": 3.617556867591194e-05, + "loss": 2.0061, + "step": 60910 + }, + { + "epoch": 2.78, + "learning_rate": 3.6173280241658656e-05, + "loss": 1.9984, + "step": 60920 + }, + { + "epoch": 2.78, + "learning_rate": 3.617099180740538e-05, + "loss": 1.8808, + "step": 60930 + }, + { + "epoch": 2.78, + "learning_rate": 3.616870337315209e-05, + "loss": 1.974, + "step": 60940 + }, + { + "epoch": 2.78, + "learning_rate": 3.6166414938898805e-05, + "loss": 2.0278, + "step": 60950 + }, + { + "epoch": 2.78, + "learning_rate": 3.6164126504645526e-05, + "loss": 1.8631, + "step": 60960 + }, + { + "epoch": 2.78, + "learning_rate": 3.616183807039224e-05, + "loss": 1.924, + "step": 60970 + }, + { + "epoch": 2.78, + "learning_rate": 3.6159549636138954e-05, + "loss": 2.006, + "step": 60980 + }, + { + "epoch": 2.78, + "learning_rate": 3.6157261201885675e-05, + "loss": 1.955, + "step": 60990 + }, + { + "epoch": 2.79, + "learning_rate": 3.615497276763239e-05, + "loss": 1.9427, + "step": 61000 + }, + { + "epoch": 2.79, + "learning_rate": 3.61526843333791e-05, + "loss": 1.9409, + "step": 61010 + }, + { + "epoch": 2.79, + "learning_rate": 3.6150395899125824e-05, + "loss": 1.9506, + "step": 61020 + }, + { + "epoch": 2.79, + "learning_rate": 3.614810746487254e-05, + "loss": 1.8466, + "step": 61030 + }, + { + "epoch": 2.79, + "learning_rate": 3.614581903061925e-05, + "loss": 1.943, + "step": 61040 + }, + { + "epoch": 2.79, + "learning_rate": 3.6143530596365967e-05, + "loss": 1.9642, + "step": 61050 + }, + { + "epoch": 2.79, + "learning_rate": 3.614124216211268e-05, + "loss": 2.1687, + "step": 61060 + }, + { + "epoch": 2.79, + "learning_rate": 3.6138953727859395e-05, + "loss": 1.925, + "step": 61070 + }, + { + "epoch": 2.79, + "learning_rate": 3.6136665293606116e-05, + "loss": 2.0432, + "step": 61080 + }, + { + "epoch": 2.79, + "learning_rate": 3.613437685935283e-05, + "loss": 2.1505, + "step": 61090 + }, + { + "epoch": 2.79, + "learning_rate": 3.6132088425099544e-05, + "loss": 1.9926, + "step": 61100 + }, + { + "epoch": 2.79, + "learning_rate": 3.6129799990846265e-05, + "loss": 2.1669, + "step": 61110 + }, + { + "epoch": 2.79, + "learning_rate": 3.612751155659298e-05, + "loss": 1.9846, + "step": 61120 + }, + { + "epoch": 2.79, + "learning_rate": 3.612522312233969e-05, + "loss": 1.9489, + "step": 61130 + }, + { + "epoch": 2.79, + "learning_rate": 3.6122934688086414e-05, + "loss": 2.0555, + "step": 61140 + }, + { + "epoch": 2.79, + "learning_rate": 3.612064625383313e-05, + "loss": 2.0091, + "step": 61150 + }, + { + "epoch": 2.79, + "learning_rate": 3.611835781957984e-05, + "loss": 1.9568, + "step": 61160 + }, + { + "epoch": 2.79, + "learning_rate": 3.611606938532656e-05, + "loss": 1.9226, + "step": 61170 + }, + { + "epoch": 2.79, + "learning_rate": 3.611378095107328e-05, + "loss": 2.1924, + "step": 61180 + }, + { + "epoch": 2.79, + "learning_rate": 3.611149251681999e-05, + "loss": 1.8953, + "step": 61190 + }, + { + "epoch": 2.79, + "learning_rate": 3.610920408256671e-05, + "loss": 2.0938, + "step": 61200 + }, + { + "epoch": 2.8, + "learning_rate": 3.6106915648313427e-05, + "loss": 2.0289, + "step": 61210 + }, + { + "epoch": 2.8, + "learning_rate": 3.610462721406014e-05, + "loss": 2.0128, + "step": 61220 + }, + { + "epoch": 2.8, + "learning_rate": 3.610233877980686e-05, + "loss": 2.1055, + "step": 61230 + }, + { + "epoch": 2.8, + "learning_rate": 3.6100050345553576e-05, + "loss": 1.8905, + "step": 61240 + }, + { + "epoch": 2.8, + "learning_rate": 3.609776191130029e-05, + "loss": 2.014, + "step": 61250 + }, + { + "epoch": 2.8, + "learning_rate": 3.6095473477047004e-05, + "loss": 2.1216, + "step": 61260 + }, + { + "epoch": 2.8, + "learning_rate": 3.609318504279372e-05, + "loss": 1.9205, + "step": 61270 + }, + { + "epoch": 2.8, + "learning_rate": 3.609089660854044e-05, + "loss": 1.954, + "step": 61280 + }, + { + "epoch": 2.8, + "learning_rate": 3.608860817428715e-05, + "loss": 1.9841, + "step": 61290 + }, + { + "epoch": 2.8, + "learning_rate": 3.608631974003387e-05, + "loss": 1.9075, + "step": 61300 + }, + { + "epoch": 2.8, + "learning_rate": 3.608403130578059e-05, + "loss": 1.9314, + "step": 61310 + }, + { + "epoch": 2.8, + "learning_rate": 3.60817428715273e-05, + "loss": 2.0757, + "step": 61320 + }, + { + "epoch": 2.8, + "learning_rate": 3.6079454437274017e-05, + "loss": 2.0854, + "step": 61330 + }, + { + "epoch": 2.8, + "learning_rate": 3.607716600302074e-05, + "loss": 2.0061, + "step": 61340 + }, + { + "epoch": 2.8, + "learning_rate": 3.607487756876745e-05, + "loss": 1.8823, + "step": 61350 + }, + { + "epoch": 2.8, + "learning_rate": 3.6072589134514166e-05, + "loss": 1.9387, + "step": 61360 + }, + { + "epoch": 2.8, + "learning_rate": 3.607030070026089e-05, + "loss": 2.0842, + "step": 61370 + }, + { + "epoch": 2.8, + "learning_rate": 3.60680122660076e-05, + "loss": 1.9555, + "step": 61380 + }, + { + "epoch": 2.8, + "learning_rate": 3.6065723831754315e-05, + "loss": 2.0182, + "step": 61390 + }, + { + "epoch": 2.8, + "learning_rate": 3.6063435397501036e-05, + "loss": 2.1223, + "step": 61400 + }, + { + "epoch": 2.8, + "learning_rate": 3.606114696324775e-05, + "loss": 2.0476, + "step": 61410 + }, + { + "epoch": 2.8, + "learning_rate": 3.6058858528994464e-05, + "loss": 2.0908, + "step": 61420 + }, + { + "epoch": 2.81, + "learning_rate": 3.6056570094741185e-05, + "loss": 1.9769, + "step": 61430 + }, + { + "epoch": 2.81, + "learning_rate": 3.60542816604879e-05, + "loss": 2.0315, + "step": 61440 + }, + { + "epoch": 2.81, + "learning_rate": 3.6051993226234606e-05, + "loss": 2.0142, + "step": 61450 + }, + { + "epoch": 2.81, + "learning_rate": 3.604970479198133e-05, + "loss": 1.8936, + "step": 61460 + }, + { + "epoch": 2.81, + "learning_rate": 3.604741635772804e-05, + "loss": 2.0686, + "step": 61470 + }, + { + "epoch": 2.81, + "learning_rate": 3.6045127923474756e-05, + "loss": 2.04, + "step": 61480 + }, + { + "epoch": 2.81, + "learning_rate": 3.6042839489221477e-05, + "loss": 2.0724, + "step": 61490 + }, + { + "epoch": 2.81, + "learning_rate": 3.604055105496819e-05, + "loss": 2.013, + "step": 61500 + }, + { + "epoch": 2.81, + "learning_rate": 3.6038262620714905e-05, + "loss": 1.9822, + "step": 61510 + }, + { + "epoch": 2.81, + "learning_rate": 3.6035974186461626e-05, + "loss": 2.0616, + "step": 61520 + }, + { + "epoch": 2.81, + "learning_rate": 3.603368575220834e-05, + "loss": 2.1496, + "step": 61530 + }, + { + "epoch": 2.81, + "learning_rate": 3.6031397317955054e-05, + "loss": 1.9661, + "step": 61540 + }, + { + "epoch": 2.81, + "learning_rate": 3.6029108883701775e-05, + "loss": 1.8144, + "step": 61550 + }, + { + "epoch": 2.81, + "learning_rate": 3.602682044944849e-05, + "loss": 1.8169, + "step": 61560 + }, + { + "epoch": 2.81, + "learning_rate": 3.60245320151952e-05, + "loss": 1.9258, + "step": 61570 + }, + { + "epoch": 2.81, + "learning_rate": 3.6022243580941924e-05, + "loss": 2.1131, + "step": 61580 + }, + { + "epoch": 2.81, + "learning_rate": 3.601995514668864e-05, + "loss": 1.8251, + "step": 61590 + }, + { + "epoch": 2.81, + "learning_rate": 3.601766671243535e-05, + "loss": 1.8831, + "step": 61600 + }, + { + "epoch": 2.81, + "learning_rate": 3.601537827818207e-05, + "loss": 2.0656, + "step": 61610 + }, + { + "epoch": 2.81, + "learning_rate": 3.601308984392879e-05, + "loss": 1.99, + "step": 61620 + }, + { + "epoch": 2.81, + "learning_rate": 3.60108014096755e-05, + "loss": 1.9731, + "step": 61630 + }, + { + "epoch": 2.81, + "learning_rate": 3.600851297542222e-05, + "loss": 1.8577, + "step": 61640 + }, + { + "epoch": 2.82, + "learning_rate": 3.600622454116893e-05, + "loss": 1.8428, + "step": 61650 + }, + { + "epoch": 2.82, + "learning_rate": 3.600393610691565e-05, + "loss": 2.0219, + "step": 61660 + }, + { + "epoch": 2.82, + "learning_rate": 3.6001647672662365e-05, + "loss": 1.9814, + "step": 61670 + }, + { + "epoch": 2.82, + "learning_rate": 3.599935923840908e-05, + "loss": 2.0118, + "step": 61680 + }, + { + "epoch": 2.82, + "learning_rate": 3.59970708041558e-05, + "loss": 2.0391, + "step": 61690 + }, + { + "epoch": 2.82, + "learning_rate": 3.5994782369902514e-05, + "loss": 1.9902, + "step": 61700 + }, + { + "epoch": 2.82, + "learning_rate": 3.599249393564923e-05, + "loss": 1.8667, + "step": 61710 + }, + { + "epoch": 2.82, + "learning_rate": 3.599020550139595e-05, + "loss": 2.0817, + "step": 61720 + }, + { + "epoch": 2.82, + "learning_rate": 3.598791706714266e-05, + "loss": 1.9757, + "step": 61730 + }, + { + "epoch": 2.82, + "learning_rate": 3.598562863288938e-05, + "loss": 1.9032, + "step": 61740 + }, + { + "epoch": 2.82, + "learning_rate": 3.59833401986361e-05, + "loss": 1.9409, + "step": 61750 + }, + { + "epoch": 2.82, + "learning_rate": 3.598105176438281e-05, + "loss": 1.9412, + "step": 61760 + }, + { + "epoch": 2.82, + "learning_rate": 3.5978763330129527e-05, + "loss": 2.0815, + "step": 61770 + }, + { + "epoch": 2.82, + "learning_rate": 3.597647489587625e-05, + "loss": 1.9708, + "step": 61780 + }, + { + "epoch": 2.82, + "learning_rate": 3.597418646162296e-05, + "loss": 1.9773, + "step": 61790 + }, + { + "epoch": 2.82, + "learning_rate": 3.5971898027369676e-05, + "loss": 1.9472, + "step": 61800 + }, + { + "epoch": 2.82, + "learning_rate": 3.5969609593116397e-05, + "loss": 1.953, + "step": 61810 + }, + { + "epoch": 2.82, + "learning_rate": 3.596732115886311e-05, + "loss": 1.9493, + "step": 61820 + }, + { + "epoch": 2.82, + "learning_rate": 3.5965032724609825e-05, + "loss": 1.9448, + "step": 61830 + }, + { + "epoch": 2.82, + "learning_rate": 3.596274429035654e-05, + "loss": 2.0161, + "step": 61840 + }, + { + "epoch": 2.82, + "learning_rate": 3.596045585610325e-05, + "loss": 1.8877, + "step": 61850 + }, + { + "epoch": 2.82, + "learning_rate": 3.595816742184997e-05, + "loss": 1.9639, + "step": 61860 + }, + { + "epoch": 2.83, + "learning_rate": 3.595587898759669e-05, + "loss": 1.9716, + "step": 61870 + }, + { + "epoch": 2.83, + "learning_rate": 3.59535905533434e-05, + "loss": 1.896, + "step": 61880 + }, + { + "epoch": 2.83, + "learning_rate": 3.5951302119090116e-05, + "loss": 2.0705, + "step": 61890 + }, + { + "epoch": 2.83, + "learning_rate": 3.594901368483684e-05, + "loss": 1.9875, + "step": 61900 + }, + { + "epoch": 2.83, + "learning_rate": 3.594672525058355e-05, + "loss": 1.9054, + "step": 61910 + }, + { + "epoch": 2.83, + "learning_rate": 3.5944436816330266e-05, + "loss": 1.934, + "step": 61920 + }, + { + "epoch": 2.83, + "learning_rate": 3.5942148382076987e-05, + "loss": 1.8307, + "step": 61930 + }, + { + "epoch": 2.83, + "learning_rate": 3.59398599478237e-05, + "loss": 1.897, + "step": 61940 + }, + { + "epoch": 2.83, + "learning_rate": 3.5937571513570415e-05, + "loss": 1.949, + "step": 61950 + }, + { + "epoch": 2.83, + "learning_rate": 3.5935283079317136e-05, + "loss": 1.9394, + "step": 61960 + }, + { + "epoch": 2.83, + "learning_rate": 3.593299464506385e-05, + "loss": 2.0158, + "step": 61970 + }, + { + "epoch": 2.83, + "learning_rate": 3.5930706210810564e-05, + "loss": 1.9855, + "step": 61980 + }, + { + "epoch": 2.83, + "learning_rate": 3.5928417776557285e-05, + "loss": 1.8759, + "step": 61990 + }, + { + "epoch": 2.83, + "learning_rate": 3.5926129342304e-05, + "loss": 1.991, + "step": 62000 + }, + { + "epoch": 2.83, + "learning_rate": 3.592384090805071e-05, + "loss": 2.0799, + "step": 62010 + }, + { + "epoch": 2.83, + "learning_rate": 3.5921552473797434e-05, + "loss": 2.1465, + "step": 62020 + }, + { + "epoch": 2.83, + "learning_rate": 3.591926403954415e-05, + "loss": 2.0323, + "step": 62030 + }, + { + "epoch": 2.83, + "learning_rate": 3.591697560529086e-05, + "loss": 1.8978, + "step": 62040 + }, + { + "epoch": 2.83, + "learning_rate": 3.5914687171037576e-05, + "loss": 1.8778, + "step": 62050 + }, + { + "epoch": 2.83, + "learning_rate": 3.591239873678429e-05, + "loss": 1.9978, + "step": 62060 + }, + { + "epoch": 2.83, + "learning_rate": 3.591011030253101e-05, + "loss": 1.9031, + "step": 62070 + }, + { + "epoch": 2.83, + "learning_rate": 3.5907821868277726e-05, + "loss": 1.8336, + "step": 62080 + }, + { + "epoch": 2.84, + "learning_rate": 3.590553343402444e-05, + "loss": 1.9167, + "step": 62090 + }, + { + "epoch": 2.84, + "learning_rate": 3.590324499977116e-05, + "loss": 1.9665, + "step": 62100 + }, + { + "epoch": 2.84, + "learning_rate": 3.5900956565517875e-05, + "loss": 1.8544, + "step": 62110 + }, + { + "epoch": 2.84, + "learning_rate": 3.589866813126459e-05, + "loss": 1.9748, + "step": 62120 + }, + { + "epoch": 2.84, + "learning_rate": 3.58963796970113e-05, + "loss": 1.9473, + "step": 62130 + }, + { + "epoch": 2.84, + "learning_rate": 3.5894091262758024e-05, + "loss": 2.0598, + "step": 62140 + }, + { + "epoch": 2.84, + "learning_rate": 3.589180282850474e-05, + "loss": 1.8982, + "step": 62150 + }, + { + "epoch": 2.84, + "learning_rate": 3.588951439425145e-05, + "loss": 1.9716, + "step": 62160 + }, + { + "epoch": 2.84, + "learning_rate": 3.588722595999817e-05, + "loss": 1.9182, + "step": 62170 + }, + { + "epoch": 2.84, + "learning_rate": 3.588493752574489e-05, + "loss": 1.9881, + "step": 62180 + }, + { + "epoch": 2.84, + "learning_rate": 3.58826490914916e-05, + "loss": 1.947, + "step": 62190 + }, + { + "epoch": 2.84, + "learning_rate": 3.588036065723832e-05, + "loss": 1.9551, + "step": 62200 + }, + { + "epoch": 2.84, + "learning_rate": 3.5878072222985036e-05, + "loss": 1.9064, + "step": 62210 + }, + { + "epoch": 2.84, + "learning_rate": 3.587578378873175e-05, + "loss": 1.973, + "step": 62220 + }, + { + "epoch": 2.84, + "learning_rate": 3.587349535447847e-05, + "loss": 1.9656, + "step": 62230 + }, + { + "epoch": 2.84, + "learning_rate": 3.587120692022518e-05, + "loss": 1.8818, + "step": 62240 + }, + { + "epoch": 2.84, + "learning_rate": 3.58689184859719e-05, + "loss": 2.0272, + "step": 62250 + }, + { + "epoch": 2.84, + "learning_rate": 3.5866630051718614e-05, + "loss": 1.9223, + "step": 62260 + }, + { + "epoch": 2.84, + "learning_rate": 3.586434161746533e-05, + "loss": 2.0453, + "step": 62270 + }, + { + "epoch": 2.84, + "learning_rate": 3.586205318321205e-05, + "loss": 1.8061, + "step": 62280 + }, + { + "epoch": 2.84, + "learning_rate": 3.585976474895876e-05, + "loss": 2.005, + "step": 62290 + }, + { + "epoch": 2.84, + "learning_rate": 3.585747631470548e-05, + "loss": 1.7388, + "step": 62300 + }, + { + "epoch": 2.85, + "learning_rate": 3.58551878804522e-05, + "loss": 1.8706, + "step": 62310 + }, + { + "epoch": 2.85, + "learning_rate": 3.585289944619891e-05, + "loss": 1.9684, + "step": 62320 + }, + { + "epoch": 2.85, + "learning_rate": 3.5850611011945626e-05, + "loss": 1.942, + "step": 62330 + }, + { + "epoch": 2.85, + "learning_rate": 3.584832257769235e-05, + "loss": 1.8568, + "step": 62340 + }, + { + "epoch": 2.85, + "learning_rate": 3.584603414343906e-05, + "loss": 1.9895, + "step": 62350 + }, + { + "epoch": 2.85, + "learning_rate": 3.5843745709185776e-05, + "loss": 1.9855, + "step": 62360 + }, + { + "epoch": 2.85, + "learning_rate": 3.5841457274932496e-05, + "loss": 1.883, + "step": 62370 + }, + { + "epoch": 2.85, + "learning_rate": 3.583916884067921e-05, + "loss": 1.9529, + "step": 62380 + }, + { + "epoch": 2.85, + "learning_rate": 3.5836880406425925e-05, + "loss": 1.7964, + "step": 62390 + }, + { + "epoch": 2.85, + "learning_rate": 3.5834591972172646e-05, + "loss": 1.9558, + "step": 62400 + }, + { + "epoch": 2.85, + "learning_rate": 3.583230353791936e-05, + "loss": 2.1253, + "step": 62410 + }, + { + "epoch": 2.85, + "learning_rate": 3.5830015103666074e-05, + "loss": 1.9071, + "step": 62420 + }, + { + "epoch": 2.85, + "learning_rate": 3.5827726669412795e-05, + "loss": 1.8805, + "step": 62430 + }, + { + "epoch": 2.85, + "learning_rate": 3.58254382351595e-05, + "loss": 1.8454, + "step": 62440 + }, + { + "epoch": 2.85, + "learning_rate": 3.5823149800906216e-05, + "loss": 2.0848, + "step": 62450 + }, + { + "epoch": 2.85, + "learning_rate": 3.582086136665294e-05, + "loss": 1.8935, + "step": 62460 + }, + { + "epoch": 2.85, + "learning_rate": 3.581857293239965e-05, + "loss": 2.0187, + "step": 62470 + }, + { + "epoch": 2.85, + "learning_rate": 3.5816284498146366e-05, + "loss": 1.7974, + "step": 62480 + }, + { + "epoch": 2.85, + "learning_rate": 3.5813996063893086e-05, + "loss": 2.0467, + "step": 62490 + }, + { + "epoch": 2.85, + "learning_rate": 3.58117076296398e-05, + "loss": 1.9387, + "step": 62500 + }, + { + "epoch": 2.85, + "learning_rate": 3.5809419195386515e-05, + "loss": 1.7828, + "step": 62510 + }, + { + "epoch": 2.85, + "learning_rate": 3.5807130761133236e-05, + "loss": 1.9787, + "step": 62520 + }, + { + "epoch": 2.86, + "learning_rate": 3.580484232687995e-05, + "loss": 1.9266, + "step": 62530 + }, + { + "epoch": 2.86, + "learning_rate": 3.5802553892626664e-05, + "loss": 1.9047, + "step": 62540 + }, + { + "epoch": 2.86, + "learning_rate": 3.5800265458373385e-05, + "loss": 1.8257, + "step": 62550 + }, + { + "epoch": 2.86, + "learning_rate": 3.57979770241201e-05, + "loss": 2.0659, + "step": 62560 + }, + { + "epoch": 2.86, + "learning_rate": 3.579568858986681e-05, + "loss": 1.9568, + "step": 62570 + }, + { + "epoch": 2.86, + "learning_rate": 3.5793400155613534e-05, + "loss": 1.9103, + "step": 62580 + }, + { + "epoch": 2.86, + "learning_rate": 3.579111172136025e-05, + "loss": 1.7248, + "step": 62590 + }, + { + "epoch": 2.86, + "learning_rate": 3.578882328710696e-05, + "loss": 1.7813, + "step": 62600 + }, + { + "epoch": 2.86, + "learning_rate": 3.578653485285368e-05, + "loss": 1.883, + "step": 62610 + }, + { + "epoch": 2.86, + "learning_rate": 3.57842464186004e-05, + "loss": 2.0126, + "step": 62620 + }, + { + "epoch": 2.86, + "learning_rate": 3.578195798434711e-05, + "loss": 2.0162, + "step": 62630 + }, + { + "epoch": 2.86, + "learning_rate": 3.5779669550093826e-05, + "loss": 1.9926, + "step": 62640 + }, + { + "epoch": 2.86, + "learning_rate": 3.577738111584054e-05, + "loss": 2.0862, + "step": 62650 + }, + { + "epoch": 2.86, + "learning_rate": 3.577509268158726e-05, + "loss": 1.961, + "step": 62660 + }, + { + "epoch": 2.86, + "learning_rate": 3.5772804247333975e-05, + "loss": 1.9156, + "step": 62670 + }, + { + "epoch": 2.86, + "learning_rate": 3.577051581308069e-05, + "loss": 1.9592, + "step": 62680 + }, + { + "epoch": 2.86, + "learning_rate": 3.576822737882741e-05, + "loss": 1.9383, + "step": 62690 + }, + { + "epoch": 2.86, + "learning_rate": 3.5765938944574124e-05, + "loss": 2.0883, + "step": 62700 + }, + { + "epoch": 2.86, + "learning_rate": 3.576365051032084e-05, + "loss": 2.0203, + "step": 62710 + }, + { + "epoch": 2.86, + "learning_rate": 3.576136207606756e-05, + "loss": 1.926, + "step": 62720 + }, + { + "epoch": 2.86, + "learning_rate": 3.575907364181427e-05, + "loss": 2.1409, + "step": 62730 + }, + { + "epoch": 2.86, + "learning_rate": 3.575678520756099e-05, + "loss": 1.9232, + "step": 62740 + }, + { + "epoch": 2.87, + "learning_rate": 3.575449677330771e-05, + "loss": 1.8628, + "step": 62750 + }, + { + "epoch": 2.87, + "learning_rate": 3.575220833905442e-05, + "loss": 2.0279, + "step": 62760 + }, + { + "epoch": 2.87, + "learning_rate": 3.5749919904801136e-05, + "loss": 1.9012, + "step": 62770 + }, + { + "epoch": 2.87, + "learning_rate": 3.574763147054786e-05, + "loss": 2.0143, + "step": 62780 + }, + { + "epoch": 2.87, + "learning_rate": 3.574534303629457e-05, + "loss": 1.8241, + "step": 62790 + }, + { + "epoch": 2.87, + "learning_rate": 3.5743054602041286e-05, + "loss": 1.9759, + "step": 62800 + }, + { + "epoch": 2.87, + "learning_rate": 3.5740766167788006e-05, + "loss": 1.7345, + "step": 62810 + }, + { + "epoch": 2.87, + "learning_rate": 3.573847773353472e-05, + "loss": 1.9398, + "step": 62820 + }, + { + "epoch": 2.87, + "learning_rate": 3.573618929928143e-05, + "loss": 2.0881, + "step": 62830 + }, + { + "epoch": 2.87, + "learning_rate": 3.573390086502815e-05, + "loss": 1.9504, + "step": 62840 + }, + { + "epoch": 2.87, + "learning_rate": 3.573161243077486e-05, + "loss": 1.9129, + "step": 62850 + }, + { + "epoch": 2.87, + "learning_rate": 3.572932399652158e-05, + "loss": 1.797, + "step": 62860 + }, + { + "epoch": 2.87, + "learning_rate": 3.57270355622683e-05, + "loss": 1.9459, + "step": 62870 + }, + { + "epoch": 2.87, + "learning_rate": 3.572474712801501e-05, + "loss": 1.9564, + "step": 62880 + }, + { + "epoch": 2.87, + "learning_rate": 3.5722458693761726e-05, + "loss": 1.8502, + "step": 62890 + }, + { + "epoch": 2.87, + "learning_rate": 3.572017025950845e-05, + "loss": 1.9123, + "step": 62900 + }, + { + "epoch": 2.87, + "learning_rate": 3.571788182525516e-05, + "loss": 2.0073, + "step": 62910 + }, + { + "epoch": 2.87, + "learning_rate": 3.5715593391001875e-05, + "loss": 1.9719, + "step": 62920 + }, + { + "epoch": 2.87, + "learning_rate": 3.5713304956748596e-05, + "loss": 1.9162, + "step": 62930 + }, + { + "epoch": 2.87, + "learning_rate": 3.571101652249531e-05, + "loss": 1.9261, + "step": 62940 + }, + { + "epoch": 2.87, + "learning_rate": 3.5708728088242025e-05, + "loss": 1.9868, + "step": 62950 + }, + { + "epoch": 2.87, + "learning_rate": 3.5706439653988746e-05, + "loss": 1.762, + "step": 62960 + }, + { + "epoch": 2.88, + "learning_rate": 3.570415121973546e-05, + "loss": 2.0028, + "step": 62970 + }, + { + "epoch": 2.88, + "learning_rate": 3.5701862785482174e-05, + "loss": 1.9391, + "step": 62980 + }, + { + "epoch": 2.88, + "learning_rate": 3.5699574351228895e-05, + "loss": 1.9854, + "step": 62990 + }, + { + "epoch": 2.88, + "learning_rate": 3.569728591697561e-05, + "loss": 1.8794, + "step": 63000 + }, + { + "epoch": 2.88, + "learning_rate": 3.569499748272232e-05, + "loss": 2.0347, + "step": 63010 + }, + { + "epoch": 2.88, + "learning_rate": 3.5692709048469044e-05, + "loss": 1.9397, + "step": 63020 + }, + { + "epoch": 2.88, + "learning_rate": 3.569042061421575e-05, + "loss": 1.9203, + "step": 63030 + }, + { + "epoch": 2.88, + "learning_rate": 3.568813217996247e-05, + "loss": 1.8024, + "step": 63040 + }, + { + "epoch": 2.88, + "learning_rate": 3.5685843745709186e-05, + "loss": 2.0544, + "step": 63050 + }, + { + "epoch": 2.88, + "learning_rate": 3.56835553114559e-05, + "loss": 1.8847, + "step": 63060 + }, + { + "epoch": 2.88, + "learning_rate": 3.568126687720262e-05, + "loss": 1.9498, + "step": 63070 + }, + { + "epoch": 2.88, + "learning_rate": 3.5678978442949335e-05, + "loss": 1.7492, + "step": 63080 + }, + { + "epoch": 2.88, + "learning_rate": 3.567669000869605e-05, + "loss": 1.8658, + "step": 63090 + }, + { + "epoch": 2.88, + "learning_rate": 3.567440157444277e-05, + "loss": 1.8613, + "step": 63100 + }, + { + "epoch": 2.88, + "learning_rate": 3.5672113140189485e-05, + "loss": 1.7266, + "step": 63110 + }, + { + "epoch": 2.88, + "learning_rate": 3.56698247059362e-05, + "loss": 1.8717, + "step": 63120 + }, + { + "epoch": 2.88, + "learning_rate": 3.566753627168292e-05, + "loss": 2.0591, + "step": 63130 + }, + { + "epoch": 2.88, + "learning_rate": 3.5665247837429634e-05, + "loss": 1.8792, + "step": 63140 + }, + { + "epoch": 2.88, + "learning_rate": 3.566295940317635e-05, + "loss": 2.0118, + "step": 63150 + }, + { + "epoch": 2.88, + "learning_rate": 3.566067096892307e-05, + "loss": 1.9258, + "step": 63160 + }, + { + "epoch": 2.88, + "learning_rate": 3.565838253466978e-05, + "loss": 1.8229, + "step": 63170 + }, + { + "epoch": 2.88, + "learning_rate": 3.56560941004165e-05, + "loss": 1.8949, + "step": 63180 + }, + { + "epoch": 2.89, + "learning_rate": 3.565380566616322e-05, + "loss": 1.8369, + "step": 63190 + }, + { + "epoch": 2.89, + "learning_rate": 3.565151723190993e-05, + "loss": 1.9096, + "step": 63200 + }, + { + "epoch": 2.89, + "learning_rate": 3.5649228797656646e-05, + "loss": 2.0014, + "step": 63210 + }, + { + "epoch": 2.89, + "learning_rate": 3.564694036340336e-05, + "loss": 1.9616, + "step": 63220 + }, + { + "epoch": 2.89, + "learning_rate": 3.5644651929150075e-05, + "loss": 2.0339, + "step": 63230 + }, + { + "epoch": 2.89, + "learning_rate": 3.564236349489679e-05, + "loss": 1.8768, + "step": 63240 + }, + { + "epoch": 2.89, + "learning_rate": 3.564007506064351e-05, + "loss": 2.0305, + "step": 63250 + }, + { + "epoch": 2.89, + "learning_rate": 3.5637786626390224e-05, + "loss": 1.805, + "step": 63260 + }, + { + "epoch": 2.89, + "learning_rate": 3.563549819213694e-05, + "loss": 2.1142, + "step": 63270 + }, + { + "epoch": 2.89, + "learning_rate": 3.563320975788366e-05, + "loss": 1.8948, + "step": 63280 + }, + { + "epoch": 2.89, + "learning_rate": 3.563092132363037e-05, + "loss": 1.8392, + "step": 63290 + }, + { + "epoch": 2.89, + "learning_rate": 3.562863288937709e-05, + "loss": 1.8652, + "step": 63300 + }, + { + "epoch": 2.89, + "learning_rate": 3.562634445512381e-05, + "loss": 1.7448, + "step": 63310 + }, + { + "epoch": 2.89, + "learning_rate": 3.562405602087052e-05, + "loss": 1.9783, + "step": 63320 + }, + { + "epoch": 2.89, + "learning_rate": 3.5621767586617236e-05, + "loss": 2.072, + "step": 63330 + }, + { + "epoch": 2.89, + "learning_rate": 3.561947915236396e-05, + "loss": 2.1366, + "step": 63340 + }, + { + "epoch": 2.89, + "learning_rate": 3.561719071811067e-05, + "loss": 1.8177, + "step": 63350 + }, + { + "epoch": 2.89, + "learning_rate": 3.5614902283857385e-05, + "loss": 1.7811, + "step": 63360 + }, + { + "epoch": 2.89, + "learning_rate": 3.5612613849604106e-05, + "loss": 1.8929, + "step": 63370 + }, + { + "epoch": 2.89, + "learning_rate": 3.561032541535082e-05, + "loss": 2.0066, + "step": 63380 + }, + { + "epoch": 2.89, + "learning_rate": 3.5608036981097535e-05, + "loss": 1.9763, + "step": 63390 + }, + { + "epoch": 2.9, + "learning_rate": 3.5605748546844256e-05, + "loss": 1.7912, + "step": 63400 + }, + { + "epoch": 2.9, + "learning_rate": 3.560346011259097e-05, + "loss": 1.9465, + "step": 63410 + }, + { + "epoch": 2.9, + "learning_rate": 3.560117167833768e-05, + "loss": 1.8102, + "step": 63420 + }, + { + "epoch": 2.9, + "learning_rate": 3.55988832440844e-05, + "loss": 1.9451, + "step": 63430 + }, + { + "epoch": 2.9, + "learning_rate": 3.559659480983111e-05, + "loss": 1.8815, + "step": 63440 + }, + { + "epoch": 2.9, + "learning_rate": 3.5594306375577826e-05, + "loss": 1.9561, + "step": 63450 + }, + { + "epoch": 2.9, + "learning_rate": 3.559201794132455e-05, + "loss": 1.8999, + "step": 63460 + }, + { + "epoch": 2.9, + "learning_rate": 3.558972950707126e-05, + "loss": 1.9295, + "step": 63470 + }, + { + "epoch": 2.9, + "learning_rate": 3.5587441072817975e-05, + "loss": 1.9766, + "step": 63480 + }, + { + "epoch": 2.9, + "learning_rate": 3.5585152638564696e-05, + "loss": 1.9353, + "step": 63490 + }, + { + "epoch": 2.9, + "learning_rate": 3.558286420431141e-05, + "loss": 1.8066, + "step": 63500 + }, + { + "epoch": 2.9, + "learning_rate": 3.5580575770058125e-05, + "loss": 2.0879, + "step": 63510 + }, + { + "epoch": 2.9, + "learning_rate": 3.5578287335804845e-05, + "loss": 1.8655, + "step": 63520 + }, + { + "epoch": 2.9, + "learning_rate": 3.557599890155156e-05, + "loss": 1.8894, + "step": 63530 + }, + { + "epoch": 2.9, + "learning_rate": 3.5573710467298274e-05, + "loss": 2.0968, + "step": 63540 + }, + { + "epoch": 2.9, + "learning_rate": 3.5571422033044995e-05, + "loss": 1.9822, + "step": 63550 + }, + { + "epoch": 2.9, + "learning_rate": 3.556913359879171e-05, + "loss": 1.8557, + "step": 63560 + }, + { + "epoch": 2.9, + "learning_rate": 3.556684516453842e-05, + "loss": 1.9945, + "step": 63570 + }, + { + "epoch": 2.9, + "learning_rate": 3.5564556730285144e-05, + "loss": 1.9027, + "step": 63580 + }, + { + "epoch": 2.9, + "learning_rate": 3.556226829603186e-05, + "loss": 1.9573, + "step": 63590 + }, + { + "epoch": 2.9, + "learning_rate": 3.555997986177857e-05, + "loss": 1.9271, + "step": 63600 + }, + { + "epoch": 2.9, + "learning_rate": 3.555769142752529e-05, + "loss": 2.0693, + "step": 63610 + }, + { + "epoch": 2.91, + "learning_rate": 3.5555402993272e-05, + "loss": 1.9814, + "step": 63620 + }, + { + "epoch": 2.91, + "learning_rate": 3.555311455901872e-05, + "loss": 2.0334, + "step": 63630 + }, + { + "epoch": 2.91, + "learning_rate": 3.5550826124765435e-05, + "loss": 1.8852, + "step": 63640 + }, + { + "epoch": 2.91, + "learning_rate": 3.554853769051215e-05, + "loss": 1.5322, + "step": 63650 + }, + { + "epoch": 2.91, + "learning_rate": 3.554624925625887e-05, + "loss": 1.8944, + "step": 63660 + }, + { + "epoch": 2.91, + "learning_rate": 3.5543960822005585e-05, + "loss": 2.0169, + "step": 63670 + }, + { + "epoch": 2.91, + "learning_rate": 3.55416723877523e-05, + "loss": 2.0145, + "step": 63680 + }, + { + "epoch": 2.91, + "learning_rate": 3.553938395349902e-05, + "loss": 1.9479, + "step": 63690 + }, + { + "epoch": 2.91, + "learning_rate": 3.5537095519245734e-05, + "loss": 1.9101, + "step": 63700 + }, + { + "epoch": 2.91, + "learning_rate": 3.553480708499245e-05, + "loss": 1.7812, + "step": 63710 + }, + { + "epoch": 2.91, + "learning_rate": 3.553251865073917e-05, + "loss": 1.7926, + "step": 63720 + }, + { + "epoch": 2.91, + "learning_rate": 3.553023021648588e-05, + "loss": 1.9802, + "step": 63730 + }, + { + "epoch": 2.91, + "learning_rate": 3.55279417822326e-05, + "loss": 1.9092, + "step": 63740 + }, + { + "epoch": 2.91, + "learning_rate": 3.552565334797932e-05, + "loss": 1.942, + "step": 63750 + }, + { + "epoch": 2.91, + "learning_rate": 3.552336491372603e-05, + "loss": 1.7755, + "step": 63760 + }, + { + "epoch": 2.91, + "learning_rate": 3.5521076479472746e-05, + "loss": 1.9491, + "step": 63770 + }, + { + "epoch": 2.91, + "learning_rate": 3.551878804521947e-05, + "loss": 1.9567, + "step": 63780 + }, + { + "epoch": 2.91, + "learning_rate": 3.551649961096618e-05, + "loss": 1.9921, + "step": 63790 + }, + { + "epoch": 2.91, + "learning_rate": 3.5514211176712895e-05, + "loss": 2.0343, + "step": 63800 + }, + { + "epoch": 2.91, + "learning_rate": 3.5511922742459616e-05, + "loss": 1.9224, + "step": 63810 + }, + { + "epoch": 2.91, + "learning_rate": 3.5509634308206324e-05, + "loss": 1.8223, + "step": 63820 + }, + { + "epoch": 2.91, + "learning_rate": 3.550734587395304e-05, + "loss": 1.969, + "step": 63830 + }, + { + "epoch": 2.92, + "learning_rate": 3.550505743969976e-05, + "loss": 2.0363, + "step": 63840 + }, + { + "epoch": 2.92, + "learning_rate": 3.550276900544647e-05, + "loss": 1.886, + "step": 63850 + }, + { + "epoch": 2.92, + "learning_rate": 3.550048057119319e-05, + "loss": 1.9836, + "step": 63860 + }, + { + "epoch": 2.92, + "learning_rate": 3.549819213693991e-05, + "loss": 1.7232, + "step": 63870 + }, + { + "epoch": 2.92, + "learning_rate": 3.549590370268662e-05, + "loss": 1.878, + "step": 63880 + }, + { + "epoch": 2.92, + "learning_rate": 3.5493615268433336e-05, + "loss": 1.9552, + "step": 63890 + }, + { + "epoch": 2.92, + "learning_rate": 3.549132683418006e-05, + "loss": 1.7719, + "step": 63900 + }, + { + "epoch": 2.92, + "learning_rate": 3.548903839992677e-05, + "loss": 1.7916, + "step": 63910 + }, + { + "epoch": 2.92, + "learning_rate": 3.5486749965673485e-05, + "loss": 2.0048, + "step": 63920 + }, + { + "epoch": 2.92, + "learning_rate": 3.5484461531420206e-05, + "loss": 2.0018, + "step": 63930 + }, + { + "epoch": 2.92, + "learning_rate": 3.548217309716692e-05, + "loss": 1.9818, + "step": 63940 + }, + { + "epoch": 2.92, + "learning_rate": 3.5479884662913635e-05, + "loss": 1.9747, + "step": 63950 + }, + { + "epoch": 2.92, + "learning_rate": 3.5477596228660355e-05, + "loss": 1.9482, + "step": 63960 + }, + { + "epoch": 2.92, + "learning_rate": 3.547530779440707e-05, + "loss": 2.0887, + "step": 63970 + }, + { + "epoch": 2.92, + "learning_rate": 3.5473019360153784e-05, + "loss": 1.9064, + "step": 63980 + }, + { + "epoch": 2.92, + "learning_rate": 3.5470730925900505e-05, + "loss": 1.8339, + "step": 63990 + }, + { + "epoch": 2.92, + "learning_rate": 3.546844249164722e-05, + "loss": 1.8523, + "step": 64000 + }, + { + "epoch": 2.92, + "learning_rate": 3.546615405739393e-05, + "loss": 1.7713, + "step": 64010 + }, + { + "epoch": 2.92, + "learning_rate": 3.546386562314065e-05, + "loss": 1.8322, + "step": 64020 + }, + { + "epoch": 2.92, + "learning_rate": 3.546157718888736e-05, + "loss": 2.1607, + "step": 64030 + }, + { + "epoch": 2.92, + "learning_rate": 3.545928875463408e-05, + "loss": 1.9986, + "step": 64040 + }, + { + "epoch": 2.92, + "learning_rate": 3.5457000320380796e-05, + "loss": 2.0054, + "step": 64050 + }, + { + "epoch": 2.93, + "learning_rate": 3.545471188612751e-05, + "loss": 2.0537, + "step": 64060 + }, + { + "epoch": 2.93, + "learning_rate": 3.545242345187423e-05, + "loss": 1.9387, + "step": 64070 + }, + { + "epoch": 2.93, + "learning_rate": 3.5450135017620945e-05, + "loss": 1.8831, + "step": 64080 + }, + { + "epoch": 2.93, + "learning_rate": 3.544784658336766e-05, + "loss": 1.9297, + "step": 64090 + }, + { + "epoch": 2.93, + "learning_rate": 3.544555814911438e-05, + "loss": 1.8759, + "step": 64100 + }, + { + "epoch": 2.93, + "learning_rate": 3.5443269714861095e-05, + "loss": 2.0205, + "step": 64110 + }, + { + "epoch": 2.93, + "learning_rate": 3.544098128060781e-05, + "loss": 1.8869, + "step": 64120 + }, + { + "epoch": 2.93, + "learning_rate": 3.543869284635453e-05, + "loss": 1.9156, + "step": 64130 + }, + { + "epoch": 2.93, + "learning_rate": 3.5436404412101244e-05, + "loss": 1.8772, + "step": 64140 + }, + { + "epoch": 2.93, + "learning_rate": 3.543411597784796e-05, + "loss": 1.9062, + "step": 64150 + }, + { + "epoch": 2.93, + "learning_rate": 3.543182754359468e-05, + "loss": 2.0051, + "step": 64160 + }, + { + "epoch": 2.93, + "learning_rate": 3.542953910934139e-05, + "loss": 2.1032, + "step": 64170 + }, + { + "epoch": 2.93, + "learning_rate": 3.542725067508811e-05, + "loss": 1.8948, + "step": 64180 + }, + { + "epoch": 2.93, + "learning_rate": 3.542496224083483e-05, + "loss": 1.9235, + "step": 64190 + }, + { + "epoch": 2.93, + "learning_rate": 3.542267380658154e-05, + "loss": 1.8876, + "step": 64200 + }, + { + "epoch": 2.93, + "learning_rate": 3.542038537232825e-05, + "loss": 1.6432, + "step": 64210 + }, + { + "epoch": 2.93, + "learning_rate": 3.541809693807497e-05, + "loss": 1.8401, + "step": 64220 + }, + { + "epoch": 2.93, + "learning_rate": 3.5415808503821684e-05, + "loss": 1.8995, + "step": 64230 + }, + { + "epoch": 2.93, + "learning_rate": 3.54135200695684e-05, + "loss": 1.7003, + "step": 64240 + }, + { + "epoch": 2.93, + "learning_rate": 3.541123163531512e-05, + "loss": 1.8123, + "step": 64250 + }, + { + "epoch": 2.93, + "learning_rate": 3.5408943201061834e-05, + "loss": 1.8025, + "step": 64260 + }, + { + "epoch": 2.93, + "learning_rate": 3.540665476680855e-05, + "loss": 1.9615, + "step": 64270 + }, + { + "epoch": 2.94, + "learning_rate": 3.540436633255527e-05, + "loss": 2.1013, + "step": 64280 + }, + { + "epoch": 2.94, + "learning_rate": 3.540207789830198e-05, + "loss": 1.949, + "step": 64290 + }, + { + "epoch": 2.94, + "learning_rate": 3.53997894640487e-05, + "loss": 1.9882, + "step": 64300 + }, + { + "epoch": 2.94, + "learning_rate": 3.539750102979542e-05, + "loss": 1.8805, + "step": 64310 + }, + { + "epoch": 2.94, + "learning_rate": 3.539521259554213e-05, + "loss": 1.8193, + "step": 64320 + }, + { + "epoch": 2.94, + "learning_rate": 3.5392924161288846e-05, + "loss": 1.8311, + "step": 64330 + }, + { + "epoch": 2.94, + "learning_rate": 3.539063572703557e-05, + "loss": 1.7831, + "step": 64340 + }, + { + "epoch": 2.94, + "learning_rate": 3.538834729278228e-05, + "loss": 1.9034, + "step": 64350 + }, + { + "epoch": 2.94, + "learning_rate": 3.5386058858528995e-05, + "loss": 1.8098, + "step": 64360 + }, + { + "epoch": 2.94, + "learning_rate": 3.5383770424275716e-05, + "loss": 1.9074, + "step": 64370 + }, + { + "epoch": 2.94, + "learning_rate": 3.538148199002243e-05, + "loss": 2.1487, + "step": 64380 + }, + { + "epoch": 2.94, + "learning_rate": 3.5379193555769144e-05, + "loss": 1.9166, + "step": 64390 + }, + { + "epoch": 2.94, + "learning_rate": 3.5376905121515865e-05, + "loss": 1.8936, + "step": 64400 + }, + { + "epoch": 2.94, + "learning_rate": 3.537461668726257e-05, + "loss": 1.8111, + "step": 64410 + }, + { + "epoch": 2.94, + "learning_rate": 3.5372328253009294e-05, + "loss": 2.1563, + "step": 64420 + }, + { + "epoch": 2.94, + "learning_rate": 3.537003981875601e-05, + "loss": 2.0387, + "step": 64430 + }, + { + "epoch": 2.94, + "learning_rate": 3.536775138450272e-05, + "loss": 1.9323, + "step": 64440 + }, + { + "epoch": 2.94, + "learning_rate": 3.5365462950249436e-05, + "loss": 1.9477, + "step": 64450 + }, + { + "epoch": 2.94, + "learning_rate": 3.536317451599616e-05, + "loss": 1.9427, + "step": 64460 + }, + { + "epoch": 2.94, + "learning_rate": 3.536088608174287e-05, + "loss": 1.7879, + "step": 64470 + }, + { + "epoch": 2.94, + "learning_rate": 3.5358597647489585e-05, + "loss": 1.876, + "step": 64480 + }, + { + "epoch": 2.94, + "learning_rate": 3.5356309213236306e-05, + "loss": 2.0496, + "step": 64490 + }, + { + "epoch": 2.95, + "learning_rate": 3.535402077898302e-05, + "loss": 1.893, + "step": 64500 + }, + { + "epoch": 2.95, + "learning_rate": 3.5351732344729734e-05, + "loss": 1.9023, + "step": 64510 + }, + { + "epoch": 2.95, + "learning_rate": 3.5349443910476455e-05, + "loss": 2.0654, + "step": 64520 + }, + { + "epoch": 2.95, + "learning_rate": 3.534715547622317e-05, + "loss": 2.1372, + "step": 64530 + }, + { + "epoch": 2.95, + "learning_rate": 3.5344867041969884e-05, + "loss": 2.0347, + "step": 64540 + }, + { + "epoch": 2.95, + "learning_rate": 3.5342578607716605e-05, + "loss": 1.9693, + "step": 64550 + }, + { + "epoch": 2.95, + "learning_rate": 3.534029017346332e-05, + "loss": 1.7586, + "step": 64560 + }, + { + "epoch": 2.95, + "learning_rate": 3.533800173921003e-05, + "loss": 1.8207, + "step": 64570 + }, + { + "epoch": 2.95, + "learning_rate": 3.5335713304956754e-05, + "loss": 1.9588, + "step": 64580 + }, + { + "epoch": 2.95, + "learning_rate": 3.533342487070347e-05, + "loss": 1.6623, + "step": 64590 + }, + { + "epoch": 2.95, + "learning_rate": 3.533113643645018e-05, + "loss": 1.9133, + "step": 64600 + }, + { + "epoch": 2.95, + "learning_rate": 3.5328848002196896e-05, + "loss": 1.9299, + "step": 64610 + }, + { + "epoch": 2.95, + "learning_rate": 3.532655956794361e-05, + "loss": 2.0485, + "step": 64620 + }, + { + "epoch": 2.95, + "learning_rate": 3.532427113369033e-05, + "loss": 1.9936, + "step": 64630 + }, + { + "epoch": 2.95, + "learning_rate": 3.5321982699437045e-05, + "loss": 1.735, + "step": 64640 + }, + { + "epoch": 2.95, + "learning_rate": 3.531969426518376e-05, + "loss": 1.8677, + "step": 64650 + }, + { + "epoch": 2.95, + "learning_rate": 3.531740583093048e-05, + "loss": 1.9971, + "step": 64660 + }, + { + "epoch": 2.95, + "learning_rate": 3.5315117396677194e-05, + "loss": 1.8924, + "step": 64670 + }, + { + "epoch": 2.95, + "learning_rate": 3.531282896242391e-05, + "loss": 1.9155, + "step": 64680 + }, + { + "epoch": 2.95, + "learning_rate": 3.531054052817063e-05, + "loss": 1.9737, + "step": 64690 + }, + { + "epoch": 2.95, + "learning_rate": 3.5308252093917344e-05, + "loss": 2.0032, + "step": 64700 + }, + { + "epoch": 2.95, + "learning_rate": 3.530596365966406e-05, + "loss": 2.058, + "step": 64710 + }, + { + "epoch": 2.96, + "learning_rate": 3.530367522541078e-05, + "loss": 1.8235, + "step": 64720 + }, + { + "epoch": 2.96, + "learning_rate": 3.530138679115749e-05, + "loss": 1.8421, + "step": 64730 + }, + { + "epoch": 2.96, + "learning_rate": 3.529909835690421e-05, + "loss": 2.0105, + "step": 64740 + }, + { + "epoch": 2.96, + "learning_rate": 3.529680992265093e-05, + "loss": 1.8812, + "step": 64750 + }, + { + "epoch": 2.96, + "learning_rate": 3.529452148839764e-05, + "loss": 2.0635, + "step": 64760 + }, + { + "epoch": 2.96, + "learning_rate": 3.5292233054144356e-05, + "loss": 1.7655, + "step": 64770 + }, + { + "epoch": 2.96, + "learning_rate": 3.528994461989108e-05, + "loss": 1.995, + "step": 64780 + }, + { + "epoch": 2.96, + "learning_rate": 3.528765618563779e-05, + "loss": 1.7921, + "step": 64790 + }, + { + "epoch": 2.96, + "learning_rate": 3.5285367751384505e-05, + "loss": 1.8284, + "step": 64800 + }, + { + "epoch": 2.96, + "learning_rate": 3.528307931713122e-05, + "loss": 1.6364, + "step": 64810 + }, + { + "epoch": 2.96, + "learning_rate": 3.5280790882877934e-05, + "loss": 1.784, + "step": 64820 + }, + { + "epoch": 2.96, + "learning_rate": 3.527850244862465e-05, + "loss": 1.9723, + "step": 64830 + }, + { + "epoch": 2.96, + "learning_rate": 3.527621401437137e-05, + "loss": 2.0987, + "step": 64840 + }, + { + "epoch": 2.96, + "learning_rate": 3.527392558011808e-05, + "loss": 1.7028, + "step": 64850 + }, + { + "epoch": 2.96, + "learning_rate": 3.52716371458648e-05, + "loss": 1.9571, + "step": 64860 + }, + { + "epoch": 2.96, + "learning_rate": 3.526934871161152e-05, + "loss": 1.8759, + "step": 64870 + }, + { + "epoch": 2.96, + "learning_rate": 3.526706027735823e-05, + "loss": 1.6979, + "step": 64880 + }, + { + "epoch": 2.96, + "learning_rate": 3.5264771843104946e-05, + "loss": 1.8347, + "step": 64890 + }, + { + "epoch": 2.96, + "learning_rate": 3.526248340885167e-05, + "loss": 1.7995, + "step": 64900 + }, + { + "epoch": 2.96, + "learning_rate": 3.526019497459838e-05, + "loss": 1.822, + "step": 64910 + }, + { + "epoch": 2.96, + "learning_rate": 3.5257906540345095e-05, + "loss": 1.9235, + "step": 64920 + }, + { + "epoch": 2.96, + "learning_rate": 3.5255618106091816e-05, + "loss": 1.8268, + "step": 64930 + }, + { + "epoch": 2.97, + "learning_rate": 3.525332967183853e-05, + "loss": 1.8438, + "step": 64940 + }, + { + "epoch": 2.97, + "learning_rate": 3.5251041237585244e-05, + "loss": 2.0091, + "step": 64950 + }, + { + "epoch": 2.97, + "learning_rate": 3.5248752803331965e-05, + "loss": 1.9503, + "step": 64960 + }, + { + "epoch": 2.97, + "learning_rate": 3.524646436907868e-05, + "loss": 1.7614, + "step": 64970 + }, + { + "epoch": 2.97, + "learning_rate": 3.5244175934825394e-05, + "loss": 1.7522, + "step": 64980 + }, + { + "epoch": 2.97, + "learning_rate": 3.5241887500572114e-05, + "loss": 1.7531, + "step": 64990 + }, + { + "epoch": 2.97, + "learning_rate": 3.523959906631882e-05, + "loss": 1.8558, + "step": 65000 + }, + { + "epoch": 2.97, + "learning_rate": 3.523731063206554e-05, + "loss": 1.791, + "step": 65010 + }, + { + "epoch": 2.97, + "learning_rate": 3.523502219781226e-05, + "loss": 1.827, + "step": 65020 + }, + { + "epoch": 2.97, + "learning_rate": 3.523273376355897e-05, + "loss": 1.7803, + "step": 65030 + }, + { + "epoch": 2.97, + "learning_rate": 3.523044532930569e-05, + "loss": 1.8605, + "step": 65040 + }, + { + "epoch": 2.97, + "learning_rate": 3.5228156895052406e-05, + "loss": 1.8823, + "step": 65050 + }, + { + "epoch": 2.97, + "learning_rate": 3.522586846079912e-05, + "loss": 1.8415, + "step": 65060 + }, + { + "epoch": 2.97, + "learning_rate": 3.522358002654584e-05, + "loss": 2.046, + "step": 65070 + }, + { + "epoch": 2.97, + "learning_rate": 3.5221291592292555e-05, + "loss": 1.7473, + "step": 65080 + }, + { + "epoch": 2.97, + "learning_rate": 3.521900315803927e-05, + "loss": 1.8824, + "step": 65090 + }, + { + "epoch": 2.97, + "learning_rate": 3.521671472378599e-05, + "loss": 1.8442, + "step": 65100 + }, + { + "epoch": 2.97, + "learning_rate": 3.5214426289532704e-05, + "loss": 1.9977, + "step": 65110 + }, + { + "epoch": 2.97, + "learning_rate": 3.521213785527942e-05, + "loss": 1.9237, + "step": 65120 + }, + { + "epoch": 2.97, + "learning_rate": 3.520984942102614e-05, + "loss": 1.7669, + "step": 65130 + }, + { + "epoch": 2.97, + "learning_rate": 3.5207560986772854e-05, + "loss": 1.9044, + "step": 65140 + }, + { + "epoch": 2.97, + "learning_rate": 3.520527255251957e-05, + "loss": 2.0043, + "step": 65150 + }, + { + "epoch": 2.98, + "learning_rate": 3.520298411826629e-05, + "loss": 1.8799, + "step": 65160 + }, + { + "epoch": 2.98, + "learning_rate": 3.5200695684013e-05, + "loss": 1.6773, + "step": 65170 + }, + { + "epoch": 2.98, + "learning_rate": 3.519840724975972e-05, + "loss": 1.7597, + "step": 65180 + }, + { + "epoch": 2.98, + "learning_rate": 3.519611881550644e-05, + "loss": 1.9074, + "step": 65190 + }, + { + "epoch": 2.98, + "learning_rate": 3.5193830381253145e-05, + "loss": 2.0024, + "step": 65200 + }, + { + "epoch": 2.98, + "learning_rate": 3.519154194699986e-05, + "loss": 1.8697, + "step": 65210 + }, + { + "epoch": 2.98, + "learning_rate": 3.518925351274658e-05, + "loss": 1.7033, + "step": 65220 + }, + { + "epoch": 2.98, + "learning_rate": 3.5186965078493294e-05, + "loss": 1.8035, + "step": 65230 + }, + { + "epoch": 2.98, + "learning_rate": 3.518467664424001e-05, + "loss": 1.8199, + "step": 65240 + }, + { + "epoch": 2.98, + "learning_rate": 3.518238820998673e-05, + "loss": 1.7095, + "step": 65250 + }, + { + "epoch": 2.98, + "learning_rate": 3.5180099775733444e-05, + "loss": 1.8901, + "step": 65260 + }, + { + "epoch": 2.98, + "learning_rate": 3.517781134148016e-05, + "loss": 1.8147, + "step": 65270 + }, + { + "epoch": 2.98, + "learning_rate": 3.517552290722688e-05, + "loss": 1.7665, + "step": 65280 + }, + { + "epoch": 2.98, + "learning_rate": 3.517323447297359e-05, + "loss": 1.7666, + "step": 65290 + }, + { + "epoch": 2.98, + "learning_rate": 3.517094603872031e-05, + "loss": 1.7612, + "step": 65300 + }, + { + "epoch": 2.98, + "learning_rate": 3.516865760446703e-05, + "loss": 1.8007, + "step": 65310 + }, + { + "epoch": 2.98, + "learning_rate": 3.516636917021374e-05, + "loss": 1.7832, + "step": 65320 + }, + { + "epoch": 2.98, + "learning_rate": 3.5164080735960456e-05, + "loss": 1.9922, + "step": 65330 + }, + { + "epoch": 2.98, + "learning_rate": 3.516179230170718e-05, + "loss": 1.7989, + "step": 65340 + }, + { + "epoch": 2.98, + "learning_rate": 3.515950386745389e-05, + "loss": 1.8394, + "step": 65350 + }, + { + "epoch": 2.98, + "learning_rate": 3.5157215433200605e-05, + "loss": 1.8232, + "step": 65360 + }, + { + "epoch": 2.98, + "learning_rate": 3.5154926998947326e-05, + "loss": 1.8581, + "step": 65370 + }, + { + "epoch": 2.99, + "learning_rate": 3.515263856469404e-05, + "loss": 1.8339, + "step": 65380 + }, + { + "epoch": 2.99, + "learning_rate": 3.5150350130440754e-05, + "loss": 2.0136, + "step": 65390 + }, + { + "epoch": 2.99, + "learning_rate": 3.514806169618747e-05, + "loss": 1.943, + "step": 65400 + }, + { + "epoch": 2.99, + "learning_rate": 3.514577326193418e-05, + "loss": 1.9084, + "step": 65410 + }, + { + "epoch": 2.99, + "learning_rate": 3.5143484827680904e-05, + "loss": 1.9565, + "step": 65420 + }, + { + "epoch": 2.99, + "learning_rate": 3.514119639342762e-05, + "loss": 1.8018, + "step": 65430 + }, + { + "epoch": 2.99, + "learning_rate": 3.513890795917433e-05, + "loss": 1.8721, + "step": 65440 + }, + { + "epoch": 2.99, + "learning_rate": 3.513661952492105e-05, + "loss": 1.7478, + "step": 65450 + }, + { + "epoch": 2.99, + "learning_rate": 3.513433109066777e-05, + "loss": 1.9598, + "step": 65460 + }, + { + "epoch": 2.99, + "learning_rate": 3.513204265641448e-05, + "loss": 1.7203, + "step": 65470 + }, + { + "epoch": 2.99, + "learning_rate": 3.51297542221612e-05, + "loss": 1.6623, + "step": 65480 + }, + { + "epoch": 2.99, + "learning_rate": 3.5127465787907916e-05, + "loss": 1.8727, + "step": 65490 + }, + { + "epoch": 2.99, + "learning_rate": 3.512517735365463e-05, + "loss": 1.7237, + "step": 65500 + }, + { + "epoch": 2.99, + "learning_rate": 3.512288891940135e-05, + "loss": 1.9115, + "step": 65510 + }, + { + "epoch": 2.99, + "learning_rate": 3.5120600485148065e-05, + "loss": 1.7929, + "step": 65520 + }, + { + "epoch": 2.99, + "learning_rate": 3.511831205089478e-05, + "loss": 1.8861, + "step": 65530 + }, + { + "epoch": 2.99, + "learning_rate": 3.51160236166415e-05, + "loss": 2.0061, + "step": 65540 + }, + { + "epoch": 2.99, + "learning_rate": 3.5113735182388214e-05, + "loss": 1.8188, + "step": 65550 + }, + { + "epoch": 2.99, + "learning_rate": 3.511144674813493e-05, + "loss": 1.8446, + "step": 65560 + }, + { + "epoch": 2.99, + "learning_rate": 3.510915831388164e-05, + "loss": 1.9129, + "step": 65570 + }, + { + "epoch": 2.99, + "learning_rate": 3.5106869879628364e-05, + "loss": 1.9336, + "step": 65580 + }, + { + "epoch": 3.0, + "learning_rate": 3.510458144537508e-05, + "loss": 1.8172, + "step": 65590 + }, + { + "epoch": 3.0, + "learning_rate": 3.510229301112179e-05, + "loss": 1.9895, + "step": 65600 + }, + { + "epoch": 3.0, + "learning_rate": 3.5100004576868506e-05, + "loss": 1.7691, + "step": 65610 + }, + { + "epoch": 3.0, + "learning_rate": 3.509771614261522e-05, + "loss": 1.9527, + "step": 65620 + }, + { + "epoch": 3.0, + "learning_rate": 3.509542770836194e-05, + "loss": 1.8455, + "step": 65630 + }, + { + "epoch": 3.0, + "learning_rate": 3.5093139274108655e-05, + "loss": 1.7767, + "step": 65640 + }, + { + "epoch": 3.0, + "learning_rate": 3.509085083985537e-05, + "loss": 1.8234, + "step": 65650 + }, + { + "epoch": 3.0, + "learning_rate": 3.508856240560209e-05, + "loss": 1.9327, + "step": 65660 + }, + { + "epoch": 3.0, + "learning_rate": 3.5086273971348804e-05, + "loss": 1.9408, + "step": 65670 + }, + { + "epoch": 3.0, + "learning_rate": 3.508398553709552e-05, + "loss": 1.9963, + "step": 65680 + }, + { + "epoch": 3.0, + "learning_rate": 3.508169710284224e-05, + "loss": 1.8424, + "step": 65690 + }, + { + "epoch": 3.0, + "eval_cer": 0.6810235444197945, + "eval_em": 0.00728476821192053, + "eval_f1": 0.00728476821192053, + "eval_loss": 1.7074658870697021, + "eval_runtime": 2583.2839, + "eval_samples_per_second": 4.092, + "eval_steps_per_second": 2.046, + "eval_wer": 0.9927152317880795, + "step": 65699 + }, + { + "epoch": 3.0, + "learning_rate": 3.5079408668588953e-05, + "loss": 2.0082, + "step": 65700 + }, + { + "epoch": 3.0, + "learning_rate": 3.507712023433567e-05, + "loss": 1.9236, + "step": 65710 + }, + { + "epoch": 3.0, + "learning_rate": 3.507483180008239e-05, + "loss": 1.7465, + "step": 65720 + }, + { + "epoch": 3.0, + "learning_rate": 3.50725433658291e-05, + "loss": 1.9264, + "step": 65730 + }, + { + "epoch": 3.0, + "learning_rate": 3.507025493157582e-05, + "loss": 1.779, + "step": 65740 + }, + { + "epoch": 3.0, + "learning_rate": 3.506796649732254e-05, + "loss": 1.7644, + "step": 65750 + }, + { + "epoch": 3.0, + "learning_rate": 3.506567806306925e-05, + "loss": 1.7723, + "step": 65760 + }, + { + "epoch": 3.0, + "learning_rate": 3.5063389628815966e-05, + "loss": 1.8406, + "step": 65770 + }, + { + "epoch": 3.0, + "learning_rate": 3.506110119456269e-05, + "loss": 1.7262, + "step": 65780 + }, + { + "epoch": 3.0, + "learning_rate": 3.50588127603094e-05, + "loss": 2.0243, + "step": 65790 + }, + { + "epoch": 3.0, + "learning_rate": 3.505652432605611e-05, + "loss": 1.8153, + "step": 65800 + }, + { + "epoch": 3.01, + "learning_rate": 3.505423589180283e-05, + "loss": 1.7175, + "step": 65810 + }, + { + "epoch": 3.01, + "learning_rate": 3.5051947457549543e-05, + "loss": 1.8107, + "step": 65820 + }, + { + "epoch": 3.01, + "learning_rate": 3.504965902329626e-05, + "loss": 1.8826, + "step": 65830 + }, + { + "epoch": 3.01, + "learning_rate": 3.504737058904298e-05, + "loss": 1.8448, + "step": 65840 + }, + { + "epoch": 3.01, + "learning_rate": 3.504508215478969e-05, + "loss": 1.7892, + "step": 65850 + }, + { + "epoch": 3.01, + "learning_rate": 3.504279372053641e-05, + "loss": 1.9239, + "step": 65860 + }, + { + "epoch": 3.01, + "learning_rate": 3.504050528628313e-05, + "loss": 1.8223, + "step": 65870 + }, + { + "epoch": 3.01, + "learning_rate": 3.503821685202984e-05, + "loss": 1.8341, + "step": 65880 + }, + { + "epoch": 3.01, + "learning_rate": 3.5035928417776556e-05, + "loss": 1.8385, + "step": 65890 + }, + { + "epoch": 3.01, + "learning_rate": 3.503363998352328e-05, + "loss": 1.8875, + "step": 65900 + }, + { + "epoch": 3.01, + "learning_rate": 3.503135154926999e-05, + "loss": 1.7562, + "step": 65910 + }, + { + "epoch": 3.01, + "learning_rate": 3.5029063115016705e-05, + "loss": 1.7359, + "step": 65920 + }, + { + "epoch": 3.01, + "learning_rate": 3.5026774680763426e-05, + "loss": 1.6034, + "step": 65930 + }, + { + "epoch": 3.01, + "learning_rate": 3.502448624651014e-05, + "loss": 1.6806, + "step": 65940 + }, + { + "epoch": 3.01, + "learning_rate": 3.5022197812256854e-05, + "loss": 1.9916, + "step": 65950 + }, + { + "epoch": 3.01, + "learning_rate": 3.5019909378003575e-05, + "loss": 1.6789, + "step": 65960 + }, + { + "epoch": 3.01, + "learning_rate": 3.501762094375029e-05, + "loss": 1.8911, + "step": 65970 + }, + { + "epoch": 3.01, + "learning_rate": 3.5015332509497003e-05, + "loss": 1.8196, + "step": 65980 + }, + { + "epoch": 3.01, + "learning_rate": 3.501304407524372e-05, + "loss": 1.7601, + "step": 65990 + }, + { + "epoch": 3.01, + "learning_rate": 3.501075564099043e-05, + "loss": 1.8497, + "step": 66000 + }, + { + "epoch": 3.01, + "learning_rate": 3.500846720673715e-05, + "loss": 1.7737, + "step": 66010 + }, + { + "epoch": 3.01, + "learning_rate": 3.500617877248387e-05, + "loss": 1.7045, + "step": 66020 + }, + { + "epoch": 3.02, + "learning_rate": 3.500389033823058e-05, + "loss": 1.8618, + "step": 66030 + }, + { + "epoch": 3.02, + "learning_rate": 3.50016019039773e-05, + "loss": 1.9597, + "step": 66040 + }, + { + "epoch": 3.02, + "learning_rate": 3.4999313469724016e-05, + "loss": 1.8738, + "step": 66050 + }, + { + "epoch": 3.02, + "learning_rate": 3.499702503547073e-05, + "loss": 1.6934, + "step": 66060 + }, + { + "epoch": 3.02, + "learning_rate": 3.499473660121745e-05, + "loss": 1.924, + "step": 66070 + }, + { + "epoch": 3.02, + "learning_rate": 3.4992448166964165e-05, + "loss": 1.937, + "step": 66080 + }, + { + "epoch": 3.02, + "learning_rate": 3.499015973271088e-05, + "loss": 1.7885, + "step": 66090 + }, + { + "epoch": 3.02, + "learning_rate": 3.49878712984576e-05, + "loss": 1.7927, + "step": 66100 + }, + { + "epoch": 3.02, + "learning_rate": 3.4985582864204314e-05, + "loss": 1.7998, + "step": 66110 + }, + { + "epoch": 3.02, + "learning_rate": 3.498329442995103e-05, + "loss": 1.8242, + "step": 66120 + }, + { + "epoch": 3.02, + "learning_rate": 3.498100599569775e-05, + "loss": 1.6451, + "step": 66130 + }, + { + "epoch": 3.02, + "learning_rate": 3.4978717561444463e-05, + "loss": 1.8553, + "step": 66140 + }, + { + "epoch": 3.02, + "learning_rate": 3.497642912719118e-05, + "loss": 1.585, + "step": 66150 + }, + { + "epoch": 3.02, + "learning_rate": 3.49741406929379e-05, + "loss": 1.8591, + "step": 66160 + }, + { + "epoch": 3.02, + "learning_rate": 3.497185225868461e-05, + "loss": 1.9516, + "step": 66170 + }, + { + "epoch": 3.02, + "learning_rate": 3.496956382443133e-05, + "loss": 1.7638, + "step": 66180 + }, + { + "epoch": 3.02, + "learning_rate": 3.496727539017804e-05, + "loss": 1.9891, + "step": 66190 + }, + { + "epoch": 3.02, + "learning_rate": 3.4964986955924755e-05, + "loss": 1.9083, + "step": 66200 + }, + { + "epoch": 3.02, + "learning_rate": 3.496269852167147e-05, + "loss": 1.8725, + "step": 66210 + }, + { + "epoch": 3.02, + "learning_rate": 3.496041008741819e-05, + "loss": 1.7639, + "step": 66220 + }, + { + "epoch": 3.02, + "learning_rate": 3.4958121653164904e-05, + "loss": 1.8529, + "step": 66230 + }, + { + "epoch": 3.02, + "learning_rate": 3.495583321891162e-05, + "loss": 1.972, + "step": 66240 + }, + { + "epoch": 3.03, + "learning_rate": 3.495354478465834e-05, + "loss": 1.8658, + "step": 66250 + }, + { + "epoch": 3.03, + "learning_rate": 3.495125635040505e-05, + "loss": 1.8149, + "step": 66260 + }, + { + "epoch": 3.03, + "learning_rate": 3.494896791615177e-05, + "loss": 1.9893, + "step": 66270 + }, + { + "epoch": 3.03, + "learning_rate": 3.494667948189849e-05, + "loss": 1.9856, + "step": 66280 + }, + { + "epoch": 3.03, + "learning_rate": 3.49443910476452e-05, + "loss": 1.8833, + "step": 66290 + }, + { + "epoch": 3.03, + "learning_rate": 3.494210261339192e-05, + "loss": 1.8682, + "step": 66300 + }, + { + "epoch": 3.03, + "learning_rate": 3.493981417913864e-05, + "loss": 1.7846, + "step": 66310 + }, + { + "epoch": 3.03, + "learning_rate": 3.493752574488535e-05, + "loss": 1.8052, + "step": 66320 + }, + { + "epoch": 3.03, + "learning_rate": 3.4935237310632066e-05, + "loss": 1.9652, + "step": 66330 + }, + { + "epoch": 3.03, + "learning_rate": 3.493294887637879e-05, + "loss": 2.1232, + "step": 66340 + }, + { + "epoch": 3.03, + "learning_rate": 3.49306604421255e-05, + "loss": 1.735, + "step": 66350 + }, + { + "epoch": 3.03, + "learning_rate": 3.4928372007872215e-05, + "loss": 1.86, + "step": 66360 + }, + { + "epoch": 3.03, + "learning_rate": 3.4926083573618936e-05, + "loss": 1.5874, + "step": 66370 + }, + { + "epoch": 3.03, + "learning_rate": 3.492379513936565e-05, + "loss": 1.8253, + "step": 66380 + }, + { + "epoch": 3.03, + "learning_rate": 3.4921506705112364e-05, + "loss": 1.7985, + "step": 66390 + }, + { + "epoch": 3.03, + "learning_rate": 3.491921827085908e-05, + "loss": 1.7732, + "step": 66400 + }, + { + "epoch": 3.03, + "learning_rate": 3.491692983660579e-05, + "loss": 1.842, + "step": 66410 + }, + { + "epoch": 3.03, + "learning_rate": 3.4914641402352513e-05, + "loss": 1.8042, + "step": 66420 + }, + { + "epoch": 3.03, + "learning_rate": 3.491235296809923e-05, + "loss": 1.8681, + "step": 66430 + }, + { + "epoch": 3.03, + "learning_rate": 3.491006453384594e-05, + "loss": 1.729, + "step": 66440 + }, + { + "epoch": 3.03, + "learning_rate": 3.490777609959266e-05, + "loss": 1.781, + "step": 66450 + }, + { + "epoch": 3.03, + "learning_rate": 3.490548766533938e-05, + "loss": 1.9866, + "step": 66460 + }, + { + "epoch": 3.04, + "learning_rate": 3.490319923108609e-05, + "loss": 1.7896, + "step": 66470 + }, + { + "epoch": 3.04, + "learning_rate": 3.490091079683281e-05, + "loss": 1.8367, + "step": 66480 + }, + { + "epoch": 3.04, + "learning_rate": 3.4898622362579526e-05, + "loss": 1.8227, + "step": 66490 + }, + { + "epoch": 3.04, + "learning_rate": 3.489633392832624e-05, + "loss": 1.7682, + "step": 66500 + }, + { + "epoch": 3.04, + "learning_rate": 3.489404549407296e-05, + "loss": 1.8766, + "step": 66510 + }, + { + "epoch": 3.04, + "learning_rate": 3.4891757059819675e-05, + "loss": 1.9236, + "step": 66520 + }, + { + "epoch": 3.04, + "learning_rate": 3.488946862556639e-05, + "loss": 1.6977, + "step": 66530 + }, + { + "epoch": 3.04, + "learning_rate": 3.488718019131311e-05, + "loss": 1.9414, + "step": 66540 + }, + { + "epoch": 3.04, + "learning_rate": 3.4884891757059824e-05, + "loss": 1.7656, + "step": 66550 + }, + { + "epoch": 3.04, + "learning_rate": 3.488260332280654e-05, + "loss": 1.9116, + "step": 66560 + }, + { + "epoch": 3.04, + "learning_rate": 3.488031488855326e-05, + "loss": 1.7284, + "step": 66570 + }, + { + "epoch": 3.04, + "learning_rate": 3.4878026454299973e-05, + "loss": 1.8351, + "step": 66580 + }, + { + "epoch": 3.04, + "learning_rate": 3.487573802004668e-05, + "loss": 1.8359, + "step": 66590 + }, + { + "epoch": 3.04, + "learning_rate": 3.48734495857934e-05, + "loss": 1.8834, + "step": 66600 + }, + { + "epoch": 3.04, + "learning_rate": 3.4871161151540116e-05, + "loss": 1.9112, + "step": 66610 + }, + { + "epoch": 3.04, + "learning_rate": 3.486887271728683e-05, + "loss": 1.8849, + "step": 66620 + }, + { + "epoch": 3.04, + "learning_rate": 3.486658428303355e-05, + "loss": 1.8758, + "step": 66630 + }, + { + "epoch": 3.04, + "learning_rate": 3.4864295848780265e-05, + "loss": 1.7935, + "step": 66640 + }, + { + "epoch": 3.04, + "learning_rate": 3.486200741452698e-05, + "loss": 1.9099, + "step": 66650 + }, + { + "epoch": 3.04, + "learning_rate": 3.48597189802737e-05, + "loss": 1.7949, + "step": 66660 + }, + { + "epoch": 3.04, + "learning_rate": 3.4857430546020414e-05, + "loss": 1.6891, + "step": 66670 + }, + { + "epoch": 3.04, + "learning_rate": 3.485514211176713e-05, + "loss": 1.8326, + "step": 66680 + }, + { + "epoch": 3.05, + "learning_rate": 3.485285367751385e-05, + "loss": 1.8512, + "step": 66690 + }, + { + "epoch": 3.05, + "learning_rate": 3.485056524326056e-05, + "loss": 1.6338, + "step": 66700 + }, + { + "epoch": 3.05, + "learning_rate": 3.484827680900728e-05, + "loss": 1.7524, + "step": 66710 + }, + { + "epoch": 3.05, + "learning_rate": 3.4845988374754e-05, + "loss": 1.6919, + "step": 66720 + }, + { + "epoch": 3.05, + "learning_rate": 3.484369994050071e-05, + "loss": 1.8179, + "step": 66730 + }, + { + "epoch": 3.05, + "learning_rate": 3.484141150624743e-05, + "loss": 1.7312, + "step": 66740 + }, + { + "epoch": 3.05, + "learning_rate": 3.483912307199415e-05, + "loss": 1.6406, + "step": 66750 + }, + { + "epoch": 3.05, + "learning_rate": 3.483683463774086e-05, + "loss": 1.7079, + "step": 66760 + }, + { + "epoch": 3.05, + "learning_rate": 3.4834546203487576e-05, + "loss": 1.8985, + "step": 66770 + }, + { + "epoch": 3.05, + "learning_rate": 3.483225776923429e-05, + "loss": 1.8188, + "step": 66780 + }, + { + "epoch": 3.05, + "learning_rate": 3.4829969334981004e-05, + "loss": 1.8982, + "step": 66790 + }, + { + "epoch": 3.05, + "learning_rate": 3.482768090072772e-05, + "loss": 1.9518, + "step": 66800 + }, + { + "epoch": 3.05, + "learning_rate": 3.482539246647444e-05, + "loss": 1.7618, + "step": 66810 + }, + { + "epoch": 3.05, + "learning_rate": 3.482310403222115e-05, + "loss": 2.0259, + "step": 66820 + }, + { + "epoch": 3.05, + "learning_rate": 3.482081559796787e-05, + "loss": 1.9405, + "step": 66830 + }, + { + "epoch": 3.05, + "learning_rate": 3.481852716371459e-05, + "loss": 1.8822, + "step": 66840 + }, + { + "epoch": 3.05, + "learning_rate": 3.48162387294613e-05, + "loss": 1.9203, + "step": 66850 + }, + { + "epoch": 3.05, + "learning_rate": 3.4813950295208017e-05, + "loss": 1.8765, + "step": 66860 + }, + { + "epoch": 3.05, + "learning_rate": 3.481166186095474e-05, + "loss": 1.5456, + "step": 66870 + }, + { + "epoch": 3.05, + "learning_rate": 3.480937342670145e-05, + "loss": 1.8903, + "step": 66880 + }, + { + "epoch": 3.05, + "learning_rate": 3.4807084992448166e-05, + "loss": 1.7233, + "step": 66890 + }, + { + "epoch": 3.05, + "learning_rate": 3.480479655819489e-05, + "loss": 1.9166, + "step": 66900 + }, + { + "epoch": 3.06, + "learning_rate": 3.48025081239416e-05, + "loss": 1.8762, + "step": 66910 + }, + { + "epoch": 3.06, + "learning_rate": 3.4800219689688315e-05, + "loss": 1.7883, + "step": 66920 + }, + { + "epoch": 3.06, + "learning_rate": 3.4797931255435036e-05, + "loss": 1.9336, + "step": 66930 + }, + { + "epoch": 3.06, + "learning_rate": 3.479564282118175e-05, + "loss": 1.8858, + "step": 66940 + }, + { + "epoch": 3.06, + "learning_rate": 3.4793354386928464e-05, + "loss": 1.7053, + "step": 66950 + }, + { + "epoch": 3.06, + "learning_rate": 3.4791065952675185e-05, + "loss": 1.8817, + "step": 66960 + }, + { + "epoch": 3.06, + "learning_rate": 3.47887775184219e-05, + "loss": 1.8366, + "step": 66970 + }, + { + "epoch": 3.06, + "learning_rate": 3.478648908416861e-05, + "loss": 1.5362, + "step": 66980 + }, + { + "epoch": 3.06, + "learning_rate": 3.478420064991533e-05, + "loss": 1.785, + "step": 66990 + }, + { + "epoch": 3.06, + "learning_rate": 3.478191221566204e-05, + "loss": 1.8607, + "step": 67000 + }, + { + "epoch": 3.06, + "learning_rate": 3.477962378140876e-05, + "loss": 1.8532, + "step": 67010 + }, + { + "epoch": 3.06, + "learning_rate": 3.4777335347155477e-05, + "loss": 1.6712, + "step": 67020 + }, + { + "epoch": 3.06, + "learning_rate": 3.477504691290219e-05, + "loss": 1.9312, + "step": 67030 + }, + { + "epoch": 3.06, + "learning_rate": 3.477275847864891e-05, + "loss": 1.8828, + "step": 67040 + }, + { + "epoch": 3.06, + "learning_rate": 3.4770470044395626e-05, + "loss": 1.7811, + "step": 67050 + }, + { + "epoch": 3.06, + "learning_rate": 3.476818161014234e-05, + "loss": 1.8132, + "step": 67060 + }, + { + "epoch": 3.06, + "learning_rate": 3.476589317588906e-05, + "loss": 1.6969, + "step": 67070 + }, + { + "epoch": 3.06, + "learning_rate": 3.4763604741635775e-05, + "loss": 1.8673, + "step": 67080 + }, + { + "epoch": 3.06, + "learning_rate": 3.476131630738249e-05, + "loss": 1.912, + "step": 67090 + }, + { + "epoch": 3.06, + "learning_rate": 3.475902787312921e-05, + "loss": 1.8126, + "step": 67100 + }, + { + "epoch": 3.06, + "learning_rate": 3.4756739438875924e-05, + "loss": 1.8136, + "step": 67110 + }, + { + "epoch": 3.06, + "learning_rate": 3.475445100462264e-05, + "loss": 1.7597, + "step": 67120 + }, + { + "epoch": 3.07, + "learning_rate": 3.475216257036936e-05, + "loss": 1.8651, + "step": 67130 + }, + { + "epoch": 3.07, + "learning_rate": 3.474987413611607e-05, + "loss": 1.7527, + "step": 67140 + }, + { + "epoch": 3.07, + "learning_rate": 3.474758570186279e-05, + "loss": 1.7671, + "step": 67150 + }, + { + "epoch": 3.07, + "learning_rate": 3.474529726760951e-05, + "loss": 1.8312, + "step": 67160 + }, + { + "epoch": 3.07, + "learning_rate": 3.474300883335622e-05, + "loss": 1.7418, + "step": 67170 + }, + { + "epoch": 3.07, + "learning_rate": 3.474072039910293e-05, + "loss": 1.8404, + "step": 67180 + }, + { + "epoch": 3.07, + "learning_rate": 3.473843196484965e-05, + "loss": 1.8354, + "step": 67190 + }, + { + "epoch": 3.07, + "learning_rate": 3.4736143530596365e-05, + "loss": 1.7296, + "step": 67200 + }, + { + "epoch": 3.07, + "learning_rate": 3.473385509634308e-05, + "loss": 1.7168, + "step": 67210 + }, + { + "epoch": 3.07, + "learning_rate": 3.47315666620898e-05, + "loss": 1.8603, + "step": 67220 + }, + { + "epoch": 3.07, + "learning_rate": 3.4729278227836514e-05, + "loss": 1.6753, + "step": 67230 + }, + { + "epoch": 3.07, + "learning_rate": 3.472698979358323e-05, + "loss": 1.7897, + "step": 67240 + }, + { + "epoch": 3.07, + "learning_rate": 3.472470135932995e-05, + "loss": 1.8106, + "step": 67250 + }, + { + "epoch": 3.07, + "learning_rate": 3.472241292507666e-05, + "loss": 1.9712, + "step": 67260 + }, + { + "epoch": 3.07, + "learning_rate": 3.472012449082338e-05, + "loss": 1.885, + "step": 67270 + }, + { + "epoch": 3.07, + "learning_rate": 3.47178360565701e-05, + "loss": 1.9207, + "step": 67280 + }, + { + "epoch": 3.07, + "learning_rate": 3.471554762231681e-05, + "loss": 1.8367, + "step": 67290 + }, + { + "epoch": 3.07, + "learning_rate": 3.4713259188063527e-05, + "loss": 1.8879, + "step": 67300 + }, + { + "epoch": 3.07, + "learning_rate": 3.471097075381025e-05, + "loss": 1.7377, + "step": 67310 + }, + { + "epoch": 3.07, + "learning_rate": 3.470868231955696e-05, + "loss": 1.8643, + "step": 67320 + }, + { + "epoch": 3.07, + "learning_rate": 3.4706393885303676e-05, + "loss": 1.6409, + "step": 67330 + }, + { + "epoch": 3.07, + "learning_rate": 3.47041054510504e-05, + "loss": 1.8309, + "step": 67340 + }, + { + "epoch": 3.08, + "learning_rate": 3.470181701679711e-05, + "loss": 1.8875, + "step": 67350 + }, + { + "epoch": 3.08, + "learning_rate": 3.4699528582543825e-05, + "loss": 1.6818, + "step": 67360 + }, + { + "epoch": 3.08, + "learning_rate": 3.4697240148290546e-05, + "loss": 1.8811, + "step": 67370 + }, + { + "epoch": 3.08, + "learning_rate": 3.469495171403725e-05, + "loss": 1.7838, + "step": 67380 + }, + { + "epoch": 3.08, + "learning_rate": 3.4692663279783974e-05, + "loss": 1.7223, + "step": 67390 + }, + { + "epoch": 3.08, + "learning_rate": 3.469037484553069e-05, + "loss": 1.7655, + "step": 67400 + }, + { + "epoch": 3.08, + "learning_rate": 3.46880864112774e-05, + "loss": 1.746, + "step": 67410 + }, + { + "epoch": 3.08, + "learning_rate": 3.468579797702412e-05, + "loss": 1.7604, + "step": 67420 + }, + { + "epoch": 3.08, + "learning_rate": 3.468350954277084e-05, + "loss": 1.8263, + "step": 67430 + }, + { + "epoch": 3.08, + "learning_rate": 3.468122110851755e-05, + "loss": 1.8382, + "step": 67440 + }, + { + "epoch": 3.08, + "learning_rate": 3.467893267426427e-05, + "loss": 1.7845, + "step": 67450 + }, + { + "epoch": 3.08, + "learning_rate": 3.4676644240010987e-05, + "loss": 1.7753, + "step": 67460 + }, + { + "epoch": 3.08, + "learning_rate": 3.46743558057577e-05, + "loss": 1.7773, + "step": 67470 + }, + { + "epoch": 3.08, + "learning_rate": 3.467206737150442e-05, + "loss": 1.9175, + "step": 67480 + }, + { + "epoch": 3.08, + "learning_rate": 3.4669778937251136e-05, + "loss": 1.8634, + "step": 67490 + }, + { + "epoch": 3.08, + "learning_rate": 3.466749050299785e-05, + "loss": 1.8878, + "step": 67500 + }, + { + "epoch": 3.08, + "learning_rate": 3.466520206874457e-05, + "loss": 1.7003, + "step": 67510 + }, + { + "epoch": 3.08, + "learning_rate": 3.4662913634491285e-05, + "loss": 1.8268, + "step": 67520 + }, + { + "epoch": 3.08, + "learning_rate": 3.4660625200238e-05, + "loss": 1.8576, + "step": 67530 + }, + { + "epoch": 3.08, + "learning_rate": 3.465833676598472e-05, + "loss": 1.8893, + "step": 67540 + }, + { + "epoch": 3.08, + "learning_rate": 3.4656048331731434e-05, + "loss": 1.8668, + "step": 67550 + }, + { + "epoch": 3.08, + "learning_rate": 3.465375989747815e-05, + "loss": 1.8605, + "step": 67560 + }, + { + "epoch": 3.09, + "learning_rate": 3.465147146322486e-05, + "loss": 1.821, + "step": 67570 + }, + { + "epoch": 3.09, + "learning_rate": 3.4649183028971577e-05, + "loss": 1.6062, + "step": 67580 + }, + { + "epoch": 3.09, + "learning_rate": 3.464689459471829e-05, + "loss": 1.8769, + "step": 67590 + }, + { + "epoch": 3.09, + "learning_rate": 3.464460616046501e-05, + "loss": 1.945, + "step": 67600 + }, + { + "epoch": 3.09, + "learning_rate": 3.4642317726211726e-05, + "loss": 1.82, + "step": 67610 + }, + { + "epoch": 3.09, + "learning_rate": 3.464002929195844e-05, + "loss": 1.7248, + "step": 67620 + }, + { + "epoch": 3.09, + "learning_rate": 3.463774085770516e-05, + "loss": 1.8834, + "step": 67630 + }, + { + "epoch": 3.09, + "learning_rate": 3.4635452423451875e-05, + "loss": 1.7171, + "step": 67640 + }, + { + "epoch": 3.09, + "learning_rate": 3.463316398919859e-05, + "loss": 1.776, + "step": 67650 + }, + { + "epoch": 3.09, + "learning_rate": 3.463087555494531e-05, + "loss": 1.7368, + "step": 67660 + }, + { + "epoch": 3.09, + "learning_rate": 3.4628587120692024e-05, + "loss": 1.6574, + "step": 67670 + }, + { + "epoch": 3.09, + "learning_rate": 3.462629868643874e-05, + "loss": 1.6888, + "step": 67680 + }, + { + "epoch": 3.09, + "learning_rate": 3.462401025218546e-05, + "loss": 1.6128, + "step": 67690 + }, + { + "epoch": 3.09, + "learning_rate": 3.462172181793217e-05, + "loss": 1.7671, + "step": 67700 + }, + { + "epoch": 3.09, + "learning_rate": 3.461943338367889e-05, + "loss": 1.7726, + "step": 67710 + }, + { + "epoch": 3.09, + "learning_rate": 3.461714494942561e-05, + "loss": 1.6521, + "step": 67720 + }, + { + "epoch": 3.09, + "learning_rate": 3.461485651517232e-05, + "loss": 1.6447, + "step": 67730 + }, + { + "epoch": 3.09, + "learning_rate": 3.4612568080919037e-05, + "loss": 1.8365, + "step": 67740 + }, + { + "epoch": 3.09, + "learning_rate": 3.461027964666576e-05, + "loss": 1.7209, + "step": 67750 + }, + { + "epoch": 3.09, + "learning_rate": 3.460799121241247e-05, + "loss": 1.8324, + "step": 67760 + }, + { + "epoch": 3.09, + "learning_rate": 3.4605702778159186e-05, + "loss": 1.6994, + "step": 67770 + }, + { + "epoch": 3.1, + "learning_rate": 3.46034143439059e-05, + "loss": 1.7332, + "step": 67780 + }, + { + "epoch": 3.1, + "learning_rate": 3.4601125909652614e-05, + "loss": 1.9242, + "step": 67790 + }, + { + "epoch": 3.1, + "learning_rate": 3.4598837475399335e-05, + "loss": 1.843, + "step": 67800 + }, + { + "epoch": 3.1, + "learning_rate": 3.459654904114605e-05, + "loss": 1.5988, + "step": 67810 + }, + { + "epoch": 3.1, + "learning_rate": 3.459426060689276e-05, + "loss": 1.8774, + "step": 67820 + }, + { + "epoch": 3.1, + "learning_rate": 3.4591972172639484e-05, + "loss": 1.8034, + "step": 67830 + }, + { + "epoch": 3.1, + "learning_rate": 3.45896837383862e-05, + "loss": 1.8662, + "step": 67840 + }, + { + "epoch": 3.1, + "learning_rate": 3.458739530413291e-05, + "loss": 1.7466, + "step": 67850 + }, + { + "epoch": 3.1, + "learning_rate": 3.458510686987963e-05, + "loss": 1.8537, + "step": 67860 + }, + { + "epoch": 3.1, + "learning_rate": 3.458281843562635e-05, + "loss": 1.8736, + "step": 67870 + }, + { + "epoch": 3.1, + "learning_rate": 3.458053000137306e-05, + "loss": 1.7332, + "step": 67880 + }, + { + "epoch": 3.1, + "learning_rate": 3.457824156711978e-05, + "loss": 1.618, + "step": 67890 + }, + { + "epoch": 3.1, + "learning_rate": 3.4575953132866497e-05, + "loss": 1.7266, + "step": 67900 + }, + { + "epoch": 3.1, + "learning_rate": 3.457366469861321e-05, + "loss": 1.9774, + "step": 67910 + }, + { + "epoch": 3.1, + "learning_rate": 3.4571376264359925e-05, + "loss": 1.8151, + "step": 67920 + }, + { + "epoch": 3.1, + "learning_rate": 3.4569087830106646e-05, + "loss": 1.9302, + "step": 67930 + }, + { + "epoch": 3.1, + "learning_rate": 3.456679939585336e-05, + "loss": 1.6073, + "step": 67940 + }, + { + "epoch": 3.1, + "learning_rate": 3.4564510961600074e-05, + "loss": 1.7988, + "step": 67950 + }, + { + "epoch": 3.1, + "learning_rate": 3.4562222527346795e-05, + "loss": 1.7984, + "step": 67960 + }, + { + "epoch": 3.1, + "learning_rate": 3.45599340930935e-05, + "loss": 1.7269, + "step": 67970 + }, + { + "epoch": 3.1, + "learning_rate": 3.455764565884022e-05, + "loss": 1.7001, + "step": 67980 + }, + { + "epoch": 3.1, + "learning_rate": 3.455535722458694e-05, + "loss": 1.6695, + "step": 67990 + }, + { + "epoch": 3.11, + "learning_rate": 3.455306879033365e-05, + "loss": 1.8122, + "step": 68000 + }, + { + "epoch": 3.11, + "learning_rate": 3.455078035608037e-05, + "loss": 1.714, + "step": 68010 + }, + { + "epoch": 3.11, + "learning_rate": 3.4548491921827086e-05, + "loss": 1.6645, + "step": 68020 + }, + { + "epoch": 3.11, + "learning_rate": 3.45462034875738e-05, + "loss": 1.78, + "step": 68030 + }, + { + "epoch": 3.11, + "learning_rate": 3.454391505332052e-05, + "loss": 1.9496, + "step": 68040 + }, + { + "epoch": 3.11, + "learning_rate": 3.4541626619067236e-05, + "loss": 2.0216, + "step": 68050 + }, + { + "epoch": 3.11, + "learning_rate": 3.453933818481395e-05, + "loss": 1.84, + "step": 68060 + }, + { + "epoch": 3.11, + "learning_rate": 3.453704975056067e-05, + "loss": 1.5849, + "step": 68070 + }, + { + "epoch": 3.11, + "learning_rate": 3.4534761316307385e-05, + "loss": 1.8691, + "step": 68080 + }, + { + "epoch": 3.11, + "learning_rate": 3.45324728820541e-05, + "loss": 1.8493, + "step": 68090 + }, + { + "epoch": 3.11, + "learning_rate": 3.453018444780082e-05, + "loss": 1.8456, + "step": 68100 + }, + { + "epoch": 3.11, + "learning_rate": 3.4527896013547534e-05, + "loss": 1.6818, + "step": 68110 + }, + { + "epoch": 3.11, + "learning_rate": 3.452560757929425e-05, + "loss": 1.8739, + "step": 68120 + }, + { + "epoch": 3.11, + "learning_rate": 3.452331914504097e-05, + "loss": 1.7854, + "step": 68130 + }, + { + "epoch": 3.11, + "learning_rate": 3.452103071078768e-05, + "loss": 1.7761, + "step": 68140 + }, + { + "epoch": 3.11, + "learning_rate": 3.45187422765344e-05, + "loss": 1.8528, + "step": 68150 + }, + { + "epoch": 3.11, + "learning_rate": 3.451645384228112e-05, + "loss": 1.7587, + "step": 68160 + }, + { + "epoch": 3.11, + "learning_rate": 3.4514165408027826e-05, + "loss": 1.7646, + "step": 68170 + }, + { + "epoch": 3.11, + "learning_rate": 3.451187697377454e-05, + "loss": 1.973, + "step": 68180 + }, + { + "epoch": 3.11, + "learning_rate": 3.450958853952126e-05, + "loss": 1.851, + "step": 68190 + }, + { + "epoch": 3.11, + "learning_rate": 3.4507300105267975e-05, + "loss": 1.903, + "step": 68200 + }, + { + "epoch": 3.11, + "learning_rate": 3.450501167101469e-05, + "loss": 1.7386, + "step": 68210 + }, + { + "epoch": 3.12, + "learning_rate": 3.450272323676141e-05, + "loss": 1.7277, + "step": 68220 + }, + { + "epoch": 3.12, + "learning_rate": 3.4500434802508124e-05, + "loss": 1.7496, + "step": 68230 + }, + { + "epoch": 3.12, + "learning_rate": 3.449814636825484e-05, + "loss": 1.7091, + "step": 68240 + }, + { + "epoch": 3.12, + "learning_rate": 3.449585793400156e-05, + "loss": 1.8363, + "step": 68250 + }, + { + "epoch": 3.12, + "learning_rate": 3.449356949974827e-05, + "loss": 1.7107, + "step": 68260 + }, + { + "epoch": 3.12, + "learning_rate": 3.449128106549499e-05, + "loss": 1.8497, + "step": 68270 + }, + { + "epoch": 3.12, + "learning_rate": 3.448899263124171e-05, + "loss": 1.8232, + "step": 68280 + }, + { + "epoch": 3.12, + "learning_rate": 3.448670419698842e-05, + "loss": 1.7486, + "step": 68290 + }, + { + "epoch": 3.12, + "learning_rate": 3.4484415762735136e-05, + "loss": 1.7461, + "step": 68300 + }, + { + "epoch": 3.12, + "learning_rate": 3.448212732848186e-05, + "loss": 1.845, + "step": 68310 + }, + { + "epoch": 3.12, + "learning_rate": 3.447983889422857e-05, + "loss": 1.9282, + "step": 68320 + }, + { + "epoch": 3.12, + "learning_rate": 3.4477550459975286e-05, + "loss": 1.7978, + "step": 68330 + }, + { + "epoch": 3.12, + "learning_rate": 3.4475262025722007e-05, + "loss": 1.7619, + "step": 68340 + }, + { + "epoch": 3.12, + "learning_rate": 3.447297359146872e-05, + "loss": 1.7569, + "step": 68350 + }, + { + "epoch": 3.12, + "learning_rate": 3.4470685157215435e-05, + "loss": 1.7746, + "step": 68360 + }, + { + "epoch": 3.12, + "learning_rate": 3.446839672296215e-05, + "loss": 1.8847, + "step": 68370 + }, + { + "epoch": 3.12, + "learning_rate": 3.446610828870886e-05, + "loss": 1.7351, + "step": 68380 + }, + { + "epoch": 3.12, + "learning_rate": 3.4463819854455584e-05, + "loss": 1.5907, + "step": 68390 + }, + { + "epoch": 3.12, + "learning_rate": 3.44615314202023e-05, + "loss": 1.638, + "step": 68400 + }, + { + "epoch": 3.12, + "learning_rate": 3.445924298594901e-05, + "loss": 1.6442, + "step": 68410 + }, + { + "epoch": 3.12, + "learning_rate": 3.445695455169573e-05, + "loss": 1.7777, + "step": 68420 + }, + { + "epoch": 3.12, + "learning_rate": 3.445466611744245e-05, + "loss": 1.9267, + "step": 68430 + }, + { + "epoch": 3.13, + "learning_rate": 3.445237768318916e-05, + "loss": 1.7699, + "step": 68440 + }, + { + "epoch": 3.13, + "learning_rate": 3.445008924893588e-05, + "loss": 1.8349, + "step": 68450 + }, + { + "epoch": 3.13, + "learning_rate": 3.4447800814682596e-05, + "loss": 1.7343, + "step": 68460 + }, + { + "epoch": 3.13, + "learning_rate": 3.444551238042931e-05, + "loss": 1.9381, + "step": 68470 + }, + { + "epoch": 3.13, + "learning_rate": 3.444322394617603e-05, + "loss": 1.7458, + "step": 68480 + }, + { + "epoch": 3.13, + "learning_rate": 3.4440935511922746e-05, + "loss": 1.713, + "step": 68490 + }, + { + "epoch": 3.13, + "learning_rate": 3.443864707766946e-05, + "loss": 1.8782, + "step": 68500 + }, + { + "epoch": 3.13, + "learning_rate": 3.443635864341618e-05, + "loss": 1.8424, + "step": 68510 + }, + { + "epoch": 3.13, + "learning_rate": 3.4434070209162895e-05, + "loss": 1.7294, + "step": 68520 + }, + { + "epoch": 3.13, + "learning_rate": 3.443178177490961e-05, + "loss": 1.7328, + "step": 68530 + }, + { + "epoch": 3.13, + "learning_rate": 3.442949334065633e-05, + "loss": 1.7296, + "step": 68540 + }, + { + "epoch": 3.13, + "learning_rate": 3.4427204906403044e-05, + "loss": 1.7063, + "step": 68550 + }, + { + "epoch": 3.13, + "learning_rate": 3.442491647214975e-05, + "loss": 1.9927, + "step": 68560 + }, + { + "epoch": 3.13, + "learning_rate": 3.442262803789647e-05, + "loss": 1.9218, + "step": 68570 + }, + { + "epoch": 3.13, + "learning_rate": 3.4420339603643186e-05, + "loss": 1.7455, + "step": 68580 + }, + { + "epoch": 3.13, + "learning_rate": 3.44180511693899e-05, + "loss": 1.5765, + "step": 68590 + }, + { + "epoch": 3.13, + "learning_rate": 3.441576273513662e-05, + "loss": 1.6608, + "step": 68600 + }, + { + "epoch": 3.13, + "learning_rate": 3.4413474300883336e-05, + "loss": 1.5993, + "step": 68610 + }, + { + "epoch": 3.13, + "learning_rate": 3.441118586663005e-05, + "loss": 1.8218, + "step": 68620 + }, + { + "epoch": 3.13, + "learning_rate": 3.440889743237677e-05, + "loss": 1.7492, + "step": 68630 + }, + { + "epoch": 3.13, + "learning_rate": 3.4406608998123485e-05, + "loss": 1.7772, + "step": 68640 + }, + { + "epoch": 3.13, + "learning_rate": 3.44043205638702e-05, + "loss": 1.9091, + "step": 68650 + }, + { + "epoch": 3.14, + "learning_rate": 3.440203212961692e-05, + "loss": 1.738, + "step": 68660 + }, + { + "epoch": 3.14, + "learning_rate": 3.4399743695363634e-05, + "loss": 1.7664, + "step": 68670 + }, + { + "epoch": 3.14, + "learning_rate": 3.439745526111035e-05, + "loss": 1.805, + "step": 68680 + }, + { + "epoch": 3.14, + "learning_rate": 3.439516682685707e-05, + "loss": 1.4438, + "step": 68690 + }, + { + "epoch": 3.14, + "learning_rate": 3.439287839260378e-05, + "loss": 1.8538, + "step": 68700 + }, + { + "epoch": 3.14, + "learning_rate": 3.43905899583505e-05, + "loss": 1.7678, + "step": 68710 + }, + { + "epoch": 3.14, + "learning_rate": 3.438830152409722e-05, + "loss": 1.7042, + "step": 68720 + }, + { + "epoch": 3.14, + "learning_rate": 3.438601308984393e-05, + "loss": 1.8081, + "step": 68730 + }, + { + "epoch": 3.14, + "learning_rate": 3.4383724655590646e-05, + "loss": 1.75, + "step": 68740 + }, + { + "epoch": 3.14, + "learning_rate": 3.438143622133737e-05, + "loss": 1.823, + "step": 68750 + }, + { + "epoch": 3.14, + "learning_rate": 3.4379147787084075e-05, + "loss": 1.6547, + "step": 68760 + }, + { + "epoch": 3.14, + "learning_rate": 3.4376859352830796e-05, + "loss": 1.8295, + "step": 68770 + }, + { + "epoch": 3.14, + "learning_rate": 3.437457091857751e-05, + "loss": 1.8547, + "step": 68780 + }, + { + "epoch": 3.14, + "learning_rate": 3.4372282484324224e-05, + "loss": 1.7879, + "step": 68790 + }, + { + "epoch": 3.14, + "learning_rate": 3.4369994050070945e-05, + "loss": 1.654, + "step": 68800 + }, + { + "epoch": 3.14, + "learning_rate": 3.436770561581766e-05, + "loss": 1.7949, + "step": 68810 + }, + { + "epoch": 3.14, + "learning_rate": 3.436541718156437e-05, + "loss": 1.6431, + "step": 68820 + }, + { + "epoch": 3.14, + "learning_rate": 3.4363128747311094e-05, + "loss": 1.7206, + "step": 68830 + }, + { + "epoch": 3.14, + "learning_rate": 3.436084031305781e-05, + "loss": 1.6454, + "step": 68840 + }, + { + "epoch": 3.14, + "learning_rate": 3.435855187880452e-05, + "loss": 1.7674, + "step": 68850 + }, + { + "epoch": 3.14, + "learning_rate": 3.435626344455124e-05, + "loss": 1.8448, + "step": 68860 + }, + { + "epoch": 3.14, + "learning_rate": 3.435397501029796e-05, + "loss": 1.6755, + "step": 68870 + }, + { + "epoch": 3.15, + "learning_rate": 3.435168657604467e-05, + "loss": 1.5661, + "step": 68880 + }, + { + "epoch": 3.15, + "learning_rate": 3.434939814179139e-05, + "loss": 1.6262, + "step": 68890 + }, + { + "epoch": 3.15, + "learning_rate": 3.4347109707538106e-05, + "loss": 1.6667, + "step": 68900 + }, + { + "epoch": 3.15, + "learning_rate": 3.434482127328482e-05, + "loss": 1.8109, + "step": 68910 + }, + { + "epoch": 3.15, + "learning_rate": 3.434253283903154e-05, + "loss": 1.6388, + "step": 68920 + }, + { + "epoch": 3.15, + "learning_rate": 3.4340244404778256e-05, + "loss": 1.7376, + "step": 68930 + }, + { + "epoch": 3.15, + "learning_rate": 3.433795597052497e-05, + "loss": 1.8557, + "step": 68940 + }, + { + "epoch": 3.15, + "learning_rate": 3.433566753627169e-05, + "loss": 1.7879, + "step": 68950 + }, + { + "epoch": 3.15, + "learning_rate": 3.43333791020184e-05, + "loss": 1.6956, + "step": 68960 + }, + { + "epoch": 3.15, + "learning_rate": 3.433109066776511e-05, + "loss": 1.7786, + "step": 68970 + }, + { + "epoch": 3.15, + "learning_rate": 3.432880223351183e-05, + "loss": 1.7331, + "step": 68980 + }, + { + "epoch": 3.15, + "learning_rate": 3.432651379925855e-05, + "loss": 2.0673, + "step": 68990 + }, + { + "epoch": 3.15, + "learning_rate": 3.432422536500526e-05, + "loss": 1.7565, + "step": 69000 + }, + { + "epoch": 3.15, + "learning_rate": 3.432193693075198e-05, + "loss": 1.7224, + "step": 69010 + }, + { + "epoch": 3.15, + "learning_rate": 3.4319648496498696e-05, + "loss": 1.6618, + "step": 69020 + }, + { + "epoch": 3.15, + "learning_rate": 3.431736006224541e-05, + "loss": 1.8148, + "step": 69030 + }, + { + "epoch": 3.15, + "learning_rate": 3.431507162799213e-05, + "loss": 1.8164, + "step": 69040 + }, + { + "epoch": 3.15, + "learning_rate": 3.4312783193738846e-05, + "loss": 1.7092, + "step": 69050 + }, + { + "epoch": 3.15, + "learning_rate": 3.431049475948556e-05, + "loss": 1.9989, + "step": 69060 + }, + { + "epoch": 3.15, + "learning_rate": 3.430820632523228e-05, + "loss": 1.8269, + "step": 69070 + }, + { + "epoch": 3.15, + "learning_rate": 3.4305917890978995e-05, + "loss": 1.7818, + "step": 69080 + }, + { + "epoch": 3.15, + "learning_rate": 3.430362945672571e-05, + "loss": 1.7345, + "step": 69090 + }, + { + "epoch": 3.16, + "learning_rate": 3.430134102247243e-05, + "loss": 1.7732, + "step": 69100 + }, + { + "epoch": 3.16, + "learning_rate": 3.4299052588219144e-05, + "loss": 1.9845, + "step": 69110 + }, + { + "epoch": 3.16, + "learning_rate": 3.429676415396586e-05, + "loss": 1.8979, + "step": 69120 + }, + { + "epoch": 3.16, + "learning_rate": 3.429447571971258e-05, + "loss": 1.5787, + "step": 69130 + }, + { + "epoch": 3.16, + "learning_rate": 3.429218728545929e-05, + "loss": 1.7492, + "step": 69140 + }, + { + "epoch": 3.16, + "learning_rate": 3.4289898851206e-05, + "loss": 1.7293, + "step": 69150 + }, + { + "epoch": 3.16, + "learning_rate": 3.428761041695272e-05, + "loss": 1.8577, + "step": 69160 + }, + { + "epoch": 3.16, + "learning_rate": 3.4285321982699435e-05, + "loss": 1.7158, + "step": 69170 + }, + { + "epoch": 3.16, + "learning_rate": 3.428303354844615e-05, + "loss": 1.6124, + "step": 69180 + }, + { + "epoch": 3.16, + "learning_rate": 3.428074511419287e-05, + "loss": 1.6171, + "step": 69190 + }, + { + "epoch": 3.16, + "learning_rate": 3.4278456679939585e-05, + "loss": 1.7737, + "step": 69200 + }, + { + "epoch": 3.16, + "learning_rate": 3.42761682456863e-05, + "loss": 1.8593, + "step": 69210 + }, + { + "epoch": 3.16, + "learning_rate": 3.427387981143302e-05, + "loss": 1.7622, + "step": 69220 + }, + { + "epoch": 3.16, + "learning_rate": 3.4271591377179734e-05, + "loss": 1.7793, + "step": 69230 + }, + { + "epoch": 3.16, + "learning_rate": 3.426930294292645e-05, + "loss": 1.7763, + "step": 69240 + }, + { + "epoch": 3.16, + "learning_rate": 3.426701450867317e-05, + "loss": 1.6456, + "step": 69250 + }, + { + "epoch": 3.16, + "learning_rate": 3.426472607441988e-05, + "loss": 1.8166, + "step": 69260 + }, + { + "epoch": 3.16, + "learning_rate": 3.42624376401666e-05, + "loss": 1.7364, + "step": 69270 + }, + { + "epoch": 3.16, + "learning_rate": 3.426014920591332e-05, + "loss": 1.6167, + "step": 69280 + }, + { + "epoch": 3.16, + "learning_rate": 3.425786077166003e-05, + "loss": 1.6206, + "step": 69290 + }, + { + "epoch": 3.16, + "learning_rate": 3.4255572337406746e-05, + "loss": 1.9497, + "step": 69300 + }, + { + "epoch": 3.16, + "learning_rate": 3.425328390315347e-05, + "loss": 1.6494, + "step": 69310 + }, + { + "epoch": 3.17, + "learning_rate": 3.425099546890018e-05, + "loss": 2.0014, + "step": 69320 + }, + { + "epoch": 3.17, + "learning_rate": 3.4248707034646895e-05, + "loss": 1.7228, + "step": 69330 + }, + { + "epoch": 3.17, + "learning_rate": 3.4246418600393616e-05, + "loss": 1.6583, + "step": 69340 + }, + { + "epoch": 3.17, + "learning_rate": 3.4244130166140324e-05, + "loss": 1.6359, + "step": 69350 + }, + { + "epoch": 3.17, + "learning_rate": 3.4241841731887045e-05, + "loss": 1.8471, + "step": 69360 + }, + { + "epoch": 3.17, + "learning_rate": 3.423955329763376e-05, + "loss": 1.803, + "step": 69370 + }, + { + "epoch": 3.17, + "learning_rate": 3.423726486338047e-05, + "loss": 1.697, + "step": 69380 + }, + { + "epoch": 3.17, + "learning_rate": 3.4234976429127194e-05, + "loss": 1.9739, + "step": 69390 + }, + { + "epoch": 3.17, + "learning_rate": 3.423268799487391e-05, + "loss": 1.6234, + "step": 69400 + }, + { + "epoch": 3.17, + "learning_rate": 3.423039956062062e-05, + "loss": 1.7986, + "step": 69410 + }, + { + "epoch": 3.17, + "learning_rate": 3.422811112636734e-05, + "loss": 1.7443, + "step": 69420 + }, + { + "epoch": 3.17, + "learning_rate": 3.422582269211406e-05, + "loss": 1.8075, + "step": 69430 + }, + { + "epoch": 3.17, + "learning_rate": 3.422353425786077e-05, + "loss": 1.7675, + "step": 69440 + }, + { + "epoch": 3.17, + "learning_rate": 3.422124582360749e-05, + "loss": 1.699, + "step": 69450 + }, + { + "epoch": 3.17, + "learning_rate": 3.4218957389354206e-05, + "loss": 1.6489, + "step": 69460 + }, + { + "epoch": 3.17, + "learning_rate": 3.421666895510092e-05, + "loss": 1.7365, + "step": 69470 + }, + { + "epoch": 3.17, + "learning_rate": 3.421438052084764e-05, + "loss": 1.8384, + "step": 69480 + }, + { + "epoch": 3.17, + "learning_rate": 3.4212092086594355e-05, + "loss": 1.7131, + "step": 69490 + }, + { + "epoch": 3.17, + "learning_rate": 3.420980365234107e-05, + "loss": 1.6463, + "step": 69500 + }, + { + "epoch": 3.17, + "learning_rate": 3.420751521808779e-05, + "loss": 1.8019, + "step": 69510 + }, + { + "epoch": 3.17, + "learning_rate": 3.4205226783834505e-05, + "loss": 1.7793, + "step": 69520 + }, + { + "epoch": 3.17, + "learning_rate": 3.420293834958122e-05, + "loss": 1.6761, + "step": 69530 + }, + { + "epoch": 3.18, + "learning_rate": 3.420064991532794e-05, + "loss": 1.7003, + "step": 69540 + }, + { + "epoch": 3.18, + "learning_rate": 3.419836148107465e-05, + "loss": 1.7302, + "step": 69550 + }, + { + "epoch": 3.18, + "learning_rate": 3.419607304682136e-05, + "loss": 1.7575, + "step": 69560 + }, + { + "epoch": 3.18, + "learning_rate": 3.419378461256808e-05, + "loss": 1.6915, + "step": 69570 + }, + { + "epoch": 3.18, + "learning_rate": 3.4191496178314796e-05, + "loss": 1.7202, + "step": 69580 + }, + { + "epoch": 3.18, + "learning_rate": 3.418920774406151e-05, + "loss": 1.8012, + "step": 69590 + }, + { + "epoch": 3.18, + "learning_rate": 3.418691930980823e-05, + "loss": 1.6556, + "step": 69600 + }, + { + "epoch": 3.18, + "learning_rate": 3.4184630875554945e-05, + "loss": 1.7071, + "step": 69610 + }, + { + "epoch": 3.18, + "learning_rate": 3.418234244130166e-05, + "loss": 1.637, + "step": 69620 + }, + { + "epoch": 3.18, + "learning_rate": 3.418005400704838e-05, + "loss": 1.5121, + "step": 69630 + }, + { + "epoch": 3.18, + "learning_rate": 3.4177765572795095e-05, + "loss": 1.6649, + "step": 69640 + }, + { + "epoch": 3.18, + "learning_rate": 3.417547713854181e-05, + "loss": 1.7736, + "step": 69650 + }, + { + "epoch": 3.18, + "learning_rate": 3.417318870428853e-05, + "loss": 1.5797, + "step": 69660 + }, + { + "epoch": 3.18, + "learning_rate": 3.4170900270035244e-05, + "loss": 1.9296, + "step": 69670 + }, + { + "epoch": 3.18, + "learning_rate": 3.416861183578196e-05, + "loss": 1.839, + "step": 69680 + }, + { + "epoch": 3.18, + "learning_rate": 3.416632340152868e-05, + "loss": 1.655, + "step": 69690 + }, + { + "epoch": 3.18, + "learning_rate": 3.416403496727539e-05, + "loss": 1.8006, + "step": 69700 + }, + { + "epoch": 3.18, + "learning_rate": 3.416174653302211e-05, + "loss": 1.6733, + "step": 69710 + }, + { + "epoch": 3.18, + "learning_rate": 3.415945809876883e-05, + "loss": 1.685, + "step": 69720 + }, + { + "epoch": 3.18, + "learning_rate": 3.415716966451554e-05, + "loss": 1.8863, + "step": 69730 + }, + { + "epoch": 3.18, + "learning_rate": 3.4154881230262256e-05, + "loss": 1.6108, + "step": 69740 + }, + { + "epoch": 3.18, + "learning_rate": 3.415259279600897e-05, + "loss": 1.5868, + "step": 69750 + }, + { + "epoch": 3.19, + "learning_rate": 3.4150304361755685e-05, + "loss": 1.4357, + "step": 69760 + }, + { + "epoch": 3.19, + "learning_rate": 3.4148015927502405e-05, + "loss": 1.7466, + "step": 69770 + }, + { + "epoch": 3.19, + "learning_rate": 3.414572749324912e-05, + "loss": 1.5788, + "step": 69780 + }, + { + "epoch": 3.19, + "learning_rate": 3.4143439058995834e-05, + "loss": 1.7572, + "step": 69790 + }, + { + "epoch": 3.19, + "learning_rate": 3.4141150624742555e-05, + "loss": 1.8272, + "step": 69800 + }, + { + "epoch": 3.19, + "learning_rate": 3.413886219048927e-05, + "loss": 1.788, + "step": 69810 + }, + { + "epoch": 3.19, + "learning_rate": 3.413657375623598e-05, + "loss": 1.9066, + "step": 69820 + }, + { + "epoch": 3.19, + "learning_rate": 3.4134285321982704e-05, + "loss": 1.7627, + "step": 69830 + }, + { + "epoch": 3.19, + "learning_rate": 3.413199688772942e-05, + "loss": 1.635, + "step": 69840 + }, + { + "epoch": 3.19, + "learning_rate": 3.412970845347613e-05, + "loss": 1.8862, + "step": 69850 + }, + { + "epoch": 3.19, + "learning_rate": 3.412742001922285e-05, + "loss": 1.9245, + "step": 69860 + }, + { + "epoch": 3.19, + "learning_rate": 3.412513158496957e-05, + "loss": 1.6997, + "step": 69870 + }, + { + "epoch": 3.19, + "learning_rate": 3.412284315071628e-05, + "loss": 1.8563, + "step": 69880 + }, + { + "epoch": 3.19, + "learning_rate": 3.4120554716463e-05, + "loss": 1.675, + "step": 69890 + }, + { + "epoch": 3.19, + "learning_rate": 3.4118266282209716e-05, + "loss": 1.7596, + "step": 69900 + }, + { + "epoch": 3.19, + "learning_rate": 3.411597784795643e-05, + "loss": 1.6975, + "step": 69910 + }, + { + "epoch": 3.19, + "learning_rate": 3.411368941370315e-05, + "loss": 1.7418, + "step": 69920 + }, + { + "epoch": 3.19, + "learning_rate": 3.4111400979449865e-05, + "loss": 1.9548, + "step": 69930 + }, + { + "epoch": 3.19, + "learning_rate": 3.410911254519657e-05, + "loss": 1.6185, + "step": 69940 + }, + { + "epoch": 3.19, + "learning_rate": 3.4106824110943294e-05, + "loss": 1.7948, + "step": 69950 + }, + { + "epoch": 3.19, + "learning_rate": 3.410453567669001e-05, + "loss": 1.6046, + "step": 69960 + }, + { + "epoch": 3.2, + "learning_rate": 3.410224724243672e-05, + "loss": 1.7939, + "step": 69970 + }, + { + "epoch": 3.2, + "learning_rate": 3.409995880818344e-05, + "loss": 1.6951, + "step": 69980 + }, + { + "epoch": 3.2, + "learning_rate": 3.409767037393016e-05, + "loss": 1.634, + "step": 69990 + }, + { + "epoch": 3.2, + "learning_rate": 3.409538193967687e-05, + "loss": 1.7374, + "step": 70000 + }, + { + "epoch": 3.2, + "learning_rate": 3.409309350542359e-05, + "loss": 1.671, + "step": 70010 + }, + { + "epoch": 3.2, + "learning_rate": 3.4090805071170306e-05, + "loss": 1.5343, + "step": 70020 + }, + { + "epoch": 3.2, + "learning_rate": 3.408851663691702e-05, + "loss": 1.9255, + "step": 70030 + }, + { + "epoch": 3.2, + "learning_rate": 3.408622820266374e-05, + "loss": 1.7532, + "step": 70040 + }, + { + "epoch": 3.2, + "learning_rate": 3.4083939768410455e-05, + "loss": 1.6215, + "step": 70050 + }, + { + "epoch": 3.2, + "learning_rate": 3.408165133415717e-05, + "loss": 1.7772, + "step": 70060 + }, + { + "epoch": 3.2, + "learning_rate": 3.407936289990389e-05, + "loss": 1.8844, + "step": 70070 + }, + { + "epoch": 3.2, + "learning_rate": 3.4077074465650605e-05, + "loss": 1.7865, + "step": 70080 + }, + { + "epoch": 3.2, + "learning_rate": 3.407478603139732e-05, + "loss": 1.7616, + "step": 70090 + }, + { + "epoch": 3.2, + "learning_rate": 3.407249759714404e-05, + "loss": 1.7968, + "step": 70100 + }, + { + "epoch": 3.2, + "learning_rate": 3.4070209162890754e-05, + "loss": 1.8039, + "step": 70110 + }, + { + "epoch": 3.2, + "learning_rate": 3.406792072863747e-05, + "loss": 1.7293, + "step": 70120 + }, + { + "epoch": 3.2, + "learning_rate": 3.406563229438419e-05, + "loss": 1.9113, + "step": 70130 + }, + { + "epoch": 3.2, + "learning_rate": 3.4063343860130896e-05, + "loss": 1.6556, + "step": 70140 + }, + { + "epoch": 3.2, + "learning_rate": 3.406105542587762e-05, + "loss": 1.6795, + "step": 70150 + }, + { + "epoch": 3.2, + "learning_rate": 3.405876699162433e-05, + "loss": 1.5458, + "step": 70160 + }, + { + "epoch": 3.2, + "learning_rate": 3.4056478557371045e-05, + "loss": 1.8006, + "step": 70170 + }, + { + "epoch": 3.2, + "learning_rate": 3.4054190123117766e-05, + "loss": 1.6203, + "step": 70180 + }, + { + "epoch": 3.21, + "learning_rate": 3.405190168886448e-05, + "loss": 1.6752, + "step": 70190 + }, + { + "epoch": 3.21, + "learning_rate": 3.4049613254611194e-05, + "loss": 1.7049, + "step": 70200 + }, + { + "epoch": 3.21, + "learning_rate": 3.4047324820357915e-05, + "loss": 1.7893, + "step": 70210 + }, + { + "epoch": 3.21, + "learning_rate": 3.404503638610463e-05, + "loss": 1.6008, + "step": 70220 + }, + { + "epoch": 3.21, + "learning_rate": 3.4042747951851344e-05, + "loss": 1.7931, + "step": 70230 + }, + { + "epoch": 3.21, + "learning_rate": 3.404045951759806e-05, + "loss": 1.7247, + "step": 70240 + }, + { + "epoch": 3.21, + "learning_rate": 3.403817108334478e-05, + "loss": 1.6238, + "step": 70250 + }, + { + "epoch": 3.21, + "learning_rate": 3.403588264909149e-05, + "loss": 1.83, + "step": 70260 + }, + { + "epoch": 3.21, + "learning_rate": 3.403359421483821e-05, + "loss": 1.7096, + "step": 70270 + }, + { + "epoch": 3.21, + "learning_rate": 3.403130578058493e-05, + "loss": 1.6918, + "step": 70280 + }, + { + "epoch": 3.21, + "learning_rate": 3.402901734633164e-05, + "loss": 1.7864, + "step": 70290 + }, + { + "epoch": 3.21, + "learning_rate": 3.4026728912078356e-05, + "loss": 1.8004, + "step": 70300 + }, + { + "epoch": 3.21, + "learning_rate": 3.402444047782508e-05, + "loss": 1.9507, + "step": 70310 + }, + { + "epoch": 3.21, + "learning_rate": 3.402215204357179e-05, + "loss": 1.6949, + "step": 70320 + }, + { + "epoch": 3.21, + "learning_rate": 3.4019863609318505e-05, + "loss": 1.716, + "step": 70330 + }, + { + "epoch": 3.21, + "learning_rate": 3.401757517506522e-05, + "loss": 1.8053, + "step": 70340 + }, + { + "epoch": 3.21, + "learning_rate": 3.4015286740811934e-05, + "loss": 1.8545, + "step": 70350 + }, + { + "epoch": 3.21, + "learning_rate": 3.4012998306558655e-05, + "loss": 1.9178, + "step": 70360 + }, + { + "epoch": 3.21, + "learning_rate": 3.401070987230537e-05, + "loss": 1.8124, + "step": 70370 + }, + { + "epoch": 3.21, + "learning_rate": 3.400842143805208e-05, + "loss": 1.4884, + "step": 70380 + }, + { + "epoch": 3.21, + "learning_rate": 3.4006133003798804e-05, + "loss": 1.7417, + "step": 70390 + }, + { + "epoch": 3.21, + "learning_rate": 3.400384456954552e-05, + "loss": 1.6662, + "step": 70400 + }, + { + "epoch": 3.22, + "learning_rate": 3.400155613529223e-05, + "loss": 1.7288, + "step": 70410 + }, + { + "epoch": 3.22, + "learning_rate": 3.399926770103895e-05, + "loss": 1.743, + "step": 70420 + }, + { + "epoch": 3.22, + "learning_rate": 3.399697926678567e-05, + "loss": 1.9328, + "step": 70430 + }, + { + "epoch": 3.22, + "learning_rate": 3.399469083253238e-05, + "loss": 1.7185, + "step": 70440 + }, + { + "epoch": 3.22, + "learning_rate": 3.39924023982791e-05, + "loss": 1.8222, + "step": 70450 + }, + { + "epoch": 3.22, + "learning_rate": 3.3990113964025816e-05, + "loss": 1.6273, + "step": 70460 + }, + { + "epoch": 3.22, + "learning_rate": 3.398782552977253e-05, + "loss": 1.762, + "step": 70470 + }, + { + "epoch": 3.22, + "learning_rate": 3.398553709551925e-05, + "loss": 1.8013, + "step": 70480 + }, + { + "epoch": 3.22, + "learning_rate": 3.3983248661265965e-05, + "loss": 1.6543, + "step": 70490 + }, + { + "epoch": 3.22, + "learning_rate": 3.398096022701268e-05, + "loss": 1.7236, + "step": 70500 + }, + { + "epoch": 3.22, + "learning_rate": 3.39786717927594e-05, + "loss": 1.8145, + "step": 70510 + }, + { + "epoch": 3.22, + "learning_rate": 3.3976383358506115e-05, + "loss": 1.8038, + "step": 70520 + }, + { + "epoch": 3.22, + "learning_rate": 3.397409492425283e-05, + "loss": 1.8, + "step": 70530 + }, + { + "epoch": 3.22, + "learning_rate": 3.397180648999954e-05, + "loss": 1.8272, + "step": 70540 + }, + { + "epoch": 3.22, + "learning_rate": 3.396951805574626e-05, + "loss": 1.6069, + "step": 70550 + }, + { + "epoch": 3.22, + "learning_rate": 3.396722962149297e-05, + "loss": 1.6247, + "step": 70560 + }, + { + "epoch": 3.22, + "learning_rate": 3.396494118723969e-05, + "loss": 1.6849, + "step": 70570 + }, + { + "epoch": 3.22, + "learning_rate": 3.3962652752986406e-05, + "loss": 1.5509, + "step": 70580 + }, + { + "epoch": 3.22, + "learning_rate": 3.396036431873312e-05, + "loss": 1.7239, + "step": 70590 + }, + { + "epoch": 3.22, + "learning_rate": 3.395807588447984e-05, + "loss": 1.6991, + "step": 70600 + }, + { + "epoch": 3.22, + "learning_rate": 3.3955787450226555e-05, + "loss": 1.8279, + "step": 70610 + }, + { + "epoch": 3.22, + "learning_rate": 3.395349901597327e-05, + "loss": 1.5866, + "step": 70620 + }, + { + "epoch": 3.23, + "learning_rate": 3.395121058171999e-05, + "loss": 1.5912, + "step": 70630 + }, + { + "epoch": 3.23, + "learning_rate": 3.3948922147466704e-05, + "loss": 1.8636, + "step": 70640 + }, + { + "epoch": 3.23, + "learning_rate": 3.394663371321342e-05, + "loss": 1.7962, + "step": 70650 + }, + { + "epoch": 3.23, + "learning_rate": 3.394434527896014e-05, + "loss": 1.7646, + "step": 70660 + }, + { + "epoch": 3.23, + "learning_rate": 3.3942056844706854e-05, + "loss": 1.7311, + "step": 70670 + }, + { + "epoch": 3.23, + "learning_rate": 3.393976841045357e-05, + "loss": 1.7984, + "step": 70680 + }, + { + "epoch": 3.23, + "learning_rate": 3.393747997620029e-05, + "loss": 1.7737, + "step": 70690 + }, + { + "epoch": 3.23, + "learning_rate": 3.3935191541947e-05, + "loss": 1.7682, + "step": 70700 + }, + { + "epoch": 3.23, + "learning_rate": 3.393290310769372e-05, + "loss": 1.6015, + "step": 70710 + }, + { + "epoch": 3.23, + "learning_rate": 3.393061467344044e-05, + "loss": 1.8704, + "step": 70720 + }, + { + "epoch": 3.23, + "learning_rate": 3.3928326239187145e-05, + "loss": 1.738, + "step": 70730 + }, + { + "epoch": 3.23, + "learning_rate": 3.3926037804933866e-05, + "loss": 1.786, + "step": 70740 + }, + { + "epoch": 3.23, + "learning_rate": 3.392374937068058e-05, + "loss": 1.9052, + "step": 70750 + }, + { + "epoch": 3.23, + "learning_rate": 3.3921460936427294e-05, + "loss": 1.878, + "step": 70760 + }, + { + "epoch": 3.23, + "learning_rate": 3.3919172502174015e-05, + "loss": 1.7626, + "step": 70770 + }, + { + "epoch": 3.23, + "learning_rate": 3.391688406792073e-05, + "loss": 1.6155, + "step": 70780 + }, + { + "epoch": 3.23, + "learning_rate": 3.3914595633667444e-05, + "loss": 1.6156, + "step": 70790 + }, + { + "epoch": 3.23, + "learning_rate": 3.3912307199414164e-05, + "loss": 1.6679, + "step": 70800 + }, + { + "epoch": 3.23, + "learning_rate": 3.391001876516088e-05, + "loss": 1.7171, + "step": 70810 + }, + { + "epoch": 3.23, + "learning_rate": 3.390773033090759e-05, + "loss": 1.6262, + "step": 70820 + }, + { + "epoch": 3.23, + "learning_rate": 3.3905441896654314e-05, + "loss": 1.8246, + "step": 70830 + }, + { + "epoch": 3.23, + "learning_rate": 3.390315346240103e-05, + "loss": 1.9839, + "step": 70840 + }, + { + "epoch": 3.24, + "learning_rate": 3.390086502814774e-05, + "loss": 1.7657, + "step": 70850 + }, + { + "epoch": 3.24, + "learning_rate": 3.389857659389446e-05, + "loss": 1.7509, + "step": 70860 + }, + { + "epoch": 3.24, + "learning_rate": 3.389628815964118e-05, + "loss": 1.6982, + "step": 70870 + }, + { + "epoch": 3.24, + "learning_rate": 3.389399972538789e-05, + "loss": 1.6829, + "step": 70880 + }, + { + "epoch": 3.24, + "learning_rate": 3.389171129113461e-05, + "loss": 1.6245, + "step": 70890 + }, + { + "epoch": 3.24, + "learning_rate": 3.3889422856881326e-05, + "loss": 1.629, + "step": 70900 + }, + { + "epoch": 3.24, + "learning_rate": 3.388713442262804e-05, + "loss": 1.8104, + "step": 70910 + }, + { + "epoch": 3.24, + "learning_rate": 3.388484598837476e-05, + "loss": 1.7058, + "step": 70920 + }, + { + "epoch": 3.24, + "learning_rate": 3.388255755412147e-05, + "loss": 1.861, + "step": 70930 + }, + { + "epoch": 3.24, + "learning_rate": 3.388026911986818e-05, + "loss": 1.6086, + "step": 70940 + }, + { + "epoch": 3.24, + "learning_rate": 3.3877980685614904e-05, + "loss": 1.6816, + "step": 70950 + }, + { + "epoch": 3.24, + "learning_rate": 3.387569225136162e-05, + "loss": 1.5911, + "step": 70960 + }, + { + "epoch": 3.24, + "learning_rate": 3.387340381710833e-05, + "loss": 1.8934, + "step": 70970 + }, + { + "epoch": 3.24, + "learning_rate": 3.387111538285505e-05, + "loss": 1.6982, + "step": 70980 + }, + { + "epoch": 3.24, + "learning_rate": 3.386882694860177e-05, + "loss": 1.8171, + "step": 70990 + }, + { + "epoch": 3.24, + "learning_rate": 3.386653851434848e-05, + "loss": 1.6876, + "step": 71000 + }, + { + "epoch": 3.24, + "learning_rate": 3.38642500800952e-05, + "loss": 1.8643, + "step": 71010 + }, + { + "epoch": 3.24, + "learning_rate": 3.3861961645841916e-05, + "loss": 1.8149, + "step": 71020 + }, + { + "epoch": 3.24, + "learning_rate": 3.385967321158863e-05, + "loss": 1.5721, + "step": 71030 + }, + { + "epoch": 3.24, + "learning_rate": 3.385738477733535e-05, + "loss": 1.632, + "step": 71040 + }, + { + "epoch": 3.24, + "learning_rate": 3.3855096343082065e-05, + "loss": 1.9215, + "step": 71050 + }, + { + "epoch": 3.24, + "learning_rate": 3.385280790882878e-05, + "loss": 1.8987, + "step": 71060 + }, + { + "epoch": 3.25, + "learning_rate": 3.38505194745755e-05, + "loss": 1.7088, + "step": 71070 + }, + { + "epoch": 3.25, + "learning_rate": 3.3848231040322214e-05, + "loss": 1.8151, + "step": 71080 + }, + { + "epoch": 3.25, + "learning_rate": 3.384594260606893e-05, + "loss": 1.8259, + "step": 71090 + }, + { + "epoch": 3.25, + "learning_rate": 3.384365417181565e-05, + "loss": 1.7353, + "step": 71100 + }, + { + "epoch": 3.25, + "learning_rate": 3.3841365737562364e-05, + "loss": 1.6368, + "step": 71110 + }, + { + "epoch": 3.25, + "learning_rate": 3.383907730330908e-05, + "loss": 1.8798, + "step": 71120 + }, + { + "epoch": 3.25, + "learning_rate": 3.383678886905579e-05, + "loss": 1.7317, + "step": 71130 + }, + { + "epoch": 3.25, + "learning_rate": 3.3834500434802506e-05, + "loss": 1.6765, + "step": 71140 + }, + { + "epoch": 3.25, + "learning_rate": 3.383221200054923e-05, + "loss": 1.8557, + "step": 71150 + }, + { + "epoch": 3.25, + "learning_rate": 3.382992356629594e-05, + "loss": 1.9942, + "step": 71160 + }, + { + "epoch": 3.25, + "learning_rate": 3.3827635132042655e-05, + "loss": 1.8569, + "step": 71170 + }, + { + "epoch": 3.25, + "learning_rate": 3.3825346697789376e-05, + "loss": 1.7149, + "step": 71180 + }, + { + "epoch": 3.25, + "learning_rate": 3.382305826353609e-05, + "loss": 1.7091, + "step": 71190 + }, + { + "epoch": 3.25, + "learning_rate": 3.3820769829282804e-05, + "loss": 1.7081, + "step": 71200 + }, + { + "epoch": 3.25, + "learning_rate": 3.3818481395029525e-05, + "loss": 1.5639, + "step": 71210 + }, + { + "epoch": 3.25, + "learning_rate": 3.381619296077624e-05, + "loss": 2.0248, + "step": 71220 + }, + { + "epoch": 3.25, + "learning_rate": 3.3813904526522954e-05, + "loss": 1.6295, + "step": 71230 + }, + { + "epoch": 3.25, + "learning_rate": 3.3811616092269674e-05, + "loss": 1.5829, + "step": 71240 + }, + { + "epoch": 3.25, + "learning_rate": 3.380932765801639e-05, + "loss": 1.7021, + "step": 71250 + }, + { + "epoch": 3.25, + "learning_rate": 3.38070392237631e-05, + "loss": 1.7825, + "step": 71260 + }, + { + "epoch": 3.25, + "learning_rate": 3.3804750789509824e-05, + "loss": 1.7578, + "step": 71270 + }, + { + "epoch": 3.25, + "learning_rate": 3.380246235525654e-05, + "loss": 1.78, + "step": 71280 + }, + { + "epoch": 3.26, + "learning_rate": 3.380017392100325e-05, + "loss": 1.5419, + "step": 71290 + }, + { + "epoch": 3.26, + "learning_rate": 3.379788548674997e-05, + "loss": 1.6828, + "step": 71300 + }, + { + "epoch": 3.26, + "learning_rate": 3.379559705249669e-05, + "loss": 1.775, + "step": 71310 + }, + { + "epoch": 3.26, + "learning_rate": 3.37933086182434e-05, + "loss": 1.7309, + "step": 71320 + }, + { + "epoch": 3.26, + "learning_rate": 3.3791020183990115e-05, + "loss": 1.8254, + "step": 71330 + }, + { + "epoch": 3.26, + "learning_rate": 3.378873174973683e-05, + "loss": 1.9168, + "step": 71340 + }, + { + "epoch": 3.26, + "learning_rate": 3.3786443315483543e-05, + "loss": 1.843, + "step": 71350 + }, + { + "epoch": 3.26, + "learning_rate": 3.3784154881230264e-05, + "loss": 1.7607, + "step": 71360 + }, + { + "epoch": 3.26, + "learning_rate": 3.378186644697698e-05, + "loss": 1.7073, + "step": 71370 + }, + { + "epoch": 3.26, + "learning_rate": 3.377957801272369e-05, + "loss": 1.7599, + "step": 71380 + }, + { + "epoch": 3.26, + "learning_rate": 3.3777289578470414e-05, + "loss": 1.7449, + "step": 71390 + }, + { + "epoch": 3.26, + "learning_rate": 3.377500114421713e-05, + "loss": 1.6075, + "step": 71400 + }, + { + "epoch": 3.26, + "learning_rate": 3.377271270996384e-05, + "loss": 1.8447, + "step": 71410 + }, + { + "epoch": 3.26, + "learning_rate": 3.377042427571056e-05, + "loss": 1.7503, + "step": 71420 + }, + { + "epoch": 3.26, + "learning_rate": 3.376813584145728e-05, + "loss": 1.7352, + "step": 71430 + }, + { + "epoch": 3.26, + "learning_rate": 3.376584740720399e-05, + "loss": 1.7258, + "step": 71440 + }, + { + "epoch": 3.26, + "learning_rate": 3.376355897295071e-05, + "loss": 1.8187, + "step": 71450 + }, + { + "epoch": 3.26, + "learning_rate": 3.3761270538697426e-05, + "loss": 1.7211, + "step": 71460 + }, + { + "epoch": 3.26, + "learning_rate": 3.375898210444414e-05, + "loss": 1.6795, + "step": 71470 + }, + { + "epoch": 3.26, + "learning_rate": 3.375669367019086e-05, + "loss": 1.7956, + "step": 71480 + }, + { + "epoch": 3.26, + "learning_rate": 3.3754405235937575e-05, + "loss": 1.7201, + "step": 71490 + }, + { + "epoch": 3.26, + "learning_rate": 3.375211680168429e-05, + "loss": 1.7036, + "step": 71500 + }, + { + "epoch": 3.27, + "learning_rate": 3.374982836743101e-05, + "loss": 1.4896, + "step": 71510 + }, + { + "epoch": 3.27, + "learning_rate": 3.374753993317772e-05, + "loss": 1.6889, + "step": 71520 + }, + { + "epoch": 3.27, + "learning_rate": 3.374525149892443e-05, + "loss": 1.6816, + "step": 71530 + }, + { + "epoch": 3.27, + "learning_rate": 3.374296306467115e-05, + "loss": 1.8322, + "step": 71540 + }, + { + "epoch": 3.27, + "learning_rate": 3.374067463041787e-05, + "loss": 1.7881, + "step": 71550 + }, + { + "epoch": 3.27, + "learning_rate": 3.373838619616458e-05, + "loss": 1.7111, + "step": 71560 + }, + { + "epoch": 3.27, + "learning_rate": 3.37360977619113e-05, + "loss": 1.7394, + "step": 71570 + }, + { + "epoch": 3.27, + "learning_rate": 3.3733809327658016e-05, + "loss": 1.8023, + "step": 71580 + }, + { + "epoch": 3.27, + "learning_rate": 3.373152089340473e-05, + "loss": 1.733, + "step": 71590 + }, + { + "epoch": 3.27, + "learning_rate": 3.372923245915145e-05, + "loss": 1.6427, + "step": 71600 + }, + { + "epoch": 3.27, + "learning_rate": 3.3726944024898165e-05, + "loss": 1.8419, + "step": 71610 + }, + { + "epoch": 3.27, + "learning_rate": 3.372465559064488e-05, + "loss": 1.6791, + "step": 71620 + }, + { + "epoch": 3.27, + "learning_rate": 3.37223671563916e-05, + "loss": 1.7931, + "step": 71630 + }, + { + "epoch": 3.27, + "learning_rate": 3.3720078722138314e-05, + "loss": 1.7717, + "step": 71640 + }, + { + "epoch": 3.27, + "learning_rate": 3.371779028788503e-05, + "loss": 1.7735, + "step": 71650 + }, + { + "epoch": 3.27, + "learning_rate": 3.371550185363175e-05, + "loss": 1.6324, + "step": 71660 + }, + { + "epoch": 3.27, + "learning_rate": 3.3713213419378464e-05, + "loss": 1.8313, + "step": 71670 + }, + { + "epoch": 3.27, + "learning_rate": 3.371092498512518e-05, + "loss": 1.9635, + "step": 71680 + }, + { + "epoch": 3.27, + "learning_rate": 3.37086365508719e-05, + "loss": 1.6416, + "step": 71690 + }, + { + "epoch": 3.27, + "learning_rate": 3.370634811661861e-05, + "loss": 1.7442, + "step": 71700 + }, + { + "epoch": 3.27, + "learning_rate": 3.370405968236533e-05, + "loss": 1.7754, + "step": 71710 + }, + { + "epoch": 3.27, + "learning_rate": 3.370177124811204e-05, + "loss": 1.6126, + "step": 71720 + }, + { + "epoch": 3.28, + "learning_rate": 3.3699482813858755e-05, + "loss": 1.5657, + "step": 71730 + }, + { + "epoch": 3.28, + "learning_rate": 3.3697194379605476e-05, + "loss": 1.5619, + "step": 71740 + }, + { + "epoch": 3.28, + "learning_rate": 3.369490594535219e-05, + "loss": 1.801, + "step": 71750 + }, + { + "epoch": 3.28, + "learning_rate": 3.3692617511098904e-05, + "loss": 1.7297, + "step": 71760 + }, + { + "epoch": 3.28, + "learning_rate": 3.3690329076845625e-05, + "loss": 1.5865, + "step": 71770 + }, + { + "epoch": 3.28, + "learning_rate": 3.368804064259234e-05, + "loss": 1.6314, + "step": 71780 + }, + { + "epoch": 3.28, + "learning_rate": 3.3685752208339053e-05, + "loss": 1.9618, + "step": 71790 + }, + { + "epoch": 3.28, + "learning_rate": 3.3683463774085774e-05, + "loss": 1.6084, + "step": 71800 + }, + { + "epoch": 3.28, + "learning_rate": 3.368117533983249e-05, + "loss": 1.8306, + "step": 71810 + }, + { + "epoch": 3.28, + "learning_rate": 3.36788869055792e-05, + "loss": 1.6027, + "step": 71820 + }, + { + "epoch": 3.28, + "learning_rate": 3.3676598471325924e-05, + "loss": 1.6296, + "step": 71830 + }, + { + "epoch": 3.28, + "learning_rate": 3.367431003707264e-05, + "loss": 1.9229, + "step": 71840 + }, + { + "epoch": 3.28, + "learning_rate": 3.367202160281935e-05, + "loss": 1.7909, + "step": 71850 + }, + { + "epoch": 3.28, + "learning_rate": 3.366973316856607e-05, + "loss": 1.6445, + "step": 71860 + }, + { + "epoch": 3.28, + "learning_rate": 3.366744473431279e-05, + "loss": 1.5888, + "step": 71870 + }, + { + "epoch": 3.28, + "learning_rate": 3.36651563000595e-05, + "loss": 1.5867, + "step": 71880 + }, + { + "epoch": 3.28, + "learning_rate": 3.366286786580622e-05, + "loss": 1.4593, + "step": 71890 + }, + { + "epoch": 3.28, + "learning_rate": 3.3660579431552936e-05, + "loss": 1.6665, + "step": 71900 + }, + { + "epoch": 3.28, + "learning_rate": 3.365829099729965e-05, + "loss": 1.6219, + "step": 71910 + }, + { + "epoch": 3.28, + "learning_rate": 3.3656002563046364e-05, + "loss": 1.692, + "step": 71920 + }, + { + "epoch": 3.28, + "learning_rate": 3.365371412879308e-05, + "loss": 1.5343, + "step": 71930 + }, + { + "epoch": 3.28, + "learning_rate": 3.365142569453979e-05, + "loss": 1.8459, + "step": 71940 + }, + { + "epoch": 3.29, + "learning_rate": 3.3649137260286513e-05, + "loss": 1.9061, + "step": 71950 + }, + { + "epoch": 3.29, + "learning_rate": 3.364684882603323e-05, + "loss": 1.5404, + "step": 71960 + }, + { + "epoch": 3.29, + "learning_rate": 3.364456039177994e-05, + "loss": 1.639, + "step": 71970 + }, + { + "epoch": 3.29, + "learning_rate": 3.364227195752666e-05, + "loss": 1.5827, + "step": 71980 + }, + { + "epoch": 3.29, + "learning_rate": 3.363998352327338e-05, + "loss": 1.7827, + "step": 71990 + }, + { + "epoch": 3.29, + "learning_rate": 3.363769508902009e-05, + "loss": 1.4901, + "step": 72000 + }, + { + "epoch": 3.29, + "learning_rate": 3.363540665476681e-05, + "loss": 1.7251, + "step": 72010 + }, + { + "epoch": 3.29, + "learning_rate": 3.3633118220513526e-05, + "loss": 1.6614, + "step": 72020 + }, + { + "epoch": 3.29, + "learning_rate": 3.363082978626024e-05, + "loss": 1.5981, + "step": 72030 + }, + { + "epoch": 3.29, + "learning_rate": 3.362854135200696e-05, + "loss": 1.8068, + "step": 72040 + }, + { + "epoch": 3.29, + "learning_rate": 3.3626252917753675e-05, + "loss": 1.7848, + "step": 72050 + }, + { + "epoch": 3.29, + "learning_rate": 3.362396448350039e-05, + "loss": 1.7673, + "step": 72060 + }, + { + "epoch": 3.29, + "learning_rate": 3.362167604924711e-05, + "loss": 1.7002, + "step": 72070 + }, + { + "epoch": 3.29, + "learning_rate": 3.3619387614993824e-05, + "loss": 1.8128, + "step": 72080 + }, + { + "epoch": 3.29, + "learning_rate": 3.361709918074054e-05, + "loss": 1.6612, + "step": 72090 + }, + { + "epoch": 3.29, + "learning_rate": 3.361481074648726e-05, + "loss": 1.7565, + "step": 72100 + }, + { + "epoch": 3.29, + "learning_rate": 3.3612522312233973e-05, + "loss": 1.5967, + "step": 72110 + }, + { + "epoch": 3.29, + "learning_rate": 3.361023387798069e-05, + "loss": 1.5583, + "step": 72120 + }, + { + "epoch": 3.29, + "learning_rate": 3.36079454437274e-05, + "loss": 1.7509, + "step": 72130 + }, + { + "epoch": 3.29, + "learning_rate": 3.3605657009474116e-05, + "loss": 1.7735, + "step": 72140 + }, + { + "epoch": 3.29, + "learning_rate": 3.360336857522084e-05, + "loss": 1.691, + "step": 72150 + }, + { + "epoch": 3.3, + "learning_rate": 3.360108014096755e-05, + "loss": 1.7482, + "step": 72160 + }, + { + "epoch": 3.3, + "learning_rate": 3.3598791706714265e-05, + "loss": 1.8768, + "step": 72170 + }, + { + "epoch": 3.3, + "learning_rate": 3.3596503272460986e-05, + "loss": 1.7349, + "step": 72180 + }, + { + "epoch": 3.3, + "learning_rate": 3.35942148382077e-05, + "loss": 1.6129, + "step": 72190 + }, + { + "epoch": 3.3, + "learning_rate": 3.3591926403954414e-05, + "loss": 1.7478, + "step": 72200 + }, + { + "epoch": 3.3, + "learning_rate": 3.3589637969701135e-05, + "loss": 1.8931, + "step": 72210 + }, + { + "epoch": 3.3, + "learning_rate": 3.358734953544785e-05, + "loss": 1.6801, + "step": 72220 + }, + { + "epoch": 3.3, + "learning_rate": 3.3585061101194563e-05, + "loss": 1.9876, + "step": 72230 + }, + { + "epoch": 3.3, + "learning_rate": 3.3582772666941284e-05, + "loss": 1.655, + "step": 72240 + }, + { + "epoch": 3.3, + "learning_rate": 3.3580484232688e-05, + "loss": 1.6467, + "step": 72250 + }, + { + "epoch": 3.3, + "learning_rate": 3.357819579843471e-05, + "loss": 1.7025, + "step": 72260 + }, + { + "epoch": 3.3, + "learning_rate": 3.3575907364181433e-05, + "loss": 1.7961, + "step": 72270 + }, + { + "epoch": 3.3, + "learning_rate": 3.357361892992815e-05, + "loss": 1.8771, + "step": 72280 + }, + { + "epoch": 3.3, + "learning_rate": 3.357133049567486e-05, + "loss": 1.7425, + "step": 72290 + }, + { + "epoch": 3.3, + "learning_rate": 3.356904206142158e-05, + "loss": 1.7531, + "step": 72300 + }, + { + "epoch": 3.3, + "learning_rate": 3.356675362716829e-05, + "loss": 1.6957, + "step": 72310 + }, + { + "epoch": 3.3, + "learning_rate": 3.3564465192915004e-05, + "loss": 1.5032, + "step": 72320 + }, + { + "epoch": 3.3, + "learning_rate": 3.3562176758661725e-05, + "loss": 1.5315, + "step": 72330 + }, + { + "epoch": 3.3, + "learning_rate": 3.355988832440844e-05, + "loss": 1.7406, + "step": 72340 + }, + { + "epoch": 3.3, + "learning_rate": 3.355759989015515e-05, + "loss": 1.5926, + "step": 72350 + }, + { + "epoch": 3.3, + "learning_rate": 3.3555311455901874e-05, + "loss": 1.72, + "step": 72360 + }, + { + "epoch": 3.3, + "learning_rate": 3.355302302164859e-05, + "loss": 1.6877, + "step": 72370 + }, + { + "epoch": 3.31, + "learning_rate": 3.35507345873953e-05, + "loss": 1.6804, + "step": 72380 + }, + { + "epoch": 3.31, + "learning_rate": 3.3548446153142023e-05, + "loss": 1.8473, + "step": 72390 + }, + { + "epoch": 3.31, + "learning_rate": 3.354615771888874e-05, + "loss": 1.5904, + "step": 72400 + }, + { + "epoch": 3.31, + "learning_rate": 3.354386928463545e-05, + "loss": 1.788, + "step": 72410 + }, + { + "epoch": 3.31, + "learning_rate": 3.354158085038217e-05, + "loss": 1.6255, + "step": 72420 + }, + { + "epoch": 3.31, + "learning_rate": 3.353929241612889e-05, + "loss": 1.6949, + "step": 72430 + }, + { + "epoch": 3.31, + "learning_rate": 3.35370039818756e-05, + "loss": 1.7558, + "step": 72440 + }, + { + "epoch": 3.31, + "learning_rate": 3.353471554762232e-05, + "loss": 1.7421, + "step": 72450 + }, + { + "epoch": 3.31, + "learning_rate": 3.3532427113369036e-05, + "loss": 1.8235, + "step": 72460 + }, + { + "epoch": 3.31, + "learning_rate": 3.353013867911575e-05, + "loss": 1.7697, + "step": 72470 + }, + { + "epoch": 3.31, + "learning_rate": 3.352785024486247e-05, + "loss": 1.7298, + "step": 72480 + }, + { + "epoch": 3.31, + "learning_rate": 3.3525561810609185e-05, + "loss": 1.5889, + "step": 72490 + }, + { + "epoch": 3.31, + "learning_rate": 3.35232733763559e-05, + "loss": 1.6287, + "step": 72500 + }, + { + "epoch": 3.31, + "learning_rate": 3.352098494210261e-05, + "loss": 1.5948, + "step": 72510 + }, + { + "epoch": 3.31, + "learning_rate": 3.351869650784933e-05, + "loss": 1.7892, + "step": 72520 + }, + { + "epoch": 3.31, + "learning_rate": 3.351640807359605e-05, + "loss": 1.8146, + "step": 72530 + }, + { + "epoch": 3.31, + "learning_rate": 3.351411963934276e-05, + "loss": 1.7375, + "step": 72540 + }, + { + "epoch": 3.31, + "learning_rate": 3.351183120508948e-05, + "loss": 1.5989, + "step": 72550 + }, + { + "epoch": 3.31, + "learning_rate": 3.35095427708362e-05, + "loss": 1.7414, + "step": 72560 + }, + { + "epoch": 3.31, + "learning_rate": 3.350725433658291e-05, + "loss": 1.5913, + "step": 72570 + }, + { + "epoch": 3.31, + "learning_rate": 3.3504965902329626e-05, + "loss": 1.8189, + "step": 72580 + }, + { + "epoch": 3.31, + "learning_rate": 3.350267746807634e-05, + "loss": 1.6982, + "step": 72590 + }, + { + "epoch": 3.32, + "learning_rate": 3.350038903382306e-05, + "loss": 1.8096, + "step": 72600 + }, + { + "epoch": 3.32, + "learning_rate": 3.3498100599569775e-05, + "loss": 1.7897, + "step": 72610 + }, + { + "epoch": 3.32, + "learning_rate": 3.349581216531649e-05, + "loss": 1.6815, + "step": 72620 + }, + { + "epoch": 3.32, + "learning_rate": 3.349352373106321e-05, + "loss": 1.7916, + "step": 72630 + }, + { + "epoch": 3.32, + "learning_rate": 3.3491235296809924e-05, + "loss": 1.7736, + "step": 72640 + }, + { + "epoch": 3.32, + "learning_rate": 3.348894686255664e-05, + "loss": 1.623, + "step": 72650 + }, + { + "epoch": 3.32, + "learning_rate": 3.348665842830336e-05, + "loss": 1.6617, + "step": 72660 + }, + { + "epoch": 3.32, + "learning_rate": 3.348436999405007e-05, + "loss": 1.5431, + "step": 72670 + }, + { + "epoch": 3.32, + "learning_rate": 3.348208155979679e-05, + "loss": 1.5446, + "step": 72680 + }, + { + "epoch": 3.32, + "learning_rate": 3.347979312554351e-05, + "loss": 1.663, + "step": 72690 + }, + { + "epoch": 3.32, + "learning_rate": 3.347750469129022e-05, + "loss": 1.628, + "step": 72700 + }, + { + "epoch": 3.32, + "learning_rate": 3.347521625703694e-05, + "loss": 1.6873, + "step": 72710 + }, + { + "epoch": 3.32, + "learning_rate": 3.347292782278365e-05, + "loss": 1.6843, + "step": 72720 + }, + { + "epoch": 3.32, + "learning_rate": 3.3470639388530365e-05, + "loss": 1.718, + "step": 72730 + }, + { + "epoch": 3.32, + "learning_rate": 3.3468350954277086e-05, + "loss": 1.5531, + "step": 72740 + }, + { + "epoch": 3.32, + "learning_rate": 3.34660625200238e-05, + "loss": 1.7296, + "step": 72750 + }, + { + "epoch": 3.32, + "learning_rate": 3.3463774085770514e-05, + "loss": 1.7352, + "step": 72760 + }, + { + "epoch": 3.32, + "learning_rate": 3.3461485651517235e-05, + "loss": 1.7843, + "step": 72770 + }, + { + "epoch": 3.32, + "learning_rate": 3.345919721726395e-05, + "loss": 1.7135, + "step": 72780 + }, + { + "epoch": 3.32, + "learning_rate": 3.345690878301066e-05, + "loss": 1.7062, + "step": 72790 + }, + { + "epoch": 3.32, + "learning_rate": 3.3454620348757384e-05, + "loss": 1.7414, + "step": 72800 + }, + { + "epoch": 3.32, + "learning_rate": 3.34523319145041e-05, + "loss": 1.7094, + "step": 72810 + }, + { + "epoch": 3.33, + "learning_rate": 3.345004348025081e-05, + "loss": 1.8046, + "step": 72820 + }, + { + "epoch": 3.33, + "learning_rate": 3.344775504599753e-05, + "loss": 1.5912, + "step": 72830 + }, + { + "epoch": 3.33, + "learning_rate": 3.344546661174425e-05, + "loss": 1.748, + "step": 72840 + }, + { + "epoch": 3.33, + "learning_rate": 3.344317817749096e-05, + "loss": 1.6945, + "step": 72850 + }, + { + "epoch": 3.33, + "learning_rate": 3.344088974323768e-05, + "loss": 1.6895, + "step": 72860 + }, + { + "epoch": 3.33, + "learning_rate": 3.34386013089844e-05, + "loss": 1.7009, + "step": 72870 + }, + { + "epoch": 3.33, + "learning_rate": 3.343631287473111e-05, + "loss": 1.772, + "step": 72880 + }, + { + "epoch": 3.33, + "learning_rate": 3.343402444047783e-05, + "loss": 1.652, + "step": 72890 + }, + { + "epoch": 3.33, + "learning_rate": 3.3431736006224546e-05, + "loss": 1.5643, + "step": 72900 + }, + { + "epoch": 3.33, + "learning_rate": 3.342944757197125e-05, + "loss": 1.8231, + "step": 72910 + }, + { + "epoch": 3.33, + "learning_rate": 3.3427159137717974e-05, + "loss": 1.6068, + "step": 72920 + }, + { + "epoch": 3.33, + "learning_rate": 3.342487070346469e-05, + "loss": 1.6606, + "step": 72930 + }, + { + "epoch": 3.33, + "learning_rate": 3.34225822692114e-05, + "loss": 1.8327, + "step": 72940 + }, + { + "epoch": 3.33, + "learning_rate": 3.342029383495812e-05, + "loss": 1.6429, + "step": 72950 + }, + { + "epoch": 3.33, + "learning_rate": 3.341800540070484e-05, + "loss": 1.7126, + "step": 72960 + }, + { + "epoch": 3.33, + "learning_rate": 3.341571696645155e-05, + "loss": 1.7168, + "step": 72970 + }, + { + "epoch": 3.33, + "learning_rate": 3.341342853219827e-05, + "loss": 1.6211, + "step": 72980 + }, + { + "epoch": 3.33, + "learning_rate": 3.341114009794499e-05, + "loss": 1.7007, + "step": 72990 + }, + { + "epoch": 3.33, + "learning_rate": 3.34088516636917e-05, + "loss": 1.6622, + "step": 73000 + }, + { + "epoch": 3.33, + "learning_rate": 3.340656322943842e-05, + "loss": 1.7673, + "step": 73010 + }, + { + "epoch": 3.33, + "learning_rate": 3.3404274795185136e-05, + "loss": 1.775, + "step": 73020 + }, + { + "epoch": 3.33, + "learning_rate": 3.340198636093185e-05, + "loss": 1.5305, + "step": 73030 + }, + { + "epoch": 3.34, + "learning_rate": 3.339969792667857e-05, + "loss": 1.7028, + "step": 73040 + }, + { + "epoch": 3.34, + "learning_rate": 3.3397409492425285e-05, + "loss": 1.6083, + "step": 73050 + }, + { + "epoch": 3.34, + "learning_rate": 3.3395121058172e-05, + "loss": 1.83, + "step": 73060 + }, + { + "epoch": 3.34, + "learning_rate": 3.339283262391872e-05, + "loss": 1.7735, + "step": 73070 + }, + { + "epoch": 3.34, + "learning_rate": 3.3390544189665434e-05, + "loss": 1.7527, + "step": 73080 + }, + { + "epoch": 3.34, + "learning_rate": 3.338825575541215e-05, + "loss": 1.7747, + "step": 73090 + }, + { + "epoch": 3.34, + "learning_rate": 3.338596732115886e-05, + "loss": 1.7574, + "step": 73100 + }, + { + "epoch": 3.34, + "learning_rate": 3.3383678886905577e-05, + "loss": 1.7566, + "step": 73110 + }, + { + "epoch": 3.34, + "learning_rate": 3.33813904526523e-05, + "loss": 1.6563, + "step": 73120 + }, + { + "epoch": 3.34, + "learning_rate": 3.337910201839901e-05, + "loss": 1.6414, + "step": 73130 + }, + { + "epoch": 3.34, + "learning_rate": 3.3376813584145726e-05, + "loss": 1.6734, + "step": 73140 + }, + { + "epoch": 3.34, + "learning_rate": 3.337452514989245e-05, + "loss": 1.6317, + "step": 73150 + }, + { + "epoch": 3.34, + "learning_rate": 3.337223671563916e-05, + "loss": 1.8106, + "step": 73160 + }, + { + "epoch": 3.34, + "learning_rate": 3.3369948281385875e-05, + "loss": 1.7077, + "step": 73170 + }, + { + "epoch": 3.34, + "learning_rate": 3.3367659847132596e-05, + "loss": 1.6348, + "step": 73180 + }, + { + "epoch": 3.34, + "learning_rate": 3.336537141287931e-05, + "loss": 1.7752, + "step": 73190 + }, + { + "epoch": 3.34, + "learning_rate": 3.3363082978626024e-05, + "loss": 1.7839, + "step": 73200 + }, + { + "epoch": 3.34, + "learning_rate": 3.3360794544372745e-05, + "loss": 1.6691, + "step": 73210 + }, + { + "epoch": 3.34, + "learning_rate": 3.335850611011946e-05, + "loss": 1.7464, + "step": 73220 + }, + { + "epoch": 3.34, + "learning_rate": 3.335621767586617e-05, + "loss": 1.7459, + "step": 73230 + }, + { + "epoch": 3.34, + "learning_rate": 3.3353929241612894e-05, + "loss": 1.7142, + "step": 73240 + }, + { + "epoch": 3.34, + "learning_rate": 3.335164080735961e-05, + "loss": 1.7486, + "step": 73250 + }, + { + "epoch": 3.35, + "learning_rate": 3.334935237310632e-05, + "loss": 1.6691, + "step": 73260 + }, + { + "epoch": 3.35, + "learning_rate": 3.334706393885304e-05, + "loss": 1.7346, + "step": 73270 + }, + { + "epoch": 3.35, + "learning_rate": 3.334477550459976e-05, + "loss": 1.6266, + "step": 73280 + }, + { + "epoch": 3.35, + "learning_rate": 3.334248707034647e-05, + "loss": 1.7474, + "step": 73290 + }, + { + "epoch": 3.35, + "learning_rate": 3.3340198636093186e-05, + "loss": 1.536, + "step": 73300 + }, + { + "epoch": 3.35, + "learning_rate": 3.33379102018399e-05, + "loss": 1.733, + "step": 73310 + }, + { + "epoch": 3.35, + "learning_rate": 3.3335621767586614e-05, + "loss": 1.742, + "step": 73320 + }, + { + "epoch": 3.35, + "learning_rate": 3.3333333333333335e-05, + "loss": 1.8243, + "step": 73330 + }, + { + "epoch": 3.35, + "learning_rate": 3.333104489908005e-05, + "loss": 1.737, + "step": 73340 + }, + { + "epoch": 3.35, + "learning_rate": 3.332875646482676e-05, + "loss": 1.9515, + "step": 73350 + }, + { + "epoch": 3.35, + "learning_rate": 3.3326468030573484e-05, + "loss": 1.6221, + "step": 73360 + }, + { + "epoch": 3.35, + "learning_rate": 3.33241795963202e-05, + "loss": 1.7299, + "step": 73370 + }, + { + "epoch": 3.35, + "learning_rate": 3.332189116206691e-05, + "loss": 1.6029, + "step": 73380 + }, + { + "epoch": 3.35, + "learning_rate": 3.331960272781363e-05, + "loss": 1.6997, + "step": 73390 + }, + { + "epoch": 3.35, + "learning_rate": 3.331731429356035e-05, + "loss": 1.7373, + "step": 73400 + }, + { + "epoch": 3.35, + "learning_rate": 3.331502585930706e-05, + "loss": 1.7565, + "step": 73410 + }, + { + "epoch": 3.35, + "learning_rate": 3.331273742505378e-05, + "loss": 1.701, + "step": 73420 + }, + { + "epoch": 3.35, + "learning_rate": 3.3310448990800497e-05, + "loss": 1.8505, + "step": 73430 + }, + { + "epoch": 3.35, + "learning_rate": 3.330816055654721e-05, + "loss": 1.6318, + "step": 73440 + }, + { + "epoch": 3.35, + "learning_rate": 3.330587212229393e-05, + "loss": 1.6245, + "step": 73450 + }, + { + "epoch": 3.35, + "learning_rate": 3.3303583688040646e-05, + "loss": 1.7686, + "step": 73460 + }, + { + "epoch": 3.35, + "learning_rate": 3.330129525378736e-05, + "loss": 1.8403, + "step": 73470 + }, + { + "epoch": 3.36, + "learning_rate": 3.329900681953408e-05, + "loss": 1.6763, + "step": 73480 + }, + { + "epoch": 3.36, + "learning_rate": 3.3296718385280795e-05, + "loss": 1.6101, + "step": 73490 + }, + { + "epoch": 3.36, + "learning_rate": 3.329442995102751e-05, + "loss": 1.7168, + "step": 73500 + }, + { + "epoch": 3.36, + "learning_rate": 3.329214151677422e-05, + "loss": 1.8389, + "step": 73510 + }, + { + "epoch": 3.36, + "learning_rate": 3.328985308252094e-05, + "loss": 1.7571, + "step": 73520 + }, + { + "epoch": 3.36, + "learning_rate": 3.328756464826766e-05, + "loss": 1.7558, + "step": 73530 + }, + { + "epoch": 3.36, + "learning_rate": 3.328527621401437e-05, + "loss": 1.578, + "step": 73540 + }, + { + "epoch": 3.36, + "learning_rate": 3.3282987779761087e-05, + "loss": 1.7713, + "step": 73550 + }, + { + "epoch": 3.36, + "learning_rate": 3.328069934550781e-05, + "loss": 1.592, + "step": 73560 + }, + { + "epoch": 3.36, + "learning_rate": 3.327841091125452e-05, + "loss": 1.6054, + "step": 73570 + }, + { + "epoch": 3.36, + "learning_rate": 3.3276122477001236e-05, + "loss": 1.6922, + "step": 73580 + }, + { + "epoch": 3.36, + "learning_rate": 3.327383404274796e-05, + "loss": 1.75, + "step": 73590 + }, + { + "epoch": 3.36, + "learning_rate": 3.327154560849467e-05, + "loss": 1.7739, + "step": 73600 + }, + { + "epoch": 3.36, + "learning_rate": 3.3269257174241385e-05, + "loss": 1.5421, + "step": 73610 + }, + { + "epoch": 3.36, + "learning_rate": 3.3266968739988106e-05, + "loss": 1.9391, + "step": 73620 + }, + { + "epoch": 3.36, + "learning_rate": 3.326468030573482e-05, + "loss": 1.8903, + "step": 73630 + }, + { + "epoch": 3.36, + "learning_rate": 3.3262391871481534e-05, + "loss": 1.5589, + "step": 73640 + }, + { + "epoch": 3.36, + "learning_rate": 3.3260103437228255e-05, + "loss": 1.5633, + "step": 73650 + }, + { + "epoch": 3.36, + "learning_rate": 3.325781500297497e-05, + "loss": 1.7194, + "step": 73660 + }, + { + "epoch": 3.36, + "learning_rate": 3.325552656872168e-05, + "loss": 1.7366, + "step": 73670 + }, + { + "epoch": 3.36, + "learning_rate": 3.32532381344684e-05, + "loss": 1.822, + "step": 73680 + }, + { + "epoch": 3.36, + "learning_rate": 3.325094970021512e-05, + "loss": 1.8145, + "step": 73690 + }, + { + "epoch": 3.37, + "learning_rate": 3.3248661265961826e-05, + "loss": 1.8021, + "step": 73700 + }, + { + "epoch": 3.37, + "learning_rate": 3.3246372831708547e-05, + "loss": 1.7816, + "step": 73710 + }, + { + "epoch": 3.37, + "learning_rate": 3.324408439745526e-05, + "loss": 1.5793, + "step": 73720 + }, + { + "epoch": 3.37, + "learning_rate": 3.3241795963201975e-05, + "loss": 1.6637, + "step": 73730 + }, + { + "epoch": 3.37, + "learning_rate": 3.3239507528948696e-05, + "loss": 1.8112, + "step": 73740 + }, + { + "epoch": 3.37, + "learning_rate": 3.323721909469541e-05, + "loss": 1.6573, + "step": 73750 + }, + { + "epoch": 3.37, + "learning_rate": 3.3234930660442124e-05, + "loss": 1.7347, + "step": 73760 + }, + { + "epoch": 3.37, + "learning_rate": 3.3232642226188845e-05, + "loss": 1.7813, + "step": 73770 + }, + { + "epoch": 3.37, + "learning_rate": 3.323035379193556e-05, + "loss": 1.6598, + "step": 73780 + }, + { + "epoch": 3.37, + "learning_rate": 3.322806535768227e-05, + "loss": 1.6269, + "step": 73790 + }, + { + "epoch": 3.37, + "learning_rate": 3.3225776923428994e-05, + "loss": 1.7914, + "step": 73800 + }, + { + "epoch": 3.37, + "learning_rate": 3.322348848917571e-05, + "loss": 1.7203, + "step": 73810 + }, + { + "epoch": 3.37, + "learning_rate": 3.322120005492242e-05, + "loss": 1.7126, + "step": 73820 + }, + { + "epoch": 3.37, + "learning_rate": 3.321891162066914e-05, + "loss": 1.6242, + "step": 73830 + }, + { + "epoch": 3.37, + "learning_rate": 3.321662318641586e-05, + "loss": 1.6119, + "step": 73840 + }, + { + "epoch": 3.37, + "learning_rate": 3.321433475216257e-05, + "loss": 1.6824, + "step": 73850 + }, + { + "epoch": 3.37, + "learning_rate": 3.321204631790929e-05, + "loss": 1.719, + "step": 73860 + }, + { + "epoch": 3.37, + "learning_rate": 3.3209757883656007e-05, + "loss": 1.7358, + "step": 73870 + }, + { + "epoch": 3.37, + "learning_rate": 3.320746944940272e-05, + "loss": 1.53, + "step": 73880 + }, + { + "epoch": 3.37, + "learning_rate": 3.3205181015149435e-05, + "loss": 1.6768, + "step": 73890 + }, + { + "epoch": 3.37, + "learning_rate": 3.320289258089615e-05, + "loss": 1.6963, + "step": 73900 + }, + { + "epoch": 3.37, + "learning_rate": 3.320060414664286e-05, + "loss": 1.7954, + "step": 73910 + }, + { + "epoch": 3.38, + "learning_rate": 3.3198315712389584e-05, + "loss": 1.8435, + "step": 73920 + }, + { + "epoch": 3.38, + "learning_rate": 3.31960272781363e-05, + "loss": 1.7507, + "step": 73930 + }, + { + "epoch": 3.38, + "learning_rate": 3.319373884388301e-05, + "loss": 1.8189, + "step": 73940 + }, + { + "epoch": 3.38, + "learning_rate": 3.319145040962973e-05, + "loss": 1.5948, + "step": 73950 + }, + { + "epoch": 3.38, + "learning_rate": 3.318916197537645e-05, + "loss": 1.5239, + "step": 73960 + }, + { + "epoch": 3.38, + "learning_rate": 3.318687354112316e-05, + "loss": 1.6711, + "step": 73970 + }, + { + "epoch": 3.38, + "learning_rate": 3.318458510686988e-05, + "loss": 1.8153, + "step": 73980 + }, + { + "epoch": 3.38, + "learning_rate": 3.3182296672616596e-05, + "loss": 1.6177, + "step": 73990 + }, + { + "epoch": 3.38, + "learning_rate": 3.318000823836331e-05, + "loss": 1.6931, + "step": 74000 + }, + { + "epoch": 3.38, + "learning_rate": 3.317771980411003e-05, + "loss": 1.6481, + "step": 74010 + }, + { + "epoch": 3.38, + "learning_rate": 3.3175431369856746e-05, + "loss": 1.7604, + "step": 74020 + }, + { + "epoch": 3.38, + "learning_rate": 3.317314293560346e-05, + "loss": 1.7801, + "step": 74030 + }, + { + "epoch": 3.38, + "learning_rate": 3.317085450135018e-05, + "loss": 1.6482, + "step": 74040 + }, + { + "epoch": 3.38, + "learning_rate": 3.3168566067096895e-05, + "loss": 1.6151, + "step": 74050 + }, + { + "epoch": 3.38, + "learning_rate": 3.316627763284361e-05, + "loss": 1.7753, + "step": 74060 + }, + { + "epoch": 3.38, + "learning_rate": 3.316398919859033e-05, + "loss": 1.8455, + "step": 74070 + }, + { + "epoch": 3.38, + "learning_rate": 3.3161700764337044e-05, + "loss": 1.813, + "step": 74080 + }, + { + "epoch": 3.38, + "learning_rate": 3.315941233008376e-05, + "loss": 1.7054, + "step": 74090 + }, + { + "epoch": 3.38, + "learning_rate": 3.315712389583047e-05, + "loss": 1.7242, + "step": 74100 + }, + { + "epoch": 3.38, + "learning_rate": 3.3154835461577186e-05, + "loss": 1.7422, + "step": 74110 + }, + { + "epoch": 3.38, + "learning_rate": 3.315254702732391e-05, + "loss": 1.6533, + "step": 74120 + }, + { + "epoch": 3.38, + "learning_rate": 3.315025859307062e-05, + "loss": 1.7513, + "step": 74130 + }, + { + "epoch": 3.39, + "learning_rate": 3.3147970158817336e-05, + "loss": 1.6613, + "step": 74140 + }, + { + "epoch": 3.39, + "learning_rate": 3.3145681724564057e-05, + "loss": 1.5444, + "step": 74150 + }, + { + "epoch": 3.39, + "learning_rate": 3.314339329031077e-05, + "loss": 1.481, + "step": 74160 + }, + { + "epoch": 3.39, + "learning_rate": 3.3141104856057485e-05, + "loss": 1.6732, + "step": 74170 + }, + { + "epoch": 3.39, + "learning_rate": 3.3138816421804206e-05, + "loss": 1.6983, + "step": 74180 + }, + { + "epoch": 3.39, + "learning_rate": 3.313652798755092e-05, + "loss": 1.7625, + "step": 74190 + }, + { + "epoch": 3.39, + "learning_rate": 3.3134239553297634e-05, + "loss": 1.622, + "step": 74200 + }, + { + "epoch": 3.39, + "learning_rate": 3.3131951119044355e-05, + "loss": 1.8223, + "step": 74210 + }, + { + "epoch": 3.39, + "learning_rate": 3.312966268479107e-05, + "loss": 1.7289, + "step": 74220 + }, + { + "epoch": 3.39, + "learning_rate": 3.312737425053778e-05, + "loss": 1.848, + "step": 74230 + }, + { + "epoch": 3.39, + "learning_rate": 3.3125085816284504e-05, + "loss": 1.6344, + "step": 74240 + }, + { + "epoch": 3.39, + "learning_rate": 3.312279738203122e-05, + "loss": 1.7061, + "step": 74250 + }, + { + "epoch": 3.39, + "learning_rate": 3.312050894777793e-05, + "loss": 1.5567, + "step": 74260 + }, + { + "epoch": 3.39, + "learning_rate": 3.311822051352465e-05, + "loss": 1.7639, + "step": 74270 + }, + { + "epoch": 3.39, + "learning_rate": 3.311593207927137e-05, + "loss": 1.6403, + "step": 74280 + }, + { + "epoch": 3.39, + "learning_rate": 3.3113643645018075e-05, + "loss": 1.6674, + "step": 74290 + }, + { + "epoch": 3.39, + "learning_rate": 3.3111355210764796e-05, + "loss": 1.537, + "step": 74300 + }, + { + "epoch": 3.39, + "learning_rate": 3.310906677651151e-05, + "loss": 1.7172, + "step": 74310 + }, + { + "epoch": 3.39, + "learning_rate": 3.3106778342258224e-05, + "loss": 1.5103, + "step": 74320 + }, + { + "epoch": 3.39, + "learning_rate": 3.3104489908004945e-05, + "loss": 1.6533, + "step": 74330 + }, + { + "epoch": 3.39, + "learning_rate": 3.310220147375166e-05, + "loss": 1.7599, + "step": 74340 + }, + { + "epoch": 3.4, + "learning_rate": 3.309991303949837e-05, + "loss": 1.6975, + "step": 74350 + }, + { + "epoch": 3.4, + "learning_rate": 3.3097624605245094e-05, + "loss": 1.7432, + "step": 74360 + }, + { + "epoch": 3.4, + "learning_rate": 3.309533617099181e-05, + "loss": 1.6648, + "step": 74370 + }, + { + "epoch": 3.4, + "learning_rate": 3.309304773673852e-05, + "loss": 1.6104, + "step": 74380 + }, + { + "epoch": 3.4, + "learning_rate": 3.309075930248524e-05, + "loss": 1.4753, + "step": 74390 + }, + { + "epoch": 3.4, + "learning_rate": 3.308847086823196e-05, + "loss": 1.7571, + "step": 74400 + }, + { + "epoch": 3.4, + "learning_rate": 3.308618243397867e-05, + "loss": 1.7483, + "step": 74410 + }, + { + "epoch": 3.4, + "learning_rate": 3.308389399972539e-05, + "loss": 1.6847, + "step": 74420 + }, + { + "epoch": 3.4, + "learning_rate": 3.3081605565472106e-05, + "loss": 1.6939, + "step": 74430 + }, + { + "epoch": 3.4, + "learning_rate": 3.307931713121882e-05, + "loss": 1.7334, + "step": 74440 + }, + { + "epoch": 3.4, + "learning_rate": 3.307702869696554e-05, + "loss": 1.8699, + "step": 74450 + }, + { + "epoch": 3.4, + "learning_rate": 3.3074740262712256e-05, + "loss": 1.9417, + "step": 74460 + }, + { + "epoch": 3.4, + "learning_rate": 3.307245182845897e-05, + "loss": 1.7054, + "step": 74470 + }, + { + "epoch": 3.4, + "learning_rate": 3.307016339420569e-05, + "loss": 1.6967, + "step": 74480 + }, + { + "epoch": 3.4, + "learning_rate": 3.30678749599524e-05, + "loss": 1.8651, + "step": 74490 + }, + { + "epoch": 3.4, + "learning_rate": 3.306558652569912e-05, + "loss": 1.8968, + "step": 74500 + }, + { + "epoch": 3.4, + "learning_rate": 3.306329809144583e-05, + "loss": 1.7462, + "step": 74510 + }, + { + "epoch": 3.4, + "learning_rate": 3.306100965719255e-05, + "loss": 1.6672, + "step": 74520 + }, + { + "epoch": 3.4, + "learning_rate": 3.305872122293927e-05, + "loss": 1.5905, + "step": 74530 + }, + { + "epoch": 3.4, + "learning_rate": 3.305643278868598e-05, + "loss": 1.6638, + "step": 74540 + }, + { + "epoch": 3.4, + "learning_rate": 3.3054144354432696e-05, + "loss": 1.609, + "step": 74550 + }, + { + "epoch": 3.4, + "learning_rate": 3.305185592017942e-05, + "loss": 1.5991, + "step": 74560 + }, + { + "epoch": 3.41, + "learning_rate": 3.304956748592613e-05, + "loss": 1.6324, + "step": 74570 + }, + { + "epoch": 3.41, + "learning_rate": 3.3047279051672846e-05, + "loss": 1.6873, + "step": 74580 + }, + { + "epoch": 3.41, + "learning_rate": 3.3044990617419566e-05, + "loss": 1.8176, + "step": 74590 + }, + { + "epoch": 3.41, + "learning_rate": 3.304270218316628e-05, + "loss": 1.7727, + "step": 74600 + }, + { + "epoch": 3.41, + "learning_rate": 3.3040413748912995e-05, + "loss": 1.8829, + "step": 74610 + }, + { + "epoch": 3.41, + "learning_rate": 3.3038125314659716e-05, + "loss": 1.6417, + "step": 74620 + }, + { + "epoch": 3.41, + "learning_rate": 3.303583688040643e-05, + "loss": 1.6906, + "step": 74630 + }, + { + "epoch": 3.41, + "learning_rate": 3.3033548446153144e-05, + "loss": 1.9113, + "step": 74640 + }, + { + "epoch": 3.41, + "learning_rate": 3.3031260011899865e-05, + "loss": 1.7408, + "step": 74650 + }, + { + "epoch": 3.41, + "learning_rate": 3.302897157764658e-05, + "loss": 1.5889, + "step": 74660 + }, + { + "epoch": 3.41, + "learning_rate": 3.302668314339329e-05, + "loss": 1.8235, + "step": 74670 + }, + { + "epoch": 3.41, + "learning_rate": 3.302439470914001e-05, + "loss": 1.6361, + "step": 74680 + }, + { + "epoch": 3.41, + "learning_rate": 3.302210627488672e-05, + "loss": 1.5696, + "step": 74690 + }, + { + "epoch": 3.41, + "learning_rate": 3.3019817840633436e-05, + "loss": 1.786, + "step": 74700 + }, + { + "epoch": 3.41, + "learning_rate": 3.3017529406380156e-05, + "loss": 1.6528, + "step": 74710 + }, + { + "epoch": 3.41, + "learning_rate": 3.301524097212687e-05, + "loss": 1.7462, + "step": 74720 + }, + { + "epoch": 3.41, + "learning_rate": 3.3012952537873585e-05, + "loss": 1.7443, + "step": 74730 + }, + { + "epoch": 3.41, + "learning_rate": 3.3010664103620306e-05, + "loss": 1.5619, + "step": 74740 + }, + { + "epoch": 3.41, + "learning_rate": 3.300837566936702e-05, + "loss": 1.9888, + "step": 74750 + }, + { + "epoch": 3.41, + "learning_rate": 3.3006087235113734e-05, + "loss": 1.6725, + "step": 74760 + }, + { + "epoch": 3.41, + "learning_rate": 3.3003798800860455e-05, + "loss": 1.6887, + "step": 74770 + }, + { + "epoch": 3.41, + "learning_rate": 3.300151036660717e-05, + "loss": 1.6146, + "step": 74780 + }, + { + "epoch": 3.42, + "learning_rate": 3.299922193235388e-05, + "loss": 1.7002, + "step": 74790 + }, + { + "epoch": 3.42, + "learning_rate": 3.2996933498100604e-05, + "loss": 1.6588, + "step": 74800 + }, + { + "epoch": 3.42, + "learning_rate": 3.299464506384732e-05, + "loss": 1.7134, + "step": 74810 + }, + { + "epoch": 3.42, + "learning_rate": 3.299235662959403e-05, + "loss": 1.6959, + "step": 74820 + }, + { + "epoch": 3.42, + "learning_rate": 3.299006819534075e-05, + "loss": 1.8146, + "step": 74830 + }, + { + "epoch": 3.42, + "learning_rate": 3.298777976108747e-05, + "loss": 1.8, + "step": 74840 + }, + { + "epoch": 3.42, + "learning_rate": 3.298549132683418e-05, + "loss": 1.6556, + "step": 74850 + }, + { + "epoch": 3.42, + "learning_rate": 3.29832028925809e-05, + "loss": 1.8278, + "step": 74860 + }, + { + "epoch": 3.42, + "learning_rate": 3.2980914458327616e-05, + "loss": 1.579, + "step": 74870 + }, + { + "epoch": 3.42, + "learning_rate": 3.297862602407433e-05, + "loss": 1.6853, + "step": 74880 + }, + { + "epoch": 3.42, + "learning_rate": 3.2976337589821045e-05, + "loss": 1.6707, + "step": 74890 + }, + { + "epoch": 3.42, + "learning_rate": 3.297404915556776e-05, + "loss": 1.6188, + "step": 74900 + }, + { + "epoch": 3.42, + "learning_rate": 3.297176072131448e-05, + "loss": 1.5936, + "step": 74910 + }, + { + "epoch": 3.42, + "learning_rate": 3.2969472287061194e-05, + "loss": 1.6282, + "step": 74920 + }, + { + "epoch": 3.42, + "learning_rate": 3.296718385280791e-05, + "loss": 1.7848, + "step": 74930 + }, + { + "epoch": 3.42, + "learning_rate": 3.296489541855462e-05, + "loss": 1.6508, + "step": 74940 + }, + { + "epoch": 3.42, + "learning_rate": 3.296260698430134e-05, + "loss": 1.8478, + "step": 74950 + }, + { + "epoch": 3.42, + "learning_rate": 3.296031855004806e-05, + "loss": 1.6811, + "step": 74960 + }, + { + "epoch": 3.42, + "learning_rate": 3.295803011579477e-05, + "loss": 1.6804, + "step": 74970 + }, + { + "epoch": 3.42, + "learning_rate": 3.295574168154149e-05, + "loss": 1.6858, + "step": 74980 + }, + { + "epoch": 3.42, + "learning_rate": 3.2953453247288206e-05, + "loss": 1.6693, + "step": 74990 + }, + { + "epoch": 3.42, + "learning_rate": 3.295116481303492e-05, + "loss": 1.7578, + "step": 75000 + }, + { + "epoch": 3.43, + "learning_rate": 3.294887637878164e-05, + "loss": 1.6495, + "step": 75010 + }, + { + "epoch": 3.43, + "learning_rate": 3.2946587944528356e-05, + "loss": 1.6666, + "step": 75020 + }, + { + "epoch": 3.43, + "learning_rate": 3.294429951027507e-05, + "loss": 1.5204, + "step": 75030 + }, + { + "epoch": 3.43, + "learning_rate": 3.294201107602179e-05, + "loss": 1.6596, + "step": 75040 + }, + { + "epoch": 3.43, + "learning_rate": 3.2939722641768505e-05, + "loss": 1.5945, + "step": 75050 + }, + { + "epoch": 3.43, + "learning_rate": 3.293743420751522e-05, + "loss": 1.7712, + "step": 75060 + }, + { + "epoch": 3.43, + "learning_rate": 3.293514577326194e-05, + "loss": 1.792, + "step": 75070 + }, + { + "epoch": 3.43, + "learning_rate": 3.293285733900865e-05, + "loss": 1.7408, + "step": 75080 + }, + { + "epoch": 3.43, + "learning_rate": 3.293056890475537e-05, + "loss": 1.7591, + "step": 75090 + }, + { + "epoch": 3.43, + "learning_rate": 3.292828047050208e-05, + "loss": 1.7925, + "step": 75100 + }, + { + "epoch": 3.43, + "learning_rate": 3.2925992036248796e-05, + "loss": 1.5578, + "step": 75110 + }, + { + "epoch": 3.43, + "learning_rate": 3.292370360199552e-05, + "loss": 1.8059, + "step": 75120 + }, + { + "epoch": 3.43, + "learning_rate": 3.292141516774223e-05, + "loss": 1.6076, + "step": 75130 + }, + { + "epoch": 3.43, + "learning_rate": 3.2919126733488945e-05, + "loss": 1.6546, + "step": 75140 + }, + { + "epoch": 3.43, + "learning_rate": 3.2916838299235666e-05, + "loss": 1.7152, + "step": 75150 + }, + { + "epoch": 3.43, + "learning_rate": 3.291454986498238e-05, + "loss": 1.562, + "step": 75160 + }, + { + "epoch": 3.43, + "learning_rate": 3.2912261430729095e-05, + "loss": 1.7394, + "step": 75170 + }, + { + "epoch": 3.43, + "learning_rate": 3.2909972996475816e-05, + "loss": 1.5244, + "step": 75180 + }, + { + "epoch": 3.43, + "learning_rate": 3.290768456222253e-05, + "loss": 1.5927, + "step": 75190 + }, + { + "epoch": 3.43, + "learning_rate": 3.2905396127969244e-05, + "loss": 1.6773, + "step": 75200 + }, + { + "epoch": 3.43, + "learning_rate": 3.2903107693715965e-05, + "loss": 1.6635, + "step": 75210 + }, + { + "epoch": 3.43, + "learning_rate": 3.290081925946268e-05, + "loss": 1.6701, + "step": 75220 + }, + { + "epoch": 3.44, + "learning_rate": 3.289853082520939e-05, + "loss": 1.6883, + "step": 75230 + }, + { + "epoch": 3.44, + "learning_rate": 3.2896242390956114e-05, + "loss": 1.6496, + "step": 75240 + }, + { + "epoch": 3.44, + "learning_rate": 3.289395395670283e-05, + "loss": 1.6142, + "step": 75250 + }, + { + "epoch": 3.44, + "learning_rate": 3.289166552244954e-05, + "loss": 1.7788, + "step": 75260 + }, + { + "epoch": 3.44, + "learning_rate": 3.288937708819626e-05, + "loss": 1.7005, + "step": 75270 + }, + { + "epoch": 3.44, + "learning_rate": 3.288708865394297e-05, + "loss": 1.6255, + "step": 75280 + }, + { + "epoch": 3.44, + "learning_rate": 3.2884800219689685e-05, + "loss": 1.6137, + "step": 75290 + }, + { + "epoch": 3.44, + "learning_rate": 3.2882511785436405e-05, + "loss": 1.6572, + "step": 75300 + }, + { + "epoch": 3.44, + "learning_rate": 3.288022335118312e-05, + "loss": 1.7105, + "step": 75310 + }, + { + "epoch": 3.44, + "learning_rate": 3.2877934916929834e-05, + "loss": 1.7652, + "step": 75320 + }, + { + "epoch": 3.44, + "learning_rate": 3.2875646482676555e-05, + "loss": 1.6192, + "step": 75330 + }, + { + "epoch": 3.44, + "learning_rate": 3.287335804842327e-05, + "loss": 1.6413, + "step": 75340 + }, + { + "epoch": 3.44, + "learning_rate": 3.287106961416998e-05, + "loss": 1.6996, + "step": 75350 + }, + { + "epoch": 3.44, + "learning_rate": 3.2868781179916704e-05, + "loss": 1.6881, + "step": 75360 + }, + { + "epoch": 3.44, + "learning_rate": 3.286649274566342e-05, + "loss": 1.8135, + "step": 75370 + }, + { + "epoch": 3.44, + "learning_rate": 3.286420431141013e-05, + "loss": 1.9046, + "step": 75380 + }, + { + "epoch": 3.44, + "learning_rate": 3.286191587715685e-05, + "loss": 1.5952, + "step": 75390 + }, + { + "epoch": 3.44, + "learning_rate": 3.285962744290357e-05, + "loss": 1.5994, + "step": 75400 + }, + { + "epoch": 3.44, + "learning_rate": 3.285733900865028e-05, + "loss": 1.748, + "step": 75410 + }, + { + "epoch": 3.44, + "learning_rate": 3.2855050574397e-05, + "loss": 1.663, + "step": 75420 + }, + { + "epoch": 3.44, + "learning_rate": 3.2852762140143716e-05, + "loss": 1.8038, + "step": 75430 + }, + { + "epoch": 3.44, + "learning_rate": 3.285047370589043e-05, + "loss": 1.9155, + "step": 75440 + }, + { + "epoch": 3.45, + "learning_rate": 3.284818527163715e-05, + "loss": 1.6643, + "step": 75450 + }, + { + "epoch": 3.45, + "learning_rate": 3.2845896837383866e-05, + "loss": 1.5925, + "step": 75460 + }, + { + "epoch": 3.45, + "learning_rate": 3.284360840313058e-05, + "loss": 1.6411, + "step": 75470 + }, + { + "epoch": 3.45, + "learning_rate": 3.2841319968877294e-05, + "loss": 1.6664, + "step": 75480 + }, + { + "epoch": 3.45, + "learning_rate": 3.283903153462401e-05, + "loss": 1.6516, + "step": 75490 + }, + { + "epoch": 3.45, + "learning_rate": 3.283674310037073e-05, + "loss": 1.6547, + "step": 75500 + }, + { + "epoch": 3.45, + "learning_rate": 3.283445466611744e-05, + "loss": 1.7078, + "step": 75510 + }, + { + "epoch": 3.45, + "learning_rate": 3.283216623186416e-05, + "loss": 1.6215, + "step": 75520 + }, + { + "epoch": 3.45, + "learning_rate": 3.282987779761088e-05, + "loss": 1.7134, + "step": 75530 + }, + { + "epoch": 3.45, + "learning_rate": 3.282758936335759e-05, + "loss": 1.8529, + "step": 75540 + }, + { + "epoch": 3.45, + "learning_rate": 3.2825300929104306e-05, + "loss": 1.572, + "step": 75550 + }, + { + "epoch": 3.45, + "learning_rate": 3.282301249485103e-05, + "loss": 1.9712, + "step": 75560 + }, + { + "epoch": 3.45, + "learning_rate": 3.282072406059774e-05, + "loss": 1.7543, + "step": 75570 + }, + { + "epoch": 3.45, + "learning_rate": 3.2818435626344455e-05, + "loss": 1.5348, + "step": 75580 + }, + { + "epoch": 3.45, + "learning_rate": 3.2816147192091176e-05, + "loss": 1.9183, + "step": 75590 + }, + { + "epoch": 3.45, + "learning_rate": 3.281385875783789e-05, + "loss": 1.7568, + "step": 75600 + }, + { + "epoch": 3.45, + "learning_rate": 3.2811570323584605e-05, + "loss": 1.6799, + "step": 75610 + }, + { + "epoch": 3.45, + "learning_rate": 3.2809281889331326e-05, + "loss": 1.7777, + "step": 75620 + }, + { + "epoch": 3.45, + "learning_rate": 3.280699345507804e-05, + "loss": 1.7359, + "step": 75630 + }, + { + "epoch": 3.45, + "learning_rate": 3.2804705020824754e-05, + "loss": 1.6255, + "step": 75640 + }, + { + "epoch": 3.45, + "learning_rate": 3.2802416586571475e-05, + "loss": 1.5748, + "step": 75650 + }, + { + "epoch": 3.45, + "learning_rate": 3.280012815231819e-05, + "loss": 1.5939, + "step": 75660 + }, + { + "epoch": 3.46, + "learning_rate": 3.2797839718064896e-05, + "loss": 1.6073, + "step": 75670 + }, + { + "epoch": 3.46, + "learning_rate": 3.279555128381162e-05, + "loss": 1.6391, + "step": 75680 + }, + { + "epoch": 3.46, + "learning_rate": 3.279326284955833e-05, + "loss": 1.6295, + "step": 75690 + }, + { + "epoch": 3.46, + "learning_rate": 3.2790974415305045e-05, + "loss": 1.7964, + "step": 75700 + }, + { + "epoch": 3.46, + "learning_rate": 3.2788685981051766e-05, + "loss": 1.8895, + "step": 75710 + }, + { + "epoch": 3.46, + "learning_rate": 3.278639754679848e-05, + "loss": 1.582, + "step": 75720 + }, + { + "epoch": 3.46, + "learning_rate": 3.2784109112545195e-05, + "loss": 1.7056, + "step": 75730 + }, + { + "epoch": 3.46, + "learning_rate": 3.2781820678291915e-05, + "loss": 1.681, + "step": 75740 + }, + { + "epoch": 3.46, + "learning_rate": 3.277953224403863e-05, + "loss": 1.4447, + "step": 75750 + }, + { + "epoch": 3.46, + "learning_rate": 3.2777243809785344e-05, + "loss": 1.65, + "step": 75760 + }, + { + "epoch": 3.46, + "learning_rate": 3.2774955375532065e-05, + "loss": 1.6501, + "step": 75770 + }, + { + "epoch": 3.46, + "learning_rate": 3.277266694127878e-05, + "loss": 1.6505, + "step": 75780 + }, + { + "epoch": 3.46, + "learning_rate": 3.277037850702549e-05, + "loss": 1.7238, + "step": 75790 + }, + { + "epoch": 3.46, + "learning_rate": 3.2768090072772214e-05, + "loss": 1.7853, + "step": 75800 + }, + { + "epoch": 3.46, + "learning_rate": 3.276580163851893e-05, + "loss": 1.597, + "step": 75810 + }, + { + "epoch": 3.46, + "learning_rate": 3.276351320426564e-05, + "loss": 1.7719, + "step": 75820 + }, + { + "epoch": 3.46, + "learning_rate": 3.276122477001236e-05, + "loss": 1.839, + "step": 75830 + }, + { + "epoch": 3.46, + "learning_rate": 3.275893633575908e-05, + "loss": 1.7726, + "step": 75840 + }, + { + "epoch": 3.46, + "learning_rate": 3.275664790150579e-05, + "loss": 1.6963, + "step": 75850 + }, + { + "epoch": 3.46, + "learning_rate": 3.275435946725251e-05, + "loss": 1.6878, + "step": 75860 + }, + { + "epoch": 3.46, + "learning_rate": 3.275207103299922e-05, + "loss": 1.8568, + "step": 75870 + }, + { + "epoch": 3.46, + "learning_rate": 3.274978259874594e-05, + "loss": 1.652, + "step": 75880 + }, + { + "epoch": 3.47, + "learning_rate": 3.2747494164492655e-05, + "loss": 1.4984, + "step": 75890 + }, + { + "epoch": 3.47, + "learning_rate": 3.274520573023937e-05, + "loss": 1.8262, + "step": 75900 + }, + { + "epoch": 3.47, + "learning_rate": 3.274291729598609e-05, + "loss": 1.6528, + "step": 75910 + }, + { + "epoch": 3.47, + "learning_rate": 3.2740628861732804e-05, + "loss": 1.5731, + "step": 75920 + }, + { + "epoch": 3.47, + "learning_rate": 3.273834042747952e-05, + "loss": 1.5589, + "step": 75930 + }, + { + "epoch": 3.47, + "learning_rate": 3.273605199322624e-05, + "loss": 1.6699, + "step": 75940 + }, + { + "epoch": 3.47, + "learning_rate": 3.273376355897295e-05, + "loss": 1.6491, + "step": 75950 + }, + { + "epoch": 3.47, + "learning_rate": 3.273147512471967e-05, + "loss": 1.7113, + "step": 75960 + }, + { + "epoch": 3.47, + "learning_rate": 3.272918669046639e-05, + "loss": 1.5401, + "step": 75970 + }, + { + "epoch": 3.47, + "learning_rate": 3.27268982562131e-05, + "loss": 1.8276, + "step": 75980 + }, + { + "epoch": 3.47, + "learning_rate": 3.2724609821959816e-05, + "loss": 1.7859, + "step": 75990 + }, + { + "epoch": 3.47, + "learning_rate": 3.272232138770654e-05, + "loss": 1.7421, + "step": 76000 + }, + { + "epoch": 3.47, + "learning_rate": 3.272003295345325e-05, + "loss": 1.6643, + "step": 76010 + }, + { + "epoch": 3.47, + "learning_rate": 3.2717744519199965e-05, + "loss": 1.7332, + "step": 76020 + }, + { + "epoch": 3.47, + "learning_rate": 3.271545608494668e-05, + "loss": 1.6165, + "step": 76030 + }, + { + "epoch": 3.47, + "learning_rate": 3.27131676506934e-05, + "loss": 1.7256, + "step": 76040 + }, + { + "epoch": 3.47, + "learning_rate": 3.2710879216440115e-05, + "loss": 1.7982, + "step": 76050 + }, + { + "epoch": 3.47, + "learning_rate": 3.270859078218683e-05, + "loss": 1.4954, + "step": 76060 + }, + { + "epoch": 3.47, + "learning_rate": 3.270630234793354e-05, + "loss": 1.6416, + "step": 76070 + }, + { + "epoch": 3.47, + "learning_rate": 3.270401391368026e-05, + "loss": 1.6924, + "step": 76080 + }, + { + "epoch": 3.47, + "learning_rate": 3.270172547942698e-05, + "loss": 1.7071, + "step": 76090 + }, + { + "epoch": 3.47, + "learning_rate": 3.269943704517369e-05, + "loss": 1.4712, + "step": 76100 + }, + { + "epoch": 3.48, + "learning_rate": 3.2697148610920406e-05, + "loss": 1.6771, + "step": 76110 + }, + { + "epoch": 3.48, + "learning_rate": 3.269486017666713e-05, + "loss": 1.7919, + "step": 76120 + }, + { + "epoch": 3.48, + "learning_rate": 3.269257174241384e-05, + "loss": 1.5675, + "step": 76130 + }, + { + "epoch": 3.48, + "learning_rate": 3.2690283308160555e-05, + "loss": 1.7159, + "step": 76140 + }, + { + "epoch": 3.48, + "learning_rate": 3.2687994873907276e-05, + "loss": 1.7838, + "step": 76150 + }, + { + "epoch": 3.48, + "learning_rate": 3.268570643965399e-05, + "loss": 1.7128, + "step": 76160 + }, + { + "epoch": 3.48, + "learning_rate": 3.2683418005400705e-05, + "loss": 1.6506, + "step": 76170 + }, + { + "epoch": 3.48, + "learning_rate": 3.2681129571147425e-05, + "loss": 1.7172, + "step": 76180 + }, + { + "epoch": 3.48, + "learning_rate": 3.267884113689414e-05, + "loss": 1.6813, + "step": 76190 + }, + { + "epoch": 3.48, + "learning_rate": 3.2676552702640854e-05, + "loss": 1.9065, + "step": 76200 + }, + { + "epoch": 3.48, + "learning_rate": 3.2674264268387575e-05, + "loss": 1.6612, + "step": 76210 + }, + { + "epoch": 3.48, + "learning_rate": 3.267197583413429e-05, + "loss": 1.6596, + "step": 76220 + }, + { + "epoch": 3.48, + "learning_rate": 3.2669687399881e-05, + "loss": 1.7513, + "step": 76230 + }, + { + "epoch": 3.48, + "learning_rate": 3.2667398965627724e-05, + "loss": 1.8113, + "step": 76240 + }, + { + "epoch": 3.48, + "learning_rate": 3.266511053137444e-05, + "loss": 1.7065, + "step": 76250 + }, + { + "epoch": 3.48, + "learning_rate": 3.266282209712115e-05, + "loss": 1.6547, + "step": 76260 + }, + { + "epoch": 3.48, + "learning_rate": 3.2660533662867866e-05, + "loss": 1.6889, + "step": 76270 + }, + { + "epoch": 3.48, + "learning_rate": 3.265824522861458e-05, + "loss": 1.5447, + "step": 76280 + }, + { + "epoch": 3.48, + "learning_rate": 3.2655956794361294e-05, + "loss": 1.6932, + "step": 76290 + }, + { + "epoch": 3.48, + "learning_rate": 3.2653668360108015e-05, + "loss": 1.7708, + "step": 76300 + }, + { + "epoch": 3.48, + "learning_rate": 3.265137992585473e-05, + "loss": 1.6732, + "step": 76310 + }, + { + "epoch": 3.48, + "learning_rate": 3.2649091491601444e-05, + "loss": 1.7099, + "step": 76320 + }, + { + "epoch": 3.49, + "learning_rate": 3.2646803057348165e-05, + "loss": 1.8583, + "step": 76330 + }, + { + "epoch": 3.49, + "learning_rate": 3.264451462309488e-05, + "loss": 1.7078, + "step": 76340 + }, + { + "epoch": 3.49, + "learning_rate": 3.264222618884159e-05, + "loss": 1.5725, + "step": 76350 + }, + { + "epoch": 3.49, + "learning_rate": 3.2639937754588314e-05, + "loss": 1.6581, + "step": 76360 + }, + { + "epoch": 3.49, + "learning_rate": 3.263764932033503e-05, + "loss": 1.6479, + "step": 76370 + }, + { + "epoch": 3.49, + "learning_rate": 3.263536088608174e-05, + "loss": 1.6662, + "step": 76380 + }, + { + "epoch": 3.49, + "learning_rate": 3.263307245182846e-05, + "loss": 1.6826, + "step": 76390 + }, + { + "epoch": 3.49, + "learning_rate": 3.263078401757518e-05, + "loss": 1.7121, + "step": 76400 + }, + { + "epoch": 3.49, + "learning_rate": 3.262849558332189e-05, + "loss": 1.7229, + "step": 76410 + }, + { + "epoch": 3.49, + "learning_rate": 3.262620714906861e-05, + "loss": 1.7043, + "step": 76420 + }, + { + "epoch": 3.49, + "learning_rate": 3.2623918714815326e-05, + "loss": 1.6629, + "step": 76430 + }, + { + "epoch": 3.49, + "learning_rate": 3.262163028056204e-05, + "loss": 1.6082, + "step": 76440 + }, + { + "epoch": 3.49, + "learning_rate": 3.261934184630876e-05, + "loss": 1.7758, + "step": 76450 + }, + { + "epoch": 3.49, + "learning_rate": 3.261705341205547e-05, + "loss": 1.5285, + "step": 76460 + }, + { + "epoch": 3.49, + "learning_rate": 3.261476497780219e-05, + "loss": 1.6832, + "step": 76470 + }, + { + "epoch": 3.49, + "learning_rate": 3.2612476543548904e-05, + "loss": 1.8026, + "step": 76480 + }, + { + "epoch": 3.49, + "learning_rate": 3.261018810929562e-05, + "loss": 1.697, + "step": 76490 + }, + { + "epoch": 3.49, + "learning_rate": 3.260789967504234e-05, + "loss": 1.6911, + "step": 76500 + }, + { + "epoch": 3.49, + "learning_rate": 3.260561124078905e-05, + "loss": 1.8949, + "step": 76510 + }, + { + "epoch": 3.49, + "learning_rate": 3.260332280653577e-05, + "loss": 1.6984, + "step": 76520 + }, + { + "epoch": 3.49, + "learning_rate": 3.260103437228249e-05, + "loss": 1.7765, + "step": 76530 + }, + { + "epoch": 3.5, + "learning_rate": 3.25987459380292e-05, + "loss": 1.5611, + "step": 76540 + }, + { + "epoch": 3.5, + "learning_rate": 3.2596457503775916e-05, + "loss": 1.7047, + "step": 76550 + }, + { + "epoch": 3.5, + "learning_rate": 3.259416906952264e-05, + "loss": 1.7235, + "step": 76560 + }, + { + "epoch": 3.5, + "learning_rate": 3.259188063526935e-05, + "loss": 1.7394, + "step": 76570 + }, + { + "epoch": 3.5, + "learning_rate": 3.2589592201016065e-05, + "loss": 1.6705, + "step": 76580 + }, + { + "epoch": 3.5, + "learning_rate": 3.2587303766762786e-05, + "loss": 1.5228, + "step": 76590 + }, + { + "epoch": 3.5, + "learning_rate": 3.25850153325095e-05, + "loss": 1.8227, + "step": 76600 + }, + { + "epoch": 3.5, + "learning_rate": 3.2582726898256214e-05, + "loss": 1.8006, + "step": 76610 + }, + { + "epoch": 3.5, + "learning_rate": 3.2580438464002935e-05, + "loss": 1.4395, + "step": 76620 + }, + { + "epoch": 3.5, + "learning_rate": 3.257815002974965e-05, + "loss": 1.5796, + "step": 76630 + }, + { + "epoch": 3.5, + "learning_rate": 3.2575861595496364e-05, + "loss": 1.7321, + "step": 76640 + }, + { + "epoch": 3.5, + "learning_rate": 3.2573573161243085e-05, + "loss": 1.6113, + "step": 76650 + }, + { + "epoch": 3.5, + "learning_rate": 3.257128472698979e-05, + "loss": 1.8255, + "step": 76660 + }, + { + "epoch": 3.5, + "learning_rate": 3.2568996292736506e-05, + "loss": 1.8957, + "step": 76670 + }, + { + "epoch": 3.5, + "learning_rate": 3.256670785848323e-05, + "loss": 1.6731, + "step": 76680 + }, + { + "epoch": 3.5, + "learning_rate": 3.256441942422994e-05, + "loss": 1.8003, + "step": 76690 + }, + { + "epoch": 3.5, + "learning_rate": 3.2562130989976655e-05, + "loss": 1.7491, + "step": 76700 + }, + { + "epoch": 3.5, + "learning_rate": 3.2559842555723376e-05, + "loss": 1.7095, + "step": 76710 + }, + { + "epoch": 3.5, + "learning_rate": 3.255755412147009e-05, + "loss": 1.6674, + "step": 76720 + }, + { + "epoch": 3.5, + "learning_rate": 3.2555265687216804e-05, + "loss": 1.9088, + "step": 76730 + }, + { + "epoch": 3.5, + "learning_rate": 3.2552977252963525e-05, + "loss": 1.6795, + "step": 76740 + }, + { + "epoch": 3.5, + "learning_rate": 3.255068881871024e-05, + "loss": 1.6476, + "step": 76750 + }, + { + "epoch": 3.51, + "learning_rate": 3.2548400384456954e-05, + "loss": 1.8876, + "step": 76760 + }, + { + "epoch": 3.51, + "learning_rate": 3.2546111950203674e-05, + "loss": 1.4741, + "step": 76770 + }, + { + "epoch": 3.51, + "learning_rate": 3.254382351595039e-05, + "loss": 1.7159, + "step": 76780 + }, + { + "epoch": 3.51, + "learning_rate": 3.25415350816971e-05, + "loss": 1.687, + "step": 76790 + }, + { + "epoch": 3.51, + "learning_rate": 3.2539246647443824e-05, + "loss": 1.775, + "step": 76800 + }, + { + "epoch": 3.51, + "learning_rate": 3.253695821319054e-05, + "loss": 1.5854, + "step": 76810 + }, + { + "epoch": 3.51, + "learning_rate": 3.253466977893725e-05, + "loss": 1.689, + "step": 76820 + }, + { + "epoch": 3.51, + "learning_rate": 3.253238134468397e-05, + "loss": 1.4563, + "step": 76830 + }, + { + "epoch": 3.51, + "learning_rate": 3.253009291043069e-05, + "loss": 1.7187, + "step": 76840 + }, + { + "epoch": 3.51, + "learning_rate": 3.25278044761774e-05, + "loss": 1.6512, + "step": 76850 + }, + { + "epoch": 3.51, + "learning_rate": 3.2525516041924115e-05, + "loss": 1.6661, + "step": 76860 + }, + { + "epoch": 3.51, + "learning_rate": 3.252322760767083e-05, + "loss": 1.7632, + "step": 76870 + }, + { + "epoch": 3.51, + "learning_rate": 3.252093917341755e-05, + "loss": 1.6853, + "step": 76880 + }, + { + "epoch": 3.51, + "learning_rate": 3.2518650739164264e-05, + "loss": 1.7831, + "step": 76890 + }, + { + "epoch": 3.51, + "learning_rate": 3.251636230491098e-05, + "loss": 1.7112, + "step": 76900 + }, + { + "epoch": 3.51, + "learning_rate": 3.25140738706577e-05, + "loss": 1.6754, + "step": 76910 + }, + { + "epoch": 3.51, + "learning_rate": 3.2511785436404414e-05, + "loss": 1.5235, + "step": 76920 + }, + { + "epoch": 3.51, + "learning_rate": 3.250949700215113e-05, + "loss": 1.8453, + "step": 76930 + }, + { + "epoch": 3.51, + "learning_rate": 3.250720856789785e-05, + "loss": 1.8138, + "step": 76940 + }, + { + "epoch": 3.51, + "learning_rate": 3.250492013364456e-05, + "loss": 1.6592, + "step": 76950 + }, + { + "epoch": 3.51, + "learning_rate": 3.250263169939128e-05, + "loss": 1.6883, + "step": 76960 + }, + { + "epoch": 3.51, + "learning_rate": 3.2500343265138e-05, + "loss": 1.7781, + "step": 76970 + }, + { + "epoch": 3.52, + "learning_rate": 3.249805483088471e-05, + "loss": 1.5321, + "step": 76980 + }, + { + "epoch": 3.52, + "learning_rate": 3.2495766396631426e-05, + "loss": 1.7357, + "step": 76990 + }, + { + "epoch": 3.52, + "learning_rate": 3.249347796237815e-05, + "loss": 1.6853, + "step": 77000 + }, + { + "epoch": 3.52, + "learning_rate": 3.249118952812486e-05, + "loss": 1.6704, + "step": 77010 + }, + { + "epoch": 3.52, + "learning_rate": 3.2488901093871575e-05, + "loss": 1.7481, + "step": 77020 + }, + { + "epoch": 3.52, + "learning_rate": 3.2486612659618296e-05, + "loss": 1.5829, + "step": 77030 + }, + { + "epoch": 3.52, + "learning_rate": 3.248432422536501e-05, + "loss": 1.6948, + "step": 77040 + }, + { + "epoch": 3.52, + "learning_rate": 3.2482035791111724e-05, + "loss": 1.6639, + "step": 77050 + }, + { + "epoch": 3.52, + "learning_rate": 3.247974735685844e-05, + "loss": 1.4829, + "step": 77060 + }, + { + "epoch": 3.52, + "learning_rate": 3.247745892260515e-05, + "loss": 1.5416, + "step": 77070 + }, + { + "epoch": 3.52, + "learning_rate": 3.247517048835187e-05, + "loss": 1.5415, + "step": 77080 + }, + { + "epoch": 3.52, + "learning_rate": 3.247288205409859e-05, + "loss": 1.5349, + "step": 77090 + }, + { + "epoch": 3.52, + "learning_rate": 3.24705936198453e-05, + "loss": 1.6664, + "step": 77100 + }, + { + "epoch": 3.52, + "learning_rate": 3.2468305185592016e-05, + "loss": 1.7662, + "step": 77110 + }, + { + "epoch": 3.52, + "learning_rate": 3.246601675133874e-05, + "loss": 1.645, + "step": 77120 + }, + { + "epoch": 3.52, + "learning_rate": 3.246372831708545e-05, + "loss": 1.7637, + "step": 77130 + }, + { + "epoch": 3.52, + "learning_rate": 3.2461439882832165e-05, + "loss": 1.6318, + "step": 77140 + }, + { + "epoch": 3.52, + "learning_rate": 3.2459151448578886e-05, + "loss": 1.6508, + "step": 77150 + }, + { + "epoch": 3.52, + "learning_rate": 3.24568630143256e-05, + "loss": 1.5847, + "step": 77160 + }, + { + "epoch": 3.52, + "learning_rate": 3.2454574580072314e-05, + "loss": 1.8042, + "step": 77170 + }, + { + "epoch": 3.52, + "learning_rate": 3.2452286145819035e-05, + "loss": 1.7716, + "step": 77180 + }, + { + "epoch": 3.52, + "learning_rate": 3.244999771156575e-05, + "loss": 1.752, + "step": 77190 + }, + { + "epoch": 3.53, + "learning_rate": 3.2447709277312464e-05, + "loss": 1.6295, + "step": 77200 + }, + { + "epoch": 3.53, + "learning_rate": 3.2445420843059184e-05, + "loss": 1.705, + "step": 77210 + }, + { + "epoch": 3.53, + "learning_rate": 3.24431324088059e-05, + "loss": 1.7088, + "step": 77220 + }, + { + "epoch": 3.53, + "learning_rate": 3.244084397455261e-05, + "loss": 1.7573, + "step": 77230 + }, + { + "epoch": 3.53, + "learning_rate": 3.2438555540299334e-05, + "loss": 1.6137, + "step": 77240 + }, + { + "epoch": 3.53, + "learning_rate": 3.243626710604604e-05, + "loss": 1.6023, + "step": 77250 + }, + { + "epoch": 3.53, + "learning_rate": 3.2433978671792755e-05, + "loss": 1.6547, + "step": 77260 + }, + { + "epoch": 3.53, + "learning_rate": 3.2431690237539476e-05, + "loss": 1.6089, + "step": 77270 + }, + { + "epoch": 3.53, + "learning_rate": 3.242940180328619e-05, + "loss": 1.5887, + "step": 77280 + }, + { + "epoch": 3.53, + "learning_rate": 3.2427113369032904e-05, + "loss": 1.7977, + "step": 77290 + }, + { + "epoch": 3.53, + "learning_rate": 3.2424824934779625e-05, + "loss": 1.5108, + "step": 77300 + }, + { + "epoch": 3.53, + "learning_rate": 3.242253650052634e-05, + "loss": 1.4521, + "step": 77310 + }, + { + "epoch": 3.53, + "learning_rate": 3.2420248066273053e-05, + "loss": 1.6904, + "step": 77320 + }, + { + "epoch": 3.53, + "learning_rate": 3.2417959632019774e-05, + "loss": 1.627, + "step": 77330 + }, + { + "epoch": 3.53, + "learning_rate": 3.241567119776649e-05, + "loss": 1.7171, + "step": 77340 + }, + { + "epoch": 3.53, + "learning_rate": 3.24133827635132e-05, + "loss": 1.6028, + "step": 77350 + }, + { + "epoch": 3.53, + "learning_rate": 3.2411094329259924e-05, + "loss": 1.5792, + "step": 77360 + }, + { + "epoch": 3.53, + "learning_rate": 3.240880589500664e-05, + "loss": 1.7689, + "step": 77370 + }, + { + "epoch": 3.53, + "learning_rate": 3.240651746075335e-05, + "loss": 1.652, + "step": 77380 + }, + { + "epoch": 3.53, + "learning_rate": 3.240422902650007e-05, + "loss": 1.7295, + "step": 77390 + }, + { + "epoch": 3.53, + "learning_rate": 3.240194059224679e-05, + "loss": 1.781, + "step": 77400 + }, + { + "epoch": 3.53, + "learning_rate": 3.23996521579935e-05, + "loss": 1.6914, + "step": 77410 + }, + { + "epoch": 3.54, + "learning_rate": 3.239736372374022e-05, + "loss": 1.6737, + "step": 77420 + }, + { + "epoch": 3.54, + "learning_rate": 3.2395075289486936e-05, + "loss": 1.5848, + "step": 77430 + }, + { + "epoch": 3.54, + "learning_rate": 3.239278685523365e-05, + "loss": 1.8382, + "step": 77440 + }, + { + "epoch": 3.54, + "learning_rate": 3.2390498420980364e-05, + "loss": 1.6334, + "step": 77450 + }, + { + "epoch": 3.54, + "learning_rate": 3.238820998672708e-05, + "loss": 1.6681, + "step": 77460 + }, + { + "epoch": 3.54, + "learning_rate": 3.23859215524738e-05, + "loss": 1.5744, + "step": 77470 + }, + { + "epoch": 3.54, + "learning_rate": 3.2383633118220514e-05, + "loss": 1.7585, + "step": 77480 + }, + { + "epoch": 3.54, + "learning_rate": 3.238134468396723e-05, + "loss": 1.7157, + "step": 77490 + }, + { + "epoch": 3.54, + "learning_rate": 3.237905624971395e-05, + "loss": 1.7179, + "step": 77500 + }, + { + "epoch": 3.54, + "learning_rate": 3.237676781546066e-05, + "loss": 1.8158, + "step": 77510 + }, + { + "epoch": 3.54, + "learning_rate": 3.237447938120738e-05, + "loss": 1.5499, + "step": 77520 + }, + { + "epoch": 3.54, + "learning_rate": 3.23721909469541e-05, + "loss": 1.8336, + "step": 77530 + }, + { + "epoch": 3.54, + "learning_rate": 3.236990251270081e-05, + "loss": 1.6128, + "step": 77540 + }, + { + "epoch": 3.54, + "learning_rate": 3.2367614078447526e-05, + "loss": 1.7412, + "step": 77550 + }, + { + "epoch": 3.54, + "learning_rate": 3.236532564419425e-05, + "loss": 1.7701, + "step": 77560 + }, + { + "epoch": 3.54, + "learning_rate": 3.236303720994096e-05, + "loss": 1.6913, + "step": 77570 + }, + { + "epoch": 3.54, + "learning_rate": 3.2360748775687675e-05, + "loss": 1.7692, + "step": 77580 + }, + { + "epoch": 3.54, + "learning_rate": 3.2358460341434396e-05, + "loss": 1.5486, + "step": 77590 + }, + { + "epoch": 3.54, + "learning_rate": 3.235617190718111e-05, + "loss": 1.753, + "step": 77600 + }, + { + "epoch": 3.54, + "learning_rate": 3.2353883472927824e-05, + "loss": 1.6771, + "step": 77610 + }, + { + "epoch": 3.54, + "learning_rate": 3.2351595038674545e-05, + "loss": 1.71, + "step": 77620 + }, + { + "epoch": 3.54, + "learning_rate": 3.234930660442126e-05, + "loss": 1.7746, + "step": 77630 + }, + { + "epoch": 3.55, + "learning_rate": 3.2347018170167974e-05, + "loss": 1.6932, + "step": 77640 + }, + { + "epoch": 3.55, + "learning_rate": 3.234472973591469e-05, + "loss": 1.6793, + "step": 77650 + }, + { + "epoch": 3.55, + "learning_rate": 3.23424413016614e-05, + "loss": 1.7064, + "step": 77660 + }, + { + "epoch": 3.55, + "learning_rate": 3.2340152867408116e-05, + "loss": 1.5605, + "step": 77670 + }, + { + "epoch": 3.55, + "learning_rate": 3.233786443315484e-05, + "loss": 1.8103, + "step": 77680 + }, + { + "epoch": 3.55, + "learning_rate": 3.233557599890155e-05, + "loss": 1.6155, + "step": 77690 + }, + { + "epoch": 3.55, + "learning_rate": 3.2333287564648265e-05, + "loss": 1.6922, + "step": 77700 + }, + { + "epoch": 3.55, + "learning_rate": 3.2330999130394986e-05, + "loss": 1.6491, + "step": 77710 + }, + { + "epoch": 3.55, + "learning_rate": 3.23287106961417e-05, + "loss": 1.5504, + "step": 77720 + }, + { + "epoch": 3.55, + "learning_rate": 3.2326422261888414e-05, + "loss": 1.6911, + "step": 77730 + }, + { + "epoch": 3.55, + "learning_rate": 3.2324133827635135e-05, + "loss": 1.8003, + "step": 77740 + }, + { + "epoch": 3.55, + "learning_rate": 3.232184539338185e-05, + "loss": 1.6766, + "step": 77750 + }, + { + "epoch": 3.55, + "learning_rate": 3.2319556959128563e-05, + "loss": 1.5878, + "step": 77760 + }, + { + "epoch": 3.55, + "learning_rate": 3.2317268524875284e-05, + "loss": 1.6403, + "step": 77770 + }, + { + "epoch": 3.55, + "learning_rate": 3.2314980090622e-05, + "loss": 1.6473, + "step": 77780 + }, + { + "epoch": 3.55, + "learning_rate": 3.231269165636871e-05, + "loss": 1.5951, + "step": 77790 + }, + { + "epoch": 3.55, + "learning_rate": 3.2310403222115434e-05, + "loss": 1.5374, + "step": 77800 + }, + { + "epoch": 3.55, + "learning_rate": 3.230811478786215e-05, + "loss": 1.6006, + "step": 77810 + }, + { + "epoch": 3.55, + "learning_rate": 3.230582635360886e-05, + "loss": 1.5512, + "step": 77820 + }, + { + "epoch": 3.55, + "learning_rate": 3.230353791935558e-05, + "loss": 1.6216, + "step": 77830 + }, + { + "epoch": 3.55, + "learning_rate": 3.23012494851023e-05, + "loss": 1.5199, + "step": 77840 + }, + { + "epoch": 3.55, + "learning_rate": 3.229896105084901e-05, + "loss": 1.712, + "step": 77850 + }, + { + "epoch": 3.56, + "learning_rate": 3.2296672616595725e-05, + "loss": 1.7751, + "step": 77860 + }, + { + "epoch": 3.56, + "learning_rate": 3.229438418234244e-05, + "loss": 1.6454, + "step": 77870 + }, + { + "epoch": 3.56, + "learning_rate": 3.229209574808916e-05, + "loss": 1.5657, + "step": 77880 + }, + { + "epoch": 3.56, + "learning_rate": 3.2289807313835874e-05, + "loss": 1.7216, + "step": 77890 + }, + { + "epoch": 3.56, + "learning_rate": 3.228751887958259e-05, + "loss": 1.6073, + "step": 77900 + }, + { + "epoch": 3.56, + "learning_rate": 3.228523044532931e-05, + "loss": 1.5301, + "step": 77910 + }, + { + "epoch": 3.56, + "learning_rate": 3.2282942011076023e-05, + "loss": 1.6478, + "step": 77920 + }, + { + "epoch": 3.56, + "learning_rate": 3.228065357682274e-05, + "loss": 1.5391, + "step": 77930 + }, + { + "epoch": 3.56, + "learning_rate": 3.227836514256946e-05, + "loss": 1.7804, + "step": 77940 + }, + { + "epoch": 3.56, + "learning_rate": 3.227607670831617e-05, + "loss": 1.7333, + "step": 77950 + }, + { + "epoch": 3.56, + "learning_rate": 3.227378827406289e-05, + "loss": 1.6855, + "step": 77960 + }, + { + "epoch": 3.56, + "learning_rate": 3.227149983980961e-05, + "loss": 1.768, + "step": 77970 + }, + { + "epoch": 3.56, + "learning_rate": 3.226921140555632e-05, + "loss": 1.5113, + "step": 77980 + }, + { + "epoch": 3.56, + "learning_rate": 3.2266922971303036e-05, + "loss": 1.7422, + "step": 77990 + }, + { + "epoch": 3.56, + "learning_rate": 3.226463453704976e-05, + "loss": 1.6449, + "step": 78000 + }, + { + "epoch": 3.56, + "learning_rate": 3.226234610279647e-05, + "loss": 1.5107, + "step": 78010 + }, + { + "epoch": 3.56, + "learning_rate": 3.2260057668543185e-05, + "loss": 1.5874, + "step": 78020 + }, + { + "epoch": 3.56, + "learning_rate": 3.2257769234289906e-05, + "loss": 1.5491, + "step": 78030 + }, + { + "epoch": 3.56, + "learning_rate": 3.2255480800036613e-05, + "loss": 1.5895, + "step": 78040 + }, + { + "epoch": 3.56, + "learning_rate": 3.225319236578333e-05, + "loss": 1.7431, + "step": 78050 + }, + { + "epoch": 3.56, + "learning_rate": 3.225090393153005e-05, + "loss": 1.6326, + "step": 78060 + }, + { + "epoch": 3.56, + "learning_rate": 3.224861549727676e-05, + "loss": 1.7783, + "step": 78070 + }, + { + "epoch": 3.57, + "learning_rate": 3.224632706302348e-05, + "loss": 1.6578, + "step": 78080 + }, + { + "epoch": 3.57, + "learning_rate": 3.22440386287702e-05, + "loss": 1.8406, + "step": 78090 + }, + { + "epoch": 3.57, + "learning_rate": 3.224175019451691e-05, + "loss": 1.8701, + "step": 78100 + }, + { + "epoch": 3.57, + "learning_rate": 3.2239461760263626e-05, + "loss": 1.6868, + "step": 78110 + }, + { + "epoch": 3.57, + "learning_rate": 3.223717332601035e-05, + "loss": 1.5885, + "step": 78120 + }, + { + "epoch": 3.57, + "learning_rate": 3.223488489175706e-05, + "loss": 1.7367, + "step": 78130 + }, + { + "epoch": 3.57, + "learning_rate": 3.2232596457503775e-05, + "loss": 1.5605, + "step": 78140 + }, + { + "epoch": 3.57, + "learning_rate": 3.2230308023250496e-05, + "loss": 1.7429, + "step": 78150 + }, + { + "epoch": 3.57, + "learning_rate": 3.222801958899721e-05, + "loss": 1.8632, + "step": 78160 + }, + { + "epoch": 3.57, + "learning_rate": 3.2225731154743924e-05, + "loss": 1.7679, + "step": 78170 + }, + { + "epoch": 3.57, + "learning_rate": 3.2223442720490645e-05, + "loss": 1.6784, + "step": 78180 + }, + { + "epoch": 3.57, + "learning_rate": 3.222115428623736e-05, + "loss": 1.8321, + "step": 78190 + }, + { + "epoch": 3.57, + "learning_rate": 3.2218865851984073e-05, + "loss": 1.6323, + "step": 78200 + }, + { + "epoch": 3.57, + "learning_rate": 3.2216577417730794e-05, + "loss": 1.6403, + "step": 78210 + }, + { + "epoch": 3.57, + "learning_rate": 3.221428898347751e-05, + "loss": 1.7277, + "step": 78220 + }, + { + "epoch": 3.57, + "learning_rate": 3.221200054922422e-05, + "loss": 1.7748, + "step": 78230 + }, + { + "epoch": 3.57, + "learning_rate": 3.220971211497094e-05, + "loss": 1.6014, + "step": 78240 + }, + { + "epoch": 3.57, + "learning_rate": 3.220742368071765e-05, + "loss": 1.6125, + "step": 78250 + }, + { + "epoch": 3.57, + "learning_rate": 3.220513524646437e-05, + "loss": 1.7018, + "step": 78260 + }, + { + "epoch": 3.57, + "learning_rate": 3.2202846812211086e-05, + "loss": 1.6987, + "step": 78270 + }, + { + "epoch": 3.57, + "learning_rate": 3.22005583779578e-05, + "loss": 1.7886, + "step": 78280 + }, + { + "epoch": 3.57, + "learning_rate": 3.219826994370452e-05, + "loss": 1.5049, + "step": 78290 + }, + { + "epoch": 3.58, + "learning_rate": 3.2195981509451235e-05, + "loss": 1.5657, + "step": 78300 + }, + { + "epoch": 3.58, + "learning_rate": 3.219369307519795e-05, + "loss": 1.669, + "step": 78310 + }, + { + "epoch": 3.58, + "learning_rate": 3.219140464094467e-05, + "loss": 1.6048, + "step": 78320 + }, + { + "epoch": 3.58, + "learning_rate": 3.2189116206691384e-05, + "loss": 1.695, + "step": 78330 + }, + { + "epoch": 3.58, + "learning_rate": 3.21868277724381e-05, + "loss": 1.7106, + "step": 78340 + }, + { + "epoch": 3.58, + "learning_rate": 3.218453933818482e-05, + "loss": 1.6297, + "step": 78350 + }, + { + "epoch": 3.58, + "learning_rate": 3.2182250903931533e-05, + "loss": 1.6915, + "step": 78360 + }, + { + "epoch": 3.58, + "learning_rate": 3.217996246967825e-05, + "loss": 1.5926, + "step": 78370 + }, + { + "epoch": 3.58, + "learning_rate": 3.217767403542496e-05, + "loss": 1.6168, + "step": 78380 + }, + { + "epoch": 3.58, + "learning_rate": 3.217538560117168e-05, + "loss": 1.576, + "step": 78390 + }, + { + "epoch": 3.58, + "learning_rate": 3.21730971669184e-05, + "loss": 1.9817, + "step": 78400 + }, + { + "epoch": 3.58, + "learning_rate": 3.217080873266511e-05, + "loss": 1.5982, + "step": 78410 + }, + { + "epoch": 3.58, + "learning_rate": 3.216852029841183e-05, + "loss": 1.773, + "step": 78420 + }, + { + "epoch": 3.58, + "learning_rate": 3.2166231864158546e-05, + "loss": 1.8565, + "step": 78430 + }, + { + "epoch": 3.58, + "learning_rate": 3.216394342990526e-05, + "loss": 1.624, + "step": 78440 + }, + { + "epoch": 3.58, + "learning_rate": 3.2161654995651974e-05, + "loss": 1.7994, + "step": 78450 + }, + { + "epoch": 3.58, + "learning_rate": 3.215936656139869e-05, + "loss": 1.6861, + "step": 78460 + }, + { + "epoch": 3.58, + "learning_rate": 3.215707812714541e-05, + "loss": 1.6971, + "step": 78470 + }, + { + "epoch": 3.58, + "learning_rate": 3.215478969289212e-05, + "loss": 1.9033, + "step": 78480 + }, + { + "epoch": 3.58, + "learning_rate": 3.215250125863884e-05, + "loss": 1.4987, + "step": 78490 + }, + { + "epoch": 3.58, + "learning_rate": 3.215021282438556e-05, + "loss": 1.5623, + "step": 78500 + }, + { + "epoch": 3.58, + "learning_rate": 3.214792439013227e-05, + "loss": 1.6077, + "step": 78510 + }, + { + "epoch": 3.59, + "learning_rate": 3.214563595587899e-05, + "loss": 1.7211, + "step": 78520 + }, + { + "epoch": 3.59, + "learning_rate": 3.214334752162571e-05, + "loss": 1.6721, + "step": 78530 + }, + { + "epoch": 3.59, + "learning_rate": 3.214105908737242e-05, + "loss": 1.5019, + "step": 78540 + }, + { + "epoch": 3.59, + "learning_rate": 3.2138770653119136e-05, + "loss": 1.5181, + "step": 78550 + }, + { + "epoch": 3.59, + "learning_rate": 3.213648221886586e-05, + "loss": 1.8064, + "step": 78560 + }, + { + "epoch": 3.59, + "learning_rate": 3.213419378461257e-05, + "loss": 1.7079, + "step": 78570 + }, + { + "epoch": 3.59, + "learning_rate": 3.2131905350359285e-05, + "loss": 1.5535, + "step": 78580 + }, + { + "epoch": 3.59, + "learning_rate": 3.2129616916106006e-05, + "loss": 1.712, + "step": 78590 + }, + { + "epoch": 3.59, + "learning_rate": 3.212732848185272e-05, + "loss": 1.6997, + "step": 78600 + }, + { + "epoch": 3.59, + "learning_rate": 3.2125040047599434e-05, + "loss": 1.6238, + "step": 78610 + }, + { + "epoch": 3.59, + "learning_rate": 3.2122751613346155e-05, + "loss": 1.7036, + "step": 78620 + }, + { + "epoch": 3.59, + "learning_rate": 3.212046317909287e-05, + "loss": 1.6542, + "step": 78630 + }, + { + "epoch": 3.59, + "learning_rate": 3.211817474483958e-05, + "loss": 1.6513, + "step": 78640 + }, + { + "epoch": 3.59, + "learning_rate": 3.21158863105863e-05, + "loss": 1.9484, + "step": 78650 + }, + { + "epoch": 3.59, + "learning_rate": 3.211359787633301e-05, + "loss": 1.7921, + "step": 78660 + }, + { + "epoch": 3.59, + "learning_rate": 3.2111309442079726e-05, + "loss": 1.805, + "step": 78670 + }, + { + "epoch": 3.59, + "learning_rate": 3.210902100782645e-05, + "loss": 1.7106, + "step": 78680 + }, + { + "epoch": 3.59, + "learning_rate": 3.210673257357316e-05, + "loss": 1.6199, + "step": 78690 + }, + { + "epoch": 3.59, + "learning_rate": 3.2104444139319875e-05, + "loss": 1.6199, + "step": 78700 + }, + { + "epoch": 3.59, + "learning_rate": 3.2102155705066596e-05, + "loss": 1.6098, + "step": 78710 + }, + { + "epoch": 3.59, + "learning_rate": 3.209986727081331e-05, + "loss": 1.628, + "step": 78720 + }, + { + "epoch": 3.6, + "learning_rate": 3.2097578836560024e-05, + "loss": 1.4949, + "step": 78730 + }, + { + "epoch": 3.6, + "learning_rate": 3.2095290402306745e-05, + "loss": 1.6325, + "step": 78740 + }, + { + "epoch": 3.6, + "learning_rate": 3.209300196805346e-05, + "loss": 1.6204, + "step": 78750 + }, + { + "epoch": 3.6, + "learning_rate": 3.209071353380017e-05, + "loss": 1.5225, + "step": 78760 + }, + { + "epoch": 3.6, + "learning_rate": 3.2088425099546894e-05, + "loss": 1.8724, + "step": 78770 + }, + { + "epoch": 3.6, + "learning_rate": 3.208613666529361e-05, + "loss": 1.6619, + "step": 78780 + }, + { + "epoch": 3.6, + "learning_rate": 3.208384823104032e-05, + "loss": 1.6951, + "step": 78790 + }, + { + "epoch": 3.6, + "learning_rate": 3.2081559796787043e-05, + "loss": 1.7784, + "step": 78800 + }, + { + "epoch": 3.6, + "learning_rate": 3.207927136253376e-05, + "loss": 1.7251, + "step": 78810 + }, + { + "epoch": 3.6, + "learning_rate": 3.207698292828047e-05, + "loss": 1.7584, + "step": 78820 + }, + { + "epoch": 3.6, + "learning_rate": 3.2074694494027186e-05, + "loss": 1.7043, + "step": 78830 + }, + { + "epoch": 3.6, + "learning_rate": 3.20724060597739e-05, + "loss": 1.5972, + "step": 78840 + }, + { + "epoch": 3.6, + "learning_rate": 3.207011762552062e-05, + "loss": 1.4774, + "step": 78850 + }, + { + "epoch": 3.6, + "learning_rate": 3.2067829191267335e-05, + "loss": 1.6825, + "step": 78860 + }, + { + "epoch": 3.6, + "learning_rate": 3.206554075701405e-05, + "loss": 1.7122, + "step": 78870 + }, + { + "epoch": 3.6, + "learning_rate": 3.206325232276077e-05, + "loss": 1.6941, + "step": 78880 + }, + { + "epoch": 3.6, + "learning_rate": 3.2060963888507484e-05, + "loss": 1.5137, + "step": 78890 + }, + { + "epoch": 3.6, + "learning_rate": 3.20586754542542e-05, + "loss": 1.7409, + "step": 78900 + }, + { + "epoch": 3.6, + "learning_rate": 3.205638702000092e-05, + "loss": 1.6512, + "step": 78910 + }, + { + "epoch": 3.6, + "learning_rate": 3.205409858574763e-05, + "loss": 1.5237, + "step": 78920 + }, + { + "epoch": 3.6, + "learning_rate": 3.205181015149435e-05, + "loss": 1.6453, + "step": 78930 + }, + { + "epoch": 3.6, + "learning_rate": 3.204952171724107e-05, + "loss": 1.5692, + "step": 78940 + }, + { + "epoch": 3.61, + "learning_rate": 3.204723328298778e-05, + "loss": 1.7068, + "step": 78950 + }, + { + "epoch": 3.61, + "learning_rate": 3.20449448487345e-05, + "loss": 1.7796, + "step": 78960 + }, + { + "epoch": 3.61, + "learning_rate": 3.204265641448122e-05, + "loss": 1.6762, + "step": 78970 + }, + { + "epoch": 3.61, + "learning_rate": 3.204036798022793e-05, + "loss": 1.8082, + "step": 78980 + }, + { + "epoch": 3.61, + "learning_rate": 3.2038079545974646e-05, + "loss": 1.502, + "step": 78990 + }, + { + "epoch": 3.61, + "learning_rate": 3.203579111172137e-05, + "loss": 1.6254, + "step": 79000 + }, + { + "epoch": 3.61, + "learning_rate": 3.203350267746808e-05, + "loss": 1.598, + "step": 79010 + }, + { + "epoch": 3.61, + "learning_rate": 3.2031214243214795e-05, + "loss": 1.5459, + "step": 79020 + }, + { + "epoch": 3.61, + "learning_rate": 3.202892580896151e-05, + "loss": 1.6756, + "step": 79030 + }, + { + "epoch": 3.61, + "learning_rate": 3.202663737470822e-05, + "loss": 1.7069, + "step": 79040 + }, + { + "epoch": 3.61, + "learning_rate": 3.202434894045494e-05, + "loss": 1.7028, + "step": 79050 + }, + { + "epoch": 3.61, + "learning_rate": 3.202206050620166e-05, + "loss": 1.5658, + "step": 79060 + }, + { + "epoch": 3.61, + "learning_rate": 3.201977207194837e-05, + "loss": 1.6257, + "step": 79070 + }, + { + "epoch": 3.61, + "learning_rate": 3.2017483637695087e-05, + "loss": 1.7366, + "step": 79080 + }, + { + "epoch": 3.61, + "learning_rate": 3.201519520344181e-05, + "loss": 1.5959, + "step": 79090 + }, + { + "epoch": 3.61, + "learning_rate": 3.201290676918852e-05, + "loss": 1.6624, + "step": 79100 + }, + { + "epoch": 3.61, + "learning_rate": 3.2010618334935236e-05, + "loss": 1.7168, + "step": 79110 + }, + { + "epoch": 3.61, + "learning_rate": 3.200832990068196e-05, + "loss": 1.6676, + "step": 79120 + }, + { + "epoch": 3.61, + "learning_rate": 3.200604146642867e-05, + "loss": 1.7246, + "step": 79130 + }, + { + "epoch": 3.61, + "learning_rate": 3.2003753032175385e-05, + "loss": 1.8727, + "step": 79140 + }, + { + "epoch": 3.61, + "learning_rate": 3.2001464597922106e-05, + "loss": 1.6817, + "step": 79150 + }, + { + "epoch": 3.61, + "learning_rate": 3.199917616366882e-05, + "loss": 1.6978, + "step": 79160 + }, + { + "epoch": 3.62, + "learning_rate": 3.1996887729415534e-05, + "loss": 1.4482, + "step": 79170 + }, + { + "epoch": 3.62, + "learning_rate": 3.1994599295162255e-05, + "loss": 1.5454, + "step": 79180 + }, + { + "epoch": 3.62, + "learning_rate": 3.199231086090897e-05, + "loss": 1.6046, + "step": 79190 + }, + { + "epoch": 3.62, + "learning_rate": 3.199002242665568e-05, + "loss": 1.567, + "step": 79200 + }, + { + "epoch": 3.62, + "learning_rate": 3.1987733992402404e-05, + "loss": 1.6007, + "step": 79210 + }, + { + "epoch": 3.62, + "learning_rate": 3.198544555814912e-05, + "loss": 1.5486, + "step": 79220 + }, + { + "epoch": 3.62, + "learning_rate": 3.198315712389583e-05, + "loss": 1.5941, + "step": 79230 + }, + { + "epoch": 3.62, + "learning_rate": 3.1980868689642547e-05, + "loss": 1.6228, + "step": 79240 + }, + { + "epoch": 3.62, + "learning_rate": 3.197858025538926e-05, + "loss": 1.6974, + "step": 79250 + }, + { + "epoch": 3.62, + "learning_rate": 3.197629182113598e-05, + "loss": 1.5132, + "step": 79260 + }, + { + "epoch": 3.62, + "learning_rate": 3.1974003386882696e-05, + "loss": 1.687, + "step": 79270 + }, + { + "epoch": 3.62, + "learning_rate": 3.197171495262941e-05, + "loss": 1.6733, + "step": 79280 + }, + { + "epoch": 3.62, + "learning_rate": 3.196942651837613e-05, + "loss": 1.6182, + "step": 79290 + }, + { + "epoch": 3.62, + "learning_rate": 3.1967138084122845e-05, + "loss": 1.6824, + "step": 79300 + }, + { + "epoch": 3.62, + "learning_rate": 3.196484964986956e-05, + "loss": 1.6468, + "step": 79310 + }, + { + "epoch": 3.62, + "learning_rate": 3.196256121561628e-05, + "loss": 1.6849, + "step": 79320 + }, + { + "epoch": 3.62, + "learning_rate": 3.1960272781362994e-05, + "loss": 1.6616, + "step": 79330 + }, + { + "epoch": 3.62, + "learning_rate": 3.195798434710971e-05, + "loss": 1.6884, + "step": 79340 + }, + { + "epoch": 3.62, + "learning_rate": 3.195569591285643e-05, + "loss": 1.6277, + "step": 79350 + }, + { + "epoch": 3.62, + "learning_rate": 3.195340747860314e-05, + "loss": 1.7661, + "step": 79360 + }, + { + "epoch": 3.62, + "learning_rate": 3.195111904434986e-05, + "loss": 1.6057, + "step": 79370 + }, + { + "epoch": 3.62, + "learning_rate": 3.194883061009658e-05, + "loss": 1.8014, + "step": 79380 + }, + { + "epoch": 3.63, + "learning_rate": 3.194654217584329e-05, + "loss": 1.6242, + "step": 79390 + }, + { + "epoch": 3.63, + "learning_rate": 3.194425374159001e-05, + "loss": 1.758, + "step": 79400 + }, + { + "epoch": 3.63, + "learning_rate": 3.194196530733673e-05, + "loss": 1.6627, + "step": 79410 + }, + { + "epoch": 3.63, + "learning_rate": 3.193967687308344e-05, + "loss": 1.7027, + "step": 79420 + }, + { + "epoch": 3.63, + "learning_rate": 3.193738843883015e-05, + "loss": 1.6776, + "step": 79430 + }, + { + "epoch": 3.63, + "learning_rate": 3.193510000457687e-05, + "loss": 1.5284, + "step": 79440 + }, + { + "epoch": 3.63, + "learning_rate": 3.1932811570323584e-05, + "loss": 1.6886, + "step": 79450 + }, + { + "epoch": 3.63, + "learning_rate": 3.19305231360703e-05, + "loss": 1.6294, + "step": 79460 + }, + { + "epoch": 3.63, + "learning_rate": 3.192823470181702e-05, + "loss": 1.5885, + "step": 79470 + }, + { + "epoch": 3.63, + "learning_rate": 3.192594626756373e-05, + "loss": 1.6837, + "step": 79480 + }, + { + "epoch": 3.63, + "learning_rate": 3.192365783331045e-05, + "loss": 1.4936, + "step": 79490 + }, + { + "epoch": 3.63, + "learning_rate": 3.192136939905717e-05, + "loss": 1.61, + "step": 79500 + }, + { + "epoch": 3.63, + "learning_rate": 3.191908096480388e-05, + "loss": 1.7411, + "step": 79510 + }, + { + "epoch": 3.63, + "learning_rate": 3.1916792530550597e-05, + "loss": 1.6813, + "step": 79520 + }, + { + "epoch": 3.63, + "learning_rate": 3.191450409629732e-05, + "loss": 1.6126, + "step": 79530 + }, + { + "epoch": 3.63, + "learning_rate": 3.191221566204403e-05, + "loss": 1.9506, + "step": 79540 + }, + { + "epoch": 3.63, + "learning_rate": 3.1909927227790746e-05, + "loss": 1.5959, + "step": 79550 + }, + { + "epoch": 3.63, + "learning_rate": 3.190763879353747e-05, + "loss": 1.6521, + "step": 79560 + }, + { + "epoch": 3.63, + "learning_rate": 3.190535035928418e-05, + "loss": 1.5688, + "step": 79570 + }, + { + "epoch": 3.63, + "learning_rate": 3.1903061925030895e-05, + "loss": 1.7462, + "step": 79580 + }, + { + "epoch": 3.63, + "learning_rate": 3.1900773490777616e-05, + "loss": 1.7627, + "step": 79590 + }, + { + "epoch": 3.63, + "learning_rate": 3.189848505652433e-05, + "loss": 1.3863, + "step": 79600 + }, + { + "epoch": 3.64, + "learning_rate": 3.1896196622271044e-05, + "loss": 1.538, + "step": 79610 + }, + { + "epoch": 3.64, + "learning_rate": 3.189390818801776e-05, + "loss": 1.9543, + "step": 79620 + }, + { + "epoch": 3.64, + "learning_rate": 3.189161975376447e-05, + "loss": 1.6517, + "step": 79630 + }, + { + "epoch": 3.64, + "learning_rate": 3.1889331319511186e-05, + "loss": 1.5907, + "step": 79640 + }, + { + "epoch": 3.64, + "learning_rate": 3.188704288525791e-05, + "loss": 1.5732, + "step": 79650 + }, + { + "epoch": 3.64, + "learning_rate": 3.188475445100462e-05, + "loss": 1.5969, + "step": 79660 + }, + { + "epoch": 3.64, + "learning_rate": 3.1882466016751336e-05, + "loss": 1.4243, + "step": 79670 + }, + { + "epoch": 3.64, + "learning_rate": 3.1880177582498057e-05, + "loss": 1.6609, + "step": 79680 + }, + { + "epoch": 3.64, + "learning_rate": 3.187788914824477e-05, + "loss": 1.5267, + "step": 79690 + }, + { + "epoch": 3.64, + "learning_rate": 3.1875600713991485e-05, + "loss": 1.7148, + "step": 79700 + }, + { + "epoch": 3.64, + "learning_rate": 3.1873312279738206e-05, + "loss": 1.6603, + "step": 79710 + }, + { + "epoch": 3.64, + "learning_rate": 3.187102384548492e-05, + "loss": 1.7487, + "step": 79720 + }, + { + "epoch": 3.64, + "learning_rate": 3.1868735411231634e-05, + "loss": 1.6041, + "step": 79730 + }, + { + "epoch": 3.64, + "learning_rate": 3.1866446976978355e-05, + "loss": 1.8743, + "step": 79740 + }, + { + "epoch": 3.64, + "learning_rate": 3.186415854272507e-05, + "loss": 1.5824, + "step": 79750 + }, + { + "epoch": 3.64, + "learning_rate": 3.186187010847178e-05, + "loss": 1.5402, + "step": 79760 + }, + { + "epoch": 3.64, + "learning_rate": 3.1859581674218504e-05, + "loss": 1.6107, + "step": 79770 + }, + { + "epoch": 3.64, + "learning_rate": 3.185729323996522e-05, + "loss": 1.6749, + "step": 79780 + }, + { + "epoch": 3.64, + "learning_rate": 3.185500480571193e-05, + "loss": 1.4565, + "step": 79790 + }, + { + "epoch": 3.64, + "learning_rate": 3.185271637145865e-05, + "loss": 1.7672, + "step": 79800 + }, + { + "epoch": 3.64, + "learning_rate": 3.185042793720537e-05, + "loss": 1.6241, + "step": 79810 + }, + { + "epoch": 3.64, + "learning_rate": 3.184813950295208e-05, + "loss": 1.6916, + "step": 79820 + }, + { + "epoch": 3.65, + "learning_rate": 3.1845851068698796e-05, + "loss": 1.7266, + "step": 79830 + }, + { + "epoch": 3.65, + "learning_rate": 3.184356263444551e-05, + "loss": 1.5944, + "step": 79840 + }, + { + "epoch": 3.65, + "learning_rate": 3.184127420019223e-05, + "loss": 1.5588, + "step": 79850 + }, + { + "epoch": 3.65, + "learning_rate": 3.1838985765938945e-05, + "loss": 1.591, + "step": 79860 + }, + { + "epoch": 3.65, + "learning_rate": 3.183669733168566e-05, + "loss": 1.815, + "step": 79870 + }, + { + "epoch": 3.65, + "learning_rate": 3.183440889743238e-05, + "loss": 1.6056, + "step": 79880 + }, + { + "epoch": 3.65, + "learning_rate": 3.1832120463179094e-05, + "loss": 1.5727, + "step": 79890 + }, + { + "epoch": 3.65, + "learning_rate": 3.182983202892581e-05, + "loss": 1.7729, + "step": 79900 + }, + { + "epoch": 3.65, + "learning_rate": 3.182754359467253e-05, + "loss": 1.5837, + "step": 79910 + }, + { + "epoch": 3.65, + "learning_rate": 3.182525516041924e-05, + "loss": 1.6584, + "step": 79920 + }, + { + "epoch": 3.65, + "learning_rate": 3.182296672616596e-05, + "loss": 1.7245, + "step": 79930 + }, + { + "epoch": 3.65, + "learning_rate": 3.182067829191268e-05, + "loss": 1.579, + "step": 79940 + }, + { + "epoch": 3.65, + "learning_rate": 3.181838985765939e-05, + "loss": 1.7137, + "step": 79950 + }, + { + "epoch": 3.65, + "learning_rate": 3.1816101423406107e-05, + "loss": 1.477, + "step": 79960 + }, + { + "epoch": 3.65, + "learning_rate": 3.181381298915283e-05, + "loss": 1.8135, + "step": 79970 + }, + { + "epoch": 3.65, + "learning_rate": 3.181152455489954e-05, + "loss": 1.7322, + "step": 79980 + }, + { + "epoch": 3.65, + "learning_rate": 3.1809236120646256e-05, + "loss": 1.8194, + "step": 79990 + }, + { + "epoch": 3.65, + "learning_rate": 3.1806947686392977e-05, + "loss": 1.6555, + "step": 80000 + }, + { + "epoch": 3.65, + "learning_rate": 3.180465925213969e-05, + "loss": 1.6452, + "step": 80010 + }, + { + "epoch": 3.65, + "learning_rate": 3.18023708178864e-05, + "loss": 1.6037, + "step": 80020 + }, + { + "epoch": 3.65, + "learning_rate": 3.180008238363312e-05, + "loss": 1.6343, + "step": 80030 + }, + { + "epoch": 3.65, + "learning_rate": 3.179779394937983e-05, + "loss": 1.7176, + "step": 80040 + }, + { + "epoch": 3.66, + "learning_rate": 3.179550551512655e-05, + "loss": 1.6261, + "step": 80050 + }, + { + "epoch": 3.66, + "learning_rate": 3.179321708087327e-05, + "loss": 1.5632, + "step": 80060 + }, + { + "epoch": 3.66, + "learning_rate": 3.179092864661998e-05, + "loss": 1.6769, + "step": 80070 + }, + { + "epoch": 3.66, + "learning_rate": 3.1788640212366696e-05, + "loss": 1.622, + "step": 80080 + }, + { + "epoch": 3.66, + "learning_rate": 3.178635177811342e-05, + "loss": 1.658, + "step": 80090 + }, + { + "epoch": 3.66, + "learning_rate": 3.178406334386013e-05, + "loss": 1.5603, + "step": 80100 + }, + { + "epoch": 3.66, + "learning_rate": 3.1781774909606846e-05, + "loss": 1.6809, + "step": 80110 + }, + { + "epoch": 3.66, + "learning_rate": 3.1779486475353567e-05, + "loss": 1.701, + "step": 80120 + }, + { + "epoch": 3.66, + "learning_rate": 3.177719804110028e-05, + "loss": 1.5526, + "step": 80130 + }, + { + "epoch": 3.66, + "learning_rate": 3.1774909606846995e-05, + "loss": 1.7751, + "step": 80140 + }, + { + "epoch": 3.66, + "learning_rate": 3.1772621172593716e-05, + "loss": 1.7999, + "step": 80150 + }, + { + "epoch": 3.66, + "learning_rate": 3.177033273834043e-05, + "loss": 1.7246, + "step": 80160 + }, + { + "epoch": 3.66, + "learning_rate": 3.1768044304087144e-05, + "loss": 1.612, + "step": 80170 + }, + { + "epoch": 3.66, + "learning_rate": 3.1765755869833865e-05, + "loss": 1.4525, + "step": 80180 + }, + { + "epoch": 3.66, + "learning_rate": 3.176346743558058e-05, + "loss": 1.6197, + "step": 80190 + }, + { + "epoch": 3.66, + "learning_rate": 3.176117900132729e-05, + "loss": 1.5733, + "step": 80200 + }, + { + "epoch": 3.66, + "learning_rate": 3.1758890567074014e-05, + "loss": 1.7781, + "step": 80210 + }, + { + "epoch": 3.66, + "learning_rate": 3.175660213282072e-05, + "loss": 1.543, + "step": 80220 + }, + { + "epoch": 3.66, + "learning_rate": 3.175431369856744e-05, + "loss": 1.5559, + "step": 80230 + }, + { + "epoch": 3.66, + "learning_rate": 3.1752025264314156e-05, + "loss": 1.5848, + "step": 80240 + }, + { + "epoch": 3.66, + "learning_rate": 3.174973683006087e-05, + "loss": 1.6863, + "step": 80250 + }, + { + "epoch": 3.66, + "learning_rate": 3.174744839580759e-05, + "loss": 1.8502, + "step": 80260 + }, + { + "epoch": 3.67, + "learning_rate": 3.1745159961554306e-05, + "loss": 1.8257, + "step": 80270 + }, + { + "epoch": 3.67, + "learning_rate": 3.174287152730102e-05, + "loss": 1.7235, + "step": 80280 + }, + { + "epoch": 3.67, + "learning_rate": 3.174058309304774e-05, + "loss": 1.5162, + "step": 80290 + }, + { + "epoch": 3.67, + "learning_rate": 3.1738294658794455e-05, + "loss": 1.7164, + "step": 80300 + }, + { + "epoch": 3.67, + "learning_rate": 3.173600622454117e-05, + "loss": 1.8426, + "step": 80310 + }, + { + "epoch": 3.67, + "learning_rate": 3.173371779028789e-05, + "loss": 1.7978, + "step": 80320 + }, + { + "epoch": 3.67, + "learning_rate": 3.1731429356034604e-05, + "loss": 1.5888, + "step": 80330 + }, + { + "epoch": 3.67, + "learning_rate": 3.172914092178132e-05, + "loss": 1.5468, + "step": 80340 + }, + { + "epoch": 3.67, + "learning_rate": 3.172685248752804e-05, + "loss": 1.5859, + "step": 80350 + }, + { + "epoch": 3.67, + "learning_rate": 3.172456405327475e-05, + "loss": 1.6549, + "step": 80360 + }, + { + "epoch": 3.67, + "learning_rate": 3.172227561902147e-05, + "loss": 1.6692, + "step": 80370 + }, + { + "epoch": 3.67, + "learning_rate": 3.171998718476819e-05, + "loss": 1.6627, + "step": 80380 + }, + { + "epoch": 3.67, + "learning_rate": 3.17176987505149e-05, + "loss": 1.5038, + "step": 80390 + }, + { + "epoch": 3.67, + "learning_rate": 3.1715410316261616e-05, + "loss": 1.5498, + "step": 80400 + }, + { + "epoch": 3.67, + "learning_rate": 3.171312188200833e-05, + "loss": 1.5674, + "step": 80410 + }, + { + "epoch": 3.67, + "learning_rate": 3.1710833447755045e-05, + "loss": 1.7757, + "step": 80420 + }, + { + "epoch": 3.67, + "learning_rate": 3.170854501350176e-05, + "loss": 1.7192, + "step": 80430 + }, + { + "epoch": 3.67, + "learning_rate": 3.170625657924848e-05, + "loss": 1.7983, + "step": 80440 + }, + { + "epoch": 3.67, + "learning_rate": 3.1703968144995194e-05, + "loss": 1.8378, + "step": 80450 + }, + { + "epoch": 3.67, + "learning_rate": 3.170167971074191e-05, + "loss": 1.6136, + "step": 80460 + }, + { + "epoch": 3.67, + "learning_rate": 3.169939127648863e-05, + "loss": 1.7424, + "step": 80470 + }, + { + "epoch": 3.67, + "learning_rate": 3.169710284223534e-05, + "loss": 1.4895, + "step": 80480 + }, + { + "epoch": 3.68, + "learning_rate": 3.169481440798206e-05, + "loss": 1.7619, + "step": 80490 + }, + { + "epoch": 3.68, + "learning_rate": 3.169252597372878e-05, + "loss": 1.6696, + "step": 80500 + }, + { + "epoch": 3.68, + "learning_rate": 3.169023753947549e-05, + "loss": 1.5832, + "step": 80510 + }, + { + "epoch": 3.68, + "learning_rate": 3.1687949105222206e-05, + "loss": 1.6422, + "step": 80520 + }, + { + "epoch": 3.68, + "learning_rate": 3.168566067096893e-05, + "loss": 1.5817, + "step": 80530 + }, + { + "epoch": 3.68, + "learning_rate": 3.168337223671564e-05, + "loss": 1.5463, + "step": 80540 + }, + { + "epoch": 3.68, + "learning_rate": 3.1681083802462356e-05, + "loss": 1.7336, + "step": 80550 + }, + { + "epoch": 3.68, + "learning_rate": 3.1678795368209077e-05, + "loss": 1.6118, + "step": 80560 + }, + { + "epoch": 3.68, + "learning_rate": 3.167650693395579e-05, + "loss": 1.7218, + "step": 80570 + }, + { + "epoch": 3.68, + "learning_rate": 3.1674218499702505e-05, + "loss": 1.6078, + "step": 80580 + }, + { + "epoch": 3.68, + "learning_rate": 3.1671930065449226e-05, + "loss": 1.8719, + "step": 80590 + }, + { + "epoch": 3.68, + "learning_rate": 3.166964163119594e-05, + "loss": 1.6154, + "step": 80600 + }, + { + "epoch": 3.68, + "learning_rate": 3.1667353196942654e-05, + "loss": 1.767, + "step": 80610 + }, + { + "epoch": 3.68, + "learning_rate": 3.166506476268937e-05, + "loss": 1.6806, + "step": 80620 + }, + { + "epoch": 3.68, + "learning_rate": 3.166277632843608e-05, + "loss": 1.5629, + "step": 80630 + }, + { + "epoch": 3.68, + "learning_rate": 3.16604878941828e-05, + "loss": 1.6004, + "step": 80640 + }, + { + "epoch": 3.68, + "learning_rate": 3.165819945992952e-05, + "loss": 1.59, + "step": 80650 + }, + { + "epoch": 3.68, + "learning_rate": 3.165591102567623e-05, + "loss": 1.5849, + "step": 80660 + }, + { + "epoch": 3.68, + "learning_rate": 3.165362259142295e-05, + "loss": 1.6363, + "step": 80670 + }, + { + "epoch": 3.68, + "learning_rate": 3.1651334157169666e-05, + "loss": 1.5879, + "step": 80680 + }, + { + "epoch": 3.68, + "learning_rate": 3.164904572291638e-05, + "loss": 1.6486, + "step": 80690 + }, + { + "epoch": 3.68, + "learning_rate": 3.1646757288663095e-05, + "loss": 1.7051, + "step": 80700 + }, + { + "epoch": 3.69, + "learning_rate": 3.1644468854409816e-05, + "loss": 1.739, + "step": 80710 + }, + { + "epoch": 3.69, + "learning_rate": 3.164218042015653e-05, + "loss": 1.6861, + "step": 80720 + }, + { + "epoch": 3.69, + "learning_rate": 3.1639891985903244e-05, + "loss": 1.6649, + "step": 80730 + }, + { + "epoch": 3.69, + "learning_rate": 3.1637603551649965e-05, + "loss": 1.392, + "step": 80740 + }, + { + "epoch": 3.69, + "learning_rate": 3.163531511739668e-05, + "loss": 1.8005, + "step": 80750 + }, + { + "epoch": 3.69, + "learning_rate": 3.163302668314339e-05, + "loss": 1.6534, + "step": 80760 + }, + { + "epoch": 3.69, + "learning_rate": 3.1630738248890114e-05, + "loss": 1.4926, + "step": 80770 + }, + { + "epoch": 3.69, + "learning_rate": 3.162844981463683e-05, + "loss": 1.5988, + "step": 80780 + }, + { + "epoch": 3.69, + "learning_rate": 3.162616138038354e-05, + "loss": 1.5824, + "step": 80790 + }, + { + "epoch": 3.69, + "learning_rate": 3.162387294613026e-05, + "loss": 1.4357, + "step": 80800 + }, + { + "epoch": 3.69, + "learning_rate": 3.162158451187697e-05, + "loss": 1.6511, + "step": 80810 + }, + { + "epoch": 3.69, + "learning_rate": 3.161929607762369e-05, + "loss": 1.5261, + "step": 80820 + }, + { + "epoch": 3.69, + "learning_rate": 3.1617007643370406e-05, + "loss": 1.8589, + "step": 80830 + }, + { + "epoch": 3.69, + "learning_rate": 3.161471920911712e-05, + "loss": 1.5695, + "step": 80840 + }, + { + "epoch": 3.69, + "learning_rate": 3.161243077486384e-05, + "loss": 1.6224, + "step": 80850 + }, + { + "epoch": 3.69, + "learning_rate": 3.1610142340610555e-05, + "loss": 1.5713, + "step": 80860 + }, + { + "epoch": 3.69, + "learning_rate": 3.160785390635727e-05, + "loss": 1.4761, + "step": 80870 + }, + { + "epoch": 3.69, + "learning_rate": 3.160556547210399e-05, + "loss": 1.6306, + "step": 80880 + }, + { + "epoch": 3.69, + "learning_rate": 3.1603277037850704e-05, + "loss": 1.6891, + "step": 80890 + }, + { + "epoch": 3.69, + "learning_rate": 3.160098860359742e-05, + "loss": 1.4681, + "step": 80900 + }, + { + "epoch": 3.69, + "learning_rate": 3.159870016934414e-05, + "loss": 1.678, + "step": 80910 + }, + { + "epoch": 3.7, + "learning_rate": 3.159641173509085e-05, + "loss": 1.5292, + "step": 80920 + }, + { + "epoch": 3.7, + "learning_rate": 3.159412330083757e-05, + "loss": 1.466, + "step": 80930 + }, + { + "epoch": 3.7, + "learning_rate": 3.159183486658429e-05, + "loss": 1.7025, + "step": 80940 + }, + { + "epoch": 3.7, + "learning_rate": 3.1589546432331e-05, + "loss": 1.6771, + "step": 80950 + }, + { + "epoch": 3.7, + "learning_rate": 3.1587257998077716e-05, + "loss": 1.7219, + "step": 80960 + }, + { + "epoch": 3.7, + "learning_rate": 3.158496956382444e-05, + "loss": 1.5414, + "step": 80970 + }, + { + "epoch": 3.7, + "learning_rate": 3.158268112957115e-05, + "loss": 1.9016, + "step": 80980 + }, + { + "epoch": 3.7, + "learning_rate": 3.1580392695317866e-05, + "loss": 1.5156, + "step": 80990 + }, + { + "epoch": 3.7, + "learning_rate": 3.1578104261064586e-05, + "loss": 1.7427, + "step": 81000 + }, + { + "epoch": 3.7, + "learning_rate": 3.1575815826811294e-05, + "loss": 1.7283, + "step": 81010 + }, + { + "epoch": 3.7, + "learning_rate": 3.157352739255801e-05, + "loss": 1.7029, + "step": 81020 + }, + { + "epoch": 3.7, + "learning_rate": 3.157123895830473e-05, + "loss": 1.4823, + "step": 81030 + }, + { + "epoch": 3.7, + "learning_rate": 3.156895052405144e-05, + "loss": 1.7263, + "step": 81040 + }, + { + "epoch": 3.7, + "learning_rate": 3.156666208979816e-05, + "loss": 1.8082, + "step": 81050 + }, + { + "epoch": 3.7, + "learning_rate": 3.156437365554488e-05, + "loss": 1.4278, + "step": 81060 + }, + { + "epoch": 3.7, + "learning_rate": 3.156208522129159e-05, + "loss": 1.6411, + "step": 81070 + }, + { + "epoch": 3.7, + "learning_rate": 3.1559796787038306e-05, + "loss": 1.5808, + "step": 81080 + }, + { + "epoch": 3.7, + "learning_rate": 3.155750835278503e-05, + "loss": 1.689, + "step": 81090 + }, + { + "epoch": 3.7, + "learning_rate": 3.155521991853174e-05, + "loss": 1.5641, + "step": 81100 + }, + { + "epoch": 3.7, + "learning_rate": 3.1552931484278455e-05, + "loss": 1.7138, + "step": 81110 + }, + { + "epoch": 3.7, + "learning_rate": 3.1550643050025176e-05, + "loss": 1.5405, + "step": 81120 + }, + { + "epoch": 3.7, + "learning_rate": 3.154835461577189e-05, + "loss": 1.6927, + "step": 81130 + }, + { + "epoch": 3.71, + "learning_rate": 3.1546066181518605e-05, + "loss": 1.7158, + "step": 81140 + }, + { + "epoch": 3.71, + "learning_rate": 3.1543777747265326e-05, + "loss": 1.8111, + "step": 81150 + }, + { + "epoch": 3.71, + "learning_rate": 3.154148931301204e-05, + "loss": 1.6295, + "step": 81160 + }, + { + "epoch": 3.71, + "learning_rate": 3.1539200878758754e-05, + "loss": 1.6739, + "step": 81170 + }, + { + "epoch": 3.71, + "learning_rate": 3.1536912444505475e-05, + "loss": 1.4465, + "step": 81180 + }, + { + "epoch": 3.71, + "learning_rate": 3.153462401025219e-05, + "loss": 1.8063, + "step": 81190 + }, + { + "epoch": 3.71, + "learning_rate": 3.15323355759989e-05, + "loss": 1.6218, + "step": 81200 + }, + { + "epoch": 3.71, + "learning_rate": 3.153004714174562e-05, + "loss": 1.5621, + "step": 81210 + }, + { + "epoch": 3.71, + "learning_rate": 3.152775870749233e-05, + "loss": 1.5895, + "step": 81220 + }, + { + "epoch": 3.71, + "learning_rate": 3.152547027323905e-05, + "loss": 1.7987, + "step": 81230 + }, + { + "epoch": 3.71, + "learning_rate": 3.1523181838985766e-05, + "loss": 1.4608, + "step": 81240 + }, + { + "epoch": 3.71, + "learning_rate": 3.152089340473248e-05, + "loss": 1.681, + "step": 81250 + }, + { + "epoch": 3.71, + "learning_rate": 3.15186049704792e-05, + "loss": 1.6384, + "step": 81260 + }, + { + "epoch": 3.71, + "learning_rate": 3.1516316536225916e-05, + "loss": 1.4755, + "step": 81270 + }, + { + "epoch": 3.71, + "learning_rate": 3.151402810197263e-05, + "loss": 1.5936, + "step": 81280 + }, + { + "epoch": 3.71, + "learning_rate": 3.151173966771935e-05, + "loss": 1.5783, + "step": 81290 + }, + { + "epoch": 3.71, + "learning_rate": 3.1509451233466065e-05, + "loss": 1.7393, + "step": 81300 + }, + { + "epoch": 3.71, + "learning_rate": 3.150716279921278e-05, + "loss": 1.693, + "step": 81310 + }, + { + "epoch": 3.71, + "learning_rate": 3.15048743649595e-05, + "loss": 1.7479, + "step": 81320 + }, + { + "epoch": 3.71, + "learning_rate": 3.1502585930706214e-05, + "loss": 1.4457, + "step": 81330 + }, + { + "epoch": 3.71, + "learning_rate": 3.150029749645293e-05, + "loss": 1.584, + "step": 81340 + }, + { + "epoch": 3.71, + "learning_rate": 3.149800906219965e-05, + "loss": 1.6935, + "step": 81350 + }, + { + "epoch": 3.72, + "learning_rate": 3.149572062794636e-05, + "loss": 1.5169, + "step": 81360 + }, + { + "epoch": 3.72, + "learning_rate": 3.149343219369308e-05, + "loss": 1.6163, + "step": 81370 + }, + { + "epoch": 3.72, + "learning_rate": 3.14911437594398e-05, + "loss": 1.7047, + "step": 81380 + }, + { + "epoch": 3.72, + "learning_rate": 3.148885532518651e-05, + "loss": 1.5909, + "step": 81390 + }, + { + "epoch": 3.72, + "learning_rate": 3.148656689093322e-05, + "loss": 1.5819, + "step": 81400 + }, + { + "epoch": 3.72, + "learning_rate": 3.148427845667994e-05, + "loss": 1.6393, + "step": 81410 + }, + { + "epoch": 3.72, + "learning_rate": 3.1481990022426655e-05, + "loss": 1.5301, + "step": 81420 + }, + { + "epoch": 3.72, + "learning_rate": 3.147970158817337e-05, + "loss": 1.6759, + "step": 81430 + }, + { + "epoch": 3.72, + "learning_rate": 3.147741315392009e-05, + "loss": 1.6089, + "step": 81440 + }, + { + "epoch": 3.72, + "learning_rate": 3.1475124719666804e-05, + "loss": 1.5644, + "step": 81450 + }, + { + "epoch": 3.72, + "learning_rate": 3.147283628541352e-05, + "loss": 1.6824, + "step": 81460 + }, + { + "epoch": 3.72, + "learning_rate": 3.147054785116024e-05, + "loss": 1.7251, + "step": 81470 + }, + { + "epoch": 3.72, + "learning_rate": 3.146825941690695e-05, + "loss": 1.4724, + "step": 81480 + }, + { + "epoch": 3.72, + "learning_rate": 3.146597098265367e-05, + "loss": 1.6416, + "step": 81490 + }, + { + "epoch": 3.72, + "learning_rate": 3.146368254840039e-05, + "loss": 1.549, + "step": 81500 + }, + { + "epoch": 3.72, + "learning_rate": 3.14613941141471e-05, + "loss": 1.7365, + "step": 81510 + }, + { + "epoch": 3.72, + "learning_rate": 3.1459105679893816e-05, + "loss": 1.7722, + "step": 81520 + }, + { + "epoch": 3.72, + "learning_rate": 3.145681724564054e-05, + "loss": 1.5734, + "step": 81530 + }, + { + "epoch": 3.72, + "learning_rate": 3.145452881138725e-05, + "loss": 1.6814, + "step": 81540 + }, + { + "epoch": 3.72, + "learning_rate": 3.1452240377133965e-05, + "loss": 1.7359, + "step": 81550 + }, + { + "epoch": 3.72, + "learning_rate": 3.1449951942880686e-05, + "loss": 1.601, + "step": 81560 + }, + { + "epoch": 3.72, + "learning_rate": 3.14476635086274e-05, + "loss": 1.4886, + "step": 81570 + }, + { + "epoch": 3.73, + "learning_rate": 3.1445375074374115e-05, + "loss": 1.731, + "step": 81580 + }, + { + "epoch": 3.73, + "learning_rate": 3.1443086640120836e-05, + "loss": 1.7326, + "step": 81590 + }, + { + "epoch": 3.73, + "learning_rate": 3.144079820586754e-05, + "loss": 1.7464, + "step": 81600 + }, + { + "epoch": 3.73, + "learning_rate": 3.1438509771614264e-05, + "loss": 1.5897, + "step": 81610 + }, + { + "epoch": 3.73, + "learning_rate": 3.143622133736098e-05, + "loss": 1.5521, + "step": 81620 + }, + { + "epoch": 3.73, + "learning_rate": 3.143393290310769e-05, + "loss": 1.4957, + "step": 81630 + }, + { + "epoch": 3.73, + "learning_rate": 3.143164446885441e-05, + "loss": 1.5359, + "step": 81640 + }, + { + "epoch": 3.73, + "learning_rate": 3.142935603460113e-05, + "loss": 1.7563, + "step": 81650 + }, + { + "epoch": 3.73, + "learning_rate": 3.142706760034784e-05, + "loss": 1.5595, + "step": 81660 + }, + { + "epoch": 3.73, + "learning_rate": 3.142477916609456e-05, + "loss": 1.7068, + "step": 81670 + }, + { + "epoch": 3.73, + "learning_rate": 3.1422490731841276e-05, + "loss": 1.7296, + "step": 81680 + }, + { + "epoch": 3.73, + "learning_rate": 3.142020229758799e-05, + "loss": 1.5678, + "step": 81690 + }, + { + "epoch": 3.73, + "learning_rate": 3.141791386333471e-05, + "loss": 1.5711, + "step": 81700 + }, + { + "epoch": 3.73, + "learning_rate": 3.1415625429081425e-05, + "loss": 1.9688, + "step": 81710 + }, + { + "epoch": 3.73, + "learning_rate": 3.141333699482814e-05, + "loss": 1.5943, + "step": 81720 + }, + { + "epoch": 3.73, + "learning_rate": 3.141104856057486e-05, + "loss": 1.7062, + "step": 81730 + }, + { + "epoch": 3.73, + "learning_rate": 3.1408760126321575e-05, + "loss": 1.5979, + "step": 81740 + }, + { + "epoch": 3.73, + "learning_rate": 3.140647169206829e-05, + "loss": 1.6445, + "step": 81750 + }, + { + "epoch": 3.73, + "learning_rate": 3.140418325781501e-05, + "loss": 1.5907, + "step": 81760 + }, + { + "epoch": 3.73, + "learning_rate": 3.1401894823561724e-05, + "loss": 1.7494, + "step": 81770 + }, + { + "epoch": 3.73, + "learning_rate": 3.139960638930844e-05, + "loss": 1.6648, + "step": 81780 + }, + { + "epoch": 3.73, + "learning_rate": 3.139731795505516e-05, + "loss": 1.563, + "step": 81790 + }, + { + "epoch": 3.74, + "learning_rate": 3.1395029520801866e-05, + "loss": 1.6204, + "step": 81800 + }, + { + "epoch": 3.74, + "learning_rate": 3.139274108654858e-05, + "loss": 1.691, + "step": 81810 + }, + { + "epoch": 3.74, + "learning_rate": 3.13904526522953e-05, + "loss": 1.7794, + "step": 81820 + }, + { + "epoch": 3.74, + "learning_rate": 3.1388164218042015e-05, + "loss": 1.7596, + "step": 81830 + }, + { + "epoch": 3.74, + "learning_rate": 3.138587578378873e-05, + "loss": 1.5341, + "step": 81840 + }, + { + "epoch": 3.74, + "learning_rate": 3.138358734953545e-05, + "loss": 1.7797, + "step": 81850 + }, + { + "epoch": 3.74, + "learning_rate": 3.1381298915282165e-05, + "loss": 1.5848, + "step": 81860 + }, + { + "epoch": 3.74, + "learning_rate": 3.137901048102888e-05, + "loss": 1.6393, + "step": 81870 + }, + { + "epoch": 3.74, + "learning_rate": 3.13767220467756e-05, + "loss": 1.7898, + "step": 81880 + }, + { + "epoch": 3.74, + "learning_rate": 3.1374433612522314e-05, + "loss": 1.6338, + "step": 81890 + }, + { + "epoch": 3.74, + "learning_rate": 3.137214517826903e-05, + "loss": 1.7461, + "step": 81900 + }, + { + "epoch": 3.74, + "learning_rate": 3.136985674401575e-05, + "loss": 1.4512, + "step": 81910 + }, + { + "epoch": 3.74, + "learning_rate": 3.136756830976246e-05, + "loss": 1.6375, + "step": 81920 + }, + { + "epoch": 3.74, + "learning_rate": 3.136527987550918e-05, + "loss": 1.7403, + "step": 81930 + }, + { + "epoch": 3.74, + "learning_rate": 3.13629914412559e-05, + "loss": 1.637, + "step": 81940 + }, + { + "epoch": 3.74, + "learning_rate": 3.136070300700261e-05, + "loss": 1.7613, + "step": 81950 + }, + { + "epoch": 3.74, + "learning_rate": 3.1358414572749326e-05, + "loss": 1.6351, + "step": 81960 + }, + { + "epoch": 3.74, + "learning_rate": 3.135612613849605e-05, + "loss": 1.6075, + "step": 81970 + }, + { + "epoch": 3.74, + "learning_rate": 3.135383770424276e-05, + "loss": 1.7476, + "step": 81980 + }, + { + "epoch": 3.74, + "learning_rate": 3.135154926998947e-05, + "loss": 1.6101, + "step": 81990 + }, + { + "epoch": 3.74, + "learning_rate": 3.134926083573619e-05, + "loss": 1.5954, + "step": 82000 + }, + { + "epoch": 3.74, + "learning_rate": 3.1346972401482904e-05, + "loss": 1.5261, + "step": 82010 + }, + { + "epoch": 3.75, + "learning_rate": 3.134468396722962e-05, + "loss": 1.4762, + "step": 82020 + }, + { + "epoch": 3.75, + "learning_rate": 3.134239553297634e-05, + "loss": 1.4537, + "step": 82030 + }, + { + "epoch": 3.75, + "learning_rate": 3.134010709872305e-05, + "loss": 1.5286, + "step": 82040 + }, + { + "epoch": 3.75, + "learning_rate": 3.133781866446977e-05, + "loss": 1.5572, + "step": 82050 + }, + { + "epoch": 3.75, + "learning_rate": 3.133553023021649e-05, + "loss": 1.6852, + "step": 82060 + }, + { + "epoch": 3.75, + "learning_rate": 3.13332417959632e-05, + "loss": 1.5971, + "step": 82070 + }, + { + "epoch": 3.75, + "learning_rate": 3.1330953361709916e-05, + "loss": 1.6924, + "step": 82080 + }, + { + "epoch": 3.75, + "learning_rate": 3.132866492745664e-05, + "loss": 1.5707, + "step": 82090 + }, + { + "epoch": 3.75, + "learning_rate": 3.132637649320335e-05, + "loss": 1.5937, + "step": 82100 + }, + { + "epoch": 3.75, + "learning_rate": 3.1324088058950065e-05, + "loss": 1.5356, + "step": 82110 + }, + { + "epoch": 3.75, + "learning_rate": 3.1321799624696786e-05, + "loss": 1.5009, + "step": 82120 + }, + { + "epoch": 3.75, + "learning_rate": 3.13195111904435e-05, + "loss": 1.7457, + "step": 82130 + }, + { + "epoch": 3.75, + "learning_rate": 3.1317222756190215e-05, + "loss": 1.7035, + "step": 82140 + }, + { + "epoch": 3.75, + "learning_rate": 3.1314934321936935e-05, + "loss": 1.5706, + "step": 82150 + }, + { + "epoch": 3.75, + "learning_rate": 3.131264588768365e-05, + "loss": 1.6343, + "step": 82160 + }, + { + "epoch": 3.75, + "learning_rate": 3.1310357453430364e-05, + "loss": 1.4776, + "step": 82170 + }, + { + "epoch": 3.75, + "learning_rate": 3.1308069019177085e-05, + "loss": 1.7204, + "step": 82180 + }, + { + "epoch": 3.75, + "learning_rate": 3.130578058492379e-05, + "loss": 1.5522, + "step": 82190 + }, + { + "epoch": 3.75, + "learning_rate": 3.130349215067051e-05, + "loss": 1.5533, + "step": 82200 + }, + { + "epoch": 3.75, + "learning_rate": 3.130120371641723e-05, + "loss": 1.6042, + "step": 82210 + }, + { + "epoch": 3.75, + "learning_rate": 3.129891528216394e-05, + "loss": 1.435, + "step": 82220 + }, + { + "epoch": 3.75, + "learning_rate": 3.129662684791066e-05, + "loss": 1.61, + "step": 82230 + }, + { + "epoch": 3.76, + "learning_rate": 3.1294338413657376e-05, + "loss": 1.7631, + "step": 82240 + }, + { + "epoch": 3.76, + "learning_rate": 3.129204997940409e-05, + "loss": 1.5579, + "step": 82250 + }, + { + "epoch": 3.76, + "learning_rate": 3.128976154515081e-05, + "loss": 1.8967, + "step": 82260 + }, + { + "epoch": 3.76, + "learning_rate": 3.1287473110897525e-05, + "loss": 1.5835, + "step": 82270 + }, + { + "epoch": 3.76, + "learning_rate": 3.128518467664424e-05, + "loss": 1.6529, + "step": 82280 + }, + { + "epoch": 3.76, + "learning_rate": 3.128289624239096e-05, + "loss": 1.7418, + "step": 82290 + }, + { + "epoch": 3.76, + "learning_rate": 3.1280607808137675e-05, + "loss": 1.6163, + "step": 82300 + }, + { + "epoch": 3.76, + "learning_rate": 3.127831937388439e-05, + "loss": 1.575, + "step": 82310 + }, + { + "epoch": 3.76, + "learning_rate": 3.127603093963111e-05, + "loss": 1.4823, + "step": 82320 + }, + { + "epoch": 3.76, + "learning_rate": 3.1273742505377824e-05, + "loss": 1.6496, + "step": 82330 + }, + { + "epoch": 3.76, + "learning_rate": 3.127145407112454e-05, + "loss": 1.6012, + "step": 82340 + }, + { + "epoch": 3.76, + "learning_rate": 3.126916563687126e-05, + "loss": 1.6729, + "step": 82350 + }, + { + "epoch": 3.76, + "learning_rate": 3.126687720261797e-05, + "loss": 1.6143, + "step": 82360 + }, + { + "epoch": 3.76, + "learning_rate": 3.126458876836469e-05, + "loss": 1.5555, + "step": 82370 + }, + { + "epoch": 3.76, + "learning_rate": 3.126230033411141e-05, + "loss": 1.576, + "step": 82380 + }, + { + "epoch": 3.76, + "learning_rate": 3.1260011899858115e-05, + "loss": 1.5637, + "step": 82390 + }, + { + "epoch": 3.76, + "learning_rate": 3.125772346560483e-05, + "loss": 1.5265, + "step": 82400 + }, + { + "epoch": 3.76, + "learning_rate": 3.125543503135155e-05, + "loss": 1.5636, + "step": 82410 + }, + { + "epoch": 3.76, + "learning_rate": 3.1253146597098264e-05, + "loss": 1.7027, + "step": 82420 + }, + { + "epoch": 3.76, + "learning_rate": 3.125085816284498e-05, + "loss": 1.7364, + "step": 82430 + }, + { + "epoch": 3.76, + "learning_rate": 3.12485697285917e-05, + "loss": 1.5814, + "step": 82440 + }, + { + "epoch": 3.76, + "learning_rate": 3.1246281294338414e-05, + "loss": 1.348, + "step": 82450 + }, + { + "epoch": 3.77, + "learning_rate": 3.124399286008513e-05, + "loss": 1.4539, + "step": 82460 + }, + { + "epoch": 3.77, + "learning_rate": 3.124170442583185e-05, + "loss": 1.6529, + "step": 82470 + }, + { + "epoch": 3.77, + "learning_rate": 3.123941599157856e-05, + "loss": 1.5324, + "step": 82480 + }, + { + "epoch": 3.77, + "learning_rate": 3.123712755732528e-05, + "loss": 1.6742, + "step": 82490 + }, + { + "epoch": 3.77, + "learning_rate": 3.1234839123072e-05, + "loss": 1.6398, + "step": 82500 + }, + { + "epoch": 3.77, + "learning_rate": 3.123255068881871e-05, + "loss": 1.6706, + "step": 82510 + }, + { + "epoch": 3.77, + "learning_rate": 3.1230262254565426e-05, + "loss": 1.6767, + "step": 82520 + }, + { + "epoch": 3.77, + "learning_rate": 3.122797382031215e-05, + "loss": 1.7095, + "step": 82530 + }, + { + "epoch": 3.77, + "learning_rate": 3.122568538605886e-05, + "loss": 1.4932, + "step": 82540 + }, + { + "epoch": 3.77, + "learning_rate": 3.1223396951805575e-05, + "loss": 1.5881, + "step": 82550 + }, + { + "epoch": 3.77, + "learning_rate": 3.1221108517552296e-05, + "loss": 1.7692, + "step": 82560 + }, + { + "epoch": 3.77, + "learning_rate": 3.121882008329901e-05, + "loss": 1.5277, + "step": 82570 + }, + { + "epoch": 3.77, + "learning_rate": 3.1216531649045724e-05, + "loss": 1.5025, + "step": 82580 + }, + { + "epoch": 3.77, + "learning_rate": 3.121424321479244e-05, + "loss": 1.4976, + "step": 82590 + }, + { + "epoch": 3.77, + "learning_rate": 3.121195478053915e-05, + "loss": 1.7278, + "step": 82600 + }, + { + "epoch": 3.77, + "learning_rate": 3.1209666346285874e-05, + "loss": 1.6243, + "step": 82610 + }, + { + "epoch": 3.77, + "learning_rate": 3.120737791203259e-05, + "loss": 1.5031, + "step": 82620 + }, + { + "epoch": 3.77, + "learning_rate": 3.12050894777793e-05, + "loss": 1.6462, + "step": 82630 + }, + { + "epoch": 3.77, + "learning_rate": 3.120280104352602e-05, + "loss": 1.6389, + "step": 82640 + }, + { + "epoch": 3.77, + "learning_rate": 3.120051260927274e-05, + "loss": 1.7302, + "step": 82650 + }, + { + "epoch": 3.77, + "learning_rate": 3.119822417501945e-05, + "loss": 1.4613, + "step": 82660 + }, + { + "epoch": 3.77, + "learning_rate": 3.119593574076617e-05, + "loss": 1.4925, + "step": 82670 + }, + { + "epoch": 3.78, + "learning_rate": 3.1193647306512886e-05, + "loss": 1.5663, + "step": 82680 + }, + { + "epoch": 3.78, + "learning_rate": 3.11913588722596e-05, + "loss": 1.5533, + "step": 82690 + }, + { + "epoch": 3.78, + "learning_rate": 3.118907043800632e-05, + "loss": 1.6288, + "step": 82700 + }, + { + "epoch": 3.78, + "learning_rate": 3.1186782003753035e-05, + "loss": 1.606, + "step": 82710 + }, + { + "epoch": 3.78, + "learning_rate": 3.118449356949975e-05, + "loss": 1.4834, + "step": 82720 + }, + { + "epoch": 3.78, + "learning_rate": 3.118220513524647e-05, + "loss": 1.4742, + "step": 82730 + }, + { + "epoch": 3.78, + "learning_rate": 3.1179916700993185e-05, + "loss": 1.5828, + "step": 82740 + }, + { + "epoch": 3.78, + "learning_rate": 3.11776282667399e-05, + "loss": 1.5192, + "step": 82750 + }, + { + "epoch": 3.78, + "learning_rate": 3.117533983248662e-05, + "loss": 1.5033, + "step": 82760 + }, + { + "epoch": 3.78, + "learning_rate": 3.1173051398233334e-05, + "loss": 1.8208, + "step": 82770 + }, + { + "epoch": 3.78, + "learning_rate": 3.117076296398004e-05, + "loss": 1.4756, + "step": 82780 + }, + { + "epoch": 3.78, + "learning_rate": 3.116847452972676e-05, + "loss": 1.6055, + "step": 82790 + }, + { + "epoch": 3.78, + "learning_rate": 3.1166186095473476e-05, + "loss": 1.502, + "step": 82800 + }, + { + "epoch": 3.78, + "learning_rate": 3.116389766122019e-05, + "loss": 1.5371, + "step": 82810 + }, + { + "epoch": 3.78, + "learning_rate": 3.116160922696691e-05, + "loss": 1.5651, + "step": 82820 + }, + { + "epoch": 3.78, + "learning_rate": 3.1159320792713625e-05, + "loss": 1.6732, + "step": 82830 + }, + { + "epoch": 3.78, + "learning_rate": 3.115703235846034e-05, + "loss": 1.5495, + "step": 82840 + }, + { + "epoch": 3.78, + "learning_rate": 3.115474392420706e-05, + "loss": 1.4596, + "step": 82850 + }, + { + "epoch": 3.78, + "learning_rate": 3.1152455489953774e-05, + "loss": 1.8159, + "step": 82860 + }, + { + "epoch": 3.78, + "learning_rate": 3.115016705570049e-05, + "loss": 1.4239, + "step": 82870 + }, + { + "epoch": 3.78, + "learning_rate": 3.114787862144721e-05, + "loss": 1.7639, + "step": 82880 + }, + { + "epoch": 3.78, + "learning_rate": 3.1145590187193924e-05, + "loss": 1.4666, + "step": 82890 + }, + { + "epoch": 3.79, + "learning_rate": 3.114330175294064e-05, + "loss": 1.4336, + "step": 82900 + }, + { + "epoch": 3.79, + "learning_rate": 3.114101331868736e-05, + "loss": 1.4775, + "step": 82910 + }, + { + "epoch": 3.79, + "learning_rate": 3.113872488443407e-05, + "loss": 1.6717, + "step": 82920 + }, + { + "epoch": 3.79, + "learning_rate": 3.113643645018079e-05, + "loss": 1.4684, + "step": 82930 + }, + { + "epoch": 3.79, + "learning_rate": 3.113414801592751e-05, + "loss": 1.7577, + "step": 82940 + }, + { + "epoch": 3.79, + "learning_rate": 3.113185958167422e-05, + "loss": 1.6017, + "step": 82950 + }, + { + "epoch": 3.79, + "learning_rate": 3.1129571147420936e-05, + "loss": 1.7331, + "step": 82960 + }, + { + "epoch": 3.79, + "learning_rate": 3.112728271316766e-05, + "loss": 1.4865, + "step": 82970 + }, + { + "epoch": 3.79, + "learning_rate": 3.1124994278914364e-05, + "loss": 1.4221, + "step": 82980 + }, + { + "epoch": 3.79, + "learning_rate": 3.1122705844661085e-05, + "loss": 1.7241, + "step": 82990 + }, + { + "epoch": 3.79, + "learning_rate": 3.11204174104078e-05, + "loss": 1.4911, + "step": 83000 + }, + { + "epoch": 3.79, + "learning_rate": 3.1118128976154514e-05, + "loss": 1.7685, + "step": 83010 + }, + { + "epoch": 3.79, + "learning_rate": 3.1115840541901234e-05, + "loss": 1.5216, + "step": 83020 + }, + { + "epoch": 3.79, + "learning_rate": 3.111355210764795e-05, + "loss": 1.6569, + "step": 83030 + }, + { + "epoch": 3.79, + "learning_rate": 3.111126367339466e-05, + "loss": 1.5753, + "step": 83040 + }, + { + "epoch": 3.79, + "learning_rate": 3.110897523914138e-05, + "loss": 1.4629, + "step": 83050 + }, + { + "epoch": 3.79, + "learning_rate": 3.11066868048881e-05, + "loss": 1.7177, + "step": 83060 + }, + { + "epoch": 3.79, + "learning_rate": 3.110439837063481e-05, + "loss": 1.5228, + "step": 83070 + }, + { + "epoch": 3.79, + "learning_rate": 3.1102109936381526e-05, + "loss": 1.6546, + "step": 83080 + }, + { + "epoch": 3.79, + "learning_rate": 3.109982150212825e-05, + "loss": 1.7095, + "step": 83090 + }, + { + "epoch": 3.79, + "learning_rate": 3.109753306787496e-05, + "loss": 1.7334, + "step": 83100 + }, + { + "epoch": 3.8, + "learning_rate": 3.1095244633621675e-05, + "loss": 1.6563, + "step": 83110 + }, + { + "epoch": 3.8, + "learning_rate": 3.1092956199368396e-05, + "loss": 1.5691, + "step": 83120 + }, + { + "epoch": 3.8, + "learning_rate": 3.109066776511511e-05, + "loss": 1.4792, + "step": 83130 + }, + { + "epoch": 3.8, + "learning_rate": 3.1088379330861824e-05, + "loss": 1.5484, + "step": 83140 + }, + { + "epoch": 3.8, + "learning_rate": 3.1086090896608545e-05, + "loss": 1.7247, + "step": 83150 + }, + { + "epoch": 3.8, + "learning_rate": 3.108380246235526e-05, + "loss": 1.5339, + "step": 83160 + }, + { + "epoch": 3.8, + "learning_rate": 3.1081514028101974e-05, + "loss": 1.7184, + "step": 83170 + }, + { + "epoch": 3.8, + "learning_rate": 3.107922559384869e-05, + "loss": 1.517, + "step": 83180 + }, + { + "epoch": 3.8, + "learning_rate": 3.10769371595954e-05, + "loss": 1.7171, + "step": 83190 + }, + { + "epoch": 3.8, + "learning_rate": 3.107464872534212e-05, + "loss": 1.6156, + "step": 83200 + }, + { + "epoch": 3.8, + "learning_rate": 3.107236029108884e-05, + "loss": 1.6078, + "step": 83210 + }, + { + "epoch": 3.8, + "learning_rate": 3.107007185683555e-05, + "loss": 1.5589, + "step": 83220 + }, + { + "epoch": 3.8, + "learning_rate": 3.106778342258227e-05, + "loss": 1.6529, + "step": 83230 + }, + { + "epoch": 3.8, + "learning_rate": 3.1065494988328986e-05, + "loss": 1.534, + "step": 83240 + }, + { + "epoch": 3.8, + "learning_rate": 3.10632065540757e-05, + "loss": 1.4449, + "step": 83250 + }, + { + "epoch": 3.8, + "learning_rate": 3.106091811982242e-05, + "loss": 1.5232, + "step": 83260 + }, + { + "epoch": 3.8, + "learning_rate": 3.1058629685569135e-05, + "loss": 1.7441, + "step": 83270 + }, + { + "epoch": 3.8, + "learning_rate": 3.105634125131585e-05, + "loss": 1.5882, + "step": 83280 + }, + { + "epoch": 3.8, + "learning_rate": 3.105405281706257e-05, + "loss": 1.659, + "step": 83290 + }, + { + "epoch": 3.8, + "learning_rate": 3.1051764382809284e-05, + "loss": 1.6254, + "step": 83300 + }, + { + "epoch": 3.8, + "learning_rate": 3.1049475948556e-05, + "loss": 1.6167, + "step": 83310 + }, + { + "epoch": 3.8, + "learning_rate": 3.104718751430272e-05, + "loss": 1.6211, + "step": 83320 + }, + { + "epoch": 3.81, + "learning_rate": 3.1044899080049434e-05, + "loss": 1.6076, + "step": 83330 + }, + { + "epoch": 3.81, + "learning_rate": 3.104261064579615e-05, + "loss": 1.6271, + "step": 83340 + }, + { + "epoch": 3.81, + "learning_rate": 3.104032221154287e-05, + "loss": 1.532, + "step": 83350 + }, + { + "epoch": 3.81, + "learning_rate": 3.103803377728958e-05, + "loss": 1.8276, + "step": 83360 + }, + { + "epoch": 3.81, + "learning_rate": 3.10357453430363e-05, + "loss": 1.5781, + "step": 83370 + }, + { + "epoch": 3.81, + "learning_rate": 3.103345690878301e-05, + "loss": 1.5825, + "step": 83380 + }, + { + "epoch": 3.81, + "learning_rate": 3.1031168474529725e-05, + "loss": 1.4904, + "step": 83390 + }, + { + "epoch": 3.81, + "learning_rate": 3.102888004027644e-05, + "loss": 1.8152, + "step": 83400 + }, + { + "epoch": 3.81, + "learning_rate": 3.102659160602316e-05, + "loss": 1.6764, + "step": 83410 + }, + { + "epoch": 3.81, + "learning_rate": 3.1024303171769874e-05, + "loss": 1.612, + "step": 83420 + }, + { + "epoch": 3.81, + "learning_rate": 3.102201473751659e-05, + "loss": 1.5652, + "step": 83430 + }, + { + "epoch": 3.81, + "learning_rate": 3.101972630326331e-05, + "loss": 1.6965, + "step": 83440 + }, + { + "epoch": 3.81, + "learning_rate": 3.1017437869010024e-05, + "loss": 1.4836, + "step": 83450 + }, + { + "epoch": 3.81, + "learning_rate": 3.101514943475674e-05, + "loss": 1.6865, + "step": 83460 + }, + { + "epoch": 3.81, + "learning_rate": 3.101286100050346e-05, + "loss": 1.5092, + "step": 83470 + }, + { + "epoch": 3.81, + "learning_rate": 3.101057256625017e-05, + "loss": 1.6151, + "step": 83480 + }, + { + "epoch": 3.81, + "learning_rate": 3.100828413199689e-05, + "loss": 1.681, + "step": 83490 + }, + { + "epoch": 3.81, + "learning_rate": 3.100599569774361e-05, + "loss": 1.649, + "step": 83500 + }, + { + "epoch": 3.81, + "learning_rate": 3.100370726349032e-05, + "loss": 1.7172, + "step": 83510 + }, + { + "epoch": 3.81, + "learning_rate": 3.1001418829237036e-05, + "loss": 1.5593, + "step": 83520 + }, + { + "epoch": 3.81, + "learning_rate": 3.099913039498376e-05, + "loss": 1.589, + "step": 83530 + }, + { + "epoch": 3.81, + "learning_rate": 3.099684196073047e-05, + "loss": 1.621, + "step": 83540 + }, + { + "epoch": 3.82, + "learning_rate": 3.0994553526477185e-05, + "loss": 1.6756, + "step": 83550 + }, + { + "epoch": 3.82, + "learning_rate": 3.0992265092223906e-05, + "loss": 1.5866, + "step": 83560 + }, + { + "epoch": 3.82, + "learning_rate": 3.0989976657970613e-05, + "loss": 1.6631, + "step": 83570 + }, + { + "epoch": 3.82, + "learning_rate": 3.0987688223717334e-05, + "loss": 1.7372, + "step": 83580 + }, + { + "epoch": 3.82, + "learning_rate": 3.098539978946405e-05, + "loss": 1.6724, + "step": 83590 + }, + { + "epoch": 3.82, + "learning_rate": 3.098311135521076e-05, + "loss": 1.6043, + "step": 83600 + }, + { + "epoch": 3.82, + "learning_rate": 3.0980822920957484e-05, + "loss": 1.6385, + "step": 83610 + }, + { + "epoch": 3.82, + "learning_rate": 3.09785344867042e-05, + "loss": 1.6577, + "step": 83620 + }, + { + "epoch": 3.82, + "learning_rate": 3.097624605245091e-05, + "loss": 1.6022, + "step": 83630 + }, + { + "epoch": 3.82, + "learning_rate": 3.097395761819763e-05, + "loss": 1.602, + "step": 83640 + }, + { + "epoch": 3.82, + "learning_rate": 3.097166918394435e-05, + "loss": 1.5621, + "step": 83650 + }, + { + "epoch": 3.82, + "learning_rate": 3.096938074969106e-05, + "loss": 1.5461, + "step": 83660 + }, + { + "epoch": 3.82, + "learning_rate": 3.096709231543778e-05, + "loss": 1.3763, + "step": 83670 + }, + { + "epoch": 3.82, + "learning_rate": 3.0964803881184496e-05, + "loss": 1.8366, + "step": 83680 + }, + { + "epoch": 3.82, + "learning_rate": 3.096251544693121e-05, + "loss": 1.6344, + "step": 83690 + }, + { + "epoch": 3.82, + "learning_rate": 3.096022701267793e-05, + "loss": 1.4759, + "step": 83700 + }, + { + "epoch": 3.82, + "learning_rate": 3.0957938578424645e-05, + "loss": 1.627, + "step": 83710 + }, + { + "epoch": 3.82, + "learning_rate": 3.095565014417136e-05, + "loss": 1.7154, + "step": 83720 + }, + { + "epoch": 3.82, + "learning_rate": 3.095336170991808e-05, + "loss": 1.7742, + "step": 83730 + }, + { + "epoch": 3.82, + "learning_rate": 3.0951073275664794e-05, + "loss": 1.5504, + "step": 83740 + }, + { + "epoch": 3.82, + "learning_rate": 3.094878484141151e-05, + "loss": 1.6742, + "step": 83750 + }, + { + "epoch": 3.82, + "learning_rate": 3.094649640715823e-05, + "loss": 1.5718, + "step": 83760 + }, + { + "epoch": 3.83, + "learning_rate": 3.094420797290494e-05, + "loss": 1.6407, + "step": 83770 + }, + { + "epoch": 3.83, + "learning_rate": 3.094191953865165e-05, + "loss": 1.5559, + "step": 83780 + }, + { + "epoch": 3.83, + "learning_rate": 3.093963110439837e-05, + "loss": 1.4874, + "step": 83790 + }, + { + "epoch": 3.83, + "learning_rate": 3.0937342670145086e-05, + "loss": 1.6785, + "step": 83800 + }, + { + "epoch": 3.83, + "learning_rate": 3.09350542358918e-05, + "loss": 1.5608, + "step": 83810 + }, + { + "epoch": 3.83, + "learning_rate": 3.093276580163852e-05, + "loss": 1.4864, + "step": 83820 + }, + { + "epoch": 3.83, + "learning_rate": 3.0930477367385235e-05, + "loss": 1.549, + "step": 83830 + }, + { + "epoch": 3.83, + "learning_rate": 3.092818893313195e-05, + "loss": 1.6117, + "step": 83840 + }, + { + "epoch": 3.83, + "learning_rate": 3.092590049887867e-05, + "loss": 1.7273, + "step": 83850 + }, + { + "epoch": 3.83, + "learning_rate": 3.0923612064625384e-05, + "loss": 1.6636, + "step": 83860 + }, + { + "epoch": 3.83, + "learning_rate": 3.09213236303721e-05, + "loss": 1.7017, + "step": 83870 + }, + { + "epoch": 3.83, + "learning_rate": 3.091903519611882e-05, + "loss": 1.5771, + "step": 83880 + }, + { + "epoch": 3.83, + "learning_rate": 3.0916746761865533e-05, + "loss": 1.6924, + "step": 83890 + }, + { + "epoch": 3.83, + "learning_rate": 3.091445832761225e-05, + "loss": 1.7748, + "step": 83900 + }, + { + "epoch": 3.83, + "learning_rate": 3.091216989335897e-05, + "loss": 1.6417, + "step": 83910 + }, + { + "epoch": 3.83, + "learning_rate": 3.090988145910568e-05, + "loss": 1.7568, + "step": 83920 + }, + { + "epoch": 3.83, + "learning_rate": 3.09075930248524e-05, + "loss": 1.6695, + "step": 83930 + }, + { + "epoch": 3.83, + "learning_rate": 3.090530459059912e-05, + "loss": 1.5873, + "step": 83940 + }, + { + "epoch": 3.83, + "learning_rate": 3.090301615634583e-05, + "loss": 1.6036, + "step": 83950 + }, + { + "epoch": 3.83, + "learning_rate": 3.0900727722092546e-05, + "loss": 1.6423, + "step": 83960 + }, + { + "epoch": 3.83, + "learning_rate": 3.089843928783926e-05, + "loss": 1.573, + "step": 83970 + }, + { + "epoch": 3.83, + "learning_rate": 3.0896150853585974e-05, + "loss": 1.6002, + "step": 83980 + }, + { + "epoch": 3.84, + "learning_rate": 3.0893862419332695e-05, + "loss": 1.432, + "step": 83990 + }, + { + "epoch": 3.84, + "learning_rate": 3.089157398507941e-05, + "loss": 1.6062, + "step": 84000 + }, + { + "epoch": 3.84, + "learning_rate": 3.0889285550826123e-05, + "loss": 1.6239, + "step": 84010 + }, + { + "epoch": 3.84, + "learning_rate": 3.0886997116572844e-05, + "loss": 1.6884, + "step": 84020 + }, + { + "epoch": 3.84, + "learning_rate": 3.088470868231956e-05, + "loss": 1.7076, + "step": 84030 + }, + { + "epoch": 3.84, + "learning_rate": 3.088242024806627e-05, + "loss": 1.7639, + "step": 84040 + }, + { + "epoch": 3.84, + "learning_rate": 3.0880131813812994e-05, + "loss": 1.6165, + "step": 84050 + }, + { + "epoch": 3.84, + "learning_rate": 3.087784337955971e-05, + "loss": 1.6982, + "step": 84060 + }, + { + "epoch": 3.84, + "learning_rate": 3.087555494530642e-05, + "loss": 1.7319, + "step": 84070 + }, + { + "epoch": 3.84, + "learning_rate": 3.087326651105314e-05, + "loss": 1.5727, + "step": 84080 + }, + { + "epoch": 3.84, + "learning_rate": 3.087097807679986e-05, + "loss": 1.6315, + "step": 84090 + }, + { + "epoch": 3.84, + "learning_rate": 3.086868964254657e-05, + "loss": 1.5255, + "step": 84100 + }, + { + "epoch": 3.84, + "learning_rate": 3.086640120829329e-05, + "loss": 1.6061, + "step": 84110 + }, + { + "epoch": 3.84, + "learning_rate": 3.0864112774040006e-05, + "loss": 1.6309, + "step": 84120 + }, + { + "epoch": 3.84, + "learning_rate": 3.086182433978672e-05, + "loss": 1.3869, + "step": 84130 + }, + { + "epoch": 3.84, + "learning_rate": 3.0859535905533434e-05, + "loss": 1.5892, + "step": 84140 + }, + { + "epoch": 3.84, + "learning_rate": 3.0857247471280155e-05, + "loss": 1.8163, + "step": 84150 + }, + { + "epoch": 3.84, + "learning_rate": 3.085495903702687e-05, + "loss": 1.6678, + "step": 84160 + }, + { + "epoch": 3.84, + "learning_rate": 3.0852670602773583e-05, + "loss": 1.4402, + "step": 84170 + }, + { + "epoch": 3.84, + "learning_rate": 3.08503821685203e-05, + "loss": 1.471, + "step": 84180 + }, + { + "epoch": 3.84, + "learning_rate": 3.084809373426701e-05, + "loss": 1.6117, + "step": 84190 + }, + { + "epoch": 3.84, + "learning_rate": 3.084580530001373e-05, + "loss": 1.5127, + "step": 84200 + }, + { + "epoch": 3.85, + "learning_rate": 3.084351686576045e-05, + "loss": 2.0005, + "step": 84210 + }, + { + "epoch": 3.85, + "learning_rate": 3.084122843150716e-05, + "loss": 1.455, + "step": 84220 + }, + { + "epoch": 3.85, + "learning_rate": 3.083893999725388e-05, + "loss": 1.5731, + "step": 84230 + }, + { + "epoch": 3.85, + "learning_rate": 3.0836651563000596e-05, + "loss": 1.688, + "step": 84240 + }, + { + "epoch": 3.85, + "learning_rate": 3.083436312874731e-05, + "loss": 1.6978, + "step": 84250 + }, + { + "epoch": 3.85, + "learning_rate": 3.083207469449403e-05, + "loss": 1.4224, + "step": 84260 + }, + { + "epoch": 3.85, + "learning_rate": 3.0829786260240745e-05, + "loss": 1.6079, + "step": 84270 + }, + { + "epoch": 3.85, + "learning_rate": 3.082749782598746e-05, + "loss": 1.6621, + "step": 84280 + }, + { + "epoch": 3.85, + "learning_rate": 3.082520939173418e-05, + "loss": 1.5306, + "step": 84290 + }, + { + "epoch": 3.85, + "learning_rate": 3.0822920957480894e-05, + "loss": 1.7794, + "step": 84300 + }, + { + "epoch": 3.85, + "learning_rate": 3.082063252322761e-05, + "loss": 1.5952, + "step": 84310 + }, + { + "epoch": 3.85, + "learning_rate": 3.081834408897433e-05, + "loss": 1.6005, + "step": 84320 + }, + { + "epoch": 3.85, + "learning_rate": 3.0816055654721043e-05, + "loss": 1.5737, + "step": 84330 + }, + { + "epoch": 3.85, + "learning_rate": 3.081376722046776e-05, + "loss": 1.5151, + "step": 84340 + }, + { + "epoch": 3.85, + "learning_rate": 3.081147878621448e-05, + "loss": 1.4976, + "step": 84350 + }, + { + "epoch": 3.85, + "learning_rate": 3.0809190351961186e-05, + "loss": 1.7481, + "step": 84360 + }, + { + "epoch": 3.85, + "learning_rate": 3.08069019177079e-05, + "loss": 1.7055, + "step": 84370 + }, + { + "epoch": 3.85, + "learning_rate": 3.080461348345462e-05, + "loss": 1.7097, + "step": 84380 + }, + { + "epoch": 3.85, + "learning_rate": 3.0802325049201335e-05, + "loss": 1.6621, + "step": 84390 + }, + { + "epoch": 3.85, + "learning_rate": 3.080003661494805e-05, + "loss": 1.5151, + "step": 84400 + }, + { + "epoch": 3.85, + "learning_rate": 3.079774818069477e-05, + "loss": 1.6614, + "step": 84410 + }, + { + "epoch": 3.85, + "learning_rate": 3.0795459746441484e-05, + "loss": 1.5269, + "step": 84420 + }, + { + "epoch": 3.86, + "learning_rate": 3.07931713121882e-05, + "loss": 1.6609, + "step": 84430 + }, + { + "epoch": 3.86, + "learning_rate": 3.079088287793492e-05, + "loss": 1.6405, + "step": 84440 + }, + { + "epoch": 3.86, + "learning_rate": 3.0788594443681633e-05, + "loss": 1.4441, + "step": 84450 + }, + { + "epoch": 3.86, + "learning_rate": 3.078630600942835e-05, + "loss": 1.6238, + "step": 84460 + }, + { + "epoch": 3.86, + "learning_rate": 3.078401757517507e-05, + "loss": 1.5869, + "step": 84470 + }, + { + "epoch": 3.86, + "learning_rate": 3.078172914092178e-05, + "loss": 1.5629, + "step": 84480 + }, + { + "epoch": 3.86, + "learning_rate": 3.07794407066685e-05, + "loss": 1.5791, + "step": 84490 + }, + { + "epoch": 3.86, + "learning_rate": 3.077715227241522e-05, + "loss": 1.4857, + "step": 84500 + }, + { + "epoch": 3.86, + "learning_rate": 3.077486383816193e-05, + "loss": 1.6876, + "step": 84510 + }, + { + "epoch": 3.86, + "learning_rate": 3.0772575403908646e-05, + "loss": 1.5809, + "step": 84520 + }, + { + "epoch": 3.86, + "learning_rate": 3.077028696965537e-05, + "loss": 1.5322, + "step": 84530 + }, + { + "epoch": 3.86, + "learning_rate": 3.076799853540208e-05, + "loss": 1.6707, + "step": 84540 + }, + { + "epoch": 3.86, + "learning_rate": 3.0765710101148795e-05, + "loss": 1.5677, + "step": 84550 + }, + { + "epoch": 3.86, + "learning_rate": 3.076342166689551e-05, + "loss": 1.6818, + "step": 84560 + }, + { + "epoch": 3.86, + "learning_rate": 3.076113323264222e-05, + "loss": 1.5952, + "step": 84570 + }, + { + "epoch": 3.86, + "learning_rate": 3.0758844798388944e-05, + "loss": 1.8476, + "step": 84580 + }, + { + "epoch": 3.86, + "learning_rate": 3.075655636413566e-05, + "loss": 1.6762, + "step": 84590 + }, + { + "epoch": 3.86, + "learning_rate": 3.075426792988237e-05, + "loss": 1.5706, + "step": 84600 + }, + { + "epoch": 3.86, + "learning_rate": 3.0751979495629093e-05, + "loss": 1.651, + "step": 84610 + }, + { + "epoch": 3.86, + "learning_rate": 3.074969106137581e-05, + "loss": 1.6692, + "step": 84620 + }, + { + "epoch": 3.86, + "learning_rate": 3.074740262712252e-05, + "loss": 1.728, + "step": 84630 + }, + { + "epoch": 3.86, + "learning_rate": 3.074511419286924e-05, + "loss": 1.6344, + "step": 84640 + }, + { + "epoch": 3.87, + "learning_rate": 3.074282575861596e-05, + "loss": 1.5855, + "step": 84650 + }, + { + "epoch": 3.87, + "learning_rate": 3.074053732436267e-05, + "loss": 1.797, + "step": 84660 + }, + { + "epoch": 3.87, + "learning_rate": 3.073824889010939e-05, + "loss": 1.6284, + "step": 84670 + }, + { + "epoch": 3.87, + "learning_rate": 3.0735960455856106e-05, + "loss": 1.6258, + "step": 84680 + }, + { + "epoch": 3.87, + "learning_rate": 3.073367202160282e-05, + "loss": 1.6311, + "step": 84690 + }, + { + "epoch": 3.87, + "learning_rate": 3.073138358734954e-05, + "loss": 1.6753, + "step": 84700 + }, + { + "epoch": 3.87, + "learning_rate": 3.0729095153096255e-05, + "loss": 1.6434, + "step": 84710 + }, + { + "epoch": 3.87, + "learning_rate": 3.072680671884297e-05, + "loss": 1.6004, + "step": 84720 + }, + { + "epoch": 3.87, + "learning_rate": 3.072451828458969e-05, + "loss": 1.6652, + "step": 84730 + }, + { + "epoch": 3.87, + "learning_rate": 3.0722229850336404e-05, + "loss": 1.6171, + "step": 84740 + }, + { + "epoch": 3.87, + "learning_rate": 3.071994141608312e-05, + "loss": 1.4992, + "step": 84750 + }, + { + "epoch": 3.87, + "learning_rate": 3.071765298182983e-05, + "loss": 1.6437, + "step": 84760 + }, + { + "epoch": 3.87, + "learning_rate": 3.071536454757655e-05, + "loss": 1.6233, + "step": 84770 + }, + { + "epoch": 3.87, + "learning_rate": 3.071307611332326e-05, + "loss": 1.552, + "step": 84780 + }, + { + "epoch": 3.87, + "learning_rate": 3.071078767906998e-05, + "loss": 1.7486, + "step": 84790 + }, + { + "epoch": 3.87, + "learning_rate": 3.0708499244816696e-05, + "loss": 1.6856, + "step": 84800 + }, + { + "epoch": 3.87, + "learning_rate": 3.070621081056341e-05, + "loss": 1.5624, + "step": 84810 + }, + { + "epoch": 3.87, + "learning_rate": 3.070392237631013e-05, + "loss": 1.5479, + "step": 84820 + }, + { + "epoch": 3.87, + "learning_rate": 3.0701633942056845e-05, + "loss": 1.7122, + "step": 84830 + }, + { + "epoch": 3.87, + "learning_rate": 3.069934550780356e-05, + "loss": 1.5116, + "step": 84840 + }, + { + "epoch": 3.87, + "learning_rate": 3.069705707355028e-05, + "loss": 1.6726, + "step": 84850 + }, + { + "epoch": 3.87, + "learning_rate": 3.0694768639296994e-05, + "loss": 1.8607, + "step": 84860 + }, + { + "epoch": 3.88, + "learning_rate": 3.069248020504371e-05, + "loss": 1.6634, + "step": 84870 + }, + { + "epoch": 3.88, + "learning_rate": 3.069019177079043e-05, + "loss": 1.6995, + "step": 84880 + }, + { + "epoch": 3.88, + "learning_rate": 3.068790333653714e-05, + "loss": 1.741, + "step": 84890 + }, + { + "epoch": 3.88, + "learning_rate": 3.068561490228386e-05, + "loss": 1.7259, + "step": 84900 + }, + { + "epoch": 3.88, + "learning_rate": 3.068332646803058e-05, + "loss": 1.6651, + "step": 84910 + }, + { + "epoch": 3.88, + "learning_rate": 3.068103803377729e-05, + "loss": 1.5109, + "step": 84920 + }, + { + "epoch": 3.88, + "learning_rate": 3.067874959952401e-05, + "loss": 1.499, + "step": 84930 + }, + { + "epoch": 3.88, + "learning_rate": 3.067646116527073e-05, + "loss": 1.4333, + "step": 84940 + }, + { + "epoch": 3.88, + "learning_rate": 3.067417273101744e-05, + "loss": 1.5071, + "step": 84950 + }, + { + "epoch": 3.88, + "learning_rate": 3.0671884296764156e-05, + "loss": 1.7555, + "step": 84960 + }, + { + "epoch": 3.88, + "learning_rate": 3.066959586251087e-05, + "loss": 1.8698, + "step": 84970 + }, + { + "epoch": 3.88, + "learning_rate": 3.0667307428257584e-05, + "loss": 1.5477, + "step": 84980 + }, + { + "epoch": 3.88, + "learning_rate": 3.0665018994004305e-05, + "loss": 1.4526, + "step": 84990 + }, + { + "epoch": 3.88, + "learning_rate": 3.066273055975102e-05, + "loss": 1.5442, + "step": 85000 + }, + { + "epoch": 3.88, + "learning_rate": 3.066044212549773e-05, + "loss": 1.5944, + "step": 85010 + }, + { + "epoch": 3.88, + "learning_rate": 3.0658153691244454e-05, + "loss": 1.8074, + "step": 85020 + }, + { + "epoch": 3.88, + "learning_rate": 3.065586525699117e-05, + "loss": 1.4864, + "step": 85030 + }, + { + "epoch": 3.88, + "learning_rate": 3.065357682273788e-05, + "loss": 1.591, + "step": 85040 + }, + { + "epoch": 3.88, + "learning_rate": 3.06512883884846e-05, + "loss": 1.6127, + "step": 85050 + }, + { + "epoch": 3.88, + "learning_rate": 3.064899995423132e-05, + "loss": 1.665, + "step": 85060 + }, + { + "epoch": 3.88, + "learning_rate": 3.064671151997803e-05, + "loss": 1.7187, + "step": 85070 + }, + { + "epoch": 3.88, + "learning_rate": 3.064442308572475e-05, + "loss": 1.5117, + "step": 85080 + }, + { + "epoch": 3.89, + "learning_rate": 3.064213465147147e-05, + "loss": 1.6607, + "step": 85090 + }, + { + "epoch": 3.89, + "learning_rate": 3.063984621721818e-05, + "loss": 1.593, + "step": 85100 + }, + { + "epoch": 3.89, + "learning_rate": 3.06375577829649e-05, + "loss": 1.7184, + "step": 85110 + }, + { + "epoch": 3.89, + "learning_rate": 3.0635269348711616e-05, + "loss": 1.5801, + "step": 85120 + }, + { + "epoch": 3.89, + "learning_rate": 3.063298091445833e-05, + "loss": 1.7346, + "step": 85130 + }, + { + "epoch": 3.89, + "learning_rate": 3.063069248020505e-05, + "loss": 1.5783, + "step": 85140 + }, + { + "epoch": 3.89, + "learning_rate": 3.0628404045951765e-05, + "loss": 1.6232, + "step": 85150 + }, + { + "epoch": 3.89, + "learning_rate": 3.062611561169847e-05, + "loss": 1.7688, + "step": 85160 + }, + { + "epoch": 3.89, + "learning_rate": 3.062382717744519e-05, + "loss": 1.5577, + "step": 85170 + }, + { + "epoch": 3.89, + "learning_rate": 3.062153874319191e-05, + "loss": 1.5336, + "step": 85180 + }, + { + "epoch": 3.89, + "learning_rate": 3.061925030893862e-05, + "loss": 1.6763, + "step": 85190 + }, + { + "epoch": 3.89, + "learning_rate": 3.061696187468534e-05, + "loss": 1.6364, + "step": 85200 + }, + { + "epoch": 3.89, + "learning_rate": 3.061467344043206e-05, + "loss": 1.6471, + "step": 85210 + }, + { + "epoch": 3.89, + "learning_rate": 3.061238500617877e-05, + "loss": 1.4962, + "step": 85220 + }, + { + "epoch": 3.89, + "learning_rate": 3.061009657192549e-05, + "loss": 1.757, + "step": 85230 + }, + { + "epoch": 3.89, + "learning_rate": 3.0607808137672206e-05, + "loss": 1.6505, + "step": 85240 + }, + { + "epoch": 3.89, + "learning_rate": 3.060551970341892e-05, + "loss": 1.6898, + "step": 85250 + }, + { + "epoch": 3.89, + "learning_rate": 3.060323126916564e-05, + "loss": 1.5833, + "step": 85260 + }, + { + "epoch": 3.89, + "learning_rate": 3.0600942834912355e-05, + "loss": 1.5637, + "step": 85270 + }, + { + "epoch": 3.89, + "learning_rate": 3.059865440065907e-05, + "loss": 1.6042, + "step": 85280 + }, + { + "epoch": 3.89, + "learning_rate": 3.059636596640579e-05, + "loss": 1.666, + "step": 85290 + }, + { + "epoch": 3.9, + "learning_rate": 3.0594077532152504e-05, + "loss": 1.5845, + "step": 85300 + }, + { + "epoch": 3.9, + "learning_rate": 3.059178909789922e-05, + "loss": 1.4397, + "step": 85310 + }, + { + "epoch": 3.9, + "learning_rate": 3.058950066364594e-05, + "loss": 1.6032, + "step": 85320 + }, + { + "epoch": 3.9, + "learning_rate": 3.058721222939265e-05, + "loss": 1.7798, + "step": 85330 + }, + { + "epoch": 3.9, + "learning_rate": 3.058492379513937e-05, + "loss": 1.4187, + "step": 85340 + }, + { + "epoch": 3.9, + "learning_rate": 3.058263536088608e-05, + "loss": 1.593, + "step": 85350 + }, + { + "epoch": 3.9, + "learning_rate": 3.0580346926632796e-05, + "loss": 1.5852, + "step": 85360 + }, + { + "epoch": 3.9, + "learning_rate": 3.057805849237952e-05, + "loss": 1.4764, + "step": 85370 + }, + { + "epoch": 3.9, + "learning_rate": 3.057577005812623e-05, + "loss": 1.5485, + "step": 85380 + }, + { + "epoch": 3.9, + "learning_rate": 3.0573481623872945e-05, + "loss": 1.6224, + "step": 85390 + }, + { + "epoch": 3.9, + "learning_rate": 3.057119318961966e-05, + "loss": 1.5244, + "step": 85400 + }, + { + "epoch": 3.9, + "learning_rate": 3.056890475536638e-05, + "loss": 1.8151, + "step": 85410 + }, + { + "epoch": 3.9, + "learning_rate": 3.0566616321113094e-05, + "loss": 1.5723, + "step": 85420 + }, + { + "epoch": 3.9, + "learning_rate": 3.056432788685981e-05, + "loss": 1.737, + "step": 85430 + }, + { + "epoch": 3.9, + "learning_rate": 3.056203945260653e-05, + "loss": 1.7104, + "step": 85440 + }, + { + "epoch": 3.9, + "learning_rate": 3.055975101835324e-05, + "loss": 1.6359, + "step": 85450 + }, + { + "epoch": 3.9, + "learning_rate": 3.055746258409996e-05, + "loss": 1.5728, + "step": 85460 + }, + { + "epoch": 3.9, + "learning_rate": 3.055517414984668e-05, + "loss": 1.5713, + "step": 85470 + }, + { + "epoch": 3.9, + "learning_rate": 3.055288571559339e-05, + "loss": 1.6029, + "step": 85480 + }, + { + "epoch": 3.9, + "learning_rate": 3.0550597281340107e-05, + "loss": 1.506, + "step": 85490 + }, + { + "epoch": 3.9, + "learning_rate": 3.054830884708683e-05, + "loss": 1.5984, + "step": 85500 + }, + { + "epoch": 3.9, + "learning_rate": 3.054602041283354e-05, + "loss": 1.7362, + "step": 85510 + }, + { + "epoch": 3.91, + "learning_rate": 3.0543731978580256e-05, + "loss": 1.3713, + "step": 85520 + }, + { + "epoch": 3.91, + "learning_rate": 3.054144354432698e-05, + "loss": 1.6454, + "step": 85530 + }, + { + "epoch": 3.91, + "learning_rate": 3.053915511007369e-05, + "loss": 1.6977, + "step": 85540 + }, + { + "epoch": 3.91, + "learning_rate": 3.0536866675820405e-05, + "loss": 1.6779, + "step": 85550 + }, + { + "epoch": 3.91, + "learning_rate": 3.053457824156712e-05, + "loss": 1.5937, + "step": 85560 + }, + { + "epoch": 3.91, + "learning_rate": 3.053228980731383e-05, + "loss": 1.663, + "step": 85570 + }, + { + "epoch": 3.91, + "learning_rate": 3.0530001373060554e-05, + "loss": 1.8179, + "step": 85580 + }, + { + "epoch": 3.91, + "learning_rate": 3.052771293880727e-05, + "loss": 1.4837, + "step": 85590 + }, + { + "epoch": 3.91, + "learning_rate": 3.052542450455398e-05, + "loss": 1.6373, + "step": 85600 + }, + { + "epoch": 3.91, + "learning_rate": 3.05231360703007e-05, + "loss": 1.5309, + "step": 85610 + }, + { + "epoch": 3.91, + "learning_rate": 3.052084763604742e-05, + "loss": 1.571, + "step": 85620 + }, + { + "epoch": 3.91, + "learning_rate": 3.051855920179413e-05, + "loss": 1.5837, + "step": 85630 + }, + { + "epoch": 3.91, + "learning_rate": 3.051627076754085e-05, + "loss": 1.7315, + "step": 85640 + }, + { + "epoch": 3.91, + "learning_rate": 3.0513982333287567e-05, + "loss": 1.7759, + "step": 85650 + }, + { + "epoch": 3.91, + "learning_rate": 3.0511693899034284e-05, + "loss": 1.7038, + "step": 85660 + }, + { + "epoch": 3.91, + "learning_rate": 3.0509405464780998e-05, + "loss": 1.7181, + "step": 85670 + }, + { + "epoch": 3.91, + "learning_rate": 3.0507117030527716e-05, + "loss": 1.5641, + "step": 85680 + }, + { + "epoch": 3.91, + "learning_rate": 3.0504828596274433e-05, + "loss": 1.6964, + "step": 85690 + }, + { + "epoch": 3.91, + "learning_rate": 3.0502540162021147e-05, + "loss": 1.5317, + "step": 85700 + }, + { + "epoch": 3.91, + "learning_rate": 3.0500251727767865e-05, + "loss": 1.663, + "step": 85710 + }, + { + "epoch": 3.91, + "learning_rate": 3.0497963293514582e-05, + "loss": 1.9764, + "step": 85720 + }, + { + "epoch": 3.91, + "learning_rate": 3.0495674859261297e-05, + "loss": 1.6075, + "step": 85730 + }, + { + "epoch": 3.92, + "learning_rate": 3.0493386425008014e-05, + "loss": 1.5536, + "step": 85740 + }, + { + "epoch": 3.92, + "learning_rate": 3.0491097990754725e-05, + "loss": 1.5449, + "step": 85750 + }, + { + "epoch": 3.92, + "learning_rate": 3.0488809556501442e-05, + "loss": 1.5377, + "step": 85760 + }, + { + "epoch": 3.92, + "learning_rate": 3.0486521122248157e-05, + "loss": 1.5975, + "step": 85770 + }, + { + "epoch": 3.92, + "learning_rate": 3.0484232687994874e-05, + "loss": 1.5776, + "step": 85780 + }, + { + "epoch": 3.92, + "learning_rate": 3.048194425374159e-05, + "loss": 1.4256, + "step": 85790 + }, + { + "epoch": 3.92, + "learning_rate": 3.0479655819488306e-05, + "loss": 1.6049, + "step": 85800 + }, + { + "epoch": 3.92, + "learning_rate": 3.0477367385235023e-05, + "loss": 1.4992, + "step": 85810 + }, + { + "epoch": 3.92, + "learning_rate": 3.047507895098174e-05, + "loss": 1.6766, + "step": 85820 + }, + { + "epoch": 3.92, + "learning_rate": 3.0472790516728455e-05, + "loss": 1.6841, + "step": 85830 + }, + { + "epoch": 3.92, + "learning_rate": 3.0470502082475172e-05, + "loss": 1.7554, + "step": 85840 + }, + { + "epoch": 3.92, + "learning_rate": 3.046821364822189e-05, + "loss": 1.626, + "step": 85850 + }, + { + "epoch": 3.92, + "learning_rate": 3.0465925213968604e-05, + "loss": 1.6005, + "step": 85860 + }, + { + "epoch": 3.92, + "learning_rate": 3.046363677971532e-05, + "loss": 1.4839, + "step": 85870 + }, + { + "epoch": 3.92, + "learning_rate": 3.046134834546204e-05, + "loss": 1.5267, + "step": 85880 + }, + { + "epoch": 3.92, + "learning_rate": 3.0459059911208753e-05, + "loss": 1.7146, + "step": 85890 + }, + { + "epoch": 3.92, + "learning_rate": 3.045677147695547e-05, + "loss": 1.3962, + "step": 85900 + }, + { + "epoch": 3.92, + "learning_rate": 3.0454483042702185e-05, + "loss": 1.667, + "step": 85910 + }, + { + "epoch": 3.92, + "learning_rate": 3.0452194608448902e-05, + "loss": 1.598, + "step": 85920 + }, + { + "epoch": 3.92, + "learning_rate": 3.044990617419562e-05, + "loss": 1.6719, + "step": 85930 + }, + { + "epoch": 3.92, + "learning_rate": 3.0447617739942334e-05, + "loss": 1.4993, + "step": 85940 + }, + { + "epoch": 3.92, + "learning_rate": 3.0445329305689048e-05, + "loss": 1.5419, + "step": 85950 + }, + { + "epoch": 3.93, + "learning_rate": 3.0443040871435762e-05, + "loss": 1.7088, + "step": 85960 + }, + { + "epoch": 3.93, + "learning_rate": 3.044075243718248e-05, + "loss": 1.5784, + "step": 85970 + }, + { + "epoch": 3.93, + "learning_rate": 3.0438464002929194e-05, + "loss": 1.704, + "step": 85980 + }, + { + "epoch": 3.93, + "learning_rate": 3.043617556867591e-05, + "loss": 1.5229, + "step": 85990 + }, + { + "epoch": 3.93, + "learning_rate": 3.043388713442263e-05, + "loss": 1.4923, + "step": 86000 + }, + { + "epoch": 3.93, + "learning_rate": 3.0431598700169343e-05, + "loss": 1.6978, + "step": 86010 + }, + { + "epoch": 3.93, + "learning_rate": 3.042931026591606e-05, + "loss": 1.6283, + "step": 86020 + }, + { + "epoch": 3.93, + "learning_rate": 3.0427021831662778e-05, + "loss": 1.6685, + "step": 86030 + }, + { + "epoch": 3.93, + "learning_rate": 3.0424733397409492e-05, + "loss": 1.4577, + "step": 86040 + }, + { + "epoch": 3.93, + "learning_rate": 3.042244496315621e-05, + "loss": 1.6445, + "step": 86050 + }, + { + "epoch": 3.93, + "learning_rate": 3.0420156528902927e-05, + "loss": 1.5081, + "step": 86060 + }, + { + "epoch": 3.93, + "learning_rate": 3.041786809464964e-05, + "loss": 1.6452, + "step": 86070 + }, + { + "epoch": 3.93, + "learning_rate": 3.041557966039636e-05, + "loss": 1.4488, + "step": 86080 + }, + { + "epoch": 3.93, + "learning_rate": 3.0413291226143077e-05, + "loss": 1.6632, + "step": 86090 + }, + { + "epoch": 3.93, + "learning_rate": 3.041100279188979e-05, + "loss": 1.7079, + "step": 86100 + }, + { + "epoch": 3.93, + "learning_rate": 3.0408714357636508e-05, + "loss": 1.5663, + "step": 86110 + }, + { + "epoch": 3.93, + "learning_rate": 3.0406425923383226e-05, + "loss": 1.736, + "step": 86120 + }, + { + "epoch": 3.93, + "learning_rate": 3.040413748912994e-05, + "loss": 1.4512, + "step": 86130 + }, + { + "epoch": 3.93, + "learning_rate": 3.040184905487665e-05, + "loss": 1.4784, + "step": 86140 + }, + { + "epoch": 3.93, + "learning_rate": 3.0399560620623368e-05, + "loss": 1.5619, + "step": 86150 + }, + { + "epoch": 3.93, + "learning_rate": 3.0397272186370086e-05, + "loss": 1.6542, + "step": 86160 + }, + { + "epoch": 3.93, + "learning_rate": 3.03949837521168e-05, + "loss": 1.679, + "step": 86170 + }, + { + "epoch": 3.94, + "learning_rate": 3.0392695317863517e-05, + "loss": 1.7283, + "step": 86180 + }, + { + "epoch": 3.94, + "learning_rate": 3.0390406883610235e-05, + "loss": 1.6497, + "step": 86190 + }, + { + "epoch": 3.94, + "learning_rate": 3.038811844935695e-05, + "loss": 1.6923, + "step": 86200 + }, + { + "epoch": 3.94, + "learning_rate": 3.0385830015103666e-05, + "loss": 1.5696, + "step": 86210 + }, + { + "epoch": 3.94, + "learning_rate": 3.0383541580850384e-05, + "loss": 1.7236, + "step": 86220 + }, + { + "epoch": 3.94, + "learning_rate": 3.0381253146597098e-05, + "loss": 1.6175, + "step": 86230 + }, + { + "epoch": 3.94, + "learning_rate": 3.0378964712343816e-05, + "loss": 1.4883, + "step": 86240 + }, + { + "epoch": 3.94, + "learning_rate": 3.0376676278090533e-05, + "loss": 1.543, + "step": 86250 + }, + { + "epoch": 3.94, + "learning_rate": 3.0374387843837247e-05, + "loss": 1.6781, + "step": 86260 + }, + { + "epoch": 3.94, + "learning_rate": 3.0372099409583965e-05, + "loss": 1.5844, + "step": 86270 + }, + { + "epoch": 3.94, + "learning_rate": 3.0369810975330682e-05, + "loss": 1.6251, + "step": 86280 + }, + { + "epoch": 3.94, + "learning_rate": 3.0367522541077396e-05, + "loss": 1.5491, + "step": 86290 + }, + { + "epoch": 3.94, + "learning_rate": 3.0365234106824114e-05, + "loss": 1.4639, + "step": 86300 + }, + { + "epoch": 3.94, + "learning_rate": 3.036294567257083e-05, + "loss": 1.3997, + "step": 86310 + }, + { + "epoch": 3.94, + "learning_rate": 3.0360657238317546e-05, + "loss": 1.4729, + "step": 86320 + }, + { + "epoch": 3.94, + "learning_rate": 3.0358368804064263e-05, + "loss": 1.5635, + "step": 86330 + }, + { + "epoch": 3.94, + "learning_rate": 3.0356080369810974e-05, + "loss": 1.5176, + "step": 86340 + }, + { + "epoch": 3.94, + "learning_rate": 3.035379193555769e-05, + "loss": 1.6474, + "step": 86350 + }, + { + "epoch": 3.94, + "learning_rate": 3.0351503501304406e-05, + "loss": 1.4921, + "step": 86360 + }, + { + "epoch": 3.94, + "learning_rate": 3.0349215067051123e-05, + "loss": 1.5851, + "step": 86370 + }, + { + "epoch": 3.94, + "learning_rate": 3.034692663279784e-05, + "loss": 1.6899, + "step": 86380 + }, + { + "epoch": 3.94, + "learning_rate": 3.0344638198544555e-05, + "loss": 1.5119, + "step": 86390 + }, + { + "epoch": 3.95, + "learning_rate": 3.0342349764291272e-05, + "loss": 1.5777, + "step": 86400 + }, + { + "epoch": 3.95, + "learning_rate": 3.034006133003799e-05, + "loss": 1.6306, + "step": 86410 + }, + { + "epoch": 3.95, + "learning_rate": 3.0337772895784704e-05, + "loss": 1.7365, + "step": 86420 + }, + { + "epoch": 3.95, + "learning_rate": 3.033548446153142e-05, + "loss": 1.5093, + "step": 86430 + }, + { + "epoch": 3.95, + "learning_rate": 3.033319602727814e-05, + "loss": 1.921, + "step": 86440 + }, + { + "epoch": 3.95, + "learning_rate": 3.0330907593024853e-05, + "loss": 1.5856, + "step": 86450 + }, + { + "epoch": 3.95, + "learning_rate": 3.032861915877157e-05, + "loss": 1.6389, + "step": 86460 + }, + { + "epoch": 3.95, + "learning_rate": 3.0326330724518288e-05, + "loss": 1.6246, + "step": 86470 + }, + { + "epoch": 3.95, + "learning_rate": 3.0324042290265002e-05, + "loss": 1.5516, + "step": 86480 + }, + { + "epoch": 3.95, + "learning_rate": 3.032175385601172e-05, + "loss": 1.3934, + "step": 86490 + }, + { + "epoch": 3.95, + "learning_rate": 3.0319465421758437e-05, + "loss": 1.4802, + "step": 86500 + }, + { + "epoch": 3.95, + "learning_rate": 3.031717698750515e-05, + "loss": 1.7516, + "step": 86510 + }, + { + "epoch": 3.95, + "learning_rate": 3.031488855325187e-05, + "loss": 1.5777, + "step": 86520 + }, + { + "epoch": 3.95, + "learning_rate": 3.0312600118998587e-05, + "loss": 1.5848, + "step": 86530 + }, + { + "epoch": 3.95, + "learning_rate": 3.0310311684745297e-05, + "loss": 1.7381, + "step": 86540 + }, + { + "epoch": 3.95, + "learning_rate": 3.030802325049201e-05, + "loss": 1.6427, + "step": 86550 + }, + { + "epoch": 3.95, + "learning_rate": 3.030573481623873e-05, + "loss": 1.5542, + "step": 86560 + }, + { + "epoch": 3.95, + "learning_rate": 3.0303446381985446e-05, + "loss": 1.5026, + "step": 86570 + }, + { + "epoch": 3.95, + "learning_rate": 3.030115794773216e-05, + "loss": 1.7823, + "step": 86580 + }, + { + "epoch": 3.95, + "learning_rate": 3.0298869513478878e-05, + "loss": 1.7878, + "step": 86590 + }, + { + "epoch": 3.95, + "learning_rate": 3.0296581079225596e-05, + "loss": 1.558, + "step": 86600 + }, + { + "epoch": 3.95, + "learning_rate": 3.029429264497231e-05, + "loss": 1.5653, + "step": 86610 + }, + { + "epoch": 3.96, + "learning_rate": 3.0292004210719027e-05, + "loss": 1.6988, + "step": 86620 + }, + { + "epoch": 3.96, + "learning_rate": 3.0289715776465745e-05, + "loss": 1.7033, + "step": 86630 + }, + { + "epoch": 3.96, + "learning_rate": 3.028742734221246e-05, + "loss": 1.6915, + "step": 86640 + }, + { + "epoch": 3.96, + "learning_rate": 3.0285138907959176e-05, + "loss": 1.4103, + "step": 86650 + }, + { + "epoch": 3.96, + "learning_rate": 3.0282850473705894e-05, + "loss": 1.5359, + "step": 86660 + }, + { + "epoch": 3.96, + "learning_rate": 3.0280562039452608e-05, + "loss": 1.488, + "step": 86670 + }, + { + "epoch": 3.96, + "learning_rate": 3.0278273605199326e-05, + "loss": 1.6393, + "step": 86680 + }, + { + "epoch": 3.96, + "learning_rate": 3.0275985170946043e-05, + "loss": 1.7662, + "step": 86690 + }, + { + "epoch": 3.96, + "learning_rate": 3.0273696736692757e-05, + "loss": 1.5899, + "step": 86700 + }, + { + "epoch": 3.96, + "learning_rate": 3.0271408302439475e-05, + "loss": 1.6061, + "step": 86710 + }, + { + "epoch": 3.96, + "learning_rate": 3.0269119868186192e-05, + "loss": 1.6198, + "step": 86720 + }, + { + "epoch": 3.96, + "learning_rate": 3.0266831433932906e-05, + "loss": 1.5571, + "step": 86730 + }, + { + "epoch": 3.96, + "learning_rate": 3.0264542999679617e-05, + "loss": 1.5931, + "step": 86740 + }, + { + "epoch": 3.96, + "learning_rate": 3.0262254565426335e-05, + "loss": 1.7164, + "step": 86750 + }, + { + "epoch": 3.96, + "learning_rate": 3.0259966131173052e-05, + "loss": 1.5721, + "step": 86760 + }, + { + "epoch": 3.96, + "learning_rate": 3.0257677696919766e-05, + "loss": 1.6537, + "step": 86770 + }, + { + "epoch": 3.96, + "learning_rate": 3.0255389262666484e-05, + "loss": 1.5761, + "step": 86780 + }, + { + "epoch": 3.96, + "learning_rate": 3.02531008284132e-05, + "loss": 1.5712, + "step": 86790 + }, + { + "epoch": 3.96, + "learning_rate": 3.0250812394159916e-05, + "loss": 1.5024, + "step": 86800 + }, + { + "epoch": 3.96, + "learning_rate": 3.0248523959906633e-05, + "loss": 1.5378, + "step": 86810 + }, + { + "epoch": 3.96, + "learning_rate": 3.024623552565335e-05, + "loss": 1.4378, + "step": 86820 + }, + { + "epoch": 3.96, + "learning_rate": 3.0243947091400065e-05, + "loss": 1.7725, + "step": 86830 + }, + { + "epoch": 3.97, + "learning_rate": 3.0241658657146782e-05, + "loss": 1.5737, + "step": 86840 + }, + { + "epoch": 3.97, + "learning_rate": 3.02393702228935e-05, + "loss": 1.5655, + "step": 86850 + }, + { + "epoch": 3.97, + "learning_rate": 3.0237081788640214e-05, + "loss": 1.717, + "step": 86860 + }, + { + "epoch": 3.97, + "learning_rate": 3.023479335438693e-05, + "loss": 1.5798, + "step": 86870 + }, + { + "epoch": 3.97, + "learning_rate": 3.023250492013365e-05, + "loss": 1.516, + "step": 86880 + }, + { + "epoch": 3.97, + "learning_rate": 3.0230216485880363e-05, + "loss": 1.6399, + "step": 86890 + }, + { + "epoch": 3.97, + "learning_rate": 3.022792805162708e-05, + "loss": 1.5596, + "step": 86900 + }, + { + "epoch": 3.97, + "learning_rate": 3.0225639617373798e-05, + "loss": 1.4641, + "step": 86910 + }, + { + "epoch": 3.97, + "learning_rate": 3.0223351183120512e-05, + "loss": 1.7695, + "step": 86920 + }, + { + "epoch": 3.97, + "learning_rate": 3.0221062748867223e-05, + "loss": 1.6226, + "step": 86930 + }, + { + "epoch": 3.97, + "learning_rate": 3.021877431461394e-05, + "loss": 1.7209, + "step": 86940 + }, + { + "epoch": 3.97, + "learning_rate": 3.0216485880360658e-05, + "loss": 1.7297, + "step": 86950 + }, + { + "epoch": 3.97, + "learning_rate": 3.0214197446107372e-05, + "loss": 1.465, + "step": 86960 + }, + { + "epoch": 3.97, + "learning_rate": 3.021190901185409e-05, + "loss": 1.6571, + "step": 86970 + }, + { + "epoch": 3.97, + "learning_rate": 3.0209620577600807e-05, + "loss": 1.5508, + "step": 86980 + }, + { + "epoch": 3.97, + "learning_rate": 3.020733214334752e-05, + "loss": 1.5637, + "step": 86990 + }, + { + "epoch": 3.97, + "learning_rate": 3.020504370909424e-05, + "loss": 1.6452, + "step": 87000 + }, + { + "epoch": 3.97, + "learning_rate": 3.0202755274840956e-05, + "loss": 1.5943, + "step": 87010 + }, + { + "epoch": 3.97, + "learning_rate": 3.020046684058767e-05, + "loss": 1.8165, + "step": 87020 + }, + { + "epoch": 3.97, + "learning_rate": 3.0198178406334388e-05, + "loss": 1.4194, + "step": 87030 + }, + { + "epoch": 3.97, + "learning_rate": 3.0195889972081106e-05, + "loss": 1.6807, + "step": 87040 + }, + { + "epoch": 3.97, + "learning_rate": 3.019360153782782e-05, + "loss": 1.6564, + "step": 87050 + }, + { + "epoch": 3.98, + "learning_rate": 3.0191313103574537e-05, + "loss": 1.6347, + "step": 87060 + }, + { + "epoch": 3.98, + "learning_rate": 3.018902466932125e-05, + "loss": 1.6582, + "step": 87070 + }, + { + "epoch": 3.98, + "learning_rate": 3.018673623506797e-05, + "loss": 1.5135, + "step": 87080 + }, + { + "epoch": 3.98, + "learning_rate": 3.0184447800814686e-05, + "loss": 1.6074, + "step": 87090 + }, + { + "epoch": 3.98, + "learning_rate": 3.01821593665614e-05, + "loss": 1.5956, + "step": 87100 + }, + { + "epoch": 3.98, + "learning_rate": 3.0179870932308118e-05, + "loss": 1.9152, + "step": 87110 + }, + { + "epoch": 3.98, + "learning_rate": 3.0177582498054836e-05, + "loss": 1.5889, + "step": 87120 + }, + { + "epoch": 3.98, + "learning_rate": 3.0175294063801546e-05, + "loss": 1.6569, + "step": 87130 + }, + { + "epoch": 3.98, + "learning_rate": 3.017300562954826e-05, + "loss": 1.5528, + "step": 87140 + }, + { + "epoch": 3.98, + "learning_rate": 3.0170717195294978e-05, + "loss": 1.5454, + "step": 87150 + }, + { + "epoch": 3.98, + "learning_rate": 3.0168428761041696e-05, + "loss": 1.5983, + "step": 87160 + }, + { + "epoch": 3.98, + "learning_rate": 3.016614032678841e-05, + "loss": 1.645, + "step": 87170 + }, + { + "epoch": 3.98, + "learning_rate": 3.0163851892535127e-05, + "loss": 1.5025, + "step": 87180 + }, + { + "epoch": 3.98, + "learning_rate": 3.0161563458281845e-05, + "loss": 1.711, + "step": 87190 + }, + { + "epoch": 3.98, + "learning_rate": 3.015927502402856e-05, + "loss": 1.6259, + "step": 87200 + }, + { + "epoch": 3.98, + "learning_rate": 3.0156986589775276e-05, + "loss": 1.4857, + "step": 87210 + }, + { + "epoch": 3.98, + "learning_rate": 3.0154698155521994e-05, + "loss": 1.4839, + "step": 87220 + }, + { + "epoch": 3.98, + "learning_rate": 3.0152409721268708e-05, + "loss": 1.507, + "step": 87230 + }, + { + "epoch": 3.98, + "learning_rate": 3.0150121287015426e-05, + "loss": 1.5343, + "step": 87240 + }, + { + "epoch": 3.98, + "learning_rate": 3.0147832852762143e-05, + "loss": 1.5668, + "step": 87250 + }, + { + "epoch": 3.98, + "learning_rate": 3.0145544418508857e-05, + "loss": 1.6742, + "step": 87260 + }, + { + "epoch": 3.98, + "learning_rate": 3.0143255984255575e-05, + "loss": 1.649, + "step": 87270 + }, + { + "epoch": 3.99, + "learning_rate": 3.0140967550002292e-05, + "loss": 1.6219, + "step": 87280 + }, + { + "epoch": 3.99, + "learning_rate": 3.0138679115749006e-05, + "loss": 1.7857, + "step": 87290 + }, + { + "epoch": 3.99, + "learning_rate": 3.0136390681495724e-05, + "loss": 1.6297, + "step": 87300 + }, + { + "epoch": 3.99, + "learning_rate": 3.013410224724244e-05, + "loss": 1.5559, + "step": 87310 + }, + { + "epoch": 3.99, + "learning_rate": 3.0131813812989156e-05, + "loss": 1.6515, + "step": 87320 + }, + { + "epoch": 3.99, + "learning_rate": 3.0129525378735866e-05, + "loss": 1.5934, + "step": 87330 + }, + { + "epoch": 3.99, + "learning_rate": 3.0127236944482584e-05, + "loss": 1.729, + "step": 87340 + }, + { + "epoch": 3.99, + "learning_rate": 3.01249485102293e-05, + "loss": 1.5493, + "step": 87350 + }, + { + "epoch": 3.99, + "learning_rate": 3.0122660075976015e-05, + "loss": 1.5888, + "step": 87360 + }, + { + "epoch": 3.99, + "learning_rate": 3.0120371641722733e-05, + "loss": 1.7753, + "step": 87370 + }, + { + "epoch": 3.99, + "learning_rate": 3.011808320746945e-05, + "loss": 1.501, + "step": 87380 + }, + { + "epoch": 3.99, + "learning_rate": 3.0115794773216165e-05, + "loss": 1.8703, + "step": 87390 + }, + { + "epoch": 3.99, + "learning_rate": 3.0113506338962882e-05, + "loss": 1.8427, + "step": 87400 + }, + { + "epoch": 3.99, + "learning_rate": 3.01112179047096e-05, + "loss": 1.6393, + "step": 87410 + }, + { + "epoch": 3.99, + "learning_rate": 3.0108929470456314e-05, + "loss": 1.7296, + "step": 87420 + }, + { + "epoch": 3.99, + "learning_rate": 3.010664103620303e-05, + "loss": 1.465, + "step": 87430 + }, + { + "epoch": 3.99, + "learning_rate": 3.010435260194975e-05, + "loss": 1.4827, + "step": 87440 + }, + { + "epoch": 3.99, + "learning_rate": 3.0102064167696463e-05, + "loss": 1.492, + "step": 87450 + }, + { + "epoch": 3.99, + "learning_rate": 3.009977573344318e-05, + "loss": 1.5468, + "step": 87460 + }, + { + "epoch": 3.99, + "learning_rate": 3.0097487299189898e-05, + "loss": 1.5962, + "step": 87470 + }, + { + "epoch": 3.99, + "learning_rate": 3.0095198864936612e-05, + "loss": 1.5442, + "step": 87480 + }, + { + "epoch": 4.0, + "learning_rate": 3.009291043068333e-05, + "loss": 1.4423, + "step": 87490 + }, + { + "epoch": 4.0, + "learning_rate": 3.0090621996430047e-05, + "loss": 1.4339, + "step": 87500 + }, + { + "epoch": 4.0, + "learning_rate": 3.008833356217676e-05, + "loss": 1.5483, + "step": 87510 + }, + { + "epoch": 4.0, + "learning_rate": 3.008604512792348e-05, + "loss": 1.5705, + "step": 87520 + }, + { + "epoch": 4.0, + "learning_rate": 3.008375669367019e-05, + "loss": 1.4546, + "step": 87530 + }, + { + "epoch": 4.0, + "learning_rate": 3.0081468259416907e-05, + "loss": 1.5455, + "step": 87540 + }, + { + "epoch": 4.0, + "learning_rate": 3.007917982516362e-05, + "loss": 1.6061, + "step": 87550 + }, + { + "epoch": 4.0, + "learning_rate": 3.007689139091034e-05, + "loss": 1.5896, + "step": 87560 + }, + { + "epoch": 4.0, + "learning_rate": 3.0074602956657056e-05, + "loss": 1.4837, + "step": 87570 + }, + { + "epoch": 4.0, + "learning_rate": 3.007231452240377e-05, + "loss": 1.5007, + "step": 87580 + }, + { + "epoch": 4.0, + "learning_rate": 3.0070026088150488e-05, + "loss": 1.67, + "step": 87590 + }, + { + "epoch": 4.0, + "eval_cer": 0.6792978756320028, + "eval_em": 0.00728476821192053, + "eval_f1": 0.00728476821192053, + "eval_loss": 1.4676569700241089, + "eval_runtime": 2607.9631, + "eval_samples_per_second": 4.053, + "eval_steps_per_second": 2.026, + "eval_wer": 0.9927152317880795, + "step": 87599 + } + ], + "max_steps": 218990, + "num_train_epochs": 10, + "total_flos": 8.265838015310193e+17, + "trial_name": null, + "trial_params": null +}