diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,8 +1,8 @@ { "best_metric": null, "best_model_checkpoint": null, - "epoch": 0.9993337774816788, - "global_step": 10500, + "epoch": 1.9986675549633577, + "global_step": 21000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, @@ -6306,11 +6306,6318 @@ "learning_rate": 6.69011170872983e-05, "loss": 1.5428, "step": 10500 + }, + { + "epoch": 1.0, + "eval_loss": 1.5751161575317383, + "eval_runtime": 4381.6742, + "eval_samples_per_second": 2.433, + "step": 10507 + }, + { + "epoch": 1.0, + "learning_rate": 6.686929123834378e-05, + "loss": 1.3253, + "step": 10510 + }, + { + "epoch": 1.0, + "learning_rate": 6.683746538938927e-05, + "loss": 1.2831, + "step": 10520 + }, + { + "epoch": 1.0, + "learning_rate": 6.680563954043474e-05, + "loss": 1.4592, + "step": 10530 + }, + { + "epoch": 1.0, + "learning_rate": 6.677381369148022e-05, + "loss": 1.3726, + "step": 10540 + }, + { + "epoch": 1.0, + "learning_rate": 6.67419878425257e-05, + "loss": 1.3083, + "step": 10550 + }, + { + "epoch": 1.01, + "learning_rate": 6.671016199357119e-05, + "loss": 1.1417, + "step": 10560 + }, + { + "epoch": 1.01, + "learning_rate": 6.667833614461666e-05, + "loss": 1.8066, + "step": 10570 + }, + { + "epoch": 1.01, + "learning_rate": 6.664651029566213e-05, + "loss": 1.3587, + "step": 10580 + }, + { + "epoch": 1.01, + "learning_rate": 6.661468444670763e-05, + "loss": 1.5318, + "step": 10590 + }, + { + "epoch": 1.01, + "learning_rate": 6.65828585977531e-05, + "loss": 1.3035, + "step": 10600 + }, + { + "epoch": 1.01, + "learning_rate": 6.655103274879858e-05, + "loss": 1.2573, + "step": 10610 + }, + { + "epoch": 1.01, + "learning_rate": 6.651920689984405e-05, + "loss": 1.2519, + "step": 10620 + }, + { + "epoch": 1.01, + "learning_rate": 6.648738105088955e-05, + "loss": 1.0242, + "step": 10630 + }, + { + "epoch": 1.01, + "learning_rate": 6.645555520193502e-05, + "loss": 1.0862, + "step": 10640 + }, + { + "epoch": 1.01, + "learning_rate": 6.642372935298049e-05, + "loss": 1.3527, + "step": 10650 + }, + { + "epoch": 1.01, + "learning_rate": 6.639190350402597e-05, + "loss": 1.16, + "step": 10660 + }, + { + "epoch": 1.02, + "learning_rate": 6.636007765507145e-05, + "loss": 1.5568, + "step": 10670 + }, + { + "epoch": 1.02, + "learning_rate": 6.632825180611694e-05, + "loss": 1.2472, + "step": 10680 + }, + { + "epoch": 1.02, + "learning_rate": 6.629642595716241e-05, + "loss": 1.3669, + "step": 10690 + }, + { + "epoch": 1.02, + "learning_rate": 6.626460010820789e-05, + "loss": 0.9845, + "step": 10700 + }, + { + "epoch": 1.02, + "learning_rate": 6.623277425925338e-05, + "loss": 1.1218, + "step": 10710 + }, + { + "epoch": 1.02, + "learning_rate": 6.620094841029885e-05, + "loss": 1.4078, + "step": 10720 + }, + { + "epoch": 1.02, + "learning_rate": 6.616912256134433e-05, + "loss": 1.2828, + "step": 10730 + }, + { + "epoch": 1.02, + "learning_rate": 6.613729671238981e-05, + "loss": 1.3693, + "step": 10740 + }, + { + "epoch": 1.02, + "learning_rate": 6.610547086343528e-05, + "loss": 1.4504, + "step": 10750 + }, + { + "epoch": 1.02, + "learning_rate": 6.607364501448077e-05, + "loss": 1.2069, + "step": 10760 + }, + { + "epoch": 1.03, + "learning_rate": 6.604181916552624e-05, + "loss": 1.3613, + "step": 10770 + }, + { + "epoch": 1.03, + "learning_rate": 6.600999331657173e-05, + "loss": 1.4412, + "step": 10780 + }, + { + "epoch": 1.03, + "learning_rate": 6.59781674676172e-05, + "loss": 1.4046, + "step": 10790 + }, + { + "epoch": 1.03, + "learning_rate": 6.594634161866267e-05, + "loss": 1.2768, + "step": 10800 + }, + { + "epoch": 1.03, + "learning_rate": 6.591451576970816e-05, + "loss": 1.3946, + "step": 10810 + }, + { + "epoch": 1.03, + "learning_rate": 6.588268992075364e-05, + "loss": 1.034, + "step": 10820 + }, + { + "epoch": 1.03, + "learning_rate": 6.585086407179913e-05, + "loss": 1.1968, + "step": 10830 + }, + { + "epoch": 1.03, + "learning_rate": 6.58190382228446e-05, + "loss": 1.0514, + "step": 10840 + }, + { + "epoch": 1.03, + "learning_rate": 6.578721237389008e-05, + "loss": 1.3141, + "step": 10850 + }, + { + "epoch": 1.03, + "learning_rate": 6.575538652493556e-05, + "loss": 1.0505, + "step": 10860 + }, + { + "epoch": 1.03, + "learning_rate": 6.572356067598103e-05, + "loss": 1.3938, + "step": 10870 + }, + { + "epoch": 1.04, + "learning_rate": 6.569173482702652e-05, + "loss": 1.2018, + "step": 10880 + }, + { + "epoch": 1.04, + "learning_rate": 6.5659908978072e-05, + "loss": 1.5693, + "step": 10890 + }, + { + "epoch": 1.04, + "learning_rate": 6.562808312911747e-05, + "loss": 1.089, + "step": 10900 + }, + { + "epoch": 1.04, + "learning_rate": 6.559625728016295e-05, + "loss": 1.2522, + "step": 10910 + }, + { + "epoch": 1.04, + "learning_rate": 6.556443143120842e-05, + "loss": 1.0244, + "step": 10920 + }, + { + "epoch": 1.04, + "learning_rate": 6.553260558225391e-05, + "loss": 1.6794, + "step": 10930 + }, + { + "epoch": 1.04, + "learning_rate": 6.550077973329939e-05, + "loss": 1.3156, + "step": 10940 + }, + { + "epoch": 1.04, + "learning_rate": 6.546895388434486e-05, + "loss": 1.3705, + "step": 10950 + }, + { + "epoch": 1.04, + "learning_rate": 6.543712803539034e-05, + "loss": 1.2222, + "step": 10960 + }, + { + "epoch": 1.04, + "learning_rate": 6.540530218643583e-05, + "loss": 1.2035, + "step": 10970 + }, + { + "epoch": 1.05, + "learning_rate": 6.537347633748131e-05, + "loss": 1.3753, + "step": 10980 + }, + { + "epoch": 1.05, + "learning_rate": 6.534165048852678e-05, + "loss": 1.3482, + "step": 10990 + }, + { + "epoch": 1.05, + "learning_rate": 6.530982463957225e-05, + "loss": 1.6365, + "step": 11000 + }, + { + "epoch": 1.05, + "learning_rate": 6.527799879061775e-05, + "loss": 1.4579, + "step": 11010 + }, + { + "epoch": 1.05, + "learning_rate": 6.524617294166322e-05, + "loss": 1.389, + "step": 11020 + }, + { + "epoch": 1.05, + "learning_rate": 6.52143470927087e-05, + "loss": 1.7417, + "step": 11030 + }, + { + "epoch": 1.05, + "learning_rate": 6.518252124375417e-05, + "loss": 1.1974, + "step": 11040 + }, + { + "epoch": 1.05, + "learning_rate": 6.515069539479966e-05, + "loss": 1.281, + "step": 11050 + }, + { + "epoch": 1.05, + "learning_rate": 6.511886954584514e-05, + "loss": 1.4675, + "step": 11060 + }, + { + "epoch": 1.05, + "learning_rate": 6.508704369689061e-05, + "loss": 1.2103, + "step": 11070 + }, + { + "epoch": 1.05, + "learning_rate": 6.50552178479361e-05, + "loss": 1.2186, + "step": 11080 + }, + { + "epoch": 1.06, + "learning_rate": 6.502339199898158e-05, + "loss": 1.4346, + "step": 11090 + }, + { + "epoch": 1.06, + "learning_rate": 6.499156615002705e-05, + "loss": 1.4353, + "step": 11100 + }, + { + "epoch": 1.06, + "learning_rate": 6.495974030107253e-05, + "loss": 0.9587, + "step": 11110 + }, + { + "epoch": 1.06, + "learning_rate": 6.492791445211802e-05, + "loss": 1.2586, + "step": 11120 + }, + { + "epoch": 1.06, + "learning_rate": 6.48960886031635e-05, + "loss": 1.3769, + "step": 11130 + }, + { + "epoch": 1.06, + "learning_rate": 6.486426275420897e-05, + "loss": 1.0935, + "step": 11140 + }, + { + "epoch": 1.06, + "learning_rate": 6.483243690525444e-05, + "loss": 1.1413, + "step": 11150 + }, + { + "epoch": 1.06, + "learning_rate": 6.480061105629994e-05, + "loss": 1.4108, + "step": 11160 + }, + { + "epoch": 1.06, + "learning_rate": 6.47687852073454e-05, + "loss": 1.5105, + "step": 11170 + }, + { + "epoch": 1.06, + "learning_rate": 6.473695935839089e-05, + "loss": 1.3528, + "step": 11180 + }, + { + "epoch": 1.07, + "learning_rate": 6.470513350943636e-05, + "loss": 1.1827, + "step": 11190 + }, + { + "epoch": 1.07, + "learning_rate": 6.467330766048184e-05, + "loss": 1.6177, + "step": 11200 + }, + { + "epoch": 1.07, + "learning_rate": 6.464148181152733e-05, + "loss": 1.6265, + "step": 11210 + }, + { + "epoch": 1.07, + "learning_rate": 6.46096559625728e-05, + "loss": 1.4097, + "step": 11220 + }, + { + "epoch": 1.07, + "learning_rate": 6.457783011361828e-05, + "loss": 1.5653, + "step": 11230 + }, + { + "epoch": 1.07, + "learning_rate": 6.454600426466376e-05, + "loss": 1.1577, + "step": 11240 + }, + { + "epoch": 1.07, + "learning_rate": 6.451417841570923e-05, + "loss": 1.1006, + "step": 11250 + }, + { + "epoch": 1.07, + "learning_rate": 6.448235256675472e-05, + "loss": 1.163, + "step": 11260 + }, + { + "epoch": 1.07, + "learning_rate": 6.44505267178002e-05, + "loss": 1.4222, + "step": 11270 + }, + { + "epoch": 1.07, + "learning_rate": 6.441870086884569e-05, + "loss": 1.6539, + "step": 11280 + }, + { + "epoch": 1.07, + "learning_rate": 6.438687501989116e-05, + "loss": 1.5317, + "step": 11290 + }, + { + "epoch": 1.08, + "learning_rate": 6.435504917093664e-05, + "loss": 1.4867, + "step": 11300 + }, + { + "epoch": 1.08, + "learning_rate": 6.432322332198212e-05, + "loss": 1.577, + "step": 11310 + }, + { + "epoch": 1.08, + "learning_rate": 6.42913974730276e-05, + "loss": 1.3068, + "step": 11320 + }, + { + "epoch": 1.08, + "learning_rate": 6.425957162407308e-05, + "loss": 1.3321, + "step": 11330 + }, + { + "epoch": 1.08, + "learning_rate": 6.422774577511855e-05, + "loss": 1.2002, + "step": 11340 + }, + { + "epoch": 1.08, + "learning_rate": 6.419591992616404e-05, + "loss": 1.3479, + "step": 11350 + }, + { + "epoch": 1.08, + "learning_rate": 6.416409407720951e-05, + "loss": 1.4076, + "step": 11360 + }, + { + "epoch": 1.08, + "learning_rate": 6.413226822825498e-05, + "loss": 1.4901, + "step": 11370 + }, + { + "epoch": 1.08, + "learning_rate": 6.410044237930047e-05, + "loss": 0.9651, + "step": 11380 + }, + { + "epoch": 1.08, + "learning_rate": 6.406861653034595e-05, + "loss": 1.462, + "step": 11390 + }, + { + "epoch": 1.08, + "learning_rate": 6.403679068139144e-05, + "loss": 1.3459, + "step": 11400 + }, + { + "epoch": 1.09, + "learning_rate": 6.40049648324369e-05, + "loss": 1.1993, + "step": 11410 + }, + { + "epoch": 1.09, + "learning_rate": 6.397313898348239e-05, + "loss": 1.4077, + "step": 11420 + }, + { + "epoch": 1.09, + "learning_rate": 6.394131313452787e-05, + "loss": 1.0938, + "step": 11430 + }, + { + "epoch": 1.09, + "learning_rate": 6.390948728557334e-05, + "loss": 1.4993, + "step": 11440 + }, + { + "epoch": 1.09, + "learning_rate": 6.387766143661883e-05, + "loss": 1.31, + "step": 11450 + }, + { + "epoch": 1.09, + "learning_rate": 6.384583558766431e-05, + "loss": 1.5857, + "step": 11460 + }, + { + "epoch": 1.09, + "learning_rate": 6.381400973870978e-05, + "loss": 1.3809, + "step": 11470 + }, + { + "epoch": 1.09, + "learning_rate": 6.378218388975526e-05, + "loss": 1.6283, + "step": 11480 + }, + { + "epoch": 1.09, + "learning_rate": 6.375035804080073e-05, + "loss": 1.2548, + "step": 11490 + }, + { + "epoch": 1.09, + "learning_rate": 6.371853219184623e-05, + "loss": 1.2578, + "step": 11500 + }, + { + "epoch": 1.1, + "learning_rate": 6.36867063428917e-05, + "loss": 1.3211, + "step": 11510 + }, + { + "epoch": 1.1, + "learning_rate": 6.365488049393717e-05, + "loss": 1.1977, + "step": 11520 + }, + { + "epoch": 1.1, + "learning_rate": 6.362305464498265e-05, + "loss": 1.2643, + "step": 11530 + }, + { + "epoch": 1.1, + "learning_rate": 6.359122879602814e-05, + "loss": 1.3753, + "step": 11540 + }, + { + "epoch": 1.1, + "learning_rate": 6.355940294707362e-05, + "loss": 1.3017, + "step": 11550 + }, + { + "epoch": 1.1, + "learning_rate": 6.352757709811909e-05, + "loss": 1.3721, + "step": 11560 + }, + { + "epoch": 1.1, + "learning_rate": 6.349575124916458e-05, + "loss": 1.6016, + "step": 11570 + }, + { + "epoch": 1.1, + "learning_rate": 6.346392540021006e-05, + "loss": 1.2166, + "step": 11580 + }, + { + "epoch": 1.1, + "learning_rate": 6.343209955125553e-05, + "loss": 1.5257, + "step": 11590 + }, + { + "epoch": 1.1, + "learning_rate": 6.340027370230101e-05, + "loss": 1.5632, + "step": 11600 + }, + { + "epoch": 1.1, + "learning_rate": 6.33684478533465e-05, + "loss": 1.3745, + "step": 11610 + }, + { + "epoch": 1.11, + "learning_rate": 6.333662200439197e-05, + "loss": 1.5288, + "step": 11620 + }, + { + "epoch": 1.11, + "learning_rate": 6.330479615543745e-05, + "loss": 1.2667, + "step": 11630 + }, + { + "epoch": 1.11, + "learning_rate": 6.327297030648292e-05, + "loss": 1.5822, + "step": 11640 + }, + { + "epoch": 1.11, + "learning_rate": 6.324114445752842e-05, + "loss": 1.0847, + "step": 11650 + }, + { + "epoch": 1.11, + "learning_rate": 6.320931860857389e-05, + "loss": 1.503, + "step": 11660 + }, + { + "epoch": 1.11, + "learning_rate": 6.317749275961936e-05, + "loss": 1.4796, + "step": 11670 + }, + { + "epoch": 1.11, + "learning_rate": 6.314566691066484e-05, + "loss": 1.1655, + "step": 11680 + }, + { + "epoch": 1.11, + "learning_rate": 6.311384106171033e-05, + "loss": 1.213, + "step": 11690 + }, + { + "epoch": 1.11, + "learning_rate": 6.308201521275581e-05, + "loss": 1.5796, + "step": 11700 + }, + { + "epoch": 1.11, + "learning_rate": 6.305018936380128e-05, + "loss": 1.1776, + "step": 11710 + }, + { + "epoch": 1.12, + "learning_rate": 6.301836351484676e-05, + "loss": 1.2745, + "step": 11720 + }, + { + "epoch": 1.12, + "learning_rate": 6.298653766589225e-05, + "loss": 1.5491, + "step": 11730 + }, + { + "epoch": 1.12, + "learning_rate": 6.295471181693772e-05, + "loss": 1.3805, + "step": 11740 + }, + { + "epoch": 1.12, + "learning_rate": 6.29228859679832e-05, + "loss": 1.4238, + "step": 11750 + }, + { + "epoch": 1.12, + "learning_rate": 6.289106011902868e-05, + "loss": 1.3267, + "step": 11760 + }, + { + "epoch": 1.12, + "learning_rate": 6.285923427007415e-05, + "loss": 1.2011, + "step": 11770 + }, + { + "epoch": 1.12, + "learning_rate": 6.282740842111964e-05, + "loss": 1.2998, + "step": 11780 + }, + { + "epoch": 1.12, + "learning_rate": 6.279558257216511e-05, + "loss": 1.0914, + "step": 11790 + }, + { + "epoch": 1.12, + "learning_rate": 6.27637567232106e-05, + "loss": 1.4221, + "step": 11800 + }, + { + "epoch": 1.12, + "learning_rate": 6.273193087425607e-05, + "loss": 1.4707, + "step": 11810 + }, + { + "epoch": 1.12, + "learning_rate": 6.270010502530154e-05, + "loss": 1.2843, + "step": 11820 + }, + { + "epoch": 1.13, + "learning_rate": 6.266827917634703e-05, + "loss": 1.5337, + "step": 11830 + }, + { + "epoch": 1.13, + "learning_rate": 6.263645332739251e-05, + "loss": 1.3396, + "step": 11840 + }, + { + "epoch": 1.13, + "learning_rate": 6.2604627478438e-05, + "loss": 1.0909, + "step": 11850 + }, + { + "epoch": 1.13, + "learning_rate": 6.257280162948347e-05, + "loss": 1.4662, + "step": 11860 + }, + { + "epoch": 1.13, + "learning_rate": 6.254097578052895e-05, + "loss": 1.1194, + "step": 11870 + }, + { + "epoch": 1.13, + "learning_rate": 6.250914993157443e-05, + "loss": 1.2887, + "step": 11880 + }, + { + "epoch": 1.13, + "learning_rate": 6.24773240826199e-05, + "loss": 1.4379, + "step": 11890 + }, + { + "epoch": 1.13, + "learning_rate": 6.244549823366539e-05, + "loss": 1.2255, + "step": 11900 + }, + { + "epoch": 1.13, + "learning_rate": 6.241367238471087e-05, + "loss": 1.5839, + "step": 11910 + }, + { + "epoch": 1.13, + "learning_rate": 6.238184653575635e-05, + "loss": 1.4361, + "step": 11920 + }, + { + "epoch": 1.14, + "learning_rate": 6.235002068680182e-05, + "loss": 1.1001, + "step": 11930 + }, + { + "epoch": 1.14, + "learning_rate": 6.23181948378473e-05, + "loss": 1.5917, + "step": 11940 + }, + { + "epoch": 1.14, + "learning_rate": 6.228636898889279e-05, + "loss": 1.1532, + "step": 11950 + }, + { + "epoch": 1.14, + "learning_rate": 6.225454313993826e-05, + "loss": 1.3314, + "step": 11960 + }, + { + "epoch": 1.14, + "learning_rate": 6.222271729098375e-05, + "loss": 1.1664, + "step": 11970 + }, + { + "epoch": 1.14, + "learning_rate": 6.219089144202922e-05, + "loss": 1.1507, + "step": 11980 + }, + { + "epoch": 1.14, + "learning_rate": 6.21590655930747e-05, + "loss": 1.3231, + "step": 11990 + }, + { + "epoch": 1.14, + "learning_rate": 6.212723974412018e-05, + "loss": 1.401, + "step": 12000 + }, + { + "epoch": 1.14, + "learning_rate": 6.209541389516565e-05, + "loss": 1.148, + "step": 12010 + }, + { + "epoch": 1.14, + "learning_rate": 6.206358804621114e-05, + "loss": 1.3955, + "step": 12020 + }, + { + "epoch": 1.14, + "learning_rate": 6.203176219725662e-05, + "loss": 1.714, + "step": 12030 + }, + { + "epoch": 1.15, + "learning_rate": 6.199993634830209e-05, + "loss": 1.3327, + "step": 12040 + }, + { + "epoch": 1.15, + "learning_rate": 6.196811049934757e-05, + "loss": 1.3942, + "step": 12050 + }, + { + "epoch": 1.15, + "learning_rate": 6.193628465039304e-05, + "loss": 1.2902, + "step": 12060 + }, + { + "epoch": 1.15, + "learning_rate": 6.190445880143854e-05, + "loss": 1.2024, + "step": 12070 + }, + { + "epoch": 1.15, + "learning_rate": 6.187263295248401e-05, + "loss": 1.6921, + "step": 12080 + }, + { + "epoch": 1.15, + "learning_rate": 6.184080710352948e-05, + "loss": 1.5338, + "step": 12090 + }, + { + "epoch": 1.15, + "learning_rate": 6.180898125457496e-05, + "loss": 1.3473, + "step": 12100 + }, + { + "epoch": 1.15, + "learning_rate": 6.177715540562045e-05, + "loss": 1.1758, + "step": 12110 + }, + { + "epoch": 1.15, + "learning_rate": 6.174532955666593e-05, + "loss": 0.9934, + "step": 12120 + }, + { + "epoch": 1.15, + "learning_rate": 6.17135037077114e-05, + "loss": 1.0722, + "step": 12130 + }, + { + "epoch": 1.16, + "learning_rate": 6.168167785875689e-05, + "loss": 1.5355, + "step": 12140 + }, + { + "epoch": 1.16, + "learning_rate": 6.164985200980237e-05, + "loss": 1.1969, + "step": 12150 + }, + { + "epoch": 1.16, + "learning_rate": 6.161802616084784e-05, + "loss": 1.2552, + "step": 12160 + }, + { + "epoch": 1.16, + "learning_rate": 6.158620031189332e-05, + "loss": 1.4347, + "step": 12170 + }, + { + "epoch": 1.16, + "learning_rate": 6.155437446293881e-05, + "loss": 1.2723, + "step": 12180 + }, + { + "epoch": 1.16, + "learning_rate": 6.152254861398428e-05, + "loss": 1.3817, + "step": 12190 + }, + { + "epoch": 1.16, + "learning_rate": 6.149072276502976e-05, + "loss": 1.4304, + "step": 12200 + }, + { + "epoch": 1.16, + "learning_rate": 6.145889691607523e-05, + "loss": 1.3085, + "step": 12210 + }, + { + "epoch": 1.16, + "learning_rate": 6.142707106712073e-05, + "loss": 1.4066, + "step": 12220 + }, + { + "epoch": 1.16, + "learning_rate": 6.13952452181662e-05, + "loss": 1.2759, + "step": 12230 + }, + { + "epoch": 1.16, + "learning_rate": 6.136341936921167e-05, + "loss": 1.3005, + "step": 12240 + }, + { + "epoch": 1.17, + "learning_rate": 6.133159352025715e-05, + "loss": 1.4923, + "step": 12250 + }, + { + "epoch": 1.17, + "learning_rate": 6.129976767130264e-05, + "loss": 1.2941, + "step": 12260 + }, + { + "epoch": 1.17, + "learning_rate": 6.126794182234812e-05, + "loss": 1.188, + "step": 12270 + }, + { + "epoch": 1.17, + "learning_rate": 6.123611597339359e-05, + "loss": 1.515, + "step": 12280 + }, + { + "epoch": 1.17, + "learning_rate": 6.120429012443907e-05, + "loss": 1.1593, + "step": 12290 + }, + { + "epoch": 1.17, + "learning_rate": 6.117246427548456e-05, + "loss": 1.1917, + "step": 12300 + }, + { + "epoch": 1.17, + "learning_rate": 6.114063842653003e-05, + "loss": 1.4694, + "step": 12310 + }, + { + "epoch": 1.17, + "learning_rate": 6.110881257757551e-05, + "loss": 1.3641, + "step": 12320 + }, + { + "epoch": 1.17, + "learning_rate": 6.1076986728621e-05, + "loss": 1.0053, + "step": 12330 + }, + { + "epoch": 1.17, + "learning_rate": 6.104516087966646e-05, + "loss": 1.3955, + "step": 12340 + }, + { + "epoch": 1.18, + "learning_rate": 6.101333503071195e-05, + "loss": 1.4972, + "step": 12350 + }, + { + "epoch": 1.18, + "learning_rate": 6.0981509181757424e-05, + "loss": 1.5457, + "step": 12360 + }, + { + "epoch": 1.18, + "learning_rate": 6.094968333280291e-05, + "loss": 1.295, + "step": 12370 + }, + { + "epoch": 1.18, + "learning_rate": 6.0917857483848385e-05, + "loss": 1.3848, + "step": 12380 + }, + { + "epoch": 1.18, + "learning_rate": 6.088603163489386e-05, + "loss": 1.3321, + "step": 12390 + }, + { + "epoch": 1.18, + "learning_rate": 6.0854205785939345e-05, + "loss": 1.5353, + "step": 12400 + }, + { + "epoch": 1.18, + "learning_rate": 6.082237993698482e-05, + "loss": 1.2588, + "step": 12410 + }, + { + "epoch": 1.18, + "learning_rate": 6.0790554088030306e-05, + "loss": 1.274, + "step": 12420 + }, + { + "epoch": 1.18, + "learning_rate": 6.075872823907578e-05, + "loss": 1.1487, + "step": 12430 + }, + { + "epoch": 1.18, + "learning_rate": 6.072690239012125e-05, + "loss": 1.2118, + "step": 12440 + }, + { + "epoch": 1.18, + "learning_rate": 6.069507654116674e-05, + "loss": 1.1819, + "step": 12450 + }, + { + "epoch": 1.19, + "learning_rate": 6.066325069221221e-05, + "loss": 1.4046, + "step": 12460 + }, + { + "epoch": 1.19, + "learning_rate": 6.0631424843257704e-05, + "loss": 1.4242, + "step": 12470 + }, + { + "epoch": 1.19, + "learning_rate": 6.0599598994303174e-05, + "loss": 1.4458, + "step": 12480 + }, + { + "epoch": 1.19, + "learning_rate": 6.056777314534865e-05, + "loss": 1.7952, + "step": 12490 + }, + { + "epoch": 1.19, + "learning_rate": 6.0535947296394134e-05, + "loss": 1.6093, + "step": 12500 + }, + { + "epoch": 1.19, + "learning_rate": 6.050412144743961e-05, + "loss": 1.3165, + "step": 12510 + }, + { + "epoch": 1.19, + "learning_rate": 6.0472295598485095e-05, + "loss": 1.1477, + "step": 12520 + }, + { + "epoch": 1.19, + "learning_rate": 6.044046974953057e-05, + "loss": 1.3023, + "step": 12530 + }, + { + "epoch": 1.19, + "learning_rate": 6.0408643900576055e-05, + "loss": 1.1291, + "step": 12540 + }, + { + "epoch": 1.19, + "learning_rate": 6.037681805162153e-05, + "loss": 1.1525, + "step": 12550 + }, + { + "epoch": 1.2, + "learning_rate": 6.0344992202667e-05, + "loss": 1.4392, + "step": 12560 + }, + { + "epoch": 1.2, + "learning_rate": 6.031316635371249e-05, + "loss": 1.2571, + "step": 12570 + }, + { + "epoch": 1.2, + "learning_rate": 6.028134050475796e-05, + "loss": 1.4243, + "step": 12580 + }, + { + "epoch": 1.2, + "learning_rate": 6.024951465580345e-05, + "loss": 1.3288, + "step": 12590 + }, + { + "epoch": 1.2, + "learning_rate": 6.021768880684892e-05, + "loss": 1.2786, + "step": 12600 + }, + { + "epoch": 1.2, + "learning_rate": 6.01858629578944e-05, + "loss": 1.4158, + "step": 12610 + }, + { + "epoch": 1.2, + "learning_rate": 6.0154037108939884e-05, + "loss": 1.6776, + "step": 12620 + }, + { + "epoch": 1.2, + "learning_rate": 6.012221125998536e-05, + "loss": 1.2973, + "step": 12630 + }, + { + "epoch": 1.2, + "learning_rate": 6.0090385411030844e-05, + "loss": 1.6589, + "step": 12640 + }, + { + "epoch": 1.2, + "learning_rate": 6.005855956207632e-05, + "loss": 1.4143, + "step": 12650 + }, + { + "epoch": 1.2, + "learning_rate": 6.00267337131218e-05, + "loss": 1.1845, + "step": 12660 + }, + { + "epoch": 1.21, + "learning_rate": 5.999490786416728e-05, + "loss": 1.379, + "step": 12670 + }, + { + "epoch": 1.21, + "learning_rate": 5.996308201521276e-05, + "loss": 1.1948, + "step": 12680 + }, + { + "epoch": 1.21, + "learning_rate": 5.993125616625824e-05, + "loss": 1.4856, + "step": 12690 + }, + { + "epoch": 1.21, + "learning_rate": 5.989943031730372e-05, + "loss": 1.0585, + "step": 12700 + }, + { + "epoch": 1.21, + "learning_rate": 5.986760446834919e-05, + "loss": 1.0362, + "step": 12710 + }, + { + "epoch": 1.21, + "learning_rate": 5.983577861939468e-05, + "loss": 1.5063, + "step": 12720 + }, + { + "epoch": 1.21, + "learning_rate": 5.980395277044015e-05, + "loss": 1.3054, + "step": 12730 + }, + { + "epoch": 1.21, + "learning_rate": 5.977212692148564e-05, + "loss": 1.2419, + "step": 12740 + }, + { + "epoch": 1.21, + "learning_rate": 5.974030107253111e-05, + "loss": 1.3128, + "step": 12750 + }, + { + "epoch": 1.21, + "learning_rate": 5.970847522357659e-05, + "loss": 1.3978, + "step": 12760 + }, + { + "epoch": 1.22, + "learning_rate": 5.967664937462207e-05, + "loss": 1.3241, + "step": 12770 + }, + { + "epoch": 1.22, + "learning_rate": 5.964482352566755e-05, + "loss": 1.3458, + "step": 12780 + }, + { + "epoch": 1.22, + "learning_rate": 5.961299767671303e-05, + "loss": 1.1913, + "step": 12790 + }, + { + "epoch": 1.22, + "learning_rate": 5.958117182775851e-05, + "loss": 1.207, + "step": 12800 + }, + { + "epoch": 1.22, + "learning_rate": 5.9549345978803985e-05, + "loss": 1.8335, + "step": 12810 + }, + { + "epoch": 1.22, + "learning_rate": 5.951752012984947e-05, + "loss": 1.2501, + "step": 12820 + }, + { + "epoch": 1.22, + "learning_rate": 5.9485694280894945e-05, + "loss": 1.3696, + "step": 12830 + }, + { + "epoch": 1.22, + "learning_rate": 5.945386843194043e-05, + "loss": 1.4096, + "step": 12840 + }, + { + "epoch": 1.22, + "learning_rate": 5.9422042582985906e-05, + "loss": 1.2826, + "step": 12850 + }, + { + "epoch": 1.22, + "learning_rate": 5.9390216734031376e-05, + "loss": 1.781, + "step": 12860 + }, + { + "epoch": 1.22, + "learning_rate": 5.9358390885076866e-05, + "loss": 1.2511, + "step": 12870 + }, + { + "epoch": 1.23, + "learning_rate": 5.9326565036122336e-05, + "loss": 1.2594, + "step": 12880 + }, + { + "epoch": 1.23, + "learning_rate": 5.929473918716783e-05, + "loss": 1.2174, + "step": 12890 + }, + { + "epoch": 1.23, + "learning_rate": 5.92629133382133e-05, + "loss": 1.3863, + "step": 12900 + }, + { + "epoch": 1.23, + "learning_rate": 5.9231087489258774e-05, + "loss": 1.5468, + "step": 12910 + }, + { + "epoch": 1.23, + "learning_rate": 5.919926164030426e-05, + "loss": 1.3738, + "step": 12920 + }, + { + "epoch": 1.23, + "learning_rate": 5.9167435791349734e-05, + "loss": 1.2275, + "step": 12930 + }, + { + "epoch": 1.23, + "learning_rate": 5.913560994239522e-05, + "loss": 1.2463, + "step": 12940 + }, + { + "epoch": 1.23, + "learning_rate": 5.9103784093440695e-05, + "loss": 1.5187, + "step": 12950 + }, + { + "epoch": 1.23, + "learning_rate": 5.907195824448617e-05, + "loss": 1.2007, + "step": 12960 + }, + { + "epoch": 1.23, + "learning_rate": 5.9040132395531655e-05, + "loss": 1.5909, + "step": 12970 + }, + { + "epoch": 1.24, + "learning_rate": 5.900830654657713e-05, + "loss": 1.1711, + "step": 12980 + }, + { + "epoch": 1.24, + "learning_rate": 5.8976480697622616e-05, + "loss": 1.498, + "step": 12990 + }, + { + "epoch": 1.24, + "learning_rate": 5.894465484866809e-05, + "loss": 1.2641, + "step": 13000 + }, + { + "epoch": 1.24, + "learning_rate": 5.891282899971356e-05, + "loss": 1.4552, + "step": 13010 + }, + { + "epoch": 1.24, + "learning_rate": 5.888100315075905e-05, + "loss": 1.4062, + "step": 13020 + }, + { + "epoch": 1.24, + "learning_rate": 5.884917730180452e-05, + "loss": 1.6072, + "step": 13030 + }, + { + "epoch": 1.24, + "learning_rate": 5.8817351452850014e-05, + "loss": 1.5844, + "step": 13040 + }, + { + "epoch": 1.24, + "learning_rate": 5.8785525603895484e-05, + "loss": 1.3093, + "step": 13050 + }, + { + "epoch": 1.24, + "learning_rate": 5.875369975494096e-05, + "loss": 1.3364, + "step": 13060 + }, + { + "epoch": 1.24, + "learning_rate": 5.8721873905986444e-05, + "loss": 1.2106, + "step": 13070 + }, + { + "epoch": 1.24, + "learning_rate": 5.869004805703192e-05, + "loss": 1.4542, + "step": 13080 + }, + { + "epoch": 1.25, + "learning_rate": 5.8658222208077405e-05, + "loss": 1.3747, + "step": 13090 + }, + { + "epoch": 1.25, + "learning_rate": 5.862639635912288e-05, + "loss": 1.4218, + "step": 13100 + }, + { + "epoch": 1.25, + "learning_rate": 5.859457051016836e-05, + "loss": 1.3271, + "step": 13110 + }, + { + "epoch": 1.25, + "learning_rate": 5.856274466121384e-05, + "loss": 1.3049, + "step": 13120 + }, + { + "epoch": 1.25, + "learning_rate": 5.853091881225932e-05, + "loss": 1.1618, + "step": 13130 + }, + { + "epoch": 1.25, + "learning_rate": 5.84990929633048e-05, + "loss": 1.5606, + "step": 13140 + }, + { + "epoch": 1.25, + "learning_rate": 5.846726711435028e-05, + "loss": 1.5192, + "step": 13150 + }, + { + "epoch": 1.25, + "learning_rate": 5.843544126539575e-05, + "loss": 1.2331, + "step": 13160 + }, + { + "epoch": 1.25, + "learning_rate": 5.840361541644124e-05, + "loss": 1.3765, + "step": 13170 + }, + { + "epoch": 1.25, + "learning_rate": 5.837178956748671e-05, + "loss": 1.3304, + "step": 13180 + }, + { + "epoch": 1.26, + "learning_rate": 5.83399637185322e-05, + "loss": 1.0399, + "step": 13190 + }, + { + "epoch": 1.26, + "learning_rate": 5.830813786957767e-05, + "loss": 1.4977, + "step": 13200 + }, + { + "epoch": 1.26, + "learning_rate": 5.827631202062316e-05, + "loss": 1.2552, + "step": 13210 + }, + { + "epoch": 1.26, + "learning_rate": 5.824448617166863e-05, + "loss": 1.1272, + "step": 13220 + }, + { + "epoch": 1.26, + "learning_rate": 5.821266032271411e-05, + "loss": 1.686, + "step": 13230 + }, + { + "epoch": 1.26, + "learning_rate": 5.818083447375959e-05, + "loss": 1.3021, + "step": 13240 + }, + { + "epoch": 1.26, + "learning_rate": 5.814900862480507e-05, + "loss": 1.3472, + "step": 13250 + }, + { + "epoch": 1.26, + "learning_rate": 5.811718277585055e-05, + "loss": 1.1018, + "step": 13260 + }, + { + "epoch": 1.26, + "learning_rate": 5.808535692689603e-05, + "loss": 1.2079, + "step": 13270 + }, + { + "epoch": 1.26, + "learning_rate": 5.8053531077941506e-05, + "loss": 1.205, + "step": 13280 + }, + { + "epoch": 1.26, + "learning_rate": 5.802170522898699e-05, + "loss": 1.4772, + "step": 13290 + }, + { + "epoch": 1.27, + "learning_rate": 5.7989879380032466e-05, + "loss": 1.1362, + "step": 13300 + }, + { + "epoch": 1.27, + "learning_rate": 5.795805353107795e-05, + "loss": 1.5549, + "step": 13310 + }, + { + "epoch": 1.27, + "learning_rate": 5.792622768212343e-05, + "loss": 1.2256, + "step": 13320 + }, + { + "epoch": 1.27, + "learning_rate": 5.78944018331689e-05, + "loss": 1.1792, + "step": 13330 + }, + { + "epoch": 1.27, + "learning_rate": 5.786257598421439e-05, + "loss": 1.444, + "step": 13340 + }, + { + "epoch": 1.27, + "learning_rate": 5.783075013525986e-05, + "loss": 1.3135, + "step": 13350 + }, + { + "epoch": 1.27, + "learning_rate": 5.779892428630535e-05, + "loss": 1.2954, + "step": 13360 + }, + { + "epoch": 1.27, + "learning_rate": 5.776709843735082e-05, + "loss": 1.2679, + "step": 13370 + }, + { + "epoch": 1.27, + "learning_rate": 5.7735272588396295e-05, + "loss": 1.3775, + "step": 13380 + }, + { + "epoch": 1.27, + "learning_rate": 5.770344673944178e-05, + "loss": 1.0079, + "step": 13390 + }, + { + "epoch": 1.28, + "learning_rate": 5.7671620890487255e-05, + "loss": 1.3256, + "step": 13400 + }, + { + "epoch": 1.28, + "learning_rate": 5.763979504153274e-05, + "loss": 1.2643, + "step": 13410 + }, + { + "epoch": 1.28, + "learning_rate": 5.7607969192578216e-05, + "loss": 1.0838, + "step": 13420 + }, + { + "epoch": 1.28, + "learning_rate": 5.757614334362369e-05, + "loss": 1.1447, + "step": 13430 + }, + { + "epoch": 1.28, + "learning_rate": 5.7544317494669176e-05, + "loss": 1.0348, + "step": 13440 + }, + { + "epoch": 1.28, + "learning_rate": 5.751249164571465e-05, + "loss": 1.4069, + "step": 13450 + }, + { + "epoch": 1.28, + "learning_rate": 5.748066579676014e-05, + "loss": 1.5608, + "step": 13460 + }, + { + "epoch": 1.28, + "learning_rate": 5.7448839947805614e-05, + "loss": 1.0844, + "step": 13470 + }, + { + "epoch": 1.28, + "learning_rate": 5.7417014098851084e-05, + "loss": 1.4903, + "step": 13480 + }, + { + "epoch": 1.28, + "learning_rate": 5.7385188249896574e-05, + "loss": 1.2309, + "step": 13490 + }, + { + "epoch": 1.28, + "learning_rate": 5.7353362400942044e-05, + "loss": 1.4884, + "step": 13500 + }, + { + "epoch": 1.29, + "learning_rate": 5.7321536551987535e-05, + "loss": 1.4128, + "step": 13510 + }, + { + "epoch": 1.29, + "learning_rate": 5.7289710703033005e-05, + "loss": 1.2433, + "step": 13520 + }, + { + "epoch": 1.29, + "learning_rate": 5.725788485407848e-05, + "loss": 1.4097, + "step": 13530 + }, + { + "epoch": 1.29, + "learning_rate": 5.7226059005123965e-05, + "loss": 1.2903, + "step": 13540 + }, + { + "epoch": 1.29, + "learning_rate": 5.719423315616944e-05, + "loss": 1.2305, + "step": 13550 + }, + { + "epoch": 1.29, + "learning_rate": 5.7162407307214926e-05, + "loss": 1.2742, + "step": 13560 + }, + { + "epoch": 1.29, + "learning_rate": 5.71305814582604e-05, + "loss": 1.1847, + "step": 13570 + }, + { + "epoch": 1.29, + "learning_rate": 5.709875560930588e-05, + "loss": 1.1794, + "step": 13580 + }, + { + "epoch": 1.29, + "learning_rate": 5.706692976035136e-05, + "loss": 1.4876, + "step": 13590 + }, + { + "epoch": 1.29, + "learning_rate": 5.703510391139684e-05, + "loss": 1.3414, + "step": 13600 + }, + { + "epoch": 1.3, + "learning_rate": 5.7003278062442324e-05, + "loss": 1.6806, + "step": 13610 + }, + { + "epoch": 1.3, + "learning_rate": 5.6971452213487794e-05, + "loss": 1.167, + "step": 13620 + }, + { + "epoch": 1.3, + "learning_rate": 5.693962636453327e-05, + "loss": 1.6015, + "step": 13630 + }, + { + "epoch": 1.3, + "learning_rate": 5.6907800515578754e-05, + "loss": 1.3552, + "step": 13640 + }, + { + "epoch": 1.3, + "learning_rate": 5.687597466662423e-05, + "loss": 1.4357, + "step": 13650 + }, + { + "epoch": 1.3, + "learning_rate": 5.6844148817669715e-05, + "loss": 1.2554, + "step": 13660 + }, + { + "epoch": 1.3, + "learning_rate": 5.681232296871519e-05, + "loss": 1.3044, + "step": 13670 + }, + { + "epoch": 1.3, + "learning_rate": 5.678049711976067e-05, + "loss": 1.0156, + "step": 13680 + }, + { + "epoch": 1.3, + "learning_rate": 5.674867127080615e-05, + "loss": 1.3119, + "step": 13690 + }, + { + "epoch": 1.3, + "learning_rate": 5.671684542185163e-05, + "loss": 1.4552, + "step": 13700 + }, + { + "epoch": 1.3, + "learning_rate": 5.668501957289711e-05, + "loss": 1.3558, + "step": 13710 + }, + { + "epoch": 1.31, + "learning_rate": 5.665319372394259e-05, + "loss": 1.3309, + "step": 13720 + }, + { + "epoch": 1.31, + "learning_rate": 5.662136787498806e-05, + "loss": 1.4877, + "step": 13730 + }, + { + "epoch": 1.31, + "learning_rate": 5.658954202603355e-05, + "loss": 1.2664, + "step": 13740 + }, + { + "epoch": 1.31, + "learning_rate": 5.655771617707902e-05, + "loss": 1.2099, + "step": 13750 + }, + { + "epoch": 1.31, + "learning_rate": 5.652589032812451e-05, + "loss": 1.2328, + "step": 13760 + }, + { + "epoch": 1.31, + "learning_rate": 5.649406447916998e-05, + "loss": 1.1204, + "step": 13770 + }, + { + "epoch": 1.31, + "learning_rate": 5.646223863021546e-05, + "loss": 1.4109, + "step": 13780 + }, + { + "epoch": 1.31, + "learning_rate": 5.643041278126094e-05, + "loss": 1.1479, + "step": 13790 + }, + { + "epoch": 1.31, + "learning_rate": 5.639858693230642e-05, + "loss": 1.2953, + "step": 13800 + }, + { + "epoch": 1.31, + "learning_rate": 5.63667610833519e-05, + "loss": 1.4705, + "step": 13810 + }, + { + "epoch": 1.32, + "learning_rate": 5.633493523439738e-05, + "loss": 1.5887, + "step": 13820 + }, + { + "epoch": 1.32, + "learning_rate": 5.630310938544286e-05, + "loss": 1.2498, + "step": 13830 + }, + { + "epoch": 1.32, + "learning_rate": 5.627128353648834e-05, + "loss": 1.3203, + "step": 13840 + }, + { + "epoch": 1.32, + "learning_rate": 5.6239457687533816e-05, + "loss": 1.6888, + "step": 13850 + }, + { + "epoch": 1.32, + "learning_rate": 5.62076318385793e-05, + "loss": 1.2531, + "step": 13860 + }, + { + "epoch": 1.32, + "learning_rate": 5.6175805989624776e-05, + "loss": 1.0861, + "step": 13870 + }, + { + "epoch": 1.32, + "learning_rate": 5.614398014067026e-05, + "loss": 1.4684, + "step": 13880 + }, + { + "epoch": 1.32, + "learning_rate": 5.611215429171574e-05, + "loss": 0.8508, + "step": 13890 + }, + { + "epoch": 1.32, + "learning_rate": 5.608032844276121e-05, + "loss": 1.4018, + "step": 13900 + }, + { + "epoch": 1.32, + "learning_rate": 5.60485025938067e-05, + "loss": 1.1384, + "step": 13910 + }, + { + "epoch": 1.32, + "learning_rate": 5.601667674485217e-05, + "loss": 1.3172, + "step": 13920 + }, + { + "epoch": 1.33, + "learning_rate": 5.598485089589766e-05, + "loss": 1.3609, + "step": 13930 + }, + { + "epoch": 1.33, + "learning_rate": 5.595302504694313e-05, + "loss": 1.3021, + "step": 13940 + }, + { + "epoch": 1.33, + "learning_rate": 5.5921199197988605e-05, + "loss": 1.4604, + "step": 13950 + }, + { + "epoch": 1.33, + "learning_rate": 5.588937334903409e-05, + "loss": 1.4656, + "step": 13960 + }, + { + "epoch": 1.33, + "learning_rate": 5.5857547500079565e-05, + "loss": 1.0608, + "step": 13970 + }, + { + "epoch": 1.33, + "learning_rate": 5.582572165112505e-05, + "loss": 1.4819, + "step": 13980 + }, + { + "epoch": 1.33, + "learning_rate": 5.5793895802170526e-05, + "loss": 1.3399, + "step": 13990 + }, + { + "epoch": 1.33, + "learning_rate": 5.5762069953216e-05, + "loss": 1.1616, + "step": 14000 + }, + { + "epoch": 1.33, + "learning_rate": 5.5730244104261486e-05, + "loss": 1.2001, + "step": 14010 + }, + { + "epoch": 1.33, + "learning_rate": 5.569841825530696e-05, + "loss": 1.5204, + "step": 14020 + }, + { + "epoch": 1.34, + "learning_rate": 5.566659240635245e-05, + "loss": 1.2357, + "step": 14030 + }, + { + "epoch": 1.34, + "learning_rate": 5.5634766557397924e-05, + "loss": 1.2601, + "step": 14040 + }, + { + "epoch": 1.34, + "learning_rate": 5.5602940708443394e-05, + "loss": 1.4038, + "step": 14050 + }, + { + "epoch": 1.34, + "learning_rate": 5.5571114859488884e-05, + "loss": 1.3067, + "step": 14060 + }, + { + "epoch": 1.34, + "learning_rate": 5.5539289010534354e-05, + "loss": 1.269, + "step": 14070 + }, + { + "epoch": 1.34, + "learning_rate": 5.5507463161579845e-05, + "loss": 1.3595, + "step": 14080 + }, + { + "epoch": 1.34, + "learning_rate": 5.5475637312625315e-05, + "loss": 1.2674, + "step": 14090 + }, + { + "epoch": 1.34, + "learning_rate": 5.544381146367079e-05, + "loss": 1.2473, + "step": 14100 + }, + { + "epoch": 1.34, + "learning_rate": 5.5411985614716275e-05, + "loss": 1.5753, + "step": 14110 + }, + { + "epoch": 1.34, + "learning_rate": 5.538015976576175e-05, + "loss": 1.4645, + "step": 14120 + }, + { + "epoch": 1.34, + "learning_rate": 5.5348333916807236e-05, + "loss": 1.4572, + "step": 14130 + }, + { + "epoch": 1.35, + "learning_rate": 5.531650806785271e-05, + "loss": 1.2548, + "step": 14140 + }, + { + "epoch": 1.35, + "learning_rate": 5.528468221889819e-05, + "loss": 1.3171, + "step": 14150 + }, + { + "epoch": 1.35, + "learning_rate": 5.525285636994367e-05, + "loss": 1.3876, + "step": 14160 + }, + { + "epoch": 1.35, + "learning_rate": 5.522103052098915e-05, + "loss": 1.4725, + "step": 14170 + }, + { + "epoch": 1.35, + "learning_rate": 5.5189204672034634e-05, + "loss": 1.2703, + "step": 14180 + }, + { + "epoch": 1.35, + "learning_rate": 5.515737882308011e-05, + "loss": 1.2428, + "step": 14190 + }, + { + "epoch": 1.35, + "learning_rate": 5.512555297412558e-05, + "loss": 1.5501, + "step": 14200 + }, + { + "epoch": 1.35, + "learning_rate": 5.509372712517107e-05, + "loss": 1.4164, + "step": 14210 + }, + { + "epoch": 1.35, + "learning_rate": 5.506190127621654e-05, + "loss": 1.4017, + "step": 14220 + }, + { + "epoch": 1.35, + "learning_rate": 5.503007542726203e-05, + "loss": 1.2215, + "step": 14230 + }, + { + "epoch": 1.36, + "learning_rate": 5.49982495783075e-05, + "loss": 1.1972, + "step": 14240 + }, + { + "epoch": 1.36, + "learning_rate": 5.496642372935298e-05, + "loss": 1.4031, + "step": 14250 + }, + { + "epoch": 1.36, + "learning_rate": 5.493459788039846e-05, + "loss": 1.1811, + "step": 14260 + }, + { + "epoch": 1.36, + "learning_rate": 5.490277203144394e-05, + "loss": 1.2467, + "step": 14270 + }, + { + "epoch": 1.36, + "learning_rate": 5.487094618248942e-05, + "loss": 1.4318, + "step": 14280 + }, + { + "epoch": 1.36, + "learning_rate": 5.48391203335349e-05, + "loss": 1.3557, + "step": 14290 + }, + { + "epoch": 1.36, + "learning_rate": 5.4807294484580376e-05, + "loss": 1.3901, + "step": 14300 + }, + { + "epoch": 1.36, + "learning_rate": 5.477546863562586e-05, + "loss": 1.5302, + "step": 14310 + }, + { + "epoch": 1.36, + "learning_rate": 5.474364278667134e-05, + "loss": 1.1885, + "step": 14320 + }, + { + "epoch": 1.36, + "learning_rate": 5.471181693771682e-05, + "loss": 1.1578, + "step": 14330 + }, + { + "epoch": 1.36, + "learning_rate": 5.46799910887623e-05, + "loss": 1.3881, + "step": 14340 + }, + { + "epoch": 1.37, + "learning_rate": 5.464816523980777e-05, + "loss": 1.1401, + "step": 14350 + }, + { + "epoch": 1.37, + "learning_rate": 5.461633939085326e-05, + "loss": 1.0311, + "step": 14360 + }, + { + "epoch": 1.37, + "learning_rate": 5.458451354189873e-05, + "loss": 1.1228, + "step": 14370 + }, + { + "epoch": 1.37, + "learning_rate": 5.455268769294422e-05, + "loss": 1.2884, + "step": 14380 + }, + { + "epoch": 1.37, + "learning_rate": 5.452086184398969e-05, + "loss": 1.3827, + "step": 14390 + }, + { + "epoch": 1.37, + "learning_rate": 5.4489035995035165e-05, + "loss": 1.239, + "step": 14400 + }, + { + "epoch": 1.37, + "learning_rate": 5.445721014608065e-05, + "loss": 1.4542, + "step": 14410 + }, + { + "epoch": 1.37, + "learning_rate": 5.4425384297126126e-05, + "loss": 1.0058, + "step": 14420 + }, + { + "epoch": 1.37, + "learning_rate": 5.439355844817161e-05, + "loss": 1.3152, + "step": 14430 + }, + { + "epoch": 1.37, + "learning_rate": 5.4361732599217086e-05, + "loss": 1.4563, + "step": 14440 + }, + { + "epoch": 1.38, + "learning_rate": 5.432990675026257e-05, + "loss": 1.2788, + "step": 14450 + }, + { + "epoch": 1.38, + "learning_rate": 5.429808090130805e-05, + "loss": 1.5673, + "step": 14460 + }, + { + "epoch": 1.38, + "learning_rate": 5.4266255052353524e-05, + "loss": 1.5005, + "step": 14470 + }, + { + "epoch": 1.38, + "learning_rate": 5.423442920339901e-05, + "loss": 1.1309, + "step": 14480 + }, + { + "epoch": 1.38, + "learning_rate": 5.4202603354444484e-05, + "loss": 1.4121, + "step": 14490 + }, + { + "epoch": 1.38, + "learning_rate": 5.417077750548997e-05, + "loss": 1.3966, + "step": 14500 + }, + { + "epoch": 1.38, + "learning_rate": 5.4138951656535445e-05, + "loss": 1.1501, + "step": 14510 + }, + { + "epoch": 1.38, + "learning_rate": 5.4107125807580915e-05, + "loss": 0.9098, + "step": 14520 + }, + { + "epoch": 1.38, + "learning_rate": 5.4075299958626405e-05, + "loss": 1.123, + "step": 14530 + }, + { + "epoch": 1.38, + "learning_rate": 5.4043474109671875e-05, + "loss": 1.2539, + "step": 14540 + }, + { + "epoch": 1.38, + "learning_rate": 5.4011648260717366e-05, + "loss": 1.4045, + "step": 14550 + }, + { + "epoch": 1.39, + "learning_rate": 5.3979822411762836e-05, + "loss": 1.6323, + "step": 14560 + }, + { + "epoch": 1.39, + "learning_rate": 5.394799656280831e-05, + "loss": 1.3246, + "step": 14570 + }, + { + "epoch": 1.39, + "learning_rate": 5.3916170713853796e-05, + "loss": 1.1879, + "step": 14580 + }, + { + "epoch": 1.39, + "learning_rate": 5.388434486489927e-05, + "loss": 1.3357, + "step": 14590 + }, + { + "epoch": 1.39, + "learning_rate": 5.385251901594476e-05, + "loss": 1.2795, + "step": 14600 + }, + { + "epoch": 1.39, + "learning_rate": 5.3820693166990234e-05, + "loss": 1.521, + "step": 14610 + }, + { + "epoch": 1.39, + "learning_rate": 5.378886731803571e-05, + "loss": 1.2611, + "step": 14620 + }, + { + "epoch": 1.39, + "learning_rate": 5.3757041469081194e-05, + "loss": 1.4852, + "step": 14630 + }, + { + "epoch": 1.39, + "learning_rate": 5.372521562012667e-05, + "loss": 0.9539, + "step": 14640 + }, + { + "epoch": 1.39, + "learning_rate": 5.3693389771172155e-05, + "loss": 1.1787, + "step": 14650 + }, + { + "epoch": 1.4, + "learning_rate": 5.366156392221763e-05, + "loss": 1.3916, + "step": 14660 + }, + { + "epoch": 1.4, + "learning_rate": 5.36297380732631e-05, + "loss": 1.0306, + "step": 14670 + }, + { + "epoch": 1.4, + "learning_rate": 5.3597912224308585e-05, + "loss": 1.3695, + "step": 14680 + }, + { + "epoch": 1.4, + "learning_rate": 5.356608637535406e-05, + "loss": 1.2022, + "step": 14690 + }, + { + "epoch": 1.4, + "learning_rate": 5.3534260526399546e-05, + "loss": 1.1744, + "step": 14700 + }, + { + "epoch": 1.4, + "learning_rate": 5.350243467744502e-05, + "loss": 1.4006, + "step": 14710 + }, + { + "epoch": 1.4, + "learning_rate": 5.34706088284905e-05, + "loss": 1.3549, + "step": 14720 + }, + { + "epoch": 1.4, + "learning_rate": 5.343878297953598e-05, + "loss": 1.2094, + "step": 14730 + }, + { + "epoch": 1.4, + "learning_rate": 5.340695713058146e-05, + "loss": 1.2815, + "step": 14740 + }, + { + "epoch": 1.4, + "learning_rate": 5.3375131281626944e-05, + "loss": 1.1214, + "step": 14750 + }, + { + "epoch": 1.4, + "learning_rate": 5.334330543267242e-05, + "loss": 1.5456, + "step": 14760 + }, + { + "epoch": 1.41, + "learning_rate": 5.331147958371789e-05, + "loss": 1.3289, + "step": 14770 + }, + { + "epoch": 1.41, + "learning_rate": 5.327965373476338e-05, + "loss": 1.5725, + "step": 14780 + }, + { + "epoch": 1.41, + "learning_rate": 5.324782788580885e-05, + "loss": 1.3129, + "step": 14790 + }, + { + "epoch": 1.41, + "learning_rate": 5.321600203685434e-05, + "loss": 1.1212, + "step": 14800 + }, + { + "epoch": 1.41, + "learning_rate": 5.318417618789981e-05, + "loss": 1.5021, + "step": 14810 + }, + { + "epoch": 1.41, + "learning_rate": 5.315235033894529e-05, + "loss": 1.2091, + "step": 14820 + }, + { + "epoch": 1.41, + "learning_rate": 5.312052448999077e-05, + "loss": 1.3694, + "step": 14830 + }, + { + "epoch": 1.41, + "learning_rate": 5.308869864103625e-05, + "loss": 1.2515, + "step": 14840 + }, + { + "epoch": 1.41, + "learning_rate": 5.305687279208173e-05, + "loss": 1.2529, + "step": 14850 + }, + { + "epoch": 1.41, + "learning_rate": 5.302504694312721e-05, + "loss": 1.2047, + "step": 14860 + }, + { + "epoch": 1.42, + "learning_rate": 5.2993221094172686e-05, + "loss": 1.3786, + "step": 14870 + }, + { + "epoch": 1.42, + "learning_rate": 5.296139524521817e-05, + "loss": 1.3992, + "step": 14880 + }, + { + "epoch": 1.42, + "learning_rate": 5.292956939626365e-05, + "loss": 1.3026, + "step": 14890 + }, + { + "epoch": 1.42, + "learning_rate": 5.289774354730913e-05, + "loss": 1.2121, + "step": 14900 + }, + { + "epoch": 1.42, + "learning_rate": 5.286591769835461e-05, + "loss": 1.4507, + "step": 14910 + }, + { + "epoch": 1.42, + "learning_rate": 5.283409184940008e-05, + "loss": 1.0681, + "step": 14920 + }, + { + "epoch": 1.42, + "learning_rate": 5.280226600044557e-05, + "loss": 1.4707, + "step": 14930 + }, + { + "epoch": 1.42, + "learning_rate": 5.277044015149104e-05, + "loss": 1.3271, + "step": 14940 + }, + { + "epoch": 1.42, + "learning_rate": 5.273861430253653e-05, + "loss": 1.1235, + "step": 14950 + }, + { + "epoch": 1.42, + "learning_rate": 5.2706788453582e-05, + "loss": 1.0133, + "step": 14960 + }, + { + "epoch": 1.42, + "learning_rate": 5.2674962604627475e-05, + "loss": 1.2553, + "step": 14970 + }, + { + "epoch": 1.43, + "learning_rate": 5.264313675567296e-05, + "loss": 1.2721, + "step": 14980 + }, + { + "epoch": 1.43, + "learning_rate": 5.2611310906718436e-05, + "loss": 1.2123, + "step": 14990 + }, + { + "epoch": 1.43, + "learning_rate": 5.257948505776392e-05, + "loss": 1.4009, + "step": 15000 + }, + { + "epoch": 1.43, + "learning_rate": 5.2547659208809396e-05, + "loss": 1.2326, + "step": 15010 + }, + { + "epoch": 1.43, + "learning_rate": 5.251583335985487e-05, + "loss": 1.3902, + "step": 15020 + }, + { + "epoch": 1.43, + "learning_rate": 5.248400751090036e-05, + "loss": 1.4369, + "step": 15030 + }, + { + "epoch": 1.43, + "learning_rate": 5.2452181661945834e-05, + "loss": 1.2699, + "step": 15040 + }, + { + "epoch": 1.43, + "learning_rate": 5.242035581299132e-05, + "loss": 1.2985, + "step": 15050 + }, + { + "epoch": 1.43, + "learning_rate": 5.2388529964036794e-05, + "loss": 1.5252, + "step": 15060 + }, + { + "epoch": 1.43, + "learning_rate": 5.2356704115082264e-05, + "loss": 1.5001, + "step": 15070 + }, + { + "epoch": 1.44, + "learning_rate": 5.2324878266127755e-05, + "loss": 1.4595, + "step": 15080 + }, + { + "epoch": 1.44, + "learning_rate": 5.2293052417173225e-05, + "loss": 1.1815, + "step": 15090 + }, + { + "epoch": 1.44, + "learning_rate": 5.2261226568218715e-05, + "loss": 1.4579, + "step": 15100 + }, + { + "epoch": 1.44, + "learning_rate": 5.2229400719264185e-05, + "loss": 1.1008, + "step": 15110 + }, + { + "epoch": 1.44, + "learning_rate": 5.2197574870309676e-05, + "loss": 1.2104, + "step": 15120 + }, + { + "epoch": 1.44, + "learning_rate": 5.2165749021355146e-05, + "loss": 1.0013, + "step": 15130 + }, + { + "epoch": 1.44, + "learning_rate": 5.213392317240062e-05, + "loss": 1.2941, + "step": 15140 + }, + { + "epoch": 1.44, + "learning_rate": 5.2102097323446106e-05, + "loss": 1.2361, + "step": 15150 + }, + { + "epoch": 1.44, + "learning_rate": 5.207027147449158e-05, + "loss": 1.1462, + "step": 15160 + }, + { + "epoch": 1.44, + "learning_rate": 5.203844562553707e-05, + "loss": 1.2805, + "step": 15170 + }, + { + "epoch": 1.44, + "learning_rate": 5.2006619776582544e-05, + "loss": 1.157, + "step": 15180 + }, + { + "epoch": 1.45, + "learning_rate": 5.197479392762802e-05, + "loss": 1.2395, + "step": 15190 + }, + { + "epoch": 1.45, + "learning_rate": 5.1942968078673504e-05, + "loss": 1.2716, + "step": 15200 + }, + { + "epoch": 1.45, + "learning_rate": 5.191114222971898e-05, + "loss": 1.1911, + "step": 15210 + }, + { + "epoch": 1.45, + "learning_rate": 5.1879316380764465e-05, + "loss": 1.2859, + "step": 15220 + }, + { + "epoch": 1.45, + "learning_rate": 5.184749053180994e-05, + "loss": 0.9757, + "step": 15230 + }, + { + "epoch": 1.45, + "learning_rate": 5.181566468285541e-05, + "loss": 1.1481, + "step": 15240 + }, + { + "epoch": 1.45, + "learning_rate": 5.17838388339009e-05, + "loss": 1.3675, + "step": 15250 + }, + { + "epoch": 1.45, + "learning_rate": 5.175201298494637e-05, + "loss": 1.1877, + "step": 15260 + }, + { + "epoch": 1.45, + "learning_rate": 5.172018713599186e-05, + "loss": 1.2303, + "step": 15270 + }, + { + "epoch": 1.45, + "learning_rate": 5.168836128703733e-05, + "loss": 1.6646, + "step": 15280 + }, + { + "epoch": 1.46, + "learning_rate": 5.165653543808281e-05, + "loss": 1.4336, + "step": 15290 + }, + { + "epoch": 1.46, + "learning_rate": 5.162470958912829e-05, + "loss": 1.612, + "step": 15300 + }, + { + "epoch": 1.46, + "learning_rate": 5.159288374017377e-05, + "loss": 1.2497, + "step": 15310 + }, + { + "epoch": 1.46, + "learning_rate": 5.1561057891219253e-05, + "loss": 1.2639, + "step": 15320 + }, + { + "epoch": 1.46, + "learning_rate": 5.152923204226473e-05, + "loss": 1.3976, + "step": 15330 + }, + { + "epoch": 1.46, + "learning_rate": 5.149740619331021e-05, + "loss": 1.5085, + "step": 15340 + }, + { + "epoch": 1.46, + "learning_rate": 5.146558034435569e-05, + "loss": 1.5096, + "step": 15350 + }, + { + "epoch": 1.46, + "learning_rate": 5.143375449540117e-05, + "loss": 1.6825, + "step": 15360 + }, + { + "epoch": 1.46, + "learning_rate": 5.140192864644665e-05, + "loss": 1.265, + "step": 15370 + }, + { + "epoch": 1.46, + "learning_rate": 5.137010279749213e-05, + "loss": 1.4147, + "step": 15380 + }, + { + "epoch": 1.46, + "learning_rate": 5.13382769485376e-05, + "loss": 1.3444, + "step": 15390 + }, + { + "epoch": 1.47, + "learning_rate": 5.130645109958309e-05, + "loss": 1.1047, + "step": 15400 + }, + { + "epoch": 1.47, + "learning_rate": 5.127462525062856e-05, + "loss": 1.6598, + "step": 15410 + }, + { + "epoch": 1.47, + "learning_rate": 5.124279940167405e-05, + "loss": 1.309, + "step": 15420 + }, + { + "epoch": 1.47, + "learning_rate": 5.121097355271952e-05, + "loss": 1.148, + "step": 15430 + }, + { + "epoch": 1.47, + "learning_rate": 5.1179147703764996e-05, + "loss": 1.2568, + "step": 15440 + }, + { + "epoch": 1.47, + "learning_rate": 5.114732185481048e-05, + "loss": 1.3135, + "step": 15450 + }, + { + "epoch": 1.47, + "learning_rate": 5.111549600585596e-05, + "loss": 1.2472, + "step": 15460 + }, + { + "epoch": 1.47, + "learning_rate": 5.108367015690144e-05, + "loss": 1.3674, + "step": 15470 + }, + { + "epoch": 1.47, + "learning_rate": 5.105184430794692e-05, + "loss": 1.3011, + "step": 15480 + }, + { + "epoch": 1.47, + "learning_rate": 5.1020018458992394e-05, + "loss": 1.1521, + "step": 15490 + }, + { + "epoch": 1.48, + "learning_rate": 5.098819261003788e-05, + "loss": 1.1709, + "step": 15500 + }, + { + "epoch": 1.48, + "learning_rate": 5.0956366761083355e-05, + "loss": 1.3494, + "step": 15510 + }, + { + "epoch": 1.48, + "learning_rate": 5.092454091212884e-05, + "loss": 1.2846, + "step": 15520 + }, + { + "epoch": 1.48, + "learning_rate": 5.0892715063174315e-05, + "loss": 1.3508, + "step": 15530 + }, + { + "epoch": 1.48, + "learning_rate": 5.0860889214219785e-05, + "loss": 1.3032, + "step": 15540 + }, + { + "epoch": 1.48, + "learning_rate": 5.0829063365265276e-05, + "loss": 1.1961, + "step": 15550 + }, + { + "epoch": 1.48, + "learning_rate": 5.0797237516310746e-05, + "loss": 1.4699, + "step": 15560 + }, + { + "epoch": 1.48, + "learning_rate": 5.0765411667356236e-05, + "loss": 1.2985, + "step": 15570 + }, + { + "epoch": 1.48, + "learning_rate": 5.0733585818401706e-05, + "loss": 1.2926, + "step": 15580 + }, + { + "epoch": 1.48, + "learning_rate": 5.070175996944718e-05, + "loss": 1.3092, + "step": 15590 + }, + { + "epoch": 1.48, + "learning_rate": 5.066993412049267e-05, + "loss": 1.4662, + "step": 15600 + }, + { + "epoch": 1.49, + "learning_rate": 5.0638108271538144e-05, + "loss": 1.1928, + "step": 15610 + }, + { + "epoch": 1.49, + "learning_rate": 5.060628242258363e-05, + "loss": 1.3266, + "step": 15620 + }, + { + "epoch": 1.49, + "learning_rate": 5.0574456573629104e-05, + "loss": 1.2545, + "step": 15630 + }, + { + "epoch": 1.49, + "learning_rate": 5.054263072467458e-05, + "loss": 1.3546, + "step": 15640 + }, + { + "epoch": 1.49, + "learning_rate": 5.0510804875720065e-05, + "loss": 1.4485, + "step": 15650 + }, + { + "epoch": 1.49, + "learning_rate": 5.047897902676554e-05, + "loss": 1.0811, + "step": 15660 + }, + { + "epoch": 1.49, + "learning_rate": 5.0447153177811025e-05, + "loss": 1.256, + "step": 15670 + }, + { + "epoch": 1.49, + "learning_rate": 5.04153273288565e-05, + "loss": 1.3061, + "step": 15680 + }, + { + "epoch": 1.49, + "learning_rate": 5.038350147990197e-05, + "loss": 1.1861, + "step": 15690 + }, + { + "epoch": 1.49, + "learning_rate": 5.035167563094746e-05, + "loss": 1.3347, + "step": 15700 + }, + { + "epoch": 1.5, + "learning_rate": 5.031984978199293e-05, + "loss": 1.105, + "step": 15710 + }, + { + "epoch": 1.5, + "learning_rate": 5.028802393303842e-05, + "loss": 1.2095, + "step": 15720 + }, + { + "epoch": 1.5, + "learning_rate": 5.025619808408389e-05, + "loss": 1.0656, + "step": 15730 + }, + { + "epoch": 1.5, + "learning_rate": 5.022437223512938e-05, + "loss": 1.291, + "step": 15740 + }, + { + "epoch": 1.5, + "learning_rate": 5.0192546386174853e-05, + "loss": 1.2578, + "step": 15750 + }, + { + "epoch": 1.5, + "learning_rate": 5.016072053722033e-05, + "loss": 1.2552, + "step": 15760 + }, + { + "epoch": 1.5, + "learning_rate": 5.0128894688265814e-05, + "loss": 1.0318, + "step": 15770 + }, + { + "epoch": 1.5, + "learning_rate": 5.009706883931129e-05, + "loss": 1.2062, + "step": 15780 + }, + { + "epoch": 1.5, + "learning_rate": 5.0065242990356774e-05, + "loss": 1.1256, + "step": 15790 + }, + { + "epoch": 1.5, + "learning_rate": 5.003341714140225e-05, + "loss": 1.0347, + "step": 15800 + }, + { + "epoch": 1.5, + "learning_rate": 5.000159129244772e-05, + "loss": 1.1673, + "step": 15810 + }, + { + "epoch": 1.51, + "learning_rate": 4.9969765443493205e-05, + "loss": 1.4957, + "step": 15820 + }, + { + "epoch": 1.51, + "learning_rate": 4.993793959453868e-05, + "loss": 1.3662, + "step": 15830 + }, + { + "epoch": 1.51, + "learning_rate": 4.9906113745584166e-05, + "loss": 1.2346, + "step": 15840 + }, + { + "epoch": 1.51, + "learning_rate": 4.987428789662964e-05, + "loss": 1.2201, + "step": 15850 + }, + { + "epoch": 1.51, + "learning_rate": 4.9842462047675126e-05, + "loss": 1.2533, + "step": 15860 + }, + { + "epoch": 1.51, + "learning_rate": 4.98106361987206e-05, + "loss": 1.2212, + "step": 15870 + }, + { + "epoch": 1.51, + "learning_rate": 4.977881034976608e-05, + "loss": 1.2752, + "step": 15880 + }, + { + "epoch": 1.51, + "learning_rate": 4.9746984500811563e-05, + "loss": 1.3381, + "step": 15890 + }, + { + "epoch": 1.51, + "learning_rate": 4.971515865185704e-05, + "loss": 1.0002, + "step": 15900 + }, + { + "epoch": 1.51, + "learning_rate": 4.9683332802902524e-05, + "loss": 1.0658, + "step": 15910 + }, + { + "epoch": 1.52, + "learning_rate": 4.9651506953948e-05, + "loss": 1.3124, + "step": 15920 + }, + { + "epoch": 1.52, + "learning_rate": 4.961968110499348e-05, + "loss": 1.4023, + "step": 15930 + }, + { + "epoch": 1.52, + "learning_rate": 4.9587855256038955e-05, + "loss": 1.426, + "step": 15940 + }, + { + "epoch": 1.52, + "learning_rate": 4.955602940708444e-05, + "loss": 1.2818, + "step": 15950 + }, + { + "epoch": 1.52, + "learning_rate": 4.9524203558129915e-05, + "loss": 1.5452, + "step": 15960 + }, + { + "epoch": 1.52, + "learning_rate": 4.94923777091754e-05, + "loss": 1.5367, + "step": 15970 + }, + { + "epoch": 1.52, + "learning_rate": 4.946055186022087e-05, + "loss": 1.2374, + "step": 15980 + }, + { + "epoch": 1.52, + "learning_rate": 4.942872601126635e-05, + "loss": 1.0215, + "step": 15990 + }, + { + "epoch": 1.52, + "learning_rate": 4.939690016231183e-05, + "loss": 1.4311, + "step": 16000 + }, + { + "epoch": 1.52, + "learning_rate": 4.936507431335731e-05, + "loss": 1.419, + "step": 16010 + }, + { + "epoch": 1.52, + "learning_rate": 4.933324846440279e-05, + "loss": 1.1318, + "step": 16020 + }, + { + "epoch": 1.53, + "learning_rate": 4.930142261544827e-05, + "loss": 0.9369, + "step": 16030 + }, + { + "epoch": 1.53, + "learning_rate": 4.926959676649375e-05, + "loss": 1.2709, + "step": 16040 + }, + { + "epoch": 1.53, + "learning_rate": 4.923777091753923e-05, + "loss": 1.0934, + "step": 16050 + }, + { + "epoch": 1.53, + "learning_rate": 4.920594506858471e-05, + "loss": 0.9907, + "step": 16060 + }, + { + "epoch": 1.53, + "learning_rate": 4.917411921963019e-05, + "loss": 1.4633, + "step": 16070 + }, + { + "epoch": 1.53, + "learning_rate": 4.9142293370675665e-05, + "loss": 1.0784, + "step": 16080 + }, + { + "epoch": 1.53, + "learning_rate": 4.911046752172114e-05, + "loss": 1.5323, + "step": 16090 + }, + { + "epoch": 1.53, + "learning_rate": 4.9078641672766625e-05, + "loss": 1.2013, + "step": 16100 + }, + { + "epoch": 1.53, + "learning_rate": 4.90468158238121e-05, + "loss": 1.261, + "step": 16110 + }, + { + "epoch": 1.53, + "learning_rate": 4.9014989974857586e-05, + "loss": 1.4675, + "step": 16120 + }, + { + "epoch": 1.54, + "learning_rate": 4.8983164125903056e-05, + "loss": 1.0573, + "step": 16130 + }, + { + "epoch": 1.54, + "learning_rate": 4.895133827694854e-05, + "loss": 1.0632, + "step": 16140 + }, + { + "epoch": 1.54, + "learning_rate": 4.8919512427994016e-05, + "loss": 1.3218, + "step": 16150 + }, + { + "epoch": 1.54, + "learning_rate": 4.88876865790395e-05, + "loss": 1.0788, + "step": 16160 + }, + { + "epoch": 1.54, + "learning_rate": 4.8855860730084977e-05, + "loss": 1.3465, + "step": 16170 + }, + { + "epoch": 1.54, + "learning_rate": 4.8824034881130453e-05, + "loss": 1.2767, + "step": 16180 + }, + { + "epoch": 1.54, + "learning_rate": 4.879220903217594e-05, + "loss": 1.6768, + "step": 16190 + }, + { + "epoch": 1.54, + "learning_rate": 4.8760383183221414e-05, + "loss": 1.2286, + "step": 16200 + }, + { + "epoch": 1.54, + "learning_rate": 4.87285573342669e-05, + "loss": 1.4228, + "step": 16210 + }, + { + "epoch": 1.54, + "learning_rate": 4.8696731485312374e-05, + "loss": 1.2702, + "step": 16220 + }, + { + "epoch": 1.54, + "learning_rate": 4.866490563635786e-05, + "loss": 1.3598, + "step": 16230 + }, + { + "epoch": 1.55, + "learning_rate": 4.863307978740333e-05, + "loss": 1.2043, + "step": 16240 + }, + { + "epoch": 1.55, + "learning_rate": 4.860125393844881e-05, + "loss": 1.4265, + "step": 16250 + }, + { + "epoch": 1.55, + "learning_rate": 4.856942808949429e-05, + "loss": 1.1054, + "step": 16260 + }, + { + "epoch": 1.55, + "learning_rate": 4.853760224053977e-05, + "loss": 1.5339, + "step": 16270 + }, + { + "epoch": 1.55, + "learning_rate": 4.850577639158525e-05, + "loss": 1.2976, + "step": 16280 + }, + { + "epoch": 1.55, + "learning_rate": 4.8473950542630726e-05, + "loss": 1.4514, + "step": 16290 + }, + { + "epoch": 1.55, + "learning_rate": 4.84421246936762e-05, + "loss": 1.2413, + "step": 16300 + }, + { + "epoch": 1.55, + "learning_rate": 4.8410298844721687e-05, + "loss": 1.0662, + "step": 16310 + }, + { + "epoch": 1.55, + "learning_rate": 4.8378472995767163e-05, + "loss": 1.3955, + "step": 16320 + }, + { + "epoch": 1.55, + "learning_rate": 4.834664714681265e-05, + "loss": 1.208, + "step": 16330 + }, + { + "epoch": 1.56, + "learning_rate": 4.831482129785812e-05, + "loss": 1.4724, + "step": 16340 + }, + { + "epoch": 1.56, + "learning_rate": 4.82829954489036e-05, + "loss": 1.2841, + "step": 16350 + }, + { + "epoch": 1.56, + "learning_rate": 4.825116959994908e-05, + "loss": 1.2628, + "step": 16360 + }, + { + "epoch": 1.56, + "learning_rate": 4.821934375099456e-05, + "loss": 0.989, + "step": 16370 + }, + { + "epoch": 1.56, + "learning_rate": 4.818751790204004e-05, + "loss": 0.9159, + "step": 16380 + }, + { + "epoch": 1.56, + "learning_rate": 4.8155692053085515e-05, + "loss": 1.2558, + "step": 16390 + }, + { + "epoch": 1.56, + "learning_rate": 4.8123866204131e-05, + "loss": 1.0551, + "step": 16400 + }, + { + "epoch": 1.56, + "learning_rate": 4.8092040355176476e-05, + "loss": 1.2573, + "step": 16410 + }, + { + "epoch": 1.56, + "learning_rate": 4.806021450622196e-05, + "loss": 1.4726, + "step": 16420 + }, + { + "epoch": 1.56, + "learning_rate": 4.8028388657267436e-05, + "loss": 1.3828, + "step": 16430 + }, + { + "epoch": 1.56, + "learning_rate": 4.799656280831291e-05, + "loss": 1.2052, + "step": 16440 + }, + { + "epoch": 1.57, + "learning_rate": 4.796473695935839e-05, + "loss": 1.149, + "step": 16450 + }, + { + "epoch": 1.57, + "learning_rate": 4.7932911110403873e-05, + "loss": 1.3104, + "step": 16460 + }, + { + "epoch": 1.57, + "learning_rate": 4.790108526144935e-05, + "loss": 1.3008, + "step": 16470 + }, + { + "epoch": 1.57, + "learning_rate": 4.7869259412494834e-05, + "loss": 1.314, + "step": 16480 + }, + { + "epoch": 1.57, + "learning_rate": 4.7837433563540304e-05, + "loss": 1.2192, + "step": 16490 + }, + { + "epoch": 1.57, + "learning_rate": 4.780560771458579e-05, + "loss": 1.6693, + "step": 16500 + }, + { + "epoch": 1.57, + "learning_rate": 4.7773781865631265e-05, + "loss": 1.2332, + "step": 16510 + }, + { + "epoch": 1.57, + "learning_rate": 4.774195601667675e-05, + "loss": 1.6067, + "step": 16520 + }, + { + "epoch": 1.57, + "learning_rate": 4.7710130167722225e-05, + "loss": 1.1059, + "step": 16530 + }, + { + "epoch": 1.57, + "learning_rate": 4.767830431876771e-05, + "loss": 1.3827, + "step": 16540 + }, + { + "epoch": 1.58, + "learning_rate": 4.7646478469813186e-05, + "loss": 1.2931, + "step": 16550 + }, + { + "epoch": 1.58, + "learning_rate": 4.761465262085866e-05, + "loss": 1.4552, + "step": 16560 + }, + { + "epoch": 1.58, + "learning_rate": 4.7582826771904146e-05, + "loss": 1.1972, + "step": 16570 + }, + { + "epoch": 1.58, + "learning_rate": 4.755100092294962e-05, + "loss": 0.8767, + "step": 16580 + }, + { + "epoch": 1.58, + "learning_rate": 4.7519175073995107e-05, + "loss": 0.9475, + "step": 16590 + }, + { + "epoch": 1.58, + "learning_rate": 4.7487349225040577e-05, + "loss": 1.5473, + "step": 16600 + }, + { + "epoch": 1.58, + "learning_rate": 4.745552337608606e-05, + "loss": 1.269, + "step": 16610 + }, + { + "epoch": 1.58, + "learning_rate": 4.742369752713154e-05, + "loss": 1.3995, + "step": 16620 + }, + { + "epoch": 1.58, + "learning_rate": 4.739187167817702e-05, + "loss": 1.4818, + "step": 16630 + }, + { + "epoch": 1.58, + "learning_rate": 4.73600458292225e-05, + "loss": 1.2699, + "step": 16640 + }, + { + "epoch": 1.58, + "learning_rate": 4.7328219980267974e-05, + "loss": 1.3036, + "step": 16650 + }, + { + "epoch": 1.59, + "learning_rate": 4.729639413131345e-05, + "loss": 1.3475, + "step": 16660 + }, + { + "epoch": 1.59, + "learning_rate": 4.7264568282358935e-05, + "loss": 1.1247, + "step": 16670 + }, + { + "epoch": 1.59, + "learning_rate": 4.723274243340441e-05, + "loss": 1.2363, + "step": 16680 + }, + { + "epoch": 1.59, + "learning_rate": 4.7200916584449895e-05, + "loss": 1.4974, + "step": 16690 + }, + { + "epoch": 1.59, + "learning_rate": 4.716909073549537e-05, + "loss": 1.3437, + "step": 16700 + }, + { + "epoch": 1.59, + "learning_rate": 4.713726488654085e-05, + "loss": 1.162, + "step": 16710 + }, + { + "epoch": 1.59, + "learning_rate": 4.710543903758633e-05, + "loss": 1.3693, + "step": 16720 + }, + { + "epoch": 1.59, + "learning_rate": 4.707361318863181e-05, + "loss": 1.42, + "step": 16730 + }, + { + "epoch": 1.59, + "learning_rate": 4.704178733967729e-05, + "loss": 1.3814, + "step": 16740 + }, + { + "epoch": 1.59, + "learning_rate": 4.7009961490722763e-05, + "loss": 1.1444, + "step": 16750 + }, + { + "epoch": 1.6, + "learning_rate": 4.697813564176825e-05, + "loss": 1.1105, + "step": 16760 + }, + { + "epoch": 1.6, + "learning_rate": 4.6946309792813724e-05, + "loss": 1.2547, + "step": 16770 + }, + { + "epoch": 1.6, + "learning_rate": 4.691448394385921e-05, + "loss": 1.2134, + "step": 16780 + }, + { + "epoch": 1.6, + "learning_rate": 4.6882658094904684e-05, + "loss": 1.3874, + "step": 16790 + }, + { + "epoch": 1.6, + "learning_rate": 4.685083224595016e-05, + "loss": 1.2903, + "step": 16800 + }, + { + "epoch": 1.6, + "learning_rate": 4.681900639699564e-05, + "loss": 1.1891, + "step": 16810 + }, + { + "epoch": 1.6, + "learning_rate": 4.678718054804112e-05, + "loss": 1.1899, + "step": 16820 + }, + { + "epoch": 1.6, + "learning_rate": 4.67553546990866e-05, + "loss": 1.3783, + "step": 16830 + }, + { + "epoch": 1.6, + "learning_rate": 4.672352885013208e-05, + "loss": 1.1659, + "step": 16840 + }, + { + "epoch": 1.6, + "learning_rate": 4.669170300117756e-05, + "loss": 1.4276, + "step": 16850 + }, + { + "epoch": 1.6, + "learning_rate": 4.6659877152223036e-05, + "loss": 1.2746, + "step": 16860 + }, + { + "epoch": 1.61, + "learning_rate": 4.662805130326851e-05, + "loss": 1.222, + "step": 16870 + }, + { + "epoch": 1.61, + "learning_rate": 4.6596225454313997e-05, + "loss": 1.157, + "step": 16880 + }, + { + "epoch": 1.61, + "learning_rate": 4.6564399605359473e-05, + "loss": 1.3028, + "step": 16890 + }, + { + "epoch": 1.61, + "learning_rate": 4.653257375640496e-05, + "loss": 1.4982, + "step": 16900 + }, + { + "epoch": 1.61, + "learning_rate": 4.6500747907450434e-05, + "loss": 1.3935, + "step": 16910 + }, + { + "epoch": 1.61, + "learning_rate": 4.646892205849591e-05, + "loss": 1.5659, + "step": 16920 + }, + { + "epoch": 1.61, + "learning_rate": 4.6437096209541394e-05, + "loss": 1.3223, + "step": 16930 + }, + { + "epoch": 1.61, + "learning_rate": 4.640527036058687e-05, + "loss": 1.1122, + "step": 16940 + }, + { + "epoch": 1.61, + "learning_rate": 4.6373444511632355e-05, + "loss": 1.0243, + "step": 16950 + }, + { + "epoch": 1.61, + "learning_rate": 4.6341618662677825e-05, + "loss": 1.1851, + "step": 16960 + }, + { + "epoch": 1.62, + "learning_rate": 4.630979281372331e-05, + "loss": 1.321, + "step": 16970 + }, + { + "epoch": 1.62, + "learning_rate": 4.6277966964768786e-05, + "loss": 1.5267, + "step": 16980 + }, + { + "epoch": 1.62, + "learning_rate": 4.624614111581427e-05, + "loss": 1.0554, + "step": 16990 + }, + { + "epoch": 1.62, + "learning_rate": 4.6214315266859746e-05, + "loss": 1.2804, + "step": 17000 + }, + { + "epoch": 1.62, + "learning_rate": 4.618248941790522e-05, + "loss": 1.2965, + "step": 17010 + }, + { + "epoch": 1.62, + "learning_rate": 4.61506635689507e-05, + "loss": 1.0722, + "step": 17020 + }, + { + "epoch": 1.62, + "learning_rate": 4.611883771999618e-05, + "loss": 1.5165, + "step": 17030 + }, + { + "epoch": 1.62, + "learning_rate": 4.608701187104166e-05, + "loss": 1.441, + "step": 17040 + }, + { + "epoch": 1.62, + "learning_rate": 4.6055186022087144e-05, + "loss": 0.9771, + "step": 17050 + }, + { + "epoch": 1.62, + "learning_rate": 4.602336017313262e-05, + "loss": 1.3015, + "step": 17060 + }, + { + "epoch": 1.62, + "learning_rate": 4.59915343241781e-05, + "loss": 1.1422, + "step": 17070 + }, + { + "epoch": 1.63, + "learning_rate": 4.595970847522358e-05, + "loss": 1.3455, + "step": 17080 + }, + { + "epoch": 1.63, + "learning_rate": 4.592788262626906e-05, + "loss": 1.004, + "step": 17090 + }, + { + "epoch": 1.63, + "learning_rate": 4.589605677731454e-05, + "loss": 1.0713, + "step": 17100 + }, + { + "epoch": 1.63, + "learning_rate": 4.586423092836001e-05, + "loss": 1.0479, + "step": 17110 + }, + { + "epoch": 1.63, + "learning_rate": 4.5832405079405495e-05, + "loss": 1.4771, + "step": 17120 + }, + { + "epoch": 1.63, + "learning_rate": 4.580057923045097e-05, + "loss": 1.2325, + "step": 17130 + }, + { + "epoch": 1.63, + "learning_rate": 4.5768753381496456e-05, + "loss": 1.262, + "step": 17140 + }, + { + "epoch": 1.63, + "learning_rate": 4.573692753254193e-05, + "loss": 1.4222, + "step": 17150 + }, + { + "epoch": 1.63, + "learning_rate": 4.5705101683587416e-05, + "loss": 1.2611, + "step": 17160 + }, + { + "epoch": 1.63, + "learning_rate": 4.5673275834632887e-05, + "loss": 1.2773, + "step": 17170 + }, + { + "epoch": 1.64, + "learning_rate": 4.564144998567837e-05, + "loss": 1.3417, + "step": 17180 + }, + { + "epoch": 1.64, + "learning_rate": 4.560962413672385e-05, + "loss": 1.2499, + "step": 17190 + }, + { + "epoch": 1.64, + "learning_rate": 4.557779828776933e-05, + "loss": 1.3159, + "step": 17200 + }, + { + "epoch": 1.64, + "learning_rate": 4.554597243881481e-05, + "loss": 1.2028, + "step": 17210 + }, + { + "epoch": 1.64, + "learning_rate": 4.5514146589860284e-05, + "loss": 1.2043, + "step": 17220 + }, + { + "epoch": 1.64, + "learning_rate": 4.548232074090577e-05, + "loss": 0.967, + "step": 17230 + }, + { + "epoch": 1.64, + "learning_rate": 4.5450494891951245e-05, + "loss": 1.2259, + "step": 17240 + }, + { + "epoch": 1.64, + "learning_rate": 4.541866904299673e-05, + "loss": 1.1098, + "step": 17250 + }, + { + "epoch": 1.64, + "learning_rate": 4.5386843194042205e-05, + "loss": 1.2959, + "step": 17260 + }, + { + "epoch": 1.64, + "learning_rate": 4.535501734508768e-05, + "loss": 1.0986, + "step": 17270 + }, + { + "epoch": 1.64, + "learning_rate": 4.532319149613316e-05, + "loss": 1.2249, + "step": 17280 + }, + { + "epoch": 1.65, + "learning_rate": 4.529136564717864e-05, + "loss": 1.3841, + "step": 17290 + }, + { + "epoch": 1.65, + "learning_rate": 4.525953979822412e-05, + "loss": 0.9871, + "step": 17300 + }, + { + "epoch": 1.65, + "learning_rate": 4.52277139492696e-05, + "loss": 1.2703, + "step": 17310 + }, + { + "epoch": 1.65, + "learning_rate": 4.5195888100315073e-05, + "loss": 1.224, + "step": 17320 + }, + { + "epoch": 1.65, + "learning_rate": 4.516406225136056e-05, + "loss": 1.1882, + "step": 17330 + }, + { + "epoch": 1.65, + "learning_rate": 4.5132236402406034e-05, + "loss": 1.0581, + "step": 17340 + }, + { + "epoch": 1.65, + "learning_rate": 4.510041055345152e-05, + "loss": 1.4191, + "step": 17350 + }, + { + "epoch": 1.65, + "learning_rate": 4.5068584704496994e-05, + "loss": 1.3129, + "step": 17360 + }, + { + "epoch": 1.65, + "learning_rate": 4.503675885554247e-05, + "loss": 1.4355, + "step": 17370 + }, + { + "epoch": 1.65, + "learning_rate": 4.5004933006587955e-05, + "loss": 1.4007, + "step": 17380 + }, + { + "epoch": 1.66, + "learning_rate": 4.497310715763343e-05, + "loss": 1.235, + "step": 17390 + }, + { + "epoch": 1.66, + "learning_rate": 4.494128130867891e-05, + "loss": 1.124, + "step": 17400 + }, + { + "epoch": 1.66, + "learning_rate": 4.490945545972439e-05, + "loss": 1.3663, + "step": 17410 + }, + { + "epoch": 1.66, + "learning_rate": 4.487762961076987e-05, + "loss": 1.2598, + "step": 17420 + }, + { + "epoch": 1.66, + "learning_rate": 4.4845803761815346e-05, + "loss": 1.2365, + "step": 17430 + }, + { + "epoch": 1.66, + "learning_rate": 4.481397791286083e-05, + "loss": 1.3799, + "step": 17440 + }, + { + "epoch": 1.66, + "learning_rate": 4.4782152063906306e-05, + "loss": 1.1697, + "step": 17450 + }, + { + "epoch": 1.66, + "learning_rate": 4.475032621495179e-05, + "loss": 1.6229, + "step": 17460 + }, + { + "epoch": 1.66, + "learning_rate": 4.471850036599727e-05, + "loss": 1.3189, + "step": 17470 + }, + { + "epoch": 1.66, + "learning_rate": 4.4686674517042744e-05, + "loss": 1.3392, + "step": 17480 + }, + { + "epoch": 1.66, + "learning_rate": 4.465484866808822e-05, + "loss": 1.5232, + "step": 17490 + }, + { + "epoch": 1.67, + "learning_rate": 4.4623022819133704e-05, + "loss": 1.3666, + "step": 17500 + }, + { + "epoch": 1.67, + "learning_rate": 4.459119697017918e-05, + "loss": 1.1053, + "step": 17510 + }, + { + "epoch": 1.67, + "learning_rate": 4.4559371121224665e-05, + "loss": 1.102, + "step": 17520 + }, + { + "epoch": 1.67, + "learning_rate": 4.4527545272270135e-05, + "loss": 0.7984, + "step": 17530 + }, + { + "epoch": 1.67, + "learning_rate": 4.449571942331562e-05, + "loss": 1.175, + "step": 17540 + }, + { + "epoch": 1.67, + "learning_rate": 4.4463893574361095e-05, + "loss": 1.2059, + "step": 17550 + }, + { + "epoch": 1.67, + "learning_rate": 4.443206772540658e-05, + "loss": 1.4837, + "step": 17560 + }, + { + "epoch": 1.67, + "learning_rate": 4.4400241876452056e-05, + "loss": 1.2114, + "step": 17570 + }, + { + "epoch": 1.67, + "learning_rate": 4.436841602749753e-05, + "loss": 1.278, + "step": 17580 + }, + { + "epoch": 1.67, + "learning_rate": 4.4336590178543016e-05, + "loss": 1.2685, + "step": 17590 + }, + { + "epoch": 1.68, + "learning_rate": 4.430476432958849e-05, + "loss": 1.3156, + "step": 17600 + }, + { + "epoch": 1.68, + "learning_rate": 4.427293848063398e-05, + "loss": 1.1934, + "step": 17610 + }, + { + "epoch": 1.68, + "learning_rate": 4.4241112631679454e-05, + "loss": 1.2918, + "step": 17620 + }, + { + "epoch": 1.68, + "learning_rate": 4.420928678272493e-05, + "loss": 1.1634, + "step": 17630 + }, + { + "epoch": 1.68, + "learning_rate": 4.417746093377041e-05, + "loss": 0.9987, + "step": 17640 + }, + { + "epoch": 1.68, + "learning_rate": 4.414563508481589e-05, + "loss": 1.4489, + "step": 17650 + }, + { + "epoch": 1.68, + "learning_rate": 4.411380923586137e-05, + "loss": 1.2199, + "step": 17660 + }, + { + "epoch": 1.68, + "learning_rate": 4.408198338690685e-05, + "loss": 1.1415, + "step": 17670 + }, + { + "epoch": 1.68, + "learning_rate": 4.405015753795232e-05, + "loss": 1.5274, + "step": 17680 + }, + { + "epoch": 1.68, + "learning_rate": 4.4018331688997805e-05, + "loss": 1.21, + "step": 17690 + }, + { + "epoch": 1.68, + "learning_rate": 4.398650584004328e-05, + "loss": 0.9351, + "step": 17700 + }, + { + "epoch": 1.69, + "learning_rate": 4.3954679991088766e-05, + "loss": 1.3924, + "step": 17710 + }, + { + "epoch": 1.69, + "learning_rate": 4.392285414213424e-05, + "loss": 1.0862, + "step": 17720 + }, + { + "epoch": 1.69, + "learning_rate": 4.389102829317972e-05, + "loss": 1.2006, + "step": 17730 + }, + { + "epoch": 1.69, + "learning_rate": 4.38592024442252e-05, + "loss": 1.3581, + "step": 17740 + }, + { + "epoch": 1.69, + "learning_rate": 4.382737659527068e-05, + "loss": 1.091, + "step": 17750 + }, + { + "epoch": 1.69, + "learning_rate": 4.3795550746316164e-05, + "loss": 1.537, + "step": 17760 + }, + { + "epoch": 1.69, + "learning_rate": 4.376372489736164e-05, + "loss": 1.29, + "step": 17770 + }, + { + "epoch": 1.69, + "learning_rate": 4.373189904840712e-05, + "loss": 1.2013, + "step": 17780 + }, + { + "epoch": 1.69, + "learning_rate": 4.3700073199452594e-05, + "loss": 1.23, + "step": 17790 + }, + { + "epoch": 1.69, + "learning_rate": 4.366824735049808e-05, + "loss": 1.4847, + "step": 17800 + }, + { + "epoch": 1.7, + "learning_rate": 4.3636421501543555e-05, + "loss": 1.3086, + "step": 17810 + }, + { + "epoch": 1.7, + "learning_rate": 4.360459565258904e-05, + "loss": 1.303, + "step": 17820 + }, + { + "epoch": 1.7, + "learning_rate": 4.3572769803634515e-05, + "loss": 1.271, + "step": 17830 + }, + { + "epoch": 1.7, + "learning_rate": 4.354094395467999e-05, + "loss": 1.2053, + "step": 17840 + }, + { + "epoch": 1.7, + "learning_rate": 4.350911810572547e-05, + "loss": 1.3765, + "step": 17850 + }, + { + "epoch": 1.7, + "learning_rate": 4.347729225677095e-05, + "loss": 1.2532, + "step": 17860 + }, + { + "epoch": 1.7, + "learning_rate": 4.344546640781643e-05, + "loss": 1.2865, + "step": 17870 + }, + { + "epoch": 1.7, + "learning_rate": 4.341364055886191e-05, + "loss": 1.2211, + "step": 17880 + }, + { + "epoch": 1.7, + "learning_rate": 4.338181470990739e-05, + "loss": 1.4564, + "step": 17890 + }, + { + "epoch": 1.7, + "learning_rate": 4.334998886095287e-05, + "loss": 1.0033, + "step": 17900 + }, + { + "epoch": 1.7, + "learning_rate": 4.331816301199835e-05, + "loss": 0.962, + "step": 17910 + }, + { + "epoch": 1.71, + "learning_rate": 4.328633716304383e-05, + "loss": 1.6116, + "step": 17920 + }, + { + "epoch": 1.71, + "learning_rate": 4.3254511314089304e-05, + "loss": 1.0693, + "step": 17930 + }, + { + "epoch": 1.71, + "learning_rate": 4.322268546513478e-05, + "loss": 1.4096, + "step": 17940 + }, + { + "epoch": 1.71, + "learning_rate": 4.3190859616180265e-05, + "loss": 1.226, + "step": 17950 + }, + { + "epoch": 1.71, + "learning_rate": 4.315903376722574e-05, + "loss": 1.0396, + "step": 17960 + }, + { + "epoch": 1.71, + "learning_rate": 4.3127207918271225e-05, + "loss": 1.2501, + "step": 17970 + }, + { + "epoch": 1.71, + "learning_rate": 4.30953820693167e-05, + "loss": 1.2864, + "step": 17980 + }, + { + "epoch": 1.71, + "learning_rate": 4.306355622036218e-05, + "loss": 1.2269, + "step": 17990 + }, + { + "epoch": 1.71, + "learning_rate": 4.3031730371407656e-05, + "loss": 1.116, + "step": 18000 + }, + { + "epoch": 1.71, + "learning_rate": 4.299990452245314e-05, + "loss": 1.3539, + "step": 18010 + }, + { + "epoch": 1.72, + "learning_rate": 4.2968078673498616e-05, + "loss": 1.5695, + "step": 18020 + }, + { + "epoch": 1.72, + "learning_rate": 4.29362528245441e-05, + "loss": 1.3146, + "step": 18030 + }, + { + "epoch": 1.72, + "learning_rate": 4.290442697558957e-05, + "loss": 1.1909, + "step": 18040 + }, + { + "epoch": 1.72, + "learning_rate": 4.2872601126635054e-05, + "loss": 1.3509, + "step": 18050 + }, + { + "epoch": 1.72, + "learning_rate": 4.284077527768053e-05, + "loss": 1.3447, + "step": 18060 + }, + { + "epoch": 1.72, + "learning_rate": 4.2808949428726014e-05, + "loss": 1.258, + "step": 18070 + }, + { + "epoch": 1.72, + "learning_rate": 4.277712357977149e-05, + "loss": 1.5396, + "step": 18080 + }, + { + "epoch": 1.72, + "learning_rate": 4.274529773081697e-05, + "loss": 1.5036, + "step": 18090 + }, + { + "epoch": 1.72, + "learning_rate": 4.271347188186245e-05, + "loss": 0.9903, + "step": 18100 + }, + { + "epoch": 1.72, + "learning_rate": 4.268164603290793e-05, + "loss": 1.188, + "step": 18110 + }, + { + "epoch": 1.72, + "learning_rate": 4.264982018395341e-05, + "loss": 1.4617, + "step": 18120 + }, + { + "epoch": 1.73, + "learning_rate": 4.261799433499889e-05, + "loss": 1.5529, + "step": 18130 + }, + { + "epoch": 1.73, + "learning_rate": 4.258616848604437e-05, + "loss": 0.9911, + "step": 18140 + }, + { + "epoch": 1.73, + "learning_rate": 4.255434263708984e-05, + "loss": 1.2402, + "step": 18150 + }, + { + "epoch": 1.73, + "learning_rate": 4.2522516788135326e-05, + "loss": 1.1673, + "step": 18160 + }, + { + "epoch": 1.73, + "learning_rate": 4.24906909391808e-05, + "loss": 1.5972, + "step": 18170 + }, + { + "epoch": 1.73, + "learning_rate": 4.245886509022629e-05, + "loss": 1.3765, + "step": 18180 + }, + { + "epoch": 1.73, + "learning_rate": 4.2427039241271764e-05, + "loss": 1.0896, + "step": 18190 + }, + { + "epoch": 1.73, + "learning_rate": 4.239521339231724e-05, + "loss": 1.0541, + "step": 18200 + }, + { + "epoch": 1.73, + "learning_rate": 4.236338754336272e-05, + "loss": 1.2994, + "step": 18210 + }, + { + "epoch": 1.73, + "learning_rate": 4.23315616944082e-05, + "loss": 1.5104, + "step": 18220 + }, + { + "epoch": 1.74, + "learning_rate": 4.229973584545368e-05, + "loss": 1.7087, + "step": 18230 + }, + { + "epoch": 1.74, + "learning_rate": 4.226790999649916e-05, + "loss": 1.5083, + "step": 18240 + }, + { + "epoch": 1.74, + "learning_rate": 4.223608414754464e-05, + "loss": 1.5388, + "step": 18250 + }, + { + "epoch": 1.74, + "learning_rate": 4.2204258298590115e-05, + "loss": 1.3103, + "step": 18260 + }, + { + "epoch": 1.74, + "learning_rate": 4.21724324496356e-05, + "loss": 1.2792, + "step": 18270 + }, + { + "epoch": 1.74, + "learning_rate": 4.2140606600681076e-05, + "loss": 1.1251, + "step": 18280 + }, + { + "epoch": 1.74, + "learning_rate": 4.210878075172656e-05, + "loss": 1.2299, + "step": 18290 + }, + { + "epoch": 1.74, + "learning_rate": 4.207695490277203e-05, + "loss": 1.2919, + "step": 18300 + }, + { + "epoch": 1.74, + "learning_rate": 4.204512905381751e-05, + "loss": 1.0468, + "step": 18310 + }, + { + "epoch": 1.74, + "learning_rate": 4.201330320486299e-05, + "loss": 1.4261, + "step": 18320 + }, + { + "epoch": 1.74, + "learning_rate": 4.1981477355908474e-05, + "loss": 1.2958, + "step": 18330 + }, + { + "epoch": 1.75, + "learning_rate": 4.194965150695395e-05, + "loss": 1.3843, + "step": 18340 + }, + { + "epoch": 1.75, + "learning_rate": 4.191782565799943e-05, + "loss": 1.0905, + "step": 18350 + }, + { + "epoch": 1.75, + "learning_rate": 4.1885999809044904e-05, + "loss": 1.3191, + "step": 18360 + }, + { + "epoch": 1.75, + "learning_rate": 4.185417396009039e-05, + "loss": 1.3327, + "step": 18370 + }, + { + "epoch": 1.75, + "learning_rate": 4.1822348111135865e-05, + "loss": 0.9465, + "step": 18380 + }, + { + "epoch": 1.75, + "learning_rate": 4.179052226218135e-05, + "loss": 1.1875, + "step": 18390 + }, + { + "epoch": 1.75, + "learning_rate": 4.1758696413226825e-05, + "loss": 0.936, + "step": 18400 + }, + { + "epoch": 1.75, + "learning_rate": 4.17268705642723e-05, + "loss": 1.3193, + "step": 18410 + }, + { + "epoch": 1.75, + "learning_rate": 4.1695044715317786e-05, + "loss": 1.1251, + "step": 18420 + }, + { + "epoch": 1.75, + "learning_rate": 4.166321886636326e-05, + "loss": 1.0732, + "step": 18430 + }, + { + "epoch": 1.76, + "learning_rate": 4.1631393017408746e-05, + "loss": 1.5964, + "step": 18440 + }, + { + "epoch": 1.76, + "learning_rate": 4.159956716845422e-05, + "loss": 1.3061, + "step": 18450 + }, + { + "epoch": 1.76, + "learning_rate": 4.15677413194997e-05, + "loss": 1.5171, + "step": 18460 + }, + { + "epoch": 1.76, + "learning_rate": 4.153591547054518e-05, + "loss": 1.2409, + "step": 18470 + }, + { + "epoch": 1.76, + "learning_rate": 4.150408962159066e-05, + "loss": 1.4052, + "step": 18480 + }, + { + "epoch": 1.76, + "learning_rate": 4.147226377263614e-05, + "loss": 1.053, + "step": 18490 + }, + { + "epoch": 1.76, + "learning_rate": 4.144043792368162e-05, + "loss": 1.3229, + "step": 18500 + }, + { + "epoch": 1.76, + "learning_rate": 4.140861207472709e-05, + "loss": 1.4538, + "step": 18510 + }, + { + "epoch": 1.76, + "learning_rate": 4.1376786225772575e-05, + "loss": 1.2916, + "step": 18520 + }, + { + "epoch": 1.76, + "learning_rate": 4.134496037681805e-05, + "loss": 0.9643, + "step": 18530 + }, + { + "epoch": 1.76, + "learning_rate": 4.1313134527863535e-05, + "loss": 1.4604, + "step": 18540 + }, + { + "epoch": 1.77, + "learning_rate": 4.128130867890901e-05, + "loss": 1.2127, + "step": 18550 + }, + { + "epoch": 1.77, + "learning_rate": 4.124948282995449e-05, + "loss": 1.0547, + "step": 18560 + }, + { + "epoch": 1.77, + "learning_rate": 4.1217656980999966e-05, + "loss": 1.2186, + "step": 18570 + }, + { + "epoch": 1.77, + "learning_rate": 4.118583113204545e-05, + "loss": 1.0003, + "step": 18580 + }, + { + "epoch": 1.77, + "learning_rate": 4.1154005283090926e-05, + "loss": 1.2351, + "step": 18590 + }, + { + "epoch": 1.77, + "learning_rate": 4.112217943413641e-05, + "loss": 1.4122, + "step": 18600 + }, + { + "epoch": 1.77, + "learning_rate": 4.109035358518189e-05, + "loss": 1.438, + "step": 18610 + }, + { + "epoch": 1.77, + "learning_rate": 4.1058527736227364e-05, + "loss": 1.196, + "step": 18620 + }, + { + "epoch": 1.77, + "learning_rate": 4.102670188727285e-05, + "loss": 1.062, + "step": 18630 + }, + { + "epoch": 1.77, + "learning_rate": 4.0994876038318324e-05, + "loss": 1.3738, + "step": 18640 + }, + { + "epoch": 1.78, + "learning_rate": 4.096305018936381e-05, + "loss": 1.1569, + "step": 18650 + }, + { + "epoch": 1.78, + "learning_rate": 4.093122434040928e-05, + "loss": 1.2354, + "step": 18660 + }, + { + "epoch": 1.78, + "learning_rate": 4.089939849145476e-05, + "loss": 0.9465, + "step": 18670 + }, + { + "epoch": 1.78, + "learning_rate": 4.086757264250024e-05, + "loss": 1.1662, + "step": 18680 + }, + { + "epoch": 1.78, + "learning_rate": 4.083574679354572e-05, + "loss": 1.3165, + "step": 18690 + }, + { + "epoch": 1.78, + "learning_rate": 4.08039209445912e-05, + "loss": 1.5689, + "step": 18700 + }, + { + "epoch": 1.78, + "learning_rate": 4.0772095095636676e-05, + "loss": 1.2426, + "step": 18710 + }, + { + "epoch": 1.78, + "learning_rate": 4.074026924668215e-05, + "loss": 1.2528, + "step": 18720 + }, + { + "epoch": 1.78, + "learning_rate": 4.0708443397727636e-05, + "loss": 1.4647, + "step": 18730 + }, + { + "epoch": 1.78, + "learning_rate": 4.067661754877311e-05, + "loss": 1.0062, + "step": 18740 + }, + { + "epoch": 1.78, + "learning_rate": 4.06447916998186e-05, + "loss": 1.1861, + "step": 18750 + }, + { + "epoch": 1.79, + "learning_rate": 4.0612965850864074e-05, + "loss": 1.3053, + "step": 18760 + }, + { + "epoch": 1.79, + "learning_rate": 4.058114000190955e-05, + "loss": 1.3605, + "step": 18770 + }, + { + "epoch": 1.79, + "learning_rate": 4.0549314152955034e-05, + "loss": 1.2673, + "step": 18780 + }, + { + "epoch": 1.79, + "learning_rate": 4.051748830400051e-05, + "loss": 1.2602, + "step": 18790 + }, + { + "epoch": 1.79, + "learning_rate": 4.0485662455045995e-05, + "loss": 1.3034, + "step": 18800 + }, + { + "epoch": 1.79, + "learning_rate": 4.045383660609147e-05, + "loss": 1.2683, + "step": 18810 + }, + { + "epoch": 1.79, + "learning_rate": 4.042201075713695e-05, + "loss": 1.3468, + "step": 18820 + }, + { + "epoch": 1.79, + "learning_rate": 4.0390184908182425e-05, + "loss": 1.1144, + "step": 18830 + }, + { + "epoch": 1.79, + "learning_rate": 4.035835905922791e-05, + "loss": 1.3821, + "step": 18840 + }, + { + "epoch": 1.79, + "learning_rate": 4.0326533210273386e-05, + "loss": 1.3033, + "step": 18850 + }, + { + "epoch": 1.79, + "learning_rate": 4.029470736131887e-05, + "loss": 1.4069, + "step": 18860 + }, + { + "epoch": 1.8, + "learning_rate": 4.026288151236434e-05, + "loss": 1.3087, + "step": 18870 + }, + { + "epoch": 1.8, + "learning_rate": 4.023105566340982e-05, + "loss": 1.3872, + "step": 18880 + }, + { + "epoch": 1.8, + "learning_rate": 4.01992298144553e-05, + "loss": 1.4077, + "step": 18890 + }, + { + "epoch": 1.8, + "learning_rate": 4.0167403965500784e-05, + "loss": 1.172, + "step": 18900 + }, + { + "epoch": 1.8, + "learning_rate": 4.013557811654626e-05, + "loss": 1.2941, + "step": 18910 + }, + { + "epoch": 1.8, + "learning_rate": 4.010375226759174e-05, + "loss": 1.4621, + "step": 18920 + }, + { + "epoch": 1.8, + "learning_rate": 4.007192641863722e-05, + "loss": 1.2763, + "step": 18930 + }, + { + "epoch": 1.8, + "learning_rate": 4.00401005696827e-05, + "loss": 1.392, + "step": 18940 + }, + { + "epoch": 1.8, + "learning_rate": 4.000827472072818e-05, + "loss": 1.4797, + "step": 18950 + }, + { + "epoch": 1.8, + "learning_rate": 3.997644887177366e-05, + "loss": 0.9922, + "step": 18960 + }, + { + "epoch": 1.81, + "learning_rate": 3.9944623022819135e-05, + "loss": 1.2553, + "step": 18970 + }, + { + "epoch": 1.81, + "learning_rate": 3.991279717386461e-05, + "loss": 1.1796, + "step": 18980 + }, + { + "epoch": 1.81, + "learning_rate": 3.9880971324910096e-05, + "loss": 1.098, + "step": 18990 + }, + { + "epoch": 1.81, + "learning_rate": 3.984914547595557e-05, + "loss": 1.3635, + "step": 19000 + }, + { + "epoch": 1.81, + "learning_rate": 3.9817319627001056e-05, + "loss": 1.5246, + "step": 19010 + }, + { + "epoch": 1.81, + "learning_rate": 3.9785493778046526e-05, + "loss": 1.3422, + "step": 19020 + }, + { + "epoch": 1.81, + "learning_rate": 3.975366792909201e-05, + "loss": 1.1719, + "step": 19030 + }, + { + "epoch": 1.81, + "learning_rate": 3.972184208013749e-05, + "loss": 1.3276, + "step": 19040 + }, + { + "epoch": 1.81, + "learning_rate": 3.969001623118297e-05, + "loss": 1.4109, + "step": 19050 + }, + { + "epoch": 1.81, + "learning_rate": 3.965819038222845e-05, + "loss": 1.3793, + "step": 19060 + }, + { + "epoch": 1.81, + "learning_rate": 3.962636453327393e-05, + "loss": 1.2927, + "step": 19070 + }, + { + "epoch": 1.82, + "learning_rate": 3.95945386843194e-05, + "loss": 1.3429, + "step": 19080 + }, + { + "epoch": 1.82, + "learning_rate": 3.9562712835364885e-05, + "loss": 0.9858, + "step": 19090 + }, + { + "epoch": 1.82, + "learning_rate": 3.953088698641036e-05, + "loss": 1.4387, + "step": 19100 + }, + { + "epoch": 1.82, + "learning_rate": 3.9499061137455845e-05, + "loss": 1.4169, + "step": 19110 + }, + { + "epoch": 1.82, + "learning_rate": 3.946723528850132e-05, + "loss": 1.1409, + "step": 19120 + }, + { + "epoch": 1.82, + "learning_rate": 3.94354094395468e-05, + "loss": 1.133, + "step": 19130 + }, + { + "epoch": 1.82, + "learning_rate": 3.940358359059228e-05, + "loss": 1.2668, + "step": 19140 + }, + { + "epoch": 1.82, + "learning_rate": 3.937175774163776e-05, + "loss": 1.2295, + "step": 19150 + }, + { + "epoch": 1.82, + "learning_rate": 3.933993189268324e-05, + "loss": 1.3709, + "step": 19160 + }, + { + "epoch": 1.82, + "learning_rate": 3.930810604372872e-05, + "loss": 1.4518, + "step": 19170 + }, + { + "epoch": 1.83, + "learning_rate": 3.92762801947742e-05, + "loss": 0.9845, + "step": 19180 + }, + { + "epoch": 1.83, + "learning_rate": 3.9244454345819674e-05, + "loss": 1.2617, + "step": 19190 + }, + { + "epoch": 1.83, + "learning_rate": 3.921262849686516e-05, + "loss": 1.1381, + "step": 19200 + }, + { + "epoch": 1.83, + "learning_rate": 3.9180802647910634e-05, + "loss": 1.007, + "step": 19210 + }, + { + "epoch": 1.83, + "learning_rate": 3.914897679895612e-05, + "loss": 1.2175, + "step": 19220 + }, + { + "epoch": 1.83, + "learning_rate": 3.911715095000159e-05, + "loss": 1.2663, + "step": 19230 + }, + { + "epoch": 1.83, + "learning_rate": 3.908532510104707e-05, + "loss": 1.1347, + "step": 19240 + }, + { + "epoch": 1.83, + "learning_rate": 3.905349925209255e-05, + "loss": 1.1037, + "step": 19250 + }, + { + "epoch": 1.83, + "learning_rate": 3.902167340313803e-05, + "loss": 1.2356, + "step": 19260 + }, + { + "epoch": 1.83, + "learning_rate": 3.898984755418351e-05, + "loss": 1.355, + "step": 19270 + }, + { + "epoch": 1.83, + "learning_rate": 3.8958021705228986e-05, + "loss": 1.2678, + "step": 19280 + }, + { + "epoch": 1.84, + "learning_rate": 3.892619585627447e-05, + "loss": 1.295, + "step": 19290 + }, + { + "epoch": 1.84, + "learning_rate": 3.8894370007319946e-05, + "loss": 1.1144, + "step": 19300 + }, + { + "epoch": 1.84, + "learning_rate": 3.886254415836543e-05, + "loss": 1.0753, + "step": 19310 + }, + { + "epoch": 1.84, + "learning_rate": 3.883071830941091e-05, + "loss": 1.2428, + "step": 19320 + }, + { + "epoch": 1.84, + "learning_rate": 3.8798892460456384e-05, + "loss": 1.2343, + "step": 19330 + }, + { + "epoch": 1.84, + "learning_rate": 3.876706661150186e-05, + "loss": 1.2299, + "step": 19340 + }, + { + "epoch": 1.84, + "learning_rate": 3.8735240762547344e-05, + "loss": 1.2575, + "step": 19350 + }, + { + "epoch": 1.84, + "learning_rate": 3.870341491359282e-05, + "loss": 1.3199, + "step": 19360 + }, + { + "epoch": 1.84, + "learning_rate": 3.8671589064638305e-05, + "loss": 1.2286, + "step": 19370 + }, + { + "epoch": 1.84, + "learning_rate": 3.863976321568378e-05, + "loss": 1.1311, + "step": 19380 + }, + { + "epoch": 1.85, + "learning_rate": 3.860793736672926e-05, + "loss": 1.1787, + "step": 19390 + }, + { + "epoch": 1.85, + "learning_rate": 3.8576111517774735e-05, + "loss": 1.3663, + "step": 19400 + }, + { + "epoch": 1.85, + "learning_rate": 3.854428566882022e-05, + "loss": 1.28, + "step": 19410 + }, + { + "epoch": 1.85, + "learning_rate": 3.8512459819865696e-05, + "loss": 1.488, + "step": 19420 + }, + { + "epoch": 1.85, + "learning_rate": 3.848063397091118e-05, + "loss": 1.0685, + "step": 19430 + }, + { + "epoch": 1.85, + "learning_rate": 3.8448808121956656e-05, + "loss": 1.1896, + "step": 19440 + }, + { + "epoch": 1.85, + "learning_rate": 3.841698227300213e-05, + "loss": 1.2928, + "step": 19450 + }, + { + "epoch": 1.85, + "learning_rate": 3.838515642404762e-05, + "loss": 1.1183, + "step": 19460 + }, + { + "epoch": 1.85, + "learning_rate": 3.8353330575093094e-05, + "loss": 1.6208, + "step": 19470 + }, + { + "epoch": 1.85, + "learning_rate": 3.832150472613858e-05, + "loss": 0.9104, + "step": 19480 + }, + { + "epoch": 1.85, + "learning_rate": 3.828967887718405e-05, + "loss": 1.4046, + "step": 19490 + }, + { + "epoch": 1.86, + "learning_rate": 3.825785302822953e-05, + "loss": 1.0457, + "step": 19500 + }, + { + "epoch": 1.86, + "learning_rate": 3.822602717927501e-05, + "loss": 1.1939, + "step": 19510 + }, + { + "epoch": 1.86, + "learning_rate": 3.819420133032049e-05, + "loss": 1.0983, + "step": 19520 + }, + { + "epoch": 1.86, + "learning_rate": 3.816237548136597e-05, + "loss": 1.5828, + "step": 19530 + }, + { + "epoch": 1.86, + "learning_rate": 3.8130549632411445e-05, + "loss": 1.2187, + "step": 19540 + }, + { + "epoch": 1.86, + "learning_rate": 3.809872378345692e-05, + "loss": 1.1794, + "step": 19550 + }, + { + "epoch": 1.86, + "learning_rate": 3.8066897934502406e-05, + "loss": 0.9364, + "step": 19560 + }, + { + "epoch": 1.86, + "learning_rate": 3.803507208554788e-05, + "loss": 1.4084, + "step": 19570 + }, + { + "epoch": 1.86, + "learning_rate": 3.8003246236593366e-05, + "loss": 1.4533, + "step": 19580 + }, + { + "epoch": 1.86, + "learning_rate": 3.7971420387638836e-05, + "loss": 1.0989, + "step": 19590 + }, + { + "epoch": 1.87, + "learning_rate": 3.793959453868432e-05, + "loss": 1.1185, + "step": 19600 + }, + { + "epoch": 1.87, + "learning_rate": 3.79077686897298e-05, + "loss": 1.0785, + "step": 19610 + }, + { + "epoch": 1.87, + "learning_rate": 3.787594284077528e-05, + "loss": 1.3841, + "step": 19620 + }, + { + "epoch": 1.87, + "learning_rate": 3.784411699182076e-05, + "loss": 1.2914, + "step": 19630 + }, + { + "epoch": 1.87, + "learning_rate": 3.7812291142866234e-05, + "loss": 1.2935, + "step": 19640 + }, + { + "epoch": 1.87, + "learning_rate": 3.778046529391172e-05, + "loss": 1.0084, + "step": 19650 + }, + { + "epoch": 1.87, + "learning_rate": 3.7748639444957195e-05, + "loss": 1.2559, + "step": 19660 + }, + { + "epoch": 1.87, + "learning_rate": 3.771681359600268e-05, + "loss": 1.5865, + "step": 19670 + }, + { + "epoch": 1.87, + "learning_rate": 3.7684987747048155e-05, + "loss": 1.2636, + "step": 19680 + }, + { + "epoch": 1.87, + "learning_rate": 3.765316189809363e-05, + "loss": 1.2656, + "step": 19690 + }, + { + "epoch": 1.87, + "learning_rate": 3.762133604913911e-05, + "loss": 0.9276, + "step": 19700 + }, + { + "epoch": 1.88, + "learning_rate": 3.758951020018459e-05, + "loss": 1.5347, + "step": 19710 + }, + { + "epoch": 1.88, + "learning_rate": 3.755768435123007e-05, + "loss": 1.053, + "step": 19720 + }, + { + "epoch": 1.88, + "learning_rate": 3.752585850227555e-05, + "loss": 1.3505, + "step": 19730 + }, + { + "epoch": 1.88, + "learning_rate": 3.749403265332103e-05, + "loss": 1.33, + "step": 19740 + }, + { + "epoch": 1.88, + "learning_rate": 3.746220680436651e-05, + "loss": 0.9352, + "step": 19750 + }, + { + "epoch": 1.88, + "learning_rate": 3.7430380955411984e-05, + "loss": 1.2561, + "step": 19760 + }, + { + "epoch": 1.88, + "learning_rate": 3.739855510645747e-05, + "loss": 1.0789, + "step": 19770 + }, + { + "epoch": 1.88, + "learning_rate": 3.7366729257502944e-05, + "loss": 1.2284, + "step": 19780 + }, + { + "epoch": 1.88, + "learning_rate": 3.733490340854843e-05, + "loss": 1.2263, + "step": 19790 + }, + { + "epoch": 1.88, + "learning_rate": 3.7303077559593905e-05, + "loss": 1.0512, + "step": 19800 + }, + { + "epoch": 1.89, + "learning_rate": 3.727125171063938e-05, + "loss": 1.0489, + "step": 19810 + }, + { + "epoch": 1.89, + "learning_rate": 3.7239425861684865e-05, + "loss": 1.4009, + "step": 19820 + }, + { + "epoch": 1.89, + "learning_rate": 3.720760001273034e-05, + "loss": 1.1195, + "step": 19830 + }, + { + "epoch": 1.89, + "learning_rate": 3.7175774163775826e-05, + "loss": 0.8015, + "step": 19840 + }, + { + "epoch": 1.89, + "learning_rate": 3.7143948314821296e-05, + "loss": 1.4234, + "step": 19850 + }, + { + "epoch": 1.89, + "learning_rate": 3.711212246586678e-05, + "loss": 1.3675, + "step": 19860 + }, + { + "epoch": 1.89, + "learning_rate": 3.7080296616912256e-05, + "loss": 0.9495, + "step": 19870 + }, + { + "epoch": 1.89, + "learning_rate": 3.704847076795774e-05, + "loss": 1.1224, + "step": 19880 + }, + { + "epoch": 1.89, + "learning_rate": 3.701664491900322e-05, + "loss": 1.409, + "step": 19890 + }, + { + "epoch": 1.89, + "learning_rate": 3.6984819070048694e-05, + "loss": 1.3952, + "step": 19900 + }, + { + "epoch": 1.89, + "learning_rate": 3.695299322109417e-05, + "loss": 1.2911, + "step": 19910 + }, + { + "epoch": 1.9, + "learning_rate": 3.6921167372139654e-05, + "loss": 1.1573, + "step": 19920 + }, + { + "epoch": 1.9, + "learning_rate": 3.688934152318513e-05, + "loss": 1.2936, + "step": 19930 + }, + { + "epoch": 1.9, + "learning_rate": 3.6857515674230615e-05, + "loss": 1.2558, + "step": 19940 + }, + { + "epoch": 1.9, + "learning_rate": 3.682568982527609e-05, + "loss": 1.1733, + "step": 19950 + }, + { + "epoch": 1.9, + "learning_rate": 3.679386397632157e-05, + "loss": 1.19, + "step": 19960 + }, + { + "epoch": 1.9, + "learning_rate": 3.676203812736705e-05, + "loss": 1.022, + "step": 19970 + }, + { + "epoch": 1.9, + "learning_rate": 3.673021227841253e-05, + "loss": 1.1534, + "step": 19980 + }, + { + "epoch": 1.9, + "learning_rate": 3.669838642945801e-05, + "loss": 1.1945, + "step": 19990 + }, + { + "epoch": 1.9, + "learning_rate": 3.666656058050348e-05, + "loss": 1.2391, + "step": 20000 + }, + { + "epoch": 1.9, + "learning_rate": 3.6634734731548966e-05, + "loss": 1.133, + "step": 20010 + }, + { + "epoch": 1.91, + "learning_rate": 3.660290888259444e-05, + "loss": 1.1127, + "step": 20020 + }, + { + "epoch": 1.91, + "learning_rate": 3.657108303363993e-05, + "loss": 1.2856, + "step": 20030 + }, + { + "epoch": 1.91, + "learning_rate": 3.6539257184685404e-05, + "loss": 1.2292, + "step": 20040 + }, + { + "epoch": 1.91, + "learning_rate": 3.650743133573089e-05, + "loss": 1.5068, + "step": 20050 + }, + { + "epoch": 1.91, + "learning_rate": 3.647560548677636e-05, + "loss": 1.3691, + "step": 20060 + }, + { + "epoch": 1.91, + "learning_rate": 3.644377963782184e-05, + "loss": 1.2278, + "step": 20070 + }, + { + "epoch": 1.91, + "learning_rate": 3.641195378886732e-05, + "loss": 1.142, + "step": 20080 + }, + { + "epoch": 1.91, + "learning_rate": 3.63801279399128e-05, + "loss": 1.4245, + "step": 20090 + }, + { + "epoch": 1.91, + "learning_rate": 3.634830209095828e-05, + "loss": 1.2538, + "step": 20100 + }, + { + "epoch": 1.91, + "learning_rate": 3.6316476242003755e-05, + "loss": 1.0865, + "step": 20110 + }, + { + "epoch": 1.91, + "learning_rate": 3.628465039304923e-05, + "loss": 1.5445, + "step": 20120 + }, + { + "epoch": 1.92, + "learning_rate": 3.6252824544094716e-05, + "loss": 1.1644, + "step": 20130 + }, + { + "epoch": 1.92, + "learning_rate": 3.622099869514019e-05, + "loss": 1.1228, + "step": 20140 + }, + { + "epoch": 1.92, + "learning_rate": 3.6189172846185676e-05, + "loss": 1.2033, + "step": 20150 + }, + { + "epoch": 1.92, + "learning_rate": 3.615734699723115e-05, + "loss": 1.2266, + "step": 20160 + }, + { + "epoch": 1.92, + "learning_rate": 3.612552114827663e-05, + "loss": 1.2489, + "step": 20170 + }, + { + "epoch": 1.92, + "learning_rate": 3.6093695299322114e-05, + "loss": 1.1645, + "step": 20180 + }, + { + "epoch": 1.92, + "learning_rate": 3.606186945036759e-05, + "loss": 1.1382, + "step": 20190 + }, + { + "epoch": 1.92, + "learning_rate": 3.6030043601413074e-05, + "loss": 1.0547, + "step": 20200 + }, + { + "epoch": 1.92, + "learning_rate": 3.5998217752458544e-05, + "loss": 1.0566, + "step": 20210 + }, + { + "epoch": 1.92, + "learning_rate": 3.596639190350403e-05, + "loss": 1.305, + "step": 20220 + }, + { + "epoch": 1.93, + "learning_rate": 3.5934566054549505e-05, + "loss": 1.3903, + "step": 20230 + }, + { + "epoch": 1.93, + "learning_rate": 3.590274020559499e-05, + "loss": 1.3047, + "step": 20240 + }, + { + "epoch": 1.93, + "learning_rate": 3.5870914356640465e-05, + "loss": 1.1542, + "step": 20250 + }, + { + "epoch": 1.93, + "learning_rate": 3.583908850768594e-05, + "loss": 1.1636, + "step": 20260 + }, + { + "epoch": 1.93, + "learning_rate": 3.580726265873142e-05, + "loss": 0.9835, + "step": 20270 + }, + { + "epoch": 1.93, + "learning_rate": 3.57754368097769e-05, + "loss": 1.1189, + "step": 20280 + }, + { + "epoch": 1.93, + "learning_rate": 3.574361096082238e-05, + "loss": 1.448, + "step": 20290 + }, + { + "epoch": 1.93, + "learning_rate": 3.571178511186786e-05, + "loss": 0.9613, + "step": 20300 + }, + { + "epoch": 1.93, + "learning_rate": 3.567995926291334e-05, + "loss": 1.092, + "step": 20310 + }, + { + "epoch": 1.93, + "learning_rate": 3.564813341395882e-05, + "loss": 1.2723, + "step": 20320 + }, + { + "epoch": 1.93, + "learning_rate": 3.56163075650043e-05, + "loss": 1.37, + "step": 20330 + }, + { + "epoch": 1.94, + "learning_rate": 3.558448171604978e-05, + "loss": 1.0441, + "step": 20340 + }, + { + "epoch": 1.94, + "learning_rate": 3.555265586709526e-05, + "loss": 0.9713, + "step": 20350 + }, + { + "epoch": 1.94, + "learning_rate": 3.552083001814074e-05, + "loss": 1.1469, + "step": 20360 + }, + { + "epoch": 1.94, + "learning_rate": 3.5489004169186215e-05, + "loss": 1.1932, + "step": 20370 + }, + { + "epoch": 1.94, + "learning_rate": 3.545717832023169e-05, + "loss": 1.4208, + "step": 20380 + }, + { + "epoch": 1.94, + "learning_rate": 3.5425352471277175e-05, + "loss": 1.3295, + "step": 20390 + }, + { + "epoch": 1.94, + "learning_rate": 3.539352662232265e-05, + "loss": 1.1444, + "step": 20400 + }, + { + "epoch": 1.94, + "learning_rate": 3.5361700773368136e-05, + "loss": 1.3685, + "step": 20410 + }, + { + "epoch": 1.94, + "learning_rate": 3.5329874924413606e-05, + "loss": 1.3868, + "step": 20420 + }, + { + "epoch": 1.94, + "learning_rate": 3.529804907545909e-05, + "loss": 1.1486, + "step": 20430 + }, + { + "epoch": 1.95, + "learning_rate": 3.5266223226504566e-05, + "loss": 1.1494, + "step": 20440 + }, + { + "epoch": 1.95, + "learning_rate": 3.523439737755005e-05, + "loss": 1.3013, + "step": 20450 + }, + { + "epoch": 1.95, + "learning_rate": 3.520257152859553e-05, + "loss": 1.0297, + "step": 20460 + }, + { + "epoch": 1.95, + "learning_rate": 3.5170745679641004e-05, + "loss": 1.2975, + "step": 20470 + }, + { + "epoch": 1.95, + "learning_rate": 3.513891983068649e-05, + "loss": 1.2635, + "step": 20480 + }, + { + "epoch": 1.95, + "learning_rate": 3.5107093981731964e-05, + "loss": 1.3708, + "step": 20490 + }, + { + "epoch": 1.95, + "learning_rate": 3.507526813277745e-05, + "loss": 1.0579, + "step": 20500 + }, + { + "epoch": 1.95, + "learning_rate": 3.5043442283822925e-05, + "loss": 1.4045, + "step": 20510 + }, + { + "epoch": 1.95, + "learning_rate": 3.50116164348684e-05, + "loss": 1.2214, + "step": 20520 + }, + { + "epoch": 1.95, + "learning_rate": 3.497979058591388e-05, + "loss": 1.3855, + "step": 20530 + }, + { + "epoch": 1.95, + "learning_rate": 3.494796473695936e-05, + "loss": 1.2537, + "step": 20540 + }, + { + "epoch": 1.96, + "learning_rate": 3.491613888800484e-05, + "loss": 1.1688, + "step": 20550 + }, + { + "epoch": 1.96, + "learning_rate": 3.488431303905032e-05, + "loss": 1.2166, + "step": 20560 + }, + { + "epoch": 1.96, + "learning_rate": 3.485248719009579e-05, + "loss": 1.0007, + "step": 20570 + }, + { + "epoch": 1.96, + "learning_rate": 3.4820661341141276e-05, + "loss": 1.5996, + "step": 20580 + }, + { + "epoch": 1.96, + "learning_rate": 3.478883549218675e-05, + "loss": 1.2712, + "step": 20590 + }, + { + "epoch": 1.96, + "learning_rate": 3.475700964323224e-05, + "loss": 1.0818, + "step": 20600 + }, + { + "epoch": 1.96, + "learning_rate": 3.4725183794277714e-05, + "loss": 1.3156, + "step": 20610 + }, + { + "epoch": 1.96, + "learning_rate": 3.469335794532319e-05, + "loss": 1.0122, + "step": 20620 + }, + { + "epoch": 1.96, + "learning_rate": 3.466153209636867e-05, + "loss": 1.098, + "step": 20630 + }, + { + "epoch": 1.96, + "learning_rate": 3.462970624741415e-05, + "loss": 1.158, + "step": 20640 + }, + { + "epoch": 1.97, + "learning_rate": 3.459788039845963e-05, + "loss": 1.1666, + "step": 20650 + }, + { + "epoch": 1.97, + "learning_rate": 3.456605454950511e-05, + "loss": 1.2442, + "step": 20660 + }, + { + "epoch": 1.97, + "learning_rate": 3.453422870055059e-05, + "loss": 1.3601, + "step": 20670 + }, + { + "epoch": 1.97, + "learning_rate": 3.4502402851596065e-05, + "loss": 1.0865, + "step": 20680 + }, + { + "epoch": 1.97, + "learning_rate": 3.447057700264155e-05, + "loss": 1.121, + "step": 20690 + }, + { + "epoch": 1.97, + "learning_rate": 3.4438751153687026e-05, + "loss": 1.148, + "step": 20700 + }, + { + "epoch": 1.97, + "learning_rate": 3.440692530473251e-05, + "loss": 1.2275, + "step": 20710 + }, + { + "epoch": 1.97, + "learning_rate": 3.4375099455777986e-05, + "loss": 0.8846, + "step": 20720 + }, + { + "epoch": 1.97, + "learning_rate": 3.434327360682346e-05, + "loss": 1.3629, + "step": 20730 + }, + { + "epoch": 1.97, + "learning_rate": 3.431144775786894e-05, + "loss": 1.3894, + "step": 20740 + }, + { + "epoch": 1.97, + "learning_rate": 3.4279621908914424e-05, + "loss": 1.1238, + "step": 20750 + }, + { + "epoch": 1.98, + "learning_rate": 3.42477960599599e-05, + "loss": 1.3383, + "step": 20760 + }, + { + "epoch": 1.98, + "learning_rate": 3.4215970211005384e-05, + "loss": 1.1081, + "step": 20770 + }, + { + "epoch": 1.98, + "learning_rate": 3.4184144362050854e-05, + "loss": 1.4528, + "step": 20780 + }, + { + "epoch": 1.98, + "learning_rate": 3.415231851309634e-05, + "loss": 1.2063, + "step": 20790 + }, + { + "epoch": 1.98, + "learning_rate": 3.4120492664141815e-05, + "loss": 0.9596, + "step": 20800 + }, + { + "epoch": 1.98, + "learning_rate": 3.40886668151873e-05, + "loss": 1.4973, + "step": 20810 + }, + { + "epoch": 1.98, + "learning_rate": 3.4056840966232775e-05, + "loss": 1.2717, + "step": 20820 + }, + { + "epoch": 1.98, + "learning_rate": 3.402501511727825e-05, + "loss": 1.4137, + "step": 20830 + }, + { + "epoch": 1.98, + "learning_rate": 3.3993189268323736e-05, + "loss": 1.1622, + "step": 20840 + }, + { + "epoch": 1.98, + "learning_rate": 3.396136341936921e-05, + "loss": 0.962, + "step": 20850 + }, + { + "epoch": 1.99, + "learning_rate": 3.3929537570414696e-05, + "loss": 1.1181, + "step": 20860 + }, + { + "epoch": 1.99, + "learning_rate": 3.389771172146017e-05, + "loss": 1.2876, + "step": 20870 + }, + { + "epoch": 1.99, + "learning_rate": 3.386588587250565e-05, + "loss": 1.5209, + "step": 20880 + }, + { + "epoch": 1.99, + "learning_rate": 3.383406002355113e-05, + "loss": 1.1866, + "step": 20890 + }, + { + "epoch": 1.99, + "learning_rate": 3.380223417459661e-05, + "loss": 1.1722, + "step": 20900 + }, + { + "epoch": 1.99, + "learning_rate": 3.377040832564209e-05, + "loss": 1.1426, + "step": 20910 + }, + { + "epoch": 1.99, + "learning_rate": 3.373858247668757e-05, + "loss": 1.0596, + "step": 20920 + }, + { + "epoch": 1.99, + "learning_rate": 3.370675662773304e-05, + "loss": 1.2358, + "step": 20930 + }, + { + "epoch": 1.99, + "learning_rate": 3.3674930778778525e-05, + "loss": 0.8627, + "step": 20940 + }, + { + "epoch": 1.99, + "learning_rate": 3.3643104929824e-05, + "loss": 1.1639, + "step": 20950 + }, + { + "epoch": 1.99, + "learning_rate": 3.3611279080869485e-05, + "loss": 1.3031, + "step": 20960 + }, + { + "epoch": 2.0, + "learning_rate": 3.357945323191496e-05, + "loss": 0.9835, + "step": 20970 + }, + { + "epoch": 2.0, + "learning_rate": 3.3547627382960446e-05, + "loss": 1.3002, + "step": 20980 + }, + { + "epoch": 2.0, + "learning_rate": 3.351580153400592e-05, + "loss": 1.5188, + "step": 20990 + }, + { + "epoch": 2.0, + "learning_rate": 3.34839756850514e-05, + "loss": 1.0337, + "step": 21000 } ], "max_steps": 31521, "num_train_epochs": 3, - "total_flos": 9.955030912708116e+17, + "total_flos": 1.9910406818664284e+18, "trial_name": null, "trial_params": null }